BigQueryへエクスポートされるGA4のentrancesパラメータについて検証したことをメモ
目次
entrancesパラメータの概要
entrancesパラメータは入口ページであることのフラグであり、セッションの最初のpage_viewイベントのentrancesパラメータに数値の1が付く。つまりランディングページに付くのがentrancesパラメータとなる。基本的にGA4でどのパラメータを元に指標を集計しているとの明記はないが、entrancesパラメータを元に「閲覧開始数」が集計されているものと思われる。
閲覧開始数とentrancesパラメータ値の検証クエリ
結果は探索で閲覧開始数を表示させた値と合致する。
CREATE TEMP FUNCTION date_from() RETURNS STRING AS ('20240401');
CREATE TEMP FUNCTION date_to() RETURNS STRING AS ('20240731');
WITH
VerifyEntrances AS (
SELECT
page_location,
SUM(entrances) AS _entrances
FROM (
SELECT
page_location,
entrances
FROM (
SELECT
REGEXP_REPLACE((SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_location'), r"\?.*", "") AS page_location,
(SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'entrances') AS entrances
FROM
`<project>.<dataset>.events_*`
WHERE
_TABLE_SUFFIX BETWEEN date_from() AND date_to()
AND
event_name = 'page_view'
)
)
GROUP BY
page_location
ORDER BY
_entrances DESC
)
SELECT
*
FROM
VerifyEntrances
entrancesパラメータの検証
entrancesパラメータの値は、そのページがランディングページであればINT型の整数「1」が入り、それ以外はNULLが入る。またランディングページであってもentrancesパラメータの値が入っていないケースもあり、その割合は0.3%程であった。
entrancesパラメータの型を検証
CREATE TEMP FUNCTION date_from() RETURNS STRING AS ('20240401');
SELECT
entrances_int,
entrances_string,
entrances_float,
entrances_double
FROM (
SELECT
(SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'entrances') AS entrances_int,
(SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'entrances') AS entrances_string,
(SELECT value.float_value FROM UNNEST(event_params) WHERE key = 'entrances') AS entrances_float,
(SELECT value.double_value FROM UNNEST(event_params) WHERE key = 'entrances') AS entrances_double
FROM
`<project>.<dataset>.events_*`
WHERE
_TABLE_SUFFIX BETWEEN date_from() AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY))
)
WHERE
entrances_int IS NOT NULL
OR
entrances_string IS NOT NULL
OR
entrances_float IS NOT NULL
OR
entrances_double IS NOT NULL
GROUP BY
entrances_int,
entrances_string,
entrances_float,
entrances_double
entrancesパラメータの欠損の割合を検証
CREATE TEMP FUNCTION date_from() RETURNS STRING AS ('20240316');
WITH
VerifyEntrances AS (
SELECT
ssid,
ymd,
event_timestamp,
page_location,
prev_page_location,
entrances
FROM (
SELECT
ssid,
ymd,
event_timestamp,
page_location,
LAG(page_location) OVER(PARTITION BY ssid ORDER BY event_timestamp ASC) AS prev_page_location,
entrances
FROM (
SELECT
CONCAT(user_pseudo_id, '-', CAST((SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id') AS STRING)) AS ssid,
PARSE_DATE("%Y%m%d", event_date) AS ymd,
event_timestamp,
REGEXP_REPLACE((SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_location'), r"\?.*", "") AS page_location,
(SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'entrances') AS entrances
FROM
`<project>.<dataset>.events_*`
WHERE
_TABLE_SUFFIX BETWEEN date_from() AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY))
AND
event_name = 'page_view'
)
)
WHERE
prev_page_location IS NULL
)
SELECT
*
FROM
VerifyEntrances