GA4のページ遷移の取得に以下のようなLEAD関数が用いられることが多く、この方法でユーザーが遷移した2ページ目を取得することが可能になる。
LEAD(page_location) OVER(PARTITION BY ga_session_id ORDER BY event_timestamp ASC) AS next_page_location
この取得方法で「正確」に次のページを取得することができるのだが、’page_view’が同一のページで送信された場合、同一のページが2ページ目として扱われてしまう問題点がある。現にこのように同一のページで’page_view’が複数回送信されることは実際にBigQueryにエクスポートされたテーブルをみてみるとかなりの割合である。
ただ、このカウント方法だと「正確」ではあるが、意味のある数値とはいえないケースがほとんどだろう。本来、計測したいのは「次のページ」であって、同一のページがリロードされたからといって、そのページを2ページ目としてカウントすることに意味をなさないためだ。
現在のページを除いた「次のページ」を取得するクエリ
WITH RECURSIVE
SessionData AS (
SELECT
ssid,
ymd,
event_timestamp,
page_location,
LEAD(page_location) OVER(PARTITION BY ssid ORDER BY event_timestamp ASC) AS next_page_location,
LEAD(event_timestamp) OVER(PARTITION BY ssid ORDER BY event_timestamp ASC) AS next_event_timestamp,
1 AS depth
FROM (
SELECT
CONCAT(user_pseudo_id, '-', CAST((SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id') AS STRING)) AS ssid,
PARSE_DATE("%Y%m%d", event_date) AS ymd,
event_timestamp,
REGEXP_REPLACE((SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_location'), r"\?.*", "") AS page_location
FROM
`<project>.<dataset>.events_*`
WHERE
_TABLE_SUFFIX BETWEEN '20240401' AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY))
AND
event_name = 'page_view'
)
),
RecursivePages AS (
SELECT
ssid,
ymd,
event_timestamp,
page_location,
next_page_location,
next_event_timestamp,
depth
FROM
SessionData
UNION ALL
SELECT
rp.ssid,
rp.ymd,
rp.event_timestamp,
rp.page_location,
sd.next_page_location,
sd.next_event_timestamp,
rp.depth + 1
FROM
RecursivePages rp
INNER JOIN
SessionData sd
ON
rp.ssid = sd.ssid
AND
rp.ymd = sd.ymd
AND
rp.next_event_timestamp = sd.event_timestamp
WHERE
rp.page_location = rp.next_page_location
),
Master AS (
SELECT
ssid,
ymd,
event_timestamp,
page_location,
CASE
WHEN next_page_location IS NULL THEN 'Exit'
ELSE next_page_location
END AS final_next_page
FROM
RecursivePages
WHERE
page_location != next_page_location
OR
next_page_location IS NULL
ORDER BY
ssid,
event_timestamp
)
SELECT
*
FROM
Master
現在のページを除いた「ランディングページ」の次ページを取得
WITH RECURSIVE
SessionData AS (
SELECT
ssid,
ymd,
event_timestamp,
page_location,
LEAD(page_location) OVER(PARTITION BY ssid ORDER BY event_timestamp ASC) AS next_page_location,
LEAD(event_timestamp) OVER(PARTITION BY ssid ORDER BY event_timestamp ASC) AS next_event_timestamp,
entrances,
1 AS depth
FROM (
SELECT
CONCAT(user_pseudo_id, '-', CAST((SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id') AS STRING)) AS ssid,
PARSE_DATE("%Y%m%d", event_date) AS ymd,
event_timestamp,
REGEXP_REPLACE((SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_location'), r"\?.*", "") AS page_location,
(SELECT value.int_value FROM UNNEST(event_params) p WHERE p.key = 'entrances') AS entrances
FROM
`<project>.<dataset>.events_*`
WHERE
_TABLE_SUFFIX BETWEEN '20240315' AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY))
AND
event_name = 'page_view'
)
),
RecursivePages AS (
SELECT
ssid,
ymd,
event_timestamp,
page_location,
next_page_location,
next_event_timestamp,
entrances,
depth
FROM
SessionData
UNION ALL
SELECT
rp.ssid,
rp.ymd,
rp.event_timestamp,
rp.page_location,
sd.next_page_location,
sd.next_event_timestamp,
rp.entrances,
rp.depth + 1
FROM
RecursivePages rp
INNER JOIN
SessionData sd
ON
rp.ssid = sd.ssid
AND
rp.ymd = sd.ymd
AND
rp.next_event_timestamp = sd.event_timestamp
WHERE
rp.page_location = rp.next_page_location
),
Master_01 AS (
SELECT
ssid,
ymd,
event_timestamp,
page_location,
CASE
WHEN next_page_location IS NULL THEN 'Exit'
ELSE next_page_location
END AS final_next_page,
entrances
FROM
RecursivePages
WHERE
(
page_location != next_page_location
OR
next_page_location IS NULL
)
AND entrances = 1
ORDER BY
page_location,
ymd
)
SELECT
*
FROM
Master_01