GA4

【GA4 SQL】セッション単位のページ遷移数をBigQueryから取得

現状GA4にはユーザー単位であれば、経路データ探索を用いることによりデータ可視化することができるが、セッション単位で確認することはできない。また、モニタリング用途として日次で遷移数を確認したいケースがほとんどなので、指定した日付の遷移数を可視化できたとしても、それ単体では良いのか悪いのか評価することができない。そのため、単体でページ遷移数を取得したとしても、そこから何か示唆を得ることは難しい。

BigQueryを使うことでこのような問題を解決することができ、日次でページ間のセッション単位での遷移数を確認できるデータマートを作成しておくことが望ましい。

セッション単位のページAからページBへの遷移数を日次で取得するクエリ

WITH
  URL_A AS (
    SELECT
      ymd,
      MIN(event_timestamp) AS min_event_timestamp,
      CONCAT(user_pseudo_id, '-', CAST(ga_session_id AS STRING)) AS ssid,
    FROM (
      SELECT
        PARSE_DATE("%Y%m%d", event_date) AS ymd,
        event_timestamp,
        user_pseudo_id,
        (SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id') AS ga_session_id,
        REGEXP_REPLACE((SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_location'), r"\?.*", "") AS page_location,
      FROM
        `<project>.<dataset>.events_*`
      WHERE
        _TABLE_SUFFIX BETWEEN '20240401' AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY))
    )
    WHERE
      page_location = 'Page_A'
    GROUP BY
      ymd,
      ssid
  ),
  URL_B AS (
    SELECT
      ymd,
      MIN(event_timestamp) AS min_event_timestamp,
      CONCAT(user_pseudo_id, '-', CAST(ga_session_id AS STRING)) AS ssid,
    FROM (
      SELECT
        PARSE_DATE("%Y%m%d", event_date) AS ymd,
        event_timestamp,
        user_pseudo_id,
        (SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id') AS ga_session_id,
        REGEXP_REPLACE((SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_location'), r"\?.*", "") AS page_location,
      FROM
        `<project>.<dataset>.events_*`
      WHERE
        _TABLE_SUFFIX BETWEEN '20240401' AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY))
    )
    WHERE
      page_location = 'Page_B'
    GROUP BY
      ymd,
      ssid
  ),
  URL_A_to_B AS (
    SELECT
      a.ymd,
      a.ssid
    FROM
      URL_A AS a
    INNER JOIN
      URL_B AS b
    ON
        a.ymd = b.ymd
      AND 
        a.ssid = b.ssid
      AND
        a.min_event_timestamp < b.min_event_timestamp
  ),
  calc AS (
    SELECT
      a.ymd,
      COALESCE(_ssuu_A, 0) AS _ssuu_A,
      COALESCE(_ssuu_B, 0) AS _ssuu_B
    FROM (
      SELECT
        ymd,
        COUNT(DISTINCT ssid) AS _ssuu_A
      FROM
        URL_A
      GROUP BY
        ymd
    ) a
    LEFT JOIN (
      SELECT
        ymd,
        COUNT(DISTINCT ssid) AS _ssuu_B
      FROM
        URL_A_to_B
      GROUP BY
        ymd
    ) b
    ON
      a.ymd = b.ymd
    ORDER BY
      ymd ASC
  )

SELECT
  *
FROM
  calc

クエリ解説

ランディングページからフォーム入力ページへの遷移のように、逆行があまり起こり得ないケースであれば、GA4の探索などを用いて遷移率に近いものを出すことは可能ではあるが、正確な遷移数を取得することはできない。そのため、event_timestampの値を比較することで時系列順のページ訪問ページを割り出す必要がある。

セッションの集計にあたってはeventを’page_view’でより限定的に条件を指定してもよい。その場合には以下のように記述する。

WHERE
  _TABLE_SUFFIX BETWEEN '20240401' AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY))
AND
  event_name = 'page_view'

「数」から「率」への変換はBIツール側で行う

上記クエリでページ遷移数を取得することができたら、BigQuery側では「率」の計算までは行わない。率を算出する際は合算値に対して計算する必要があるため、単一行で率を求めてしまうと結果の値が不正確になる。そのため、Looker StudioなどのBIツールを連携するのであれば日次の遷移数まで求めたデータマートを作成し、それをLooker Studio側で読み込み、SUM関数で行の値を合算し率を計算するのがセオリーとなる。

参照元情報付きの遷移数を取得するクエリ

CREATE TEMP FUNCTION date_from() RETURNS STRING AS ('20241020');
CREATE TEMP FUNCTION date_to() RETURNS STRING AS ('20241113');
CREATE TEMP FUNCTION URL_A() RETURNS STRING AS ('URL_A');
CREATE TEMP FUNCTION URL_B() RETURNS STRING AS ('URL_B');

CREATE OR REPLACE TEMP TABLE SMC_ssid AS (
  SELECT
    ssid,
    -- manual_source_medium AS source_medium,
    -- manual_source AS source,
    -- manual_medium AS medium,
    -- manual_campaign AS campaign,
    cross_source_medium AS source_medium,   
    cross_source AS source,
    cross_medium AS medium,
    cross_campaign AS campaign         
  FROM (
    SELECT
      ssid,
      -- セッションの手動参照元
      CONCAT(manual_source,' / ', manual_medium) AS manual_source_medium,
      manual_source,
      manual_medium,
      manual_campaign,
      -- セッションの参照元
      CONCAT(cross_source,' / ', cross_medium) AS cross_source_medium,
      cross_source,
      cross_medium,
      cross_campaign
    FROM (
      SELECT
        CONCAT(user_pseudo_id, '-', CAST((SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id') AS STRING)) AS ssid,
        -- セッションの手動参照元
        session_traffic_source_last_click.manual_campaign.source AS manual_source,
        session_traffic_source_last_click.manual_campaign.medium AS manual_medium,
        session_traffic_source_last_click.manual_campaign.campaign_name AS manual_campaign,
        -- セッションの参照元
        session_traffic_source_last_click.cross_channel_campaign.source AS cross_source,
        session_traffic_source_last_click.cross_channel_campaign.medium AS cross_medium,
        session_traffic_source_last_click.cross_channel_campaign.campaign_name AS cross_campaign
      FROM
        `<project>.<dataset>.events_*`
      WHERE
        _TABLE_SUFFIX BETWEEN date_from() AND date_to()
      AND
        event_name = 'session_start'
    )
  )
);

WITH 
  URL_A AS (
    SELECT
       urla.ymd,
       urla.min_event_timestamp,
       urla.ssid,
       id.source_medium,
       id.source,
       id.medium,
       id.campaign
    FROM (
      SELECT
        ymd,
        MIN(event_timestamp) AS min_event_timestamp,
        CONCAT(user_pseudo_id, '-', CAST(ga_session_id AS STRING)) AS ssid,
      FROM (
        SELECT
          PARSE_DATE("%Y%m%d", event_date) AS ymd,
          event_timestamp,
          user_pseudo_id,
          (SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id') AS ga_session_id,
          REGEXP_REPLACE((SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_location'), r"\?.*", "") AS page_location,
        FROM
          `<project>.<dataset>.events_*`
        WHERE
          _TABLE_SUFFIX BETWEEN date_from() AND date_to()
      )
      WHERE
        page_location = URL_A()
      GROUP BY
        ymd,
        ssid
    ) AS urla
    LEFT JOIN (
      SELECT
        *
      FROM
        _SESSION.SMC_ssid
    ) AS id
    ON urla.ssid = id.ssid
  ),
  URL_B AS (
    SELECT
       urlb.ymd,
       urlb.min_event_timestamp,
       urlb.ssid,
       id.source_medium,
       id.source,
       id.medium,
       id.campaign
    FROM (
      SELECT
        ymd,
        MIN(event_timestamp) AS min_event_timestamp,
        CONCAT(user_pseudo_id, '-', CAST(ga_session_id AS STRING)) AS ssid,
      FROM (
        SELECT
          PARSE_DATE("%Y%m%d", event_date) AS ymd,
          event_timestamp,
          user_pseudo_id,
          (SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id') AS ga_session_id,
          REGEXP_REPLACE((SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_location'), r"\?.*", "") AS page_location,
        FROM
          `<project>.<dataset>.events_*`
        WHERE
          _TABLE_SUFFIX BETWEEN date_from() AND date_to()
      )
      WHERE
        page_location = URL_B()
      GROUP BY
        ymd,
        ssid
    ) AS urlb
    LEFT JOIN (
      SELECT
        *
      FROM
        _SESSION.SMC_ssid
    ) AS id
    ON urlb.ssid = id.ssid
  ),
  URL_A_to_B AS (
    SELECT
      a.ymd,
      a.ssid,
      a.source_medium,
      a.source,
      a.medium,
      a.campaign
    FROM
      URL_A AS a
    INNER JOIN
      URL_B AS b
    ON
        a.ymd = b.ymd
      AND 
        a.ssid = b.ssid
      AND
        a.source_medium = b.source_medium
      AND
        a.source = b.source
      AND
        a.medium = b.medium
      AND
        a.campaign = b.campaign
      AND
        a.min_event_timestamp < b.min_event_timestamp
  ),
  calc AS (
    SELECT
      a.ymd,
      a.source_medium,
      a.source,
      a.medium,
      a.campaign,      
      COALESCE(_ssuu_A, 0) AS _ssuu_A,
      COALESCE(_ssuu_B, 0) AS _ssuu_B
    FROM (
      SELECT
        ymd,
        source_medium,
        source,
        medium,
        campaign,
        COUNT(DISTINCT ssid) AS _ssuu_A
      FROM
        URL_A
      GROUP BY
        ymd,
        source_medium,
        source,
        medium,
        campaign
    ) a
    LEFT JOIN (
      SELECT
        ymd,
        source_medium,
        source,
        medium,
        campaign,        
        COUNT(DISTINCT ssid) AS _ssuu_B
      FROM
        URL_A_to_B
      GROUP BY
        ymd,
        source_medium,
        source,
        medium,
        campaign        
    ) b
    ON
      a.ymd = b.ymd
    AND
      a.source_medium = b.source_medium
    AND
      a.source = b.source
    AND
      a.medium = b.medium
    AND
      a.campaign = b.campaign
  )

SELECT
  *
FROM
  calc
ORDER BY
  ymd ASC

関連記事

【GA4 SQL】ページ単位のエンゲージメント分析

【GA4 SQL】再帰CTEで同一ページを除いた「2ページ目」を取得 BigQuery WITH RECURSIVE

【GA4 SQL】セッションセグメントを適用させる方法

PAGE TOP