GA4

【GA4 SQL】新規セッション率をBigQueryから算出

GA4には「新規セッション率」という指標は存在しないため、UAに近しい「新規セッション率」を求めたい場合はBigQueryを活用する必要がある。

新規セッション率の定義

新規セッション率 = 新規セッション数 / セッション数 * 100

新規セッション数の定義としては、ここでは初回訪問日にサイトに訪問した全てのセッションを「新規セッション」として扱う。このようにカスタマイズしている理由としては、初回訪問日であればすべてのセッションを新規訪問とみなすことがより直感的であり、リピーターセッションを求める際はセッション数から新規セッション数を引くことで求められるため計算が合致するメリットがあるため。

新規セッション数を算出するクエリ

CREATE TEMP FUNCTION date_from() RETURNS STRING AS ('20240401');

WITH
  SEG_FirstVisitSession AS (
    SELECT
      DISTINCT ssid
    FROM (
      SELECT
        ssid,
        ymd,
        CASE
          WHEN ymd = first_visit_date THEN 'first_visit'
          ELSE 'subsequent_visit'
        END AS visit_type
      FROM (
        SELECT
          CONCAT(e.user_pseudo_id, '-', CAST((SELECT value.int_value FROM UNNEST(e.event_params) WHERE key = 'ga_session_id') AS STRING)) AS ssid,
          PARSE_DATE("%Y%m%d", e.event_date) AS ymd,
          first_visit.ymd AS first_visit_date
        FROM
          `<project>.<dataset>.events_*` AS e
        INNER JOIN (
          SELECT
            user_pseudo_id,
            MIN(PARSE_DATE("%Y%m%d", event_date)) AS ymd
          FROM
            `<project>.<dataset>.events_*`
          WHERE
            _TABLE_SUFFIX BETWEEN date_from() AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY))
            AND event_name = 'first_visit'
          GROUP BY
            user_pseudo_id
        ) AS first_visit
        ON e.user_pseudo_id = first_visit.user_pseudo_id
        WHERE
          e._TABLE_SUFFIX BETWEEN date_from() AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY))
      )
    )
    WHERE
      visit_type = 'first_visit'
  ),
  SessionID AS (
    SELECT
      ymd,
      CONCAT(user_pseudo_id, '-', CAST(ga_session_id AS STRING)) AS ssid
    FROM (
      SELECT
        PARSE_DATE("%Y%m%d", event_date) AS ymd,
        user_pseudo_id,
        (SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id') AS ga_session_id
      FROM
        `<project>.<dataset>.events_*`
      WHERE
        _TABLE_SUFFIX BETWEEN date_from() AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(),INTERVAL 1 DAY))
    )
  ),
  NewSessionID AS (
    SELECT
      ymd,
      ssid
    FROM (
      SELECT
        ymd,
        ssid
      FROM
        SessionID
    ) AS event_data
    WHERE
      event_data.ssid IN (SELECT ssid FROM SEG_FirstVisitSession)
  ),
  NewSessionDaily AS (
    SELECT
      ymd,
      COUNT(DISTINCT ssid) AS _ssuu
    FROM
     NewSessionID
    GROUP BY
      ymd
  )

SELECT
  *
FROM
  NewSessionDaily
ORDER BY
  ymd ASC

新規セッション率を算出するクエリ

CREATE TEMP FUNCTION date_from() RETURNS STRING AS ('20240401');

WITH
  SessionDaily AS (
    SELECT
      ymd,
      COUNT(DISTINCT ssid) AS _ssuu
    FROM (
      SELECT
        ymd,
        CONCAT(user_pseudo_id, '-', CAST(ga_session_id AS STRING)) AS ssid
      FROM (
        SELECT
          PARSE_DATE("%Y%m%d", event_date) AS ymd,
          user_pseudo_id,
          (SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id') AS ga_session_id
        FROM
          `<project>.<dataset>.events_*`
        WHERE
          _TABLE_SUFFIX BETWEEN date_from() AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(),INTERVAL 1 DAY))
      )
  )
  GROUP BY
    ymd
  ),
  SEG_FirstVisitSession AS (
    SELECT
      DISTINCT ssid
    FROM (
      SELECT
        ssid,
        ymd,
        CASE
          WHEN ymd = first_visit_date THEN 'first_visit'
          ELSE 'subsequent_visit'
        END AS visit_type
      FROM (
        SELECT
          CONCAT(e.user_pseudo_id, '-', CAST((SELECT value.int_value FROM UNNEST(e.event_params) WHERE key = 'ga_session_id') AS STRING)) AS ssid,
          PARSE_DATE("%Y%m%d", e.event_date) AS ymd,
          first_visit.ymd AS first_visit_date
        FROM
          `<project>.<dataset>.events_*` AS e
        INNER JOIN (
          SELECT
            user_pseudo_id,
            MIN(PARSE_DATE("%Y%m%d", event_date)) AS ymd
          FROM
            `<project>.<dataset>.events_*`
          WHERE
            _TABLE_SUFFIX BETWEEN date_from() AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY))
            AND event_name = 'first_visit'
          GROUP BY
            user_pseudo_id
        ) AS first_visit
        ON e.user_pseudo_id = first_visit.user_pseudo_id
        WHERE
          e._TABLE_SUFFIX BETWEEN date_from() AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY))
      )
    )
    WHERE
      visit_type = 'first_visit'
  ),
  SessionID AS (
    SELECT
      ymd,
      CONCAT(user_pseudo_id, '-', CAST(ga_session_id AS STRING)) AS ssid
    FROM (
      SELECT
        PARSE_DATE("%Y%m%d", event_date) AS ymd,
        user_pseudo_id,
        (SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id') AS ga_session_id
      FROM
        `<project>.<dataset>.events_*`
      WHERE
        _TABLE_SUFFIX BETWEEN date_from() AND FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(),INTERVAL 1 DAY))
    )
    ORDER BY
      ymd
  ),
  NewSessionID AS (
    SELECT
      ymd,
      ssid
    FROM (
      SELECT
        ymd,
        ssid
      FROM
        SessionID
    ) AS event_data
    WHERE
      event_data.ssid IN (SELECT ssid FROM SEG_FirstVisitSession)
  ),
  NewSessionDaily AS (
    SELECT
      ymd,
      COUNT(DISTINCT ssid) AS _ssuu
    FROM
     NewSessionID
    GROUP BY
      ymd
  ),
  JoinSession AS (
    SELECT
      ymd,
      _ssuu,
      _new_ssuu,
      ROUND(_rate_new_ssuu,2) AS _rate_new_ssuu
    FROM (
      SELECT
        t.ymd,
        t._ssuu AS _ssuu,
        COALESCE(n._ssuu, 0) AS _new_ssuu,
        COALESCE(n._ssuu, 0) / t._ssuu AS _rate_new_ssuu
      FROM
        SessionDaily AS t
      LEFT JOIN
        NewSessionDaily AS n
      ON
        t.ymd = n.ymd
    )
    ORDER BY
      ymd ASC
  )

SELECT
  *
FROM
  JoinSession

関連記事

【GA4 SQL】ページごとのページビュー数をBigQueryから集計

【GA4 SQL】ページ再訪問率をBigQueryから集計

【GA4 SQL】セッションセグメントを適用させる方法

PAGE TOP