I am trying to perform what, you would think, is a trivial operation in BigQuery; I am trying to update a nested field in a BigQuery table that is the result of a 360 export
hits
is an array, so you need to use an array subquery to assign to it. It would look something like this:
#standardSQL
UPDATE `dataset_name`.`ga_sessions_20170705`
SET hits =
ARRAY(
SELECT AS STRUCT * REPLACE(
(SELECT AS STRUCT eventInfo.* REPLACE('some string' AS eventLabel)) AS eventInfo)
FROM UNNEST(hits)
)
WHERE TRUE;
Here is to mask PII data in ga sessions having an email
UPDATE
`<project-id>.<dataset-name>.<table-name>`
SET hits =
ARRAY(SELECT AS STRUCT * REPLACE (
-- correcting pages here
IF(REGEXP_CONTAINS(page.pagePath, r"@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+")
,STRUCT(
REGEXP_REPLACE(page.pagePath, r"@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+", "[EMAIL]")
,page.pagePathLevel1
,page.pagePathLevel2
,page.pagePathLevel3
,page.pagePathLevel4
,page.hostname
,page.pageTitle
,page.searchKeyword
,page.searchCategory
), page) AS page)
FROM UNNEST(hits)
)
WHERE ( -- only relevant sessions
SELECT COUNT(1) > 0
FROM UNNEST(hits) AS hits
WHERE totals.visits = 1
AND hits.type = 'PAGE'
AND REGEXP_CONTAINS(hits.page.pagePath, r"@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+") = true
)
If you need to modify a given custom dimension you can use this:
#standardSQL
UPDATE `tablename`
SET hits =
ARRAY(
SELECT AS STRUCT * REPLACE(
ARRAY(
SELECT AS STRUCT cd.index,
CASE WHEN cd.index = index_number THEN 'new value'
ELSE cd.value
END
FROM UNNEST(customDimensions) AS cd
) AS customDimensions)
FROM UNNEST(hits) hit
)
WHERE TRUE
But it takes a while to run.