问题
Is there a way to delete/update nested field in bigquery?
Let's say I have this data
wives.age wives.name name
21 angel adam
20 kale
21 victoria rossi
20 jessica
or in json:
{"name":"adam","wives":[{"name":"angel","age":21},{"name":"kale","age":20}]}
{"name":"rossi","wives":[{"name":"victoria","age":21},{"name":"jessica","age":20}]}
As you can see from the data above. Adam has 2 wives, named angel and kale. How to:
- Delete kale record.
- Update jessica to dessica
I tried to google this, but can't find it. I also tried to unnest, etc but no luck.
The reason why we want to do this is because we insert the array to the wrong records and want to remove/update array data with some condition.
回答1:
Below is for BigQuery Standard SQL
#standardSQL
WITH updates AS (
SELECT 'rossi' name, 'jessica' oldname, 'dessica' newname UNION ALL
SELECT 'rossi' name, 'victoria' oldname, 'polly' newname UNION ALL
SELECT 'adam' name, 'angel' oldname, 'jen' newname
), divorces AS (
SELECT 'adam' name, 'kale' wifename UNION ALL
SELECT 'adam' name, 'milly' wifename UNION ALL
SELECT 'rossi' name, 'linda' wifename
)
SELECT t.name,
ARRAY(
SELECT AS STRUCT
age,
CASE
WHEN NOT oldname IS NULL THEN newname
ELSE name
END name
FROM UNNEST(wives)
LEFT JOIN UNNEST(updates) ON t.name = u.name AND name = oldname
LEFT JOIN UNNEST(divorces) AS wifename ON t.name = d.name AND name = wifename
WHERE wifename IS NULL
) waves
FROM `project.dataset.table` t
LEFT JOIN (
SELECT name, ARRAY_AGG(STRUCT(oldname, newname)) updates
FROM updates GROUP BY name
) u ON t.name = u.name
LEFT JOIN (
SELECT name, ARRAY_AGG(wifename) divorces
FROM divorces GROUP BY name
) d ON t.name = d.name
You can test / play with above using dummy data as below
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'adam' name, [STRUCT<age INT64, name STRING>(21, 'angel'), (20, 'kale'), (22, 'milly')] wives UNION ALL
SELECT 'rossi', [STRUCT<age INT64, name STRING>(21, 'victoria'), (20, 'jessica'), (23, 'linda')]
), updates AS (
SELECT 'rossi' name, 'jessica' oldname, 'dessica' newname UNION ALL
SELECT 'rossi' name, 'victoria' oldname, 'polly' newname UNION ALL
SELECT 'adam' name, 'angel' oldname, 'jen' newname
), divorces AS (
SELECT 'adam' name, 'kale' wifename UNION ALL
SELECT 'adam' name, 'milly' wifename UNION ALL
SELECT 'rossi' name, 'linda' wifename
)
SELECT t.name,
ARRAY(
SELECT AS STRUCT
age,
CASE
WHEN NOT oldname IS NULL THEN newname
ELSE name
END name
FROM UNNEST(wives)
LEFT JOIN UNNEST(updates) ON t.name = u.name AND name = oldname
LEFT JOIN UNNEST(divorces) AS wifename ON t.name = d.name AND name = wifename
WHERE wifename IS NULL
) waves
FROM `project.dataset.table` t
LEFT JOIN (
SELECT name, ARRAY_AGG(STRUCT(oldname, newname)) updates
FROM updates GROUP BY name
) u ON t.name = u.name
LEFT JOIN (
SELECT name, ARRAY_AGG(wifename) divorces
FROM divorces GROUP BY name
) d ON t.name = d.name
result is as expected
name waves.age waves.name
adam 21 jen
rossi 21 polly
20 dessica
I hope you will be able to apply above to your real case :o)
来源:https://stackoverflow.com/questions/47707076/how-to-delete-update-nested-data-in-bigquery