问题
Let's say I have the following table:
child_id parent_id
1 2
2 3
3 -
4 5
5 6
6 -
7 8
and I want to create the following table:
child_id parent_id branch_id
1 2 1
2 3 1
3 - 1
4 5 2
5 6 2
6 - 2
7 8 3
in which the branch_id denotes groupings that are linked together by the parent_ids.
However, the row order is not guaranteed and branches may contain hundreds of rows. This rules out a simple use of the LAG() function.
How can I achieve this given the limitations of BigQuery's SQL?
回答1:
Below example of how to employ BigQuery Scripting for such cases
DECLARE rows_count, run_away_stop INT64 DEFAULT 0;
CREATE TEMP TABLE input AS
SELECT 1 child_id, 2 parent_id UNION ALL
SELECT 2, 3 UNION ALL
SELECT 3, NULL UNION ALL
SELECT 4, 5 UNION ALL
SELECT 5, 6 UNION ALL
SELECT 6, NULL UNION ALL
SELECT 7, 8 ;
CREATE TEMP TABLE ttt AS
SELECT ARRAY(SELECT val FROM UNNEST([child_id, IFNULL(parent_id, child_id)]) val ORDER BY val ) arr FROM input;
LOOP
SET (run_away_stop, rows_count) = (SELECT AS STRUCT run_away_stop + 1, COUNT(1) FROM ttt);
CREATE OR REPLACE TEMP TABLE ttt AS
SELECT ANY_VALUE(arr) arr FROM (
SELECT ARRAY(SELECT DISTINCT val FROM UNNEST(arr) val ORDER BY val) arr
FROM (
SELECT ANY_VALUE(arr1) arr1, ARRAY_CONCAT_AGG(arr) arr
FROM (
SELECT t1.arr arr1, t2.arr arr2, ARRAY(SELECT DISTINCT val FROM UNNEST(ARRAY_CONCAT( t1.arr, t2.arr)) val ORDER BY val) arr
FROM ttt t1, ttt t2
WHERE (SELECT COUNT(1) FROM UNNEST(t1.arr) val JOIN UNNEST(t2.arr) val USING(val)) > 0
) GROUP BY FORMAT('%t', arr1)
)
) GROUP BY FORMAT('%t', arr);
IF (rows_count = (SELECT COUNT(1) FROM ttt) AND run_away_stop > 1) OR run_away_stop > 10 THEN BREAK; END IF;
END LOOP;
SELECT input.*, branch_id
FROM input
JOIN (
SELECT ROW_NUMBER() OVER() AS branch_id, arr AS IDs FROM ttt
)
ON child_id IN UNNEST(IDs)
with final output
Row child_id parent_id branch_id
1 7 8 1
2 4 5 2
3 6 null 2
4 5 6 2
5 3 null 3
6 2 3 3
7 1 2 3
来源:https://stackoverflow.com/questions/60349934/how-to-link-child-and-parent-ids-using-bigquery