问题
Now that BigQuery supports async on #standardSQL, how can I convert this #legacySQL function to run on #standardSQL?
#legacySQL
SELECT SUM(s)
FROM
js((
SELECT FLOOR(RAND()*100000) group, NEST(requests) as x
FROM (
SELECT requests, content_size
FROM [fh-bigquery:wikipedia.pagecounts_201205]
)
GROUP BY group)
, group, x
, "[{name:'s', type: 'float'}]",
"function (row, emit) {
const memory = new WebAssembly.Memory({ initial: 256, maximum: 256 });
const env = {
'abortStackOverflow': _ => { throw new Error('overflow'); },
'table': new WebAssembly.Table({ initial: 0, maximum: 0, element: 'anyfunc' }),
'tableBase': 0,
'memory': memory,
'memoryBase': 1024,
'STACKTOP': 0,
'STACK_MAX': memory.buffer.byteLength,
};
const imports = { env };
const bytes = new Uint8Array([0, 97, 115, 109, 1, 0, 0, 0, 1, 139, 128, 128, 128, 0, 2, 96, 1, 127, 0, 96, 2, 127, 127, 1, 127, 2, 254, 128, 128, 128, 0, 7, 3, 101, 110, 118, 8, 83, 84, 65, 67, 75, 84, 79, 80, 3, 127, 0, 3, 101, 110, 118, 9, 83, 84, 65, 67, 75, 95, 77, 65, 88, 3, 127, 0, 3, 101, 110, 118, 18, 97, 98, 111, 114, 116, 83, 116, 97, 99, 107, 79, 118, 101, 114, 102, 108, 111, 119, 0, 0, 3, 101, 110, 118, 6, 109, 101, 109, 111, 114, 121, 2, 1, 128, 2, 128, 2, 3, 101, 110, 118, 5, 116, 97, 98, 108, 101, 1, 112, 1, 0, 0, 3, 101, 110, 118, 10, 109, 101, 109, 111, 114, 121, 66, 97, 115, 101, 3, 127, 0, 3, 101, 110, 118, 9, 116, 97, 98, 108, 101, 66, 97, 115, 101, 3, 127, 0, 3, 130, 128, 128, 128, 0, 1, 1, 6, 147, 128, 128, 128, 0, 3, 127, 1, 35, 0, 11, 127, 1, 35, 1, 11, 125, 1, 67, 0, 0, 0, 0, 11, 7, 136, 128, 128, 128, 0, 1, 4, 95, 115, 117, 109, 0, 1, 9, 129, 128, 128, 128, 0, 0, 10, 196, 128, 128, 128, 0, 1, 190, 128, 128, 128, 0, 1, 7, 127, 2, 64, 35, 4, 33, 8, 35, 4, 65, 16, 106, 36, 4, 35, 4, 35, 5, 78, 4, 64, 65, 16, 16, 0, 11, 32, 0, 33, 2, 32, 1, 33, 3, 32, 2, 33, 4, 32, 3, 33, 5, 32, 4, 32, 5, 106, 33, 6, 32, 8, 36, 4, 32, 6, 15, 0, 11, 0, 11]);
WebAssembly.instantiate(bytes, imports).then(wa => {
const exports = wa.instance.exports;
const sum = exports._sum;
for (var i = 0, len = row.x.length; i < len; i++) {
emit({s: sum(row.x[i], row.x[i])});
}
});
}"
)
(from https://medium.com/@hoffa/bigquery-beyond-sql-and-js-running-c-and-rust-code-at-scale-33021763ee1f)
回答1:
Now you can create an async function x()
and then return x()
.
Instead of being able to emit()
multiple times, you'll have to nest the results into array.
Working example:
CREATE TEMP FUNCTION `magic_function`(x ARRAY<INT64>) RETURNS ARRAY<INT64> LANGUAGE js AS '''
const memory = new WebAssembly.Memory({ initial: 256, maximum: 256 });
const env = {
'abortStackOverflow': _ => { throw new Error('overflow'); },
'table': new WebAssembly.Table({ initial: 0, maximum: 0, element: 'anyfunc' }),
'tableBase': 0,
'memory': memory,
'memoryBase': 1024,
'STACKTOP': 0,
'STACK_MAX': memory.buffer.byteLength,
};
const imports = { env };
const bytes = new Uint8Array([0, 97, 115, 109, 1, 0, 0, 0, 1, 139, 128, 128, 128, 0, 2, 96, 1, 127, 0, 96, 2, 127, 127, 1, 127, 2, 254, 128, 128, 128, 0, 7, 3, 101, 110, 118, 8, 83, 84, 65, 67, 75, 84, 79, 80, 3, 127, 0, 3, 101, 110, 118, 9, 83, 84, 65, 67, 75, 95, 77, 65, 88, 3, 127, 0, 3, 101, 110, 118, 18, 97, 98, 111, 114, 116, 83, 116, 97, 99, 107, 79, 118, 101, 114, 102, 108, 111, 119, 0, 0, 3, 101, 110, 118, 6, 109, 101, 109, 111, 114, 121, 2, 1, 128, 2, 128, 2, 3, 101, 110, 118, 5, 116, 97, 98, 108, 101, 1, 112, 1, 0, 0, 3, 101, 110, 118, 10, 109, 101, 109, 111, 114, 121, 66, 97, 115, 101, 3, 127, 0, 3, 101, 110, 118, 9, 116, 97, 98, 108, 101, 66, 97, 115, 101, 3, 127, 0, 3, 130, 128, 128, 128, 0, 1, 1, 6, 147, 128, 128, 128, 0, 3, 127, 1, 35, 0, 11, 127, 1, 35, 1, 11, 125, 1, 67, 0, 0, 0, 0, 11, 7, 136, 128, 128, 128, 0, 1, 4, 95, 115, 117, 109, 0, 1, 9, 129, 128, 128, 128, 0, 0, 10, 196, 128, 128, 128, 0, 1, 190, 128, 128, 128, 0, 1, 7, 127, 2, 64, 35, 4, 33, 8, 35, 4, 65, 16, 106, 36, 4, 35, 4, 35, 5, 78, 4, 64, 65, 16, 16, 0, 11, 32, 0, 33, 2, 32, 1, 33, 3, 32, 2, 33, 4, 32, 3, 33, 5, 32, 4, 32, 5, 106, 33, 6, 32, 8, 36, 4, 32, 6, 15, 0, 11, 0, 11]);
async function main() {
const wa = await WebAssembly.instantiate(bytes, imports);
const exports = wa.instance.exports;
const magic_sum = exports._sum;
return x.map((val) => {
return magic_sum(val, val);
});
}
return main();
''';
SELECT SUM(s) sum_s
FROM (
SELECT FLOOR(RAND()*100000) grp, magic_function(ARRAY_AGG(views)) s
FROM `fh-bigquery.wikipedia_v3.pageviews_2019`
WHERE DATE(datehour) = '2019-01-01' AND wiki='pt'
GROUP BY grp
), UNNEST(s) s
(https://issuetracker.google.com/issues/138430827#comment9) (h/t https://twitter.com/mylesborins)
来源:https://stackoverflow.com/questions/57897905/running-async-js-functions-on-bigquery-with-standardsql