问题
I'm trying to get distinct possible combination value from single column in BigQuery.
Suppose i have this table:
+---------------------------------------------+
| date |type |payment |customer_no|status|
+---------------------------------------------+
|2019-01-02|Shirt |Cashless| 101|Cancel|
|2019-01-02|Jeans |Cashless| 133|OK |
|2019-01-02|Jeans |Cash | 102|OK |
|2019-01-02|Cap |Cash | 144|OK |
|2019-01-02|Shirt |Cash | 132|OK |
|2019-01-01|Jeans |Cash | 111|Cancel|
|2019-01-01|Cap |Cash | 141|OK |
|2019-01-01|Shirt |Cash | 101|OK |
|2019-01-01|Jeans |Cash | 105|OK |
- I wanna take with rules:
- Only status = 'OK'
- No repetition in combination like Shirt, Jeans and Jeans, Shirt is unacceptable
- Group for each payment and its combination (Cash, Cassless, Cash&Cashless)
With this code:
#standardSQL
SELECT date,
type,
COUNT(customer_no) as total_customer_per_order_type,
order_payment
FROM `blabla.order`
WHERE status = 'OK'
GROUP BY date, type , payment
ORDER BY date DESC, payment ASC
i just got total customer for single type
How to get table something like this:
http://imgur.com/7aECjpSl.png
回答1:
Below is for BigQuery Standard SQL and answers just the exact question in the title of your post which is:
How to get combination of value from single column?
#standardSQL
CREATE TEMP FUNCTION test(a ARRAY<INT64>)
RETURNS ARRAY<STRING>
LANGUAGE js AS '''
var combine = function(a) {
var fn = function(n, src, got, all) {
if (n == 0) {
if (got.length > 0) {
all[all.length] = got;
} return;
}
for (var j = 0; j < src.length; j++) {
fn(n - 1, src.slice(j + 1), got.concat([src[j]]), all);
} return;
}
var all = [];
for (var i = 1; i < a.length; i++) {
fn(i, a, [], all);
}
all.push(a);
return all;
}
return combine(a)
''';
WITH types AS (
SELECT DISTINCT type, CAST(DENSE_RANK() OVER(ORDER BY type) AS STRING) type_num
FROM `project.dataset.order`
WHERE status = 'OK'
)
SELECT items, STRING_AGG(type ORDER BY type_num) types
FROM UNNEST(test(GENERATE_ARRAY(1,(SELECT COUNT(1) FROM types)))) AS items,
UNNEST(SPLIT(items)) AS pos
JOIN types ON pos = type_num
GROUP BY items
You can test, play with above using sample data from your questions as in below
#standardSQL
CREATE TEMP FUNCTION test(a ARRAY<INT64>)
RETURNS ARRAY<STRING>
LANGUAGE js AS '''
var combine = function(a) {
var fn = function(n, src, got, all) {
if (n == 0) {
if (got.length > 0) {
all[all.length] = got;
} return;
}
for (var j = 0; j < src.length; j++) {
fn(n - 1, src.slice(j + 1), got.concat([src[j]]), all);
} return;
}
var all = [];
for (var i = 1; i < a.length; i++) {
fn(i, a, [], all);
}
all.push(a);
return all;
}
return combine(a)
''';
WITH `project.dataset.order` AS (
SELECT '2019-01-02' dt, 'Shirt' type, 'Cashless' payment, 101 customer_no, 'Cancel' status UNION ALL
SELECT '2019-01-02', 'Jeans', 'Cashless', 133, 'OK' UNION ALL
SELECT '2019-01-02', 'Jeans', 'Cash', 102, 'OK' UNION ALL
SELECT '2019-01-02', 'Cap', 'Cash', 144, 'OK' UNION ALL
SELECT '2019-01-02', 'Shirt', 'Cash', 132, 'OK' UNION ALL
SELECT '2019-01-01', 'Jeans', 'Cash', 111, 'Cancel' UNION ALL
SELECT '2019-01-01', 'Cap', 'Cash', 141, 'OK' UNION ALL
SELECT '2019-01-01', 'Shirt', 'Cash', 101, 'OK' UNION ALL
SELECT '2019-01-01', 'Jeans', 'Cash', 105, 'OK'
), types AS (
SELECT DISTINCT type, CAST(DENSE_RANK() OVER(ORDER BY type) AS STRING) type_num
FROM `project.dataset.order`
WHERE status = 'OK'
)
SELECT items, STRING_AGG(type ORDER BY type_num) types
FROM UNNEST(test(GENERATE_ARRAY(1,(SELECT COUNT(1) FROM types)))) AS items,
UNNEST(SPLIT(items)) AS pos
JOIN types ON pos = type_num
GROUP BY items
with result
Row items types
1 1 Cap
2 2 Jeans
3 3 Shirt
4 1,2 Cap,Jeans
5 1,3 Cap,Shirt
6 2,3 Jeans,Shirt
7 1,2,3 Cap,Jeans,Shirt
来源:https://stackoverflow.com/questions/54229500/how-to-get-combination-of-value-from-single-column