How to summarize all possible combinations of variables?

后端 未结 3 632
北恋
北恋 2021-02-11 11:44

I am trying to summarize the count based on the all possible combinations of variables. Here is an example data:

3条回答
  •  日久生厌
    2021-02-11 12:17

    Naive approach SQL Server version (I've assumed that we always have 3 columns so there will be 2^3-1 rows):

    SELECT 'A' AS combination, COUNT(DISTINCT CASE WHEN a > 0 THEN a ELSE NULL END) AS cnt FROM t
    UNION ALL 
    SELECT 'B', COUNT(DISTINCT CASE WHEN b > 0 THEN a ELSE NULL END) FROM t
    UNION ALL 
    SELECT 'C', COUNT(DISTINCT CASE WHEN c > 0 THEN a ELSE NULL END) FROM t
    UNION ALL
    SELECT 'A,B', COUNT(DISTINCT CASE WHEN a > 0 THEN CAST(a AS VARCHAR(10)) ELSE NULL END 
                         + ',' + CASE WHEN b > 0 THEN CAST(b AS VARCHAR(10)) ELSE NULL END) FROM t
    UNION ALL
    SELECT 'A,C', COUNT(DISTINCT CASE WHEN a > 0 THEN CAST(a AS VARCHAR(10)) ELSE NULL END 
                         + ',' + CASE WHEN c > 0 THEN CAST(c AS VARCHAR(10)) ELSE NULL END) FROM t
    UNION ALL
    SELECT 'B,C', COUNT(DISTINCT CASE WHEN b > 0 THEN CAST(b AS VARCHAR(10)) ELSE NULL END 
                         + ',' + CASE WHEN c > 0 THEN CAST(c AS VARCHAR(10)) ELSE NULL END) FROM t
    UNION ALL
    SELECT 'A,B,C', COUNT(DISTINCT CASE WHEN a > 0 THEN CAST(a AS VARCHAR(10)) ELSE NULL END 
                         + ',' + CASE WHEN b > 0 THEN CAST(b AS VARCHAR(10)) ELSE NULL END
                         + ',' + CASE WHEN c > 0 THEN CAST(c AS VARCHAR(10)) ELSE NULL END ) FROM t
    ORDER BY combination 
    
     
    

    Rextester Demo


    EDIT:

    Same as above but more concise:

    WITH cte AS (
        SELECT ID
              ,CAST(NULLIF(a,0) AS VARCHAR(10)) a
              ,CAST(NULLIF(b,0) AS VARCHAR(10)) b
              ,CAST(NULLIF(c,0) AS VARCHAR(10)) c 
        FROM t
    )
    SELECT 'A' AS combination, COUNT(DISTINCT a) AS cnt FROM cte UNION ALL 
    SELECT 'B', COUNT(DISTINCT b) FROM cte UNION ALL 
    SELECT 'C', COUNT(DISTINCT c) FROM cte UNION ALL
    SELECT 'A,B', COUNT(DISTINCT a + ',' + b) FROM cte UNION ALL
    SELECT 'A,C', COUNT(DISTINCT a + ',' + c) FROM cte UNION ALL
    SELECT 'B,C', COUNT(DISTINCT b + ',' + c) FROM cte UNION ALL
    SELECT 'A,B,C', COUNT(DISTINCT a + ',' + b + ',' + c ) FROM cte ;
    

    Rextester Demo


    EDIT 2

    Using UNPIVOT:

    WITH cte AS (SELECT ID
                   ,CAST(IIF(a!=0,1,NULL) AS VARCHAR(10)) a
                   ,CAST(IIF(b!=0,1,NULL) AS VARCHAR(10)) b
                   ,CAST(IIF(c!=0,1,NULL) AS VARCHAR(10)) c 
                FROM t)
    SELECT combination, [count]
    FROM (SELECT  a=COUNT(a), b=COUNT(b), c=COUNT(c)
               , ab=COUNT(a+b), ac=COUNT(a+c), bc=COUNT(b+c), abc=COUNT(a+b+c)
          FROM cte) s
    UNPIVOT ([count] FOR combination IN (a,b,c,ab,ac,bc,abc))AS unpvt
    

    Rextester Demo


    EDIT FINAL APPROACH

    I appreciate your approach. I have more than 3 variables in my actual dataset and do you think we can generate all possible combinations programatically rather than the hard coding them! May be your second approach will cover that :

    SQL is a bit clumsy to do this kind of operation, but I want to show it is possible.

    CREATE TABLE t(id INT, a INT, b INT, c INT);
    
    INSERT INTO t
    SELECT 10001,1,3,3 UNION
    SELECT 10002,0,0,0 UNION
    SELECT 10003,3,6,0 UNION
    SELECT 10004,7,0,0 UNION
    SELECT 10005,0,0,0;
    
    DECLARE @Sample AS TABLE 
    (
        item_id     tinyint IDENTITY(1,1) PRIMARY KEY NONCLUSTERED,
        item        nvarchar(500) NOT NULL,
        bit_value   AS  CONVERT ( integer, POWER(2, item_id - 1) )
                    PERSISTED UNIQUE CLUSTERED
    );    
    
    INSERT INTO @Sample
    SELECT name
    FROM sys.columns
    WHERE object_id = OBJECT_ID('t')
      AND name != 'id';
    
    DECLARE @max integer = POWER(2, ( SELECT COUNT(*) FROM @Sample AS s)) - 1;
    DECLARE @cols NVARCHAR(MAX);
    DECLARE @cols_casted NVARCHAR(MAX);
    DECLARE @cols_count NVARCHAR(MAX);
    
    
    ;WITH
      Pass0 as (select 1 as C union all select 1), --2 rows
      Pass1 as (select 1 as C from Pass0 as A, Pass0 as B),--4 rows
      Pass2 as (select 1 as C from Pass1 as A, Pass1 as B),--16 rows
      Pass3 as (select 1 as C from Pass2 as A, Pass2 as B),--256 rows
      Pass4 as (select 1 as C from Pass3 as A, Pass3 as B),--65536 rows
      Tally as (select row_number() over(order by C) as n from Pass4)
    , cte AS (SELECT
        combination =
            STUFF
            (
                (
                    SELECT ',' + s.item 
                    FROM @Sample AS s
                    WHERE
                        n.n & s.bit_value = s.bit_value
                    ORDER BY
                        s.bit_value
                    FOR XML 
                        PATH (''),
                        TYPE                    
                ).value('(./text())[1]', 'varchar(8000)'), 1, 1, ''
            )
    FROM Tally AS N
    WHERE N.n BETWEEN 1 AND @max
    )
    SELECT @cols = STRING_AGG(QUOTENAME(combination),',')
          ,@cols_count = STRING_AGG(FORMATMESSAGE('[%s]=COUNT(DISTINCT %s)'
                        ,combination,REPLACE(combination, ',', ' + '','' +') ),',')
    FROM cte;
    
    SELECT 
      @cols_casted = STRING_AGG(FORMATMESSAGE('CAST(NULLIF(%s,0) AS VARCHAR(10)) %s'
                     ,name, name), ',')
    FROM sys.columns
    WHERE object_id = OBJECT_ID('t')
      AND name != 'id';
      
    DECLARE @sql NVARCHAR(MAX);
    
    SET @sql =
    'SELECT combination, [count]
    FROM (SELECT  
          FROM (SELECT ID,  FROM t )cte) s
    UNPIVOT ([count] FOR combination IN ())AS unpvt';
    
    SET @sql = REPLACE(@sql, '', @cols_casted);
    SET @sql = REPLACE(@sql, '', @cols_count);
    SET @sql = REPLACE(@sql, '', @cols);
    
    SELECT @sql;
    EXEC (@sql);
    

    DBFiddle Demo

    DBFiddle Demo with 4 variables

提交回复
热议问题