Recursive SQL statement (PostgreSQL 9.1.4)

前端 未结 2 2211
余生分开走
余生分开走 2021-02-20 13:31

PostgreSQL 9.1

Business situation

Every month, there is a new batch of accounts given to a specific process. Every batch can be described by mon

2条回答
  •  抹茶落季
    2021-02-20 14:18

            --
            -- rank the dates.
            -- , also fetch the the fields that seem to depend on them.
            -- (this should have been done in the data model)
            --
    CREATE VIEW date_rank AS (
            SELECT uniq.granularity,uniq.entry_accounts,uniq.entry_amount
            , row_number() OVER(ORDER BY 0) AS zrank
            FROM ( SELECT DISTINCT granularity, entry_accounts, entry_amount FROM vintage_data)
                 AS uniq
            );
    
    -- SELECT * FROM date_rank ORDER BY granularity;
            --
            -- transform to an x*y matrix, avoiding the date key and the slack columns
            --
    CREATE VIEW matrix_data AS (
            SELECT vd.distance_in_months AS xxx
            , dr.zrank AS yyy
            , vd.recovery_amount AS val
            FROM vintage_data vd
            JOIN date_rank dr ON dr.granularity = vd.granularity
            );
    -- SELECT * FROM matrix_data;
    
            --
            -- In order to perform the reversed transformation:
            -- make the view insertable.
            -- INSERTS to matrix_data will percolate back into the vintage_data table
            -- (don't try this at home ;-)
            --
    CREATE RULE magic_by_the_plasser AS
            ON INSERT TO matrix_data
            DO INSTEAD (
            INSERT INTO vintage_data (granularity,distance_in_months,entry_accounts,entry_amount,recovery_amount)
            SELECT dr.granularity, new.xxx, dr.entry_accounts, dr.entry_amount, new.val
            FROM date_rank dr
            WHERE dr.zrank = new.yyy
                    ;
            );
    
            --
            -- This CTE creates the weights for a Pascal-triangle
            --
    -- EXPLAIN -- ANALYZE
    WITH RECURSIVE pascal AS (
            WITH empty AS (
                    --
                    -- "cart" is a cathesian product of X*Y
                    -- its function is similar to a "calendar table":
                    -- filling in the missing X,Y pairs, making the matrix "square".
                    -- (well: rectangular, but in the given case nX==nY)
                    --
                    WITH cart AS (
                            WITH mmx AS (
                                    WITH xx AS ( SELECT MIN(xxx) AS x0 , MAX(xxx) AS x1 FROM matrix_data)
                                    SELECT generate_series(xx.x0,xx.x1) AS xxx
                                    FROM xx
                                    )
                            , mmy AS (
                                    WITH yy AS ( SELECT MIN(yyy) AS y0 , MAX(yyy) AS y1 FROM matrix_data)
                                    SELECT generate_series(yy.y0,yy.y1) AS yyy
                                    FROM yy
                                    )
                            SELECT * FROM mmx
                            JOIN mmy ON (1=1) -- Carthesian product here!
                            )
                    --
                    -- The (x,y) pairs that are not present in the current matrix
                    --
                    SELECT * FROM cart ca
                    WHERE NOT EXISTS (
                            SELECT *
                            FROM matrix_data nx
                            WHERE nx.xxx = ca.xxx
                            AND nx.yyy = ca.yyy
                            )
                    )
            SELECT md.yyy AS src_y
                    , md.xxx AS src_x
                    , md.yyy AS dst_y
                    , md.xxx AS dst_x
                    -- The filled-in matrix cells have weight 1
                    , 1::numeric AS weight
            FROM matrix_data md
            UNION ALL
            SELECT pa.src_y AS src_y
                    , pa.src_x AS src_x
                    , em.yyy AS dst_y
                    , em.xxx AS dst_x
                    -- the derived matrix cells inherit weight/2 from both their parents
                    , (pa.weight/2) AS weight
            FROM pascal pa
            JOIN empty em
                    ON ( em.yyy = pa.dst_y+1 AND em.xxx = pa.dst_x)
                    OR ( em.yyy = pa.dst_y AND em.xxx = pa.dst_x+1 )
            )
    INSERT INTO matrix_data(yyy,xxx,val)
    SELECT pa.dst_y,pa.dst_x
            ,SUM(ma.val*pa.weight)
    FROM pascal pa
    JOIN matrix_data ma ON pa.src_y = ma.yyy AND pa.src_x = ma.xxx
            -- avoid the filled-in matrix cells (which map to themselves)
    WHERE NOT (pa.src_y = pa.dst_y AND pa.src_x = pa.dst_x)
    GROUP BY pa.dst_y,pa.dst_x
            ;
    
            --
            -- This will also get rid of the matrix_data view and the rule.
            --
    DROP VIEW date_rank CASCADE;
    -- SELECT * FROM matrix_data ;
    
    SELECT * FROM vintage_data ORDER BY granularity, distance_in_months;
    

    RESULT:

    NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "vintage_data_pkey" for table "vintage_data"
    CREATE TABLE
    NOTICE:  ALTER TABLE / ADD UNIQUE will create implicit index "mx_xy" for table "vintage_data"
    ALTER TABLE
    INSERT 0 21
    VACUUM
    CREATE VIEW
    CREATE VIEW
    CREATE RULE
    INSERT 0 15
    NOTICE:  drop cascades to view matrix_data
    DROP VIEW
     granularity | distance_in_months | entry_accounts | entry_amount |      recovery_amount      
    -------------+--------------------+----------------+--------------+---------------------------
     2012-01-31  |                  1 |            200 |       100000 |                      1000
     2012-01-31  |                  2 |            200 |       100000 |                      2000
     2012-01-31  |                  3 |            200 |       100000 |                      3000
     2012-01-31  |                  4 |            200 |       100000 |                      3500
     2012-01-31  |                  5 |            200 |       100000 |                      3400
     2012-01-31  |                  6 |            200 |       100000 |                      3300
     2012-02-28  |                  1 |            250 |       150000 |                      1200
     2012-02-28  |                  2 |            250 |       150000 |                      1600
     2012-02-28  |                  3 |            250 |       150000 |                      1800
     2012-02-28  |                  4 |            250 |       150000 |                      1200
     2012-02-28  |                  5 |            250 |       150000 |                      1600
     2012-02-28  |                  6 |            250 |       150000 | 2381.25000000000000000000
     2012-03-31  |                  1 |            200 |        90000 |                      1300
     2012-03-31  |                  2 |            200 |        90000 |                      1200
     2012-03-31  |                  3 |            200 |        90000 |                      1400
     2012-03-31  |                  4 |            200 |        90000 |                      1000
     2012-03-31  |                  5 |            200 |        90000 | 2200.00000000000000000000
     2012-03-31  |                  6 |            200 |        90000 | 2750.00000000000000000000
     2012-04-30  |                  1 |            300 |       180000 |                      1600
     2012-04-30  |                  2 |            300 |       180000 |                      1500
     2012-04-30  |                  3 |            300 |       180000 |                      4000
     2012-04-30  |                  4 |            300 |       180000 | 2500.00000000000000000000
     2012-04-30  |                  5 |            300 |       180000 | 2350.00000000000000000000
     2012-04-30  |                  6 |            300 |       180000 | 2550.00000000000000000000
     2012-05-31  |                  1 |            400 |       225000 |                      2200
     2012-05-31  |                  2 |            400 |       225000 |                      6000
     2012-05-31  |                  3 |            400 |       225000 | 5000.00000000000000000000
     2012-05-31  |                  4 |            400 |       225000 | 3750.00000000000000000000
     2012-05-31  |                  5 |            400 |       225000 | 3050.00000000000000000000
     2012-05-31  |                  6 |            400 |       225000 | 2800.00000000000000000000
     2012-06-30  |                  1 |            100 |        60000 |                      1000
     2012-06-30  |                  2 |            100 |        60000 | 3500.00000000000000000000
     2012-06-30  |                  3 |            100 |        60000 | 4250.00000000000000000000
     2012-06-30  |                  4 |            100 |        60000 | 4000.00000000000000000000
     2012-06-30  |                  5 |            100 |        60000 | 3525.00000000000000000000
     2012-06-30  |                  6 |            100 |        60000 | 3162.50000000000000000000
    (36 rows)
    

提交回复
热议问题