Joining together consecutive date validity intervals

后端 未结 4 2011
予麋鹿
予麋鹿 2021-01-14 02:59

I have a series of records containing some information (product type) with temporal validity.

I would like to meld together adjacent validity intervals, provided tha

相关标签:
4条回答
  • 2021-01-14 03:04

    This is a pretty complicated set of steps, but it's the way that I solved a similar problem:

    -- Sample Data
    CREATE TABLE AdjacentValidity 
      (
    RowID INT IDENTITY(1,1) NOT NULL,
    Product VARCHAR(1) NOT NULL,
    Start_Date DATETIME NOT NULL,
    End_Date DATETIME NOT NULL
      )
    
    INSERT INTO AdjacentValidity (Product, Start_Date, End_Date)
    
    SELECT 'A', '7/1/2013', '7/31/2013' UNION
    SELECT 'A', '8/1/2013', '8/31/2013' UNION
    SELECT 'A', '9/1/2013', '9/30/2013' UNION
    SELECT 'B', '10/1/2013', '10/31/2013' UNION
    SELECT 'B', '11/1/2013', '11/30/2013' UNION
    SELECT 'A', '12/1/2013', '12/31/2013' UNION
    SELECT 'A', '1/1/2014', '1/31/2014' UNION
    SELECT 'A', '2/1/2014', '2/28/2014' UNION
    SELECT 'A', '3/1/2014', '3/31/2014'
    
    
    -- Modify the sample data to include necessary tags
    CREATE TABLE #RawData
      (
        RawData_ID INT IDENTITY(1,1) NOT NULL,
        Product VARCHAR(1) NOT NULL,
        Start_Date DATETIME NOT NULL,
        End_Date DATETIME NOT NULL,
        isFirstOccurrence BIT NULL,
        isLastOccurrence BIT NULL,
        isFirstInstance BIT NULL,
        isLastInstance BIT NULL
      )
    
    -- Load and flag first occurrences of a natural key
    INSERT INTO #RawData
      (
        Product,
        Start_Date,
        End_Date,
        isFirstInstance
      )
    SELECT 
        Product,
        Start_Date,
        End_Date,
        CASE WHEN ROW_NUMBER() OVER
          (
            --PARTITION BY <NaturalKey>
            ORDER BY Start_date
          ) = 1 THEN 1 ELSE 0 END AS isFirstOccurrence
    FROM AdjacentValidity
    
    -- update to flag the last sequential instance of a particalar data set, and the last     occurrence of a natural key
    UPDATE a
    SET 
        a.isLastInstance = 
         CASE 
          WHEN 
            a.Product <> b.Product OR 
            DATEADD(m, 1, a.Start_Date) <> b.Start_Date  OR
            b.RawData_ID IS NULL
          THEN 1 
          ELSE 0 
         END,
        a.isLastOccurrence = 
         CASE
          WHEN 
            b.RawData_ID IS NULL
          THEN 1 
          ELSE 0 
         END 
    FROM 
        #RawData a
         LEFT JOIN
        #RawData b ON 
            b.RawData_ID = a.RawData_ID + 1 --AND 
            --b.<NaturalKey> = a.<NaturalKey>
    
    --  flag first sequential instance of a particular data set
    UPDATE b
    SET 
        b.isFirstInstance = 
         CASE 
          WHEN 
            a.isLastInstance = 1
          THEN 1 
          ELSE 0 
         END 
    FROM 
        #RawData a
         LEFT JOIN
        #RawData b ON 
            b.RawData_ID = a.RawData_ID + 1 --AND 
            --b.<NaturalKey> = a.<NaturalKey>
    
    
    -- reduce the records to only those that are the first or last occurrence of a     particular data set
    CREATE TABLE #UniqueData 
      (
        [UniqueData_ID] [int] IDENTITY(1,1) NOT NULL,
        Start_Date DATETIME NOT NULL,
        End_Date DATETIME NOT NULL,
        Product VARCHAR(1) NULL,
        isFirstOccurrence BIT NULL,
        isLastOccurrence BIT NULL,
        isFirstInstance BIT NULL,
        isLastInstance BIT NULL
      ) 
    
    INSERT INTO #UniqueData
      (
        Start_Date,
        End_Date,
        Product,
        isFirstOccurrence,
        isLastOccurrence,
        isFirstInstance,
        isLastInstance
      )
    
    SELECT 
        Start_Date,
        End_Date,
        Product,
        isFirstOccurrence,
        isLastOccurrence,
        isFirstInstance,
        isLastInstance
    FROM 
        #RawData 
    WHERE 
        isFirstOccurrence = 1 OR
        isFirstInstance = 1 OR
        isLastInstance = 1
    ORDER BY RawData_ID, Start_Date
    
    
    
    
    -- combine the first and last occurrences in any given sequence into a single row
    SELECT 
        a.Start_Date,
        ISNULL(b.Start_Date, a.End_Date) End_Date,
        a.Product
    FROM 
        #UniqueData a 
         LEFT JOIN
        #UniqueData b ON 
            b.UniqueData_ID = a.UniqueData_ID + 1 AND
            --b.<NaturalKey> = a.<NaturalKey> AND
            a.isLastInstance <> 1
    WHERE a.isFirstInstance = 1 or a.isFirstOccurrence = 1
    ORDER BY a.UniqueData_ID
    
    
    
    -- clean up
    /*
    DROP TABLE AdjacentValidity
    DROP TABLE #RawData
    DROP TABLE #UniqueData
    */
    
    0 讨论(0)
  • 2021-01-14 03:08

    It seems like there should be an easier way, but a combination of an analytical query (to find the different gaps) and a hierarchical query (to connect the rows that are continuous) works:

    with data as (
        select 'A' product, to_date('7/1/2013', 'MM/DD/YYYY') start_date, to_date('7/31/2013', 'MM/DD/YYYY') end_date from dual union all
        select 'A' product, to_date('8/1/2013', 'MM/DD/YYYY') start_date, to_date('8/31/2013', 'MM/DD/YYYY') end_date from dual union all
        select 'A' product, to_date('9/1/2013', 'MM/DD/YYYY') start_date, to_date('9/30/2013', 'MM/DD/YYYY') end_date from dual union all
        select 'B' product, to_date('10/1/2013', 'MM/DD/YYYY') start_date, to_date('10/31/2013', 'MM/DD/YYYY') end_date from dual union all
        select 'B' product, to_date('11/1/2013', 'MM/DD/YYYY') start_date, to_date('11/30/2013', 'MM/DD/YYYY') end_date from dual union all
        select 'A' product, to_date('12/1/2013', 'MM/DD/YYYY') start_date, to_date('12/31/2013', 'MM/DD/YYYY') end_date from dual union all
        select 'A' product, to_date('1/1/2014', 'MM/DD/YYYY') start_date, to_date('1/31/2014', 'MM/DD/YYYY') end_date from dual union all
        select 'A' product, to_date('2/1/2014', 'MM/DD/YYYY') start_date, to_date('2/28/2014', 'MM/DD/YYYY') end_date from dual union all
        select 'A' product, to_date('3/1/2014', 'MM/DD/YYYY') start_date, to_date('3/31/2014', 'MM/DD/YYYY') end_date from dual
    ),
    start_points as
    (
        select product, start_date, end_date, prior_end+1, case when prior_end + 1 = start_date then null else 'Y' end start_point 
        from (
            select product, start_date, end_date, lag(end_date,1) over (partition by product order by end_date) prior_end
            from data
        )
    )
    select product, min(start_date) start_date, max(end_date) end_date
    from (
        select product, start_date, end_date, level, connect_by_root(start_date) root_start
        from start_points
        start with start_point = 'Y'
        connect by prior end_date = start_date - 1
        and prior product = product
    )
    group by product, root_start;
    
    
    
    PRODUCT START_DATE END_DATE 
    ------- ---------- ---------
    A       01-JUL-13  30-SEP-13
    A       01-DEC-13  31-MAR-14
    B       01-OCT-13  30-NOV-13
    
    0 讨论(0)
  • 2021-01-14 03:12

    Try something like:

    with dat as (
    select 'A' as product, sysdate-3 as start_dte, sysdate-2 as end_dte from dual
    union all
    select 'A' as product, sysdate-2 as start_dte, sysdate-1 as end_dte from dual
    union all
    select 'B' as product, sysdate-5 as start_dte, sysdate-4 as end_dte from dual
    )
    SELECT product,
           MIN(start_dte) KEEP (DENSE_RANK FIRST ORDER BY start_dte) "Start",
           MAX(end_dte) KEEP (DENSE_RANK LAST ORDER BY end_dte) "End"
      FROM dat
      GROUP BY product
      ORDER BY product;
    

    Output

    PRODUCT Start   End
    A   2/24/2014 10:25:53 AM   2/26/2014 10:25:53 AM
    B   2/22/2014 10:25:53 AM   2/23/2014 10:25:53 AM
    
    0 讨论(0)
  • 2021-01-14 03:25

    This is a gaps-and-islands problem. There are various ways to approach it; this uses lead and lag analytic functions:

    select distinct product,
      case when start_date is null then lag(start_date)
        over (partition by product order by rn) else start_date end as start_date,
      case when end_date is null then lead(end_date)
        over (partition by product order by rn) else end_date end as end_date
    from (
      select product, start_date, end_date, rn
      from (
        select t.product,
          case when lag(end_date)
              over (partition by product order by start_date) is null
            or lag(end_date)
              over (partition by product order by start_date) != start_date - 1
            then start_date end as start_date,
          case when lead(start_date)
              over (partition by product order by start_date) is null
            or lead(start_date)
              over (partition by product order by start_date) != end_date + 1
            then end_date end as end_date,
          row_number() over (partition by product order by start_date) as rn
        from t
      )
      where start_date is not null or end_date is not null
    )
    order by start_date, product;
    
    PRODUCT START_DATE END_DATE
    ------- ---------- ---------
    A       01-JUL-13  30-SEP-13 
    B       01-OCT-13  30-NOV-13 
    A       01-DEC-13  31-MAR-14 
    

    SQL Fiddle

    The innermost query looks at the preceding and following records for the product, and only retains the start and/or end time if the records are not contiguous:

    select t.product,
      case when lag(end_date)
          over (partition by product order by start_date) is null
        or lag(end_date)
          over (partition by product order by start_date) != start_date - 1
        then start_date end as start_date,
      case when lead(start_date)
          over (partition by product order by start_date) is null
        or lead(start_date)
          over (partition by product order by start_date) != end_date + 1
        then end_date end as end_date
    from t;
    
    PRODUCT START_DATE END_DATE
    ------- ---------- ---------
    A       01-JUL-13            
    A                            
    A                  30-SEP-13 
    A       01-DEC-13            
    A                            
    A                            
    A                  31-MAR-14 
    B       01-OCT-13            
    B                  30-NOV-13 
    

    The next level of select removes those which are mid-period, where both dates were blanked by the inner query, which gives:

    PRODUCT START_DATE END_DATE
    ------- ---------- ---------
    A       01-JUL-13            
    A                  30-SEP-13 
    A       01-DEC-13            
    A                  31-MAR-14 
    B       01-OCT-13            
    B                  30-NOV-13 
    

    The outer query then collapses those adjacent pairs; I've used the easy route of creating duplicates and then eliminating them with distinct, but you can do it other ways, like putting both values into one of the pairs of rows and leaving both values in the other null, and then eliminating those with another layer of select, but I think distinct is OK here.

    If your real-world use case has times, not just dates, then you'll need to adjust the comparison in the inner query; rather than +/- 1, an interval of 1 second perhaps, or 1/86400 if you prefer, but depends on the precision of your values.

    0 讨论(0)
提交回复
热议问题