SQL to determine multiple date ranges (SQL Server 2000)

后端 未结 5 1101
小蘑菇
小蘑菇 2021-01-15 01:20

I have a table which contains an ID and a Date for an event. Each row is for one date. I am trying to determine consecutive date ranges and consoli

5条回答
  •  臣服心动
    2021-01-15 01:32

    This is the way I've done it in the past. It's a two step process:

    1. Build the set of candidate contiguous periods
    2. If there are any overlapping periods, delete all but the longest such period.

    Here's a script that shows how it's done. You might be able to pull it off in a single [bug, ugly] query, but trying to do that makes my head hurt. I'm using temp tables as it makes the debugging a whole lot easier.

    drop table #source
    create table #source
    (
      id    int      not null ,
      dtCol datetime not null ,
    
      -----------------------------------------------------------------------
      -- ASSUMPTION 1: Each date must be unique for a given ID value.
      -----------------------------------------------------------------------
      unique clustered ( id , dtCol ) ,
    
      -----------------------------------------------------------------------
      -- ASSUMPTION 2: The datetime column only represents a day.
      -- The value of the time component is always 00:00:00.000
      -----------------------------------------------------------------------
      check ( dtCol = convert(datetime,convert(varchar,dtCol,112),112) ) ,
    
    )
    go
    
    insert #source values(1,'jan 1, 2011')
    insert #source values(1,'jan 4, 2011')
    insert #source values(1,'jan 5, 2011')
    insert #source values(2,'jan 1, 2011')
    insert #source values(2,'jan 2, 2011')
    insert #source values(2,'jan 3, 2011')
    insert #source values(2,'jan 5, 2011')
    insert #source values(3,'jan 1, 2011')
    insert #source values(4,'jan 1, 2011')
    insert #source values(4,'jan 2, 2011')
    insert #source values(4,'jan 3, 2011')
    insert #source values(4,'jan 4, 2011')
    go
    
    insert #source values( 200236 , '2011-01-02')
    insert #source values( 200236 , '2011-01-03')
    insert #source values( 200236 , '2011-01-05')
    insert #source values( 200236 , '2011-01-06')
    insert #source values( 200236 , '2011-01-07')
    insert #source values( 200236 , '2011-01-08')
    insert #source values( 200236 , '2011-01-09')
    insert #source values( 200236 , '2011-01-10')
    insert #source values( 200236 , '2011-01-11')
    insert #source values( 200236 , '2011-01-12')
    insert #source values( 200236 , '2011-01-13')
    insert #source values( 200236 , '2011-01-15')
    insert #source values( 200236 , '2011-01-16')
    insert #source values( 200236 , '2011-01-17')
    go
    
    drop table #candidate_range
    go
    create table #candidate_range
    (
      rowId   int      not null identity(1,1) ,
      id      int      not null ,
      dtFrom  datetime not null ,
      dtThru  datetime not null ,
      length  as 1+datediff(day,dtFrom,dtThru) ,
    
      primary key nonclustered ( rowID ) ,
      unique clustered (id,dtFrom,dtThru) ,
    
    )
    go
    
    --
    -- seed the candidate range table with the set of all possible contiguous ranges for each id
    --
    insert #candidate_range ( id , dtFrom , dtThru )
    select id      = tFrom.id    ,
           valFrom = tFrom.dtCol ,
           valThru = tThru.dtCol
    from #source tFrom
    join #source tThru on tThru.id     = tFrom.id
                      and tThru.dtCol >= tFrom.dtCol
    where 1+datediff(day,tFrom.dtCol,tThru.dtCol) = ( select count(*)
                                                      from #source t
                                                      where t.id = tFrom.id
                                                        and t.dtCol between tFrom.dtCol and tThru.dtCol
                                                    )
    order by 1,2,3
    go
    
    --
    -- compare the table to itself. If we find overlapping periods,
    -- we'll keep the longest such period and delete the shorter overlapping periods.
    --
    delete t2
    from #candidate_range t1
    join #candidate_range t2 on t2.id      = t1.id
                            and t2.rowId  != t1.rowID
                            and t2.length <  t1.length
                            and t2.dtFrom <= t1.dtThru
                            and t2.dtThru >= t1.dtFrom
    go
    

    That's about all there is to it.

提交回复
热议问题