Group close numbers

后端 未结 3 1470
粉色の甜心
粉色の甜心 2021-02-08 08:49

I have a table with 2 columns of integers. The first column represents start index and the second column represents end index.

START END
1     8
9     13
14    2         


        
相关标签:
3条回答
  • 2021-02-08 09:34

    Edited to include another version which i think is a bit more reliable, and also works with overlapping ranges

    CREATE TABLE #data (start_range INT, end_range INT)
    INSERT INTO #data VALUES (1,8) 
    INSERT INTO #data VALUES (2,15) 
    INSERT INTO #data VALUES (9,13)
    INSERT INTO #data VALUES (14,20) 
    INSERT INTO #data VALUES (13,26) 
    INSERT INTO #data VALUES (12,21) 
    INSERT INTO #data VALUES (9,25) 
    INSERT INTO #data VALUES (20,25) 
    INSERT INTO #data VALUES (30,42) 
    INSERT INTO #data VALUES (42,49) 
    INSERT INTO #data VALUES (60,67)   
    
    ;with ranges as
    (
    SELECT start_range as level
    ,end_range as end_range
    ,row_number() OVER (PARTITION BY (SELECT NULL) ORDER BY start_range) as row
    FROM #data
    UNION ALL
    SELECT
    level + 1 as level
    ,end_range as end_range
    ,row
    From ranges 
    WHERE level < end_range
    )
    ,ranges2 AS
    (
    SELECT DISTINCT 
    level
    FROM ranges
    )
    ,ranges3 AS
    (
    SELECT 
    level
    ,row_number() OVER (ORDER BY level) - level as grouping_group
    from ranges2
    )
    SELECT 
    MIN(level) as start_number
    ,MAX(level) as end_number
    FROM ranges3
    GROUP BY grouping_group
    ORDER BY start_number ASC
    

    I think this should work - might not be especially efficient on larger sets though...

    CREATE TABLE #data (start_range INT, end_range INT)
    INSERT INTO #data VALUES (1,8)
    INSERT INTO #data VALUES (2,15)
    INSERT INTO #data VALUES (9,13)
    INSERT INTO #data VALUES (14,20)
    INSERT INTO #data VALUES (21,25)
    INSERT INTO #data VALUES (30,42)
    INSERT INTO #data VALUES (42,49)
    INSERT INTO #data VALUES (60,67)
    
    
    ;with overlaps as
    (
    select * 
    ,end_range - start_range as range
    ,row_number() OVER (PARTITION BY (SELECT NULL) ORDER BY start_range ASC) as line_number
    from #data
    )
    ,overlaps2 AS
    (
    SELECT
    O1.start_range
    ,O1.end_range
    ,O1.line_number
    ,O1.range
    ,O2.start_range as next_range
    ,CASE WHEN O2.start_range - O1.end_range < 2 THEN 1 ELSE 0 END as overlap
    ,O1.line_number - DENSE_RANK() OVER (PARTITION BY (CASE WHEN O2.start_range - O1.end_range < 2 THEN 1 ELSE 0 END) ORDER BY O1.line_number ASC) as overlap_group
    FROM overlaps O1
    LEFT OUTER JOIN overlaps O2 on O2.line_number = O1.line_number + 1
    )
    SELECT 
    MIN(start_range) as range_start
    ,MAX(end_range) as range_end
    ,MAX(end_range) - MIN(start_range) as range_span
    FROM overlaps2
    GROUP BY overlap_group
    
    0 讨论(0)
  • 2021-02-08 09:35

    This works for your example, let me know if it doesn't work for other data

    create table #Range 
    (
      [Start] INT,
      [End] INT
    )
    
    insert into #Range ([Start], [End]) Values (1, 8)
    insert into #Range ([Start], [End]) Values (9, 13)
    insert into #Range ([Start], [End]) Values (14, 20)
    insert into #Range ([Start], [End]) Values (20, 25)
    insert into #Range ([Start], [End]) Values (30, 42)
    insert into #Range ([Start], [End]) Values (42, 49)
    insert into #Range ([Start], [End]) Values (60, 67)
    
    
    
    ;with RangeTable as
    (select
        t1.[Start],
        t1.[End],
        row_number() over (order by t1.[Start]) as [Index]
    from
        #Range t1
    where t1.Start not in (select 
                          [End] 
                   from
                      #Range
                      Union
                   select 
                      [End] + 1
                   from
                      #Range
                   )
    )
    select 
        t1.[Start],
        case 
       when t2.[Start] is null then
            (select max([End])
                         from #Range)
           else
            (select max([End])
                         from #Range
                         where t2.[Start] > [End])
    end as [End]    
    from 
        RangeTable t1
    left join 
        RangeTable t2
    on
        t1.[Index] = t2.[Index]-1 
    
    drop table #Range;
    
    0 讨论(0)
  • You could use a number table to solve this problem. Basically, you first expand the ranges, then combine subsequent items in groups.

    Here's one implementation:

    WITH data (START, [END]) AS (
      SELECT  1,  8 UNION ALL
      SELECT  9, 13 UNION ALL
      SELECT 14, 20 UNION ALL
      SELECT 20, 25 UNION ALL
      SELECT 30, 42 UNION ALL
      SELECT 42, 49 UNION ALL
      SELECT 60, 67
    ),
    expanded AS (
      SELECT DISTINCT
        N = d.START + v.number
      FROM data d
        INNER JOIN master..spt_values v ON v.number BETWEEN 0 AND d.[END] - d.START
      WHERE v.type = 'P'
    ),
    marked AS (
      SELECT
        N,
        SeqID = N - ROW_NUMBER() OVER (ORDER BY N)
      FROM expanded
    )
    SELECT
      START = MIN(N),
      [END] = MAX(N)
    FROM marked
    GROUP BY SeqID
    

    This solution uses master..spt_values as a number table, for expanding the initial ranges. But if (all or some of) those ranges may span more than 2048 (subsequent) values, then you should define and use your own number table.

    0 讨论(0)
提交回复
热议问题