Union of multiple ranges

后端 未结 5 591
悲&欢浪女
悲&欢浪女 2020-11-27 07:28

I have these ranges:

7,10
11,13
11,15
14,20
23,39

I need to perform a union of the overlapping ranges to give ranges that are not overlappi

相关标签:
5条回答
  • 2020-11-27 07:41

    Here is a one-liner using functools.reduce (assuming (x, 10) and (11, y) overlap):

    reduce(
        lambda acc, el: acc[:-1:] + [(min(*acc[-1], *el), max(*acc[-1], *el))]
            if acc[-1][1] >= el[0] - 1
            else acc + [el],
        ranges[1::],
        ranges[0:1]
    )
    

    This begins with the first range and uses reduce to go through the rest of the ranges. It compares the last element (acc[-1]) with the next range (el). If they overlap, it replaces the last element with the min and max of the two ranges (acc[:-1:] + [min, max]). If they don't overlap, it simply puts this new range at the end of the list (acc + [el]).

    Example:

    from functools import reduce
    
    example_ranges = [(7, 10), (11, 13), (11, 15), (14, 20), (23, 39)]
    
    def combine_overlaps(ranges):
        return reduce(
            lambda acc, el: acc[:-1:] + [(min(*acc[-1], *el), max(*acc[-1], *el))]
                if acc[-1][1] >= el[0] - 1
                else acc + [el],
            ranges[1::],
            ranges[0:1],
        )
    
    print(combine_overlaps(example_ranges))
    

    Output:

    [(7, 20), (23, 39)]
    
    0 讨论(0)
  • 2020-11-27 07:52

    The following function works well for the given example data:

    def range_overlap_adjust(list_ranges):
        overlap_corrected = []
        for start, stop in sorted(list_ranges):
            if overlap_corrected and start-1 <= overlap_corrected[-1][1] and stop >= overlap_corrected[-1][1]:
                overlap_corrected[-1] = min(overlap_corrected[-1][0], start), stop
            elif overlap_corrected and start <= overlap_corrected[-1][1] and stop <= overlap_corrected[-1][1]:
                break
            else:
                overlap_corrected.append((start, stop))
        return overlap_corrected
    

    Usage

    list_ranges = [(7, 10), (11, 13), (11, 15), (14, 20), (23, 39)]   
    print(range_overlap_adjust(list_ranges))
    # prints [(7, 20), (23, 39)]
    
    0 讨论(0)
  • 2020-11-27 07:56

    Old question. But I wanted to add this answer for future references. sympy can be used to achieve union of intervals:

    from sympy import Interval, Union
    def union(data):
        """ Union of a list of intervals e.g. [(1,2),(3,4)] """
        intervals = [Interval(begin, end) for (begin, end) in data]
        u = Union(*intervals)
        return [list(u.args[:2])] if isinstance(u, Interval) \
           else list(u.args)
    

    If output of Union is more than two intervals is a Union object while when there is a single interval, the output is an Interval object. That's the reason for the if statement in the return line.

    examples:

    In [26]: union([(10, 12), (14, 16), (15, 22)])
    Out[26]: [[10, 12], [14, 22]]
    
    In [27]: union([(10, 12), (9, 16)])
    Out[27]: [[9, 16]]
    
    0 讨论(0)
  • 2020-11-27 07:58

    Let's say, (7, 10) and (11, 13) result into (7, 13):

    a = [(7, 10), (11, 13), (11, 15), (14, 20), (23, 39)]
    b = []
    for begin,end in sorted(a):
        if b and b[-1][1] >= begin - 1:
            b[-1] = (b[-1][0], end)
        else:
            b.append((begin, end))
    

    b is now

    [(7, 20), (23, 39)]
    

    EDIT:

    As @CentAu correctly notices, [(2,4), (1,6)] would return (1,4) instead of (1,6). Here is the new version with correct handling of this case:

    a = [(7, 10), (11, 13), (11, 15), (14, 20), (23, 39)]
    b = []
    for begin,end in sorted(a):
        if b and b[-1][1] >= begin - 1:
            b[-1][1] = max(b[-1][1], end)
        else:
            b.append([begin, end])
    
    0 讨论(0)
  • 2020-11-27 07:59

    I tried with particular cases of presence of (45, 46) and (45, 45)
    and also test cases that are unlikely to happen in your application: presence of (11,6), presence of (-1, -5), presence of (-9, 5), presence of (-3, 10).
    Anyway the results are right for all these cases, it's a point.

    The algorithm:

    def yi(li):
        gen = (x for a,b in li for x in xrange(a,b+1))
        start = p = gen.next()
        for x in gen:
            if x>p+2:
                yield (start,p)
                start = p = x
            else:
                p = x
        yield (start,x)
    

    If aff in the following code is set to True, the steps of the execution are displayed.

    def yi(li):
        aff = 0
        gen = (x for a,b in li for x in xrange(a,b+1))
        start = p = gen.next()
        for x in gen:
            if aff:
                print ('start %s     p %d  p+2 %d     '
                       'x==%s' % (start,p,p+2,x))
            if x>p+2:
                if aff:
                    print 'yield range(%d,%d)' % (start,p+1)
                yield (start,p)
                start = p = x
            else:
                p = x
        if aff:
            print 'yield range(%d,%d)' % (start,x+1)
        yield (start,x)
    
    
    
    for li in ([(7,10),(23,39),(11,13),(11,15),(14,20),(45,46)],
               [(7,10),(23,39),(11,13),(11,15),(14,20),(45,46),(45,45)],
               [(7,10),(23,39),(11,13),(11,15),(14,20),(45,45)],
    
               [(7,10),(23,39),(11,13),(11,6),(14,20),(45,46)], 
               #1 presence of (11, 6)
               [(7,10),(23,39),(11,13),(-1,-5),(14,20),(45,45)], 
               #2  presence of (-1,-5)
               [(7,10),(23,39),(11,13),(-9,-5),(14,20),(45,45)], 
               #3  presence of (-9, -5)
               [(7,10),(23,39),(11,13),(-3,10),(14,20),(45,45)]): 
               #4  presence of (-3, 10)
    
        li.sort()
        print 'sorted li    %s'%li
        print '\n'.join('  (%d,%d)   %r' % (a,b,range(a,b)) 
                         for a,b in li)
        print 'list(yi(li)) %s\n' % list(yi(li))
    

    result

    sorted li    [(7, 10), (11, 13), (11, 15), (14, 20),
                  (23, 39), (45, 46)]
      (7,10)   [7, 8, 9]
      (11,13)   [11, 12]
      (11,15)   [11, 12, 13, 14]
      (14,20)   [14, 15, 16, 17, 18, 19]
      (23,39)   [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 
                 35, 36, 37, 38]
      (45,46)   [45]
    list(yi(li)) [(7, 20), (23, 39), (45, 46)]
    
    sorted li    [(7, 10), (11, 13), (11, 15), (14, 20), 
                  (23, 39), (45, 45), (45, 46)]
      (7,10)   [7, 8, 9]
      (11,13)   [11, 12]
      (11,15)   [11, 12, 13, 14]
      (14,20)   [14, 15, 16, 17, 18, 19]
      (23,39)   [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
                 35, 36, 37, 38]
      (45,45)   []
      (45,46)   [45]
    list(yi(li)) [(7, 20), (23, 39), (45, 46)]
    
    sorted li    [(7, 10), (11, 13), (11, 15), (14, 20), 
                  (23, 39), (45, 45)]
      (7,10)   [7, 8, 9]
      (11,13)   [11, 12]
      (11,15)   [11, 12, 13, 14]
      (14,20)   [14, 15, 16, 17, 18, 19]
      (23,39)   [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
                 35, 36, 37, 38]
      (45,45)   []
    list(yi(li)) [(7, 20), (23, 39), (45, 45)]
    
    sorted li    [(7, 10), (11, 6), (11, 13), (14, 20), 
                  (23, 39), (45, 46)]
      (7,10)   [7, 8, 9]
      (11,6)   []
      (11,13)   [11, 12]
      (14,20)   [14, 15, 16, 17, 18, 19]
      (23,39)   [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 
                 35, 36, 37, 38]
      (45,46)   [45]
    list(yi(li)) [(7, 20), (23, 39), (45, 46)]
    
    sorted li    [(-1, -5), (7, 10), (11, 13), (14, 20), 
                  (23, 39), (45, 45)]
      (-1,-5)   []
      (7,10)   [7, 8, 9]
      (11,13)   [11, 12]
      (14,20)   [14, 15, 16, 17, 18, 19]
      (23,39)   [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
                 35, 36, 37, 38]
      (45,45)   []
    list(yi(li)) [(7, 20), (23, 39), (45, 45)]
    
    sorted li    [(-9, -5), (7, 10), (11, 13), (14, 20), 
                  (23, 39), (45, 45)]
      (-9,-5)   [-9, -8, -7, -6]
      (7,10)   [7, 8, 9]
      (11,13)   [11, 12]
      (14,20)   [14, 15, 16, 17, 18, 19]
      (23,39)   [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
                 35, 36, 37, 38]
      (45,45)   []
    list(yi(li)) [(-9, -5), (7, 20), (23, 39), (45, 45)]
    
    sorted li    [(-3, 10), (7, 10), (11, 13), (14, 20), 
                  (23, 39), (45, 45)]
      (-3,10)   [-3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
      (7,10)   [7, 8, 9]
      (11,13)   [11, 12]
      (14,20)   [14, 15, 16, 17, 18, 19]
      (23,39)   [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 
                 35, 36, 37, 38]
      (45,45)   []
    list(yi(li)) [(-3, 20), (23, 39), (45, 45)]
    
    0 讨论(0)
提交回复
热议问题