How to generate all possible combinations with a given condition to make it more efficient?

后端 未结 2 1081
死守一世寂寞
死守一世寂寞 2021-01-18 05:33

(Python) I would like to generate all possible combinations with length 9 out of a sorted list list with 150 numbers. However, that\'s not very efficient, so I want to have

相关标签:
2条回答
  • 2021-01-18 06:24

    Here's a solution using a recursive generator function: the function combinations_max_diff takes a list of numbers nums, a number of elements k, and a maximum difference max_diff.

    The helper function does all of the work; it takes a partial combination comb, a number of remaining elements r, a minimum list index i for the next element to be chosen in the combination, and a max_next which controls the maximum size of that next element.

    def combinations_max_diff(nums, k, max_diff):
        # input list must be sorted
        nums = sorted(nums)
        n = len(nums)
    
        def helper(comb, r, i, max_next):
            if r == 0:
                yield comb
            else:
                for ii in range(i, n - r + 1):
                    v = nums[ii]
                    if v > max_next: break
                    comb_v = comb + (v,)
                    yield from helper(comb_v, r - 1, ii + 1, v + max_diff)
    
        return helper((), k, 0, nums[-1])
    

    Example usage:

    >>> nums = [1, 2, 3, 4, 5, 6, 7]
    >>> for c in combinations_max_diff(nums, 3, 2):
    ...     print(c)
    ... 
    (1, 2, 3)
    (1, 2, 4)
    (1, 3, 4)
    (1, 3, 5)
    (2, 3, 4)
    (2, 3, 5)
    (2, 4, 5)
    (2, 4, 6)
    (3, 4, 5)
    (3, 4, 6)
    (3, 5, 6)
    (3, 5, 7)
    (4, 5, 6)
    (4, 5, 7)
    (4, 6, 7)
    (5, 6, 7)
    

    The question asks about efficiency, so here's some idea about that:

    >>> import random, timeit
    >>> nums = sorted(random.randrange(0, 5000) for _ in range(150))
    >>> len(list(combinations_max_diff(nums, 9, 150)))
    16932905
    >>> timeit.timeit(lambda: list(combinations_max_diff(nums, 9, 150)), number=1)
    15.906288493999455
    

    So, about 16 seconds to generate about 17 million combinations, or a little under one microsecond per combination on my machine.

    0 讨论(0)
  • 2021-01-18 06:28

    Here it is:

    from itertools import combinations, islice, takewhile
    
    def mad_combinations(data, comb_lenth, diff, create_comb=tuple):
        assert comb_lenth >= 2
        sorted_nums = sorted(frozenset(data))
        stop_index = len(sorted_nums) # or use None - what is faster?
        combination = [None]*comb_lenth # common memory
    
        def last_combinator(start_index, right_max_number):
            """Last combination place loop"""
            return takewhile(right_max_number.__ge__, islice(sorted_nums, start_index, stop_index))
            # In other words:
            # for x in islice(sorted_nums, start_index, stop_index):
            #     if x <= right_max_number:
            #         yield x
            #     else: return
    
        def _create_combinator(next_place_combinator, current_combination_place):
            # this namespace should store variables above
            def combinator(start_index, right_max_number):
                """Main loop"""
                for i, combination[current_combination_place] in \
                    enumerate(
                        takewhile(
                            right_max_number.__ge__,
                            islice(sorted_nums, start_index, stop_index)),
                        start_index + 1):
                    yield from ( # it yields last combination place number
                        next_place_combinator(i, combination[current_combination_place] + diff))
    
            return combinator
    
        for combination_place in range(comb_lenth-2, 0, -1): # create chain of loops
            last_combinator = _create_combinator(last_combinator, combination_place)
    
        last_index = comb_lenth - 1
        # First combination place loop:
        for j, combination[0] in enumerate(sorted_nums, 1):
            for combination[last_index] in last_combinator(j, combination[0] + diff):
                yield create_comb(combination) # don't miss to create a copy!!!
    

    The function above is roughly equivalent to:

    def example_of_comb_length_3(data, diff):
        sorted_nums = sorted(frozenset(data))
        for i1, n1 in enumerate(sorted_nums, 1):
            for i2, n2 in enumerate(sorted_nums[i1:], i1 + 1):
                if n2 - n1 > diff:break
                for n3 in sorted_nums[i2:]:
                    if n3 - n2 > diff:break
                    yield (n1, n2, n3)
    

    Versions that use filter:

    def insane_combinations(data, comb_lenth, diff):
        assert comb_lenth >= 2
        for comb in combinations(sorted(frozenset(data)), comb_lenth):
            for left, right in zip(comb, islice(comb, 1, comb_lenth)):
                if right - left > diff:
                    break
            else:
                yield comb
    
    
    def crazy_combinations(data, comb_lenth, diff):
        assert comb_lenth >= 2
        last_index = comb_lenth - 1
        last_index_m1 = last_index - 1
        last_rule = (lambda comb: comb[last_index] - comb[last_index_m1] <= diff)
        _create_rule = (lambda next_rule, left, right:
            (lambda comb: (comb[right] - comb[left] <= diff) and next_rule(comb)))
        for combination_place in range(last_index_m1, 0, -1): 
            last_rule = _create_rule(last_rule, combination_place - 1, combination_place)
        return filter(last_rule, combinations(sorted(frozenset(data)), comb_lenth))
    

    Tests:

    def test(fetch, expected, comb_length, diff):
        fetch = tuple(fetch)
        assert list(insane_combinations(fetch, comb_length, diff)) == \
               list(crazy_combinations(fetch, comb_length, diff)) == \
               list(mad_combinations(fetch, comb_length, diff)) == list(expected)
    
    if __name__ == '__main__':
        test([1,2,3,4,5,6],
             comb_length=3, diff=2,
             expected=[
                (1, 2, 3), (1, 2, 4), (1, 3, 4), (1, 3, 5), (2, 3, 4), (2, 3, 5), (2, 4, 5),
                (2, 4, 6), (3, 4, 5), (3, 4, 6), (3, 5, 6), (4, 5, 6)])
    
        test([1, 2, 3, 8, 9, 10, 11, 12, 13],
             comb_length=3, diff=3,
             expected=[
                 (1, 2, 3), (8, 9, 10), (8, 9, 11), (8, 9, 12), (8, 10, 11), (8, 10, 12),
                 (8, 10, 13), (8, 11, 12), (8, 11, 13), (9, 10, 11), (9, 10, 12), (9, 10, 13),
                 (9, 11, 12), (9, 11, 13), (9, 12, 13), (10, 11, 12), (10, 11, 13), (10, 12, 13),
                 (11, 12, 13)])
    

    I did not bother much with edge cases!! And I've tested only these 2 fetches! If you will find my answer helpful, be sure to test all possible options and write about bugs found (many bugs, I think). To check your concrete fetch use mad_combinations(your_fetch, 9, 150).

    0 讨论(0)
提交回复
热议问题