Divide and Conquer strategy to determine if more than 1/3 same element in list

你说的曾经没有我的故事 提交于 2019-12-04 12:58:37

Here's a rough draft I experimented with for fun. It looks like the divide and conquer may reduce the number of candidate frequency checks but I'm not sure (see the last example, where only 0 is checked against the full list).

If we divide the list in three, the smallest frequency a valid candidate can have is 1/3 of each part. This narrows our list of candidates for searching in other parts. Let f(A, l, r) represent candidates that could have a frequency of 1/3 or more in their parent group. Then:

from math import ceil

def f(A, l, r):
  length = r - l + 1

  if length <= 3:
    candidates = A[l:r+1]
    print "l, r, candidates: %s, %s, %s\n" % (l, r, candidates)
    return candidates

  i = 0
  j = 0
  third = length // 3
  lg_third = int(ceil(length / float(3)))
  sm_third = lg_third // 3

  if length % 3 == 1:
    i, j = l + third, l + 2 * third
  elif length % 3 == 2:
    i, j = l + third, l + 2 * third + 1
  else:
    i, j = l + third - 1, l + 2 * third - 1

  left_candidates = f(A, l, i)
  middle_candidates = f(A, i + 1, j)
  right_candidates = f(A, j + 1, r)
  print "length: %s, sm_third: %s, lg_third: %s" % (length, sm_third, lg_third)
  print "Candidate parts: %s, %s, %s" % (left_candidates, middle_candidates, right_candidates)
  left_part = A[l:i+1]
  middle_part = A[i+1:j+1]
  right_part = A[j+1:r+1]
  candidates = []
  seen = []

  for e in left_candidates:
    if e in seen or e in candidates:
      continue
    seen.append(e)
    count = left_part.count(e)
    if count >= lg_third:
      candidates.append(e)
    else:
      middle_part_count = middle_part.count(e)
      print "Left: counting %s in middle: %s" % (e, middle_part_count)
      if middle_part_count >= sm_third:
        count = count + middle_part_count
      right_part_count = right_part.count(e)
      print "Left: counting %s in right: %s" % (e, right_part_count)
      if right_part_count >= sm_third:
        count = count + right_part_count
      if count >= lg_third:
        candidates.append(e)

  seen = []
  for e in middle_candidates:
    if e in seen or e in candidates:
      continue
    seen.append(e)
    count = middle_part.count(e)
    if count >= lg_third:
      candidates.append(e)
    else:
      left_part_count = left_part.count(e)
      print "Middle: counting %s in left: %s" % (e, left_part_count)
      if left_part_count >= sm_third:
        count = count + left_part_count
      right_part_count = right_part.count(e)
      print "Middle: counting %s in right: %s" % (e, right_part_count)
      if right_part_count >= sm_third:
        count = count + right_part_count
      if count >= lg_third:
        candidates.append(e)

  seen = []
  for e in right_candidates:
    if e in seen or e in candidates:
      continue
    seen.append(e)
    count = right_part.count(e)
    if count >= lg_third:
      candidates.append(e)
    else:
      left_part_count = left_part.count(e)
      print "Right: counting %s in left: %s" % (e, left_part_count)
      if left_part_count >= sm_third:
        count = count + left_part_count
      middle_part_count = middle_part.count(e)
      print "Right: counting %s in middle: %s" % (e, middle_part_count)
      if middle_part_count >= sm_third:
        count = count + middle_part_count
      if count >= lg_third:
        candidates.append(e)
  print "l, r, candidates: %s, %s, %s\n" % (l, r, candidates)
  return candidates


#A = [1, 1, 2, 4, 5]
#A = [1, 2, 3, 1, 2, 3, 1, 2, 3]
#A = [1, 1, 1, 1, 1, 2, 2, 2, 2, 3]
A = [2, 2, 1, 3, 3, 1, 4, 4, 1]
#A = [x for x in range(1, 13)] + [0] * 6
print f(A, 0, len(A) - 1)

You can do a variation on quick-sort:

  • choose a pivot
  • divide the array to the smaller and larger than the pivot
  • find the most frequent element on each side
  • return the most frequent element (and its frequency count) of the two in the recursive call
  • if you need only to check for frequency n/3, you can check only when the sub array is larger than n/3.

If you need only to check for frequency n/3, the time complexity is linear for the average case.

The time complexity to find the most frequent element is the same as quicksort.

You can use a Binary Search Tree (BST). 1. Create BST maintaining key count at each node 2. Traverse tree to find maximum key count using divide and conquer 3. Test if max count > n/3 With data in BST, divide and conquer is simple since we just have to determine if the left, current, or right branch has the highest repeat count.

# A utility function to create a new BST node  
class newNode:  
    # Constructor to create a new node  
    def __init__(self, data):  
        self.key = data 
        self.count = 1
        self.left = None
        self.right = None

# A utility function to insert a new node  
# with given key in BST  
def insert(node, key): 
    # If the tree is empty, return a new node  
    if node == None: 
        k = newNode(key) 
        return k 

    # If key already exists in BST, increment 
    # count and return  
    if key == node.key: 
        (node.count) += 1
        return node 

    # Otherwise, recur down the tree  
    if key < node.key:  
        node.left = insert(node.left, key)  
    else: 
        node.right = insert(node.right, key) 

    # return the (unchanged) node pointer  
    return node 

# Finds the node with the highest count in a binary search tree
def MaxCount(node):
  if node == None:
    return 0, None
  else:
    left = MaxCount(node.left)
    right = MaxCount(node.right)
    current = node.count, node

    return max([left, right, current], key=lambda x: x[0])

def generateBST(a):
  root = None
  for x in a:
    root = insert(root, x)

  return root

# Driver Code 
if __name__ == '__main__': 
    a = [1, 2, 3, 1, 1]
    root = generateBST(a)
    cnt, node = MaxCount(root)
    if cnt >= (len(a) // 3):
      print(node.key)  # Prints 1
    else:
      print(None)

A non-divide and conquer technique for n/3 which has O(n) time from https://www.geeksforgeeks.org/n3-repeated-number-array-o1-space/:

# Python 3 program to find if  
# any element appears more than 
# n/3. 
import sys 

def appearsNBy3(arr, n): 

    count1 = 0
    count2 = 0
    first = sys.maxsize 
    second = sys.maxsize 

    for i in range(0, n):  

        # if this element is 
        # previously seen,  
        # increment count1. 
        if (first == arr[i]): 
            count1 += 1

        # if this element is 
        # previously seen,  
        # increment count2. 
        elif (second == arr[i]): 
            count2 += 1

        elif (count1 == 0): 
            count1 += 1
            first = arr[i] 

        elif (count2 == 0): 
            count2 += 1
            second = arr[i] 


        # if current element is  
        # different from both 
        # the previously seen  
        # variables, decrement 
        # both the counts. 
        else: 
            count1 -= 1
            count2 -= 1



    count1 = 0
    count2 = 0

    # Again traverse the array 
    # and find the actual counts. 
    for i in range(0, n):  
        if (arr[i] == first): 
            count1 += 1

        elif (arr[i] == second): 
            count2 += 1


    if (count1 > n / 3): 
        return first 

    if (count2 > n / 3): 
        return second 

    return -1

# Driver code 
arr = [1, 2, 3, 1, 1 ] 
n = len(arr)  
print(appearsNBy3(arr, n)) 
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!