I am working an a divide and conquer algorithm to determine if more than 1/3 elements in list are the same. For example: [1,2,3,4] No, all element are unique. [1,1,2,4,5] Yes, 2 of them are the same.
Without sorting, is there a divide and conquer strategy ? I get stucked on how to divide...
def is_valid(ids):
n = len(ids)
is_valid_recur(ids, n n-1)
def is_valid_recur(ids, l, r):
m = (l + h) // 2
return .... is_valid_recur(ids, l, m) ...is_valid_recur(ids, m+1, r):
Thanks a lot!
Here's a rough draft I experimented with for fun. It looks like the divide and conquer may reduce the number of candidate frequency checks but I'm not sure (see the last example, where only 0 is checked against the full list).
If we divide the list in three, the smallest frequency a valid candidate can have is 1/3 of each part. This narrows our list of candidates for searching in other parts. Let f(A, l, r)
represent candidates that could have a frequency of 1/3 or more in their parent group. Then:
from math import ceil
def f(A, l, r):
length = r - l + 1
if length <= 3:
candidates = A[l:r+1]
print "l, r, candidates: %s, %s, %s\n" % (l, r, candidates)
return candidates
i = 0
j = 0
third = length // 3
lg_third = int(ceil(length / float(3)))
sm_third = lg_third // 3
if length % 3 == 1:
i, j = l + third, l + 2 * third
elif length % 3 == 2:
i, j = l + third, l + 2 * third + 1
i, j = l + third - 1, l + 2 * third - 1
left_candidates = f(A, l, i)
middle_candidates = f(A, i + 1, j)
right_candidates = f(A, j + 1, r)
print "length: %s, sm_third: %s, lg_third: %s" % (length, sm_third, lg_third)
print "Candidate parts: %s, %s, %s" % (left_candidates, middle_candidates, right_candidates)
left_part = A[l:i+1]
middle_part = A[i+1:j+1]
right_part = A[j+1:r+1]
candidates = []
seen = []
for e in left_candidates:
if e in seen or e in candidates:
count = left_part.count(e)
if count >= lg_third:
middle_part_count = middle_part.count(e)
print "Left: counting %s in middle: %s" % (e, middle_part_count)
if middle_part_count >= sm_third:
count = count + middle_part_count
right_part_count = right_part.count(e)
print "Left: counting %s in right: %s" % (e, right_part_count)
if right_part_count >= sm_third:
count = count + right_part_count
if count >= lg_third:
seen = []
for e in middle_candidates:
if e in seen or e in candidates:
count = middle_part.count(e)
if count >= lg_third:
left_part_count = left_part.count(e)
print "Middle: counting %s in left: %s" % (e, left_part_count)
if left_part_count >= sm_third:
count = count + left_part_count
right_part_count = right_part.count(e)
print "Middle: counting %s in right: %s" % (e, right_part_count)
if right_part_count >= sm_third:
count = count + right_part_count
if count >= lg_third:
seen = []
for e in right_candidates:
if e in seen or e in candidates:
count = right_part.count(e)
if count >= lg_third:
left_part_count = left_part.count(e)
print "Right: counting %s in left: %s" % (e, left_part_count)
if left_part_count >= sm_third:
count = count + left_part_count
middle_part_count = middle_part.count(e)
print "Right: counting %s in middle: %s" % (e, middle_part_count)
if middle_part_count >= sm_third:
count = count + middle_part_count
if count >= lg_third:
print "l, r, candidates: %s, %s, %s\n" % (l, r, candidates)
return candidates
#A = [1, 1, 2, 4, 5]
#A = [1, 2, 3, 1, 2, 3, 1, 2, 3]
#A = [1, 1, 1, 1, 1, 2, 2, 2, 2, 3]
A = [2, 2, 1, 3, 3, 1, 4, 4, 1]
#A = [x for x in range(1, 13)] + [0] * 6
print f(A, 0, len(A) - 1)
You can do a variation on quick-sort:
- choose a pivot
- divide the array to the smaller and larger than the pivot
- find the most frequent element on each side
- return the most frequent element (and its frequency count) of the two in the recursive call
- if you need only to check for frequency n/3, you can check only when the sub array is larger than n/3.
If you need only to check for frequency n/3, the time complexity is linear for the average case.
The time complexity to find the most frequent element is the same as quicksort.
You can use a Binary Search Tree (BST). 1. Create BST maintaining key count at each node 2. Traverse tree to find maximum key count using divide and conquer 3. Test if max count > n/3 With data in BST, divide and conquer is simple since we just have to determine if the left, current, or right branch has the highest repeat count.
# A utility function to create a new BST node
class newNode:
# Constructor to create a new node
def __init__(self, data):
self.key = data
self.count = 1
self.left = None
self.right = None
# A utility function to insert a new node
# with given key in BST
def insert(node, key):
# If the tree is empty, return a new node
if node == None:
k = newNode(key)
return k
# If key already exists in BST, increment
# count and return
if key == node.key:
(node.count) += 1
return node
# Otherwise, recur down the tree
if key < node.key:
node.left = insert(node.left, key)
node.right = insert(node.right, key)
# return the (unchanged) node pointer
return node
# Finds the node with the highest count in a binary search tree
def MaxCount(node):
if node == None:
return 0, None
left = MaxCount(node.left)
right = MaxCount(node.right)
current = node.count, node
return max([left, right, current], key=lambda x: x[0])
def generateBST(a):
root = None
for x in a:
root = insert(root, x)
return root
# Driver Code
if __name__ == '__main__':
a = [1, 2, 3, 1, 1]
root = generateBST(a)
cnt, node = MaxCount(root)
if cnt >= (len(a) // 3):
print(node.key) # Prints 1
A non-divide and conquer technique for n/3 which has O(n) time from https://www.geeksforgeeks.org/n3-repeated-number-array-o1-space/:
# Python 3 program to find if
# any element appears more than
# n/3.
import sys
def appearsNBy3(arr, n):
count1 = 0
count2 = 0
first = sys.maxsize
second = sys.maxsize
for i in range(0, n):
# if this element is
# previously seen,
# increment count1.
if (first == arr[i]):
count1 += 1
# if this element is
# previously seen,
# increment count2.
elif (second == arr[i]):
count2 += 1
elif (count1 == 0):
count1 += 1
first = arr[i]
elif (count2 == 0):
count2 += 1
second = arr[i]
# if current element is
# different from both
# the previously seen
# variables, decrement
# both the counts.
count1 -= 1
count2 -= 1
count1 = 0
count2 = 0
# Again traverse the array
# and find the actual counts.
for i in range(0, n):
if (arr[i] == first):
count1 += 1
elif (arr[i] == second):
count2 += 1
if (count1 > n / 3):
return first
if (count2 > n / 3):
return second
return -1
# Driver code
arr = [1, 2, 3, 1, 1 ]
n = len(arr)
print(appearsNBy3(arr, n))