I\'m looking for a built-in Python data structure that can add
a new element, remove
an existing element, and choose a random element, all in bette
A tree seems like overkill. You can use a dict for all of your operations in O(1) unless I'm missing something. This is just a proof-of-concept; you can subclass dict or set and do a more complete implementation.
import random
from time import perf_counter
class SetFastRnd:
def __init__(self, it):
self.elems = {k: k for k in it}
def __contains__(self, elem):
return elem in self.elems
def __iter__(self):
return iter(self.elems)
def __len__(self):
return len(self.elems)
def __getitem__(self, elem):
return self.elems[elem]
def __repr__(self):
return str(self.elems.keys())
def add(self, elem):
self.elems[elem] = elem
def remove(self, elem):
del self.elems[elem]
if __name__ == "__main__":
n = 100000000
s = SetFastRnd(range(n))
start_time = perf_counter()
print(n // 2 in s) # True
s.remove(42)
print(42 in s) # False
s.add(42)
print(42 in s) # True
random.choice(s) # O(1)
random.choices(s, k=5) # O(1)
print("with dict:", perf_counter() - start_time)
s = set(range(n))
start_time = perf_counter()
random.sample(s, 1) # O(n)
print("with set: ", perf_counter() - start_time)
Output:
True
False
True
with dict: 0.0043047999999998865
with set: 0.5654710000000005
Python does not have a built-in data structure which meets all 3 of your requirements.
That said, it's fairly trivial to implement a tree yourself.
Another option would be to combine a dictionary with a list to create what is effectively a set that also maintains a list of its items:
import random
class ListDict(object):
def __init__(self):
self.item_to_position = {}
self.items = []
def add_item(self, item):
if item in self.item_to_position:
return
self.items.append(item)
self.item_to_position[item] = len(self.items)-1
def remove_item(self, item):
position = self.item_to_position.pop(item)
last_item = self.items.pop()
if position != len(self.items):
self.items[position] = last_item
self.item_to_position[last_item] = position
def choose_random_item(self):
return random.choice(self.items)
Since the only operations done on the list are .pop()
and .append()
, they shouldn't take more than constant time (in most Python implementations, at least).