I needed to write a weighted version of random.choice (each element in the list has a different probability for being selected). This is what I came up with:
I didn't love the syntax of any of those. I really wanted to just specify what the items were and what the weighting of each was. I realize I could have used random.choices
but instead I quickly wrote the class below.
import random, string
from numpy import cumsum
class randomChoiceWithProportions:
'''
Accepts a dictionary of choices as keys and weights as values. Example if you want a unfair dice:
choiceWeightDic = {"1":0.16666666666666666, "2": 0.16666666666666666, "3": 0.16666666666666666
, "4": 0.16666666666666666, "5": .06666666666666666, "6": 0.26666666666666666}
dice = randomChoiceWithProportions(choiceWeightDic)
samples = []
for i in range(100000):
samples.append(dice.sample())
# Should be close to .26666
samples.count("6")/len(samples)
# Should be close to .16666
samples.count("1")/len(samples)
'''
def __init__(self, choiceWeightDic):
self.choiceWeightDic = choiceWeightDic
weightSum = sum(self.choiceWeightDic.values())
assert weightSum == 1, 'Weights sum to ' + str(weightSum) + ', not 1.'
self.valWeightDict = self._compute_valWeights()
def _compute_valWeights(self):
valWeights = list(cumsum(list(self.choiceWeightDic.values())))
valWeightDict = dict(zip(list(self.choiceWeightDic.keys()), valWeights))
return valWeightDict
def sample(self):
num = random.uniform(0,1)
for key, val in self.valWeightDict.items():
if val >= num:
return key
Since version 1.7.0, NumPy has a choice function that supports probability distributions.
from numpy.random import choice
draw = choice(list_of_candidates, number_of_items_to_pick,
p=probability_distribution)
Note that probability_distribution
is a sequence in the same order of list_of_candidates
. You can also use the keyword replace=False
to change the behavior so that drawn items are not replaced.
If you happen to have Python 3, and are afraid of installing numpy
or writing your own loops, you could do:
import itertools, bisect, random
def weighted_choice(choices):
weights = list(zip(*choices))[1]
return choices[bisect.bisect(list(itertools.accumulate(weights)),
random.uniform(0, sum(weights)))][0]
Because you can build anything out of a bag of plumbing adaptors! Although... I must admit that Ned's answer, while slightly longer, is easier to understand.
0.0 <= x < total
. from random import random
from bisect import bisect
def weighted_choice(choices):
values, weights = zip(*choices)
total = 0
cum_weights = []
for w in weights:
total += w
cum_weights.append(total)
x = random() * total
i = bisect(cum_weights, x)
return values[i]
>>> weighted_choice([("WHITE",90), ("RED",8), ("GREEN",2)])
'WHITE'
If you need to make more than one choice, split this into two functions, one to build the cumulative weights and another to bisect to a random point.
Here is another version of weighted_choice that uses numpy. Pass in the weights vector and it will return an array of 0's containing a 1 indicating which bin was chosen. The code defaults to just making a single draw but you can pass in the number of draws to be made and the counts per bin drawn will be returned.
If the weights vector does not sum to 1, it will be normalized so that it does.
import numpy as np
def weighted_choice(weights, n=1):
if np.sum(weights)!=1:
weights = weights/np.sum(weights)
draws = np.random.random_sample(size=n)
weights = np.cumsum(weights)
weights = np.insert(weights,0,0.0)
counts = np.histogram(draws, bins=weights)
return(counts[0])
def weighted_choice(choices):
total = sum(w for c, w in choices)
r = random.uniform(0, total)
upto = 0
for c, w in choices:
if upto + w >= r:
return c
upto += w
assert False, "Shouldn't get here"