Problem: Generate large binary strings (length 2000+). Do it quickly, as this generateRandom() function will be called 300,000 times in the algorithm.
To go from J.F. Sebastian's answer to a binary string (string with 0
and 1
characters in it):
>>> import random
>>> r = random.SystemRandom()
>>> bin(r.getrandbits(2000))[2:].zfill(2000)
'00010110001101100011000011001011011000010001010111000001111001001111010110010110011011110011101101100001111000011111010110100110011011110011111110010111111001110110011011011111011101000010101001001000100010001011011100011101100101010110001101101100100000001001010001101100110001100001101101010010001000010010010101100110100000001001001101000110001011111111111010011111111011001000100011001011011100010100010000010011100100011111011110111101010001001111011010101101010000110110010011000101111110000101001111100111111101101101110001100111011110011001000110110011101001010111100110000111000110011010000000111011001011100000100100111011110111101001110001110010010101100110000111001011100101111110100000100101011000110100111111001100111001000000111010000000101001111011111101111111101101011010010001101101101010011000100101000111000100100011010100111101111010001011110100100011111101101000111011100010000111000111110100001100111010001001010100001010111100001001010110010010100010011010010010011000000011010111110001100001100110101111011100010100100101000101000111110011111011011011111101011001001111010110000101100011111000111010100001000000101111011010011010011101011110000101011011110001111111101100000100000110111100011010110000111110110010100010010111111110010000000100001011110011100001001110110111100001101000011101011011101110001001001110011111111010001000100011010000110011000000000011111011011011101011010010001000101001011111110110000011000000000010101101111100011001010011100010010110111011001001011011010101111011101000001000011110010011111000011001110000100100111010000011100111110101110111010010101110010111100011111001101010010010000001010110011110001110010011011001110011000010010111111100101101100110111100001101100001100000101110000000011111010011000010000000011001101000001001000101001010100001110001011111110100110111000101011101111110010110001111111010100000100011011110011100010000000101001011100000010101101110110001110010111010000111101110100001100101000101010000110111011001001011'
>>> bin(r.getrandbits(2000))[2:].zfill(2000)
'11111011011010000011111101101101001110101011100110100011111011101101111100110001000110110100101010101000110010000101010100011111100111100010000001011011101100011101000001100101000101000010010000001111110101010011001110001001010011000011010100011111110111110010100000111011000000110000100000000110101101101111001101100000010010000100101001111100101010011101011010111110010001111111100101011110001101100111010010101110111000001000100101111011010001111001001010010000011100111101101101111101111010101100001000110011100110010110010101101001011000010101011111010010111000000100101100000100000010010000001000000001110010010100100111001011011111100111111001100000111111110011100001000111111110110001000010010110101100100001000001011110110100000111010101100111111010011111011111011000100101010111111000110111001001100011101101001000100110011001011101100010011010101111000011011111110001010110100100100001010100100100110101100111011110101001001111000010001101001010111111110011110111011111010001110011001000000100000101100001001100101000010011001101001110000101100000110000110101110011000010111110100100100100110010111110011101001000100111110011001101010000101100010011110100100110000111010111001000001101010101000001111001110010111000000101111101000110100101101000100000101100100110111101100010011110111011010111000111111100001110000100111001110001010101111000111100000011111111111110110100011000000111010100111011011100100010110100010110001111010110010100101111010111101110011010110100100010001001001110010110100111010010111001011101100000010011000110011011011010001100010000000010110011101101000111101000011101100001010001010001010111111110101100001110010000001010000000011000000000101111001001100100010110010000000101001010011110110111101111001110001001110111101011111111010101101011101010010111101000000010000101010100000101111010011010001001001101000001001011010110000000000111010001111001110100110000100011110100110110010011000111110100011000110001100001101101001010110001001001001101000011011101001010'
>>> bin(r.getrandbits(2000))[2:].zfill(2000)
'00100010111111011100001001000010100100101000100010101010100001011100101010100000000011111000001001011101010010010000101111111001101101100101011011001011110001001111101011101000101101000001000000110010011001110010101100101000011111001111111011011010001010010000011000000010011001101000100101000001001101001100010100000100100110000010110101011101011111010000010110011001101010010010000111001101101101001001100101111001101110111111111000011000001011011100111010011101001100011101011001110010111101011111010100101101010001011011001001010001111011100100101100100011111010010011110010011101100101001100111011011110011101010101010011001100000100110000001001111001111010011110110110001100110111011011111001110101000100100000011101001100111001100010001101010110000110010101010100011011101101010010100011111100001100100011010111000001011000101110100001111111001001111111101001000000011110110001010101001110110011000010101011010111111011010000011000000111110001110111111110110001110101011000010101001111110111101101010000111000011001111110101000001000011110000101011011001001000100010000110000110011100000100110100101011010011000110001010000101001110101111000110101001010010101000100011010101001110101101111010010101000011111101110110100101000110011010100010110110101010111001010110111011000000010001000110010011000011011110101101000111111101110100100011100000011110100000110111001001101110000011001101101011001000000101001110110011101111111010000000101111011100011010110111110000000001110101101100001001000001010000100101001111110110101000000101101111010010110000111101111011111100001111010001000100110111011010100110111011101011110001001001010100010101011001100100110101101111010011110001010011110000100100011101100101000000001011001010100101010011000011000100001011010010001010100001110101101100000010101011111010011000000000100100000000001011110101011101100101100010110101000111001010000001011000010001111101011111000011100101101110111110000011001000001101010001010000111001100101100111000111000001000000000'
With this benchmark:
import random
import time
def run(n):
r = random.SystemRandom()
for i in xrange(n):
if i%30000 == 0: print i
bin(r.getrandbits(2000))[2:].zfill(2000)
s = time.time()
run(300000)
e = time.time()
print "Took %.2fs" % (e-s,)
The result was Took 12.32s
Just getting the random bits without any string conversion (only r.getrandbits(2000)
) took 7.77s
, so if you could find a way to use the random bits as a long
then you'd save yourself some time.
Re-running the benchmark using os.urandom(250)
instead (without additional processing) took only 3.59s
, so that seems to be the fastest option.
Measure whether it is fast enough for your purposes, "randomness" might diminish the more you call it: os.urandom(250). It produces a binary string aka bytes.
To avoid "long int too large to convert to float" error don't use floats.
If you need an integer with k
random bits instead of a binary string:
import random
r = random.SystemRandom()
n = r.getrandbits(2000) # uses os.urandom() under the hood
To get a string of "0"s and "1"s:
k = 2000
binstr = "{:0{}b}".format(r.getrandbits(k), k)
Note: you can't use randint/randrange
for large numbers if getrandbits
is not used:
import random
class R(random.Random):
def random(self): # override random to suppress getrandbits usage
return random.random()
r = R()
r.randrange(2**2000) # -> OverflowError: long int too large to convert to float
b2a_bin() extension allows to create binary strings ("01") directly from bytestrings without creating an intermediate Python integer. It is 3-20 times faster than pure Python analogs:
def b2a_bin_bin(data):
return bin(int.from_bytes(data, 'big', signed=False)
)[2:].zfill(len(data)*8).encode('ascii', 'strict')
def b2a_bin_format(data):
n = int.from_bytes(data, 'big', signed=False)
return "{:0{}b}".format(n, len(data)*8).encode('ascii', 'strict')
Usage:
>>> import os
>>> from b2a_bin import b2a_bin
>>> b2a_bin.b2a_bin(b'\x0a')
b'00001010'
>>> b2a_bin(os.urandom(5))
b'1001111011000011111001110010000101111010'
Is random.randrange
really too slow? Let's see how slow it really is.
import random
word_size = 2048
word_max = 2 ** word_size
def random_bits(n):
"""
Return a string consisting of `n` zeroes and ones (chosen randomly).
"""
def words():
s, m, r = word_size, word_max, n % word_size
for _ in range(n // s):
yield bin(random.randrange(m))[2:].zfill(s)
yield bin(random.randrange(2 ** r))[2:].zfill(r)
return ''.join(words())
>>> from timeit import Timer
>>> Timer(lambda:random_bits(2000)).timeit(number=300000)
9.680696964263916
10 seconds doesn't seem an absurd amount of time for choosing 600 million random bits. So perhaps you can say more about your speed requirement. Is this really too slow?