问题
I was making a table of different run-times for Python 2.7, and noticed a thing that I cannot explain: The run-time of repr(2**n)
and int('1'*n)
is O(n^2)
. I always assumed that converting between integer and string would be O(n)
with n
being number of digits. The results show that if O(n)
fitting gives ~30% error, while O(n^2)
is only ~5%.
Could anyone explain please.
Here are the results that I get (codes are below):
Test Number-1 -- time to compute int('1'*n) (fit to O(n**2))
Spec_string: 1000<=n<=10000 by factors of 2
var_list ['n']
Function list: ('n**2', 'n', '1')
run times:
n = 1000 : 10.045052 microseconds
n = 2000 : 35.215855 microseconds
n = 4000 : 141.801834 microseconds
n = 8000 : 480.597973 microseconds
Coefficients as interpolated from data:
7.17731e-06*n**2
+0.00487043*n
-2.0574*1
(measuring time in microseconds)
Sum of squares of residuals: 0.00673433934709
RMS error = 4.1 percent
Test Number-2 -- time to compute repr(2**n) (fit to O(n**2))
Spec_string: 1000<=n<=10000 by factors of 2
var_list ['n']
Function list: ('n**2', 'n', '1')
run times:
n = 1000 : 1.739025 microseconds
n = 2000 : 6.217957 microseconds
n = 4000 : 29.226065 microseconds
n = 8000 : 102.524042 microseconds
Coefficients as interpolated from data:
1.72846e-06*n**2
-0.000434518*n
+0.433448*1
(measuring time in microseconds)
Sum of squares of residuals: 0.0139070140697
RMS error = 5.9 percent
Test Number-3 -- time to compute int('1'*n) (fit to O(n))
Spec_string: 1000<=n<=10000 by factors of 2
var_list ['n']
Function list: ('n', '1')
run times:
n = 1000 : 10.187864 microseconds
n = 2000 : 37.642002 microseconds
n = 4000 : 128.378153 microseconds
n = 8000 : 492.624998 microseconds
Coefficients as interpolated from data:
0.0380857*n
-28.5106*1
(measuring time in microseconds)
Sum of squares of residuals: 0.268768241745
RMS error = 26 percent
Test Number-4 -- time to compute repr(2**n) (fit to O(n))
Spec_string: 1000<=n<=10000 by factors of 2
var_list ['n']
Function list: ('n', '1')
run times:
n = 1000 : 1.750946 microseconds
n = 2000 : 6.271839 microseconds
n = 4000 : 30.361176 microseconds
n = 8000 : 102.671146 microseconds
Coefficients as interpolated from data:
0.0070098*n
-5.40096*1
(measuring time in microseconds)
Sum of squares of residuals: 0.467752717824
RMS error = 34 percent
Below are the codes and the results. You can ignore the support functions in the code (lg
, sqrt
, make_param_list
, fit
, fit2
)
main.py
:
from .support import *
def test_number():
print
print "Test Number-1 -- time to compute int('1'*n) (fit to O(n**2))"
spec_string = "1000<=n<=10000"
growth_factor = 2
print "Spec_string: ",spec_string,"by factors of",growth_factor
var_list, param_list = make_param_list(spec_string,growth_factor)
f_list = ("n**2","n","1")
run_times = []
trials = 1000
for D in param_list:
t = timeit.Timer("string.atoi(x)","import string;x='1'*%(n)s"%D)
run_times.append(t.timeit(trials)*1e6/float(trials))
fit(var_list,param_list,run_times,f_list)
print
print "Test Number-2 -- time to compute repr(2**n) (fit to O(n**2))"
spec_string = "1000<=n<=10000"
growth_factor = 2
print "Spec_string: ",spec_string,"by factors of",growth_factor
var_list, param_list = make_param_list(spec_string,growth_factor)
f_list = ("n**2","n","1")
run_times = []
trials = 1000
for D in param_list:
t = timeit.Timer("repr(x)","x=2**%(n)s"%D)
run_times.append(t.timeit(trials)*1e6/float(trials))
fit(var_list,param_list,run_times,f_list)
print
print "Test Number-3 -- time to compute int('1'*n) (fit to O(n))"
spec_string = "1000<=n<=10000"
growth_factor = 2
print "Spec_string: ",spec_string,"by factors of",growth_factor
var_list, param_list = make_param_list(spec_string,growth_factor)
f_list = ("n","1")
run_times = []
trials = 1000
for D in param_list:
t = timeit.Timer("string.atoi(x)","import string;x='1'*%(n)s"%D)
run_times.append(t.timeit(trials)*1e6/float(trials))
fit(var_list,param_list,run_times,f_list)
print
print "Test Number-4 -- time to compute repr(2**n) (fit to O(n))"
spec_string = "1000<=n<=10000"
growth_factor = 2
print "Spec_string: ",spec_string,"by factors of",growth_factor
var_list, param_list = make_param_list(spec_string,growth_factor)
f_list = ("n","1")
run_times = []
trials = 1000
for D in param_list:
t = timeit.Timer("repr(x)","x=2**%(n)s"%D)
run_times.append(t.timeit(trials)*1e6/float(trials))
fit(var_list,param_list,run_times,f_list)
if __name__ == '__main__':
test_number()
support.py
:
import math
import string
import timeit
import scipy.optimize
def lg(x):
return math.log(x)/math.log(2.0)
def sqrt(x):
return math.sqrt(x)
def make_param_list(spec_string,growth_factor):
"""
Generate a list of dictionaries
given maximum and minimum values for each range.
Each min and max value is a *string* that can be evaluted;
each string may depend on earlier variable values
Values increment by factor of growth_factor from min to max
Example:
make_param_list("1<=n<=1000")
make_param_list("1<=n<=1000;1<=m<=1000;min(n,m)<=k<=max(n,m)")
"""
var_list = []
spec_list = string.split(spec_string,";")
D = {}
D['lg']=lg
D['sqrt'] = sqrt
D_list = [D]
for spec in spec_list:
spec_parts = string.split(spec,"<=")
assert len(spec_parts)==3
lower_spec = spec_parts[0]
var_name = spec_parts[1]
assert len(var_name)==1
var_list.append(var_name)
upper_spec = spec_parts[2]
new_D_list = []
for D in D_list:
new_D = D.copy()
val = eval(lower_spec,D)
while val<=eval(upper_spec,D):
new_D[var_name] = val
new_D_list.append(new_D.copy())
val *= growth_factor
D_list = new_D_list
return (var_list,D_list)
def fit(var_list,param_list,run_times,f_list):
"""
Return matrix A needed for least-squares fit.
Given:
list of variable names
list of sample dicts for various parameter sets
list of corresponding run times
list of functions to be considered for fit
these are *strings*, e.g. "n","n**2","min(n,m)",etc.
prints:
coefficients for each function in f_list
"""
print "var_list",var_list
print "Function list:",f_list
print "run times:",
for i in range(len(param_list)):
print
for v in var_list:
print v,"= %6s"%param_list[i][v],
print ": %8f"%run_times[i],"microseconds",
print
rows = len(run_times)
cols = len(f_list)
A = [ [0 for j in range(cols)] for i in range(rows) ]
for i in range(rows):
D = param_list[i]
for j in range(cols):
A[i][j] = float(eval(f_list[j],D))
b = run_times
(x,resids,rank,s) = fit2(A,b)
print "Coefficients as interpolated from data:"
for j in range(cols):
sign = ''
if x[j]>0 and j>0:
sign="+"
elif x[j]>0:
sign = " "
print "%s%g*%s"%(sign,x[j],f_list[j])
print "(measuring time in microseconds)"
print "Sum of squares of residuals:",resids
print "RMS error = %0.2g percent"%(math.sqrt(resids/len(A))*100.0)
def fit2(A,b):
""" Relative error minimizer """
def f(x):
assert len(x) == len(A[0])
resids = []
for i in range(len(A)):
sum = 0.0
for j in range(len(A[0])):
sum += A[i][j]*x[j]
relative_error = (sum-b[i])/b[i]
resids.append(relative_error)
return resids
ans = scipy.optimize.leastsq(f,[0.0]*len(A[0]))
# print "ans:",ans
if len(A[0])==1:
x = [ans[0]]
else:
x = ans[0]
resids = sum([r*r for r in f(x)])
return (x,resids,0,0)
UPDATE 1: Even more confusing!
I checked the python implementation of intobject, and it looks like it is supposed to be linear:
/* Convert an integer to a decimal string. On many platforms, this
will be significantly faster than the general arbitrary-base
conversion machinery in _PyInt_Format, thanks to optimization
opportunities offered by division by a compile-time constant. */
static PyObject *
int_to_decimal_string(PyIntObject *v) {
char buf[sizeof(long)*CHAR_BIT/3+6], *p, *bufend;
long n = v->ob_ival;
unsigned long absn;
p = bufend = buf + sizeof(buf);
absn = n < 0 ? 0UL - n : n;
do {
*--p = '0' + (char)(absn % 10);
absn /= 10;
} while (absn);
if (n < 0)
*--p = '-';
return PyString_FromStringAndSize(p, bufend - p);
}
来源:https://stackoverflow.com/questions/40822290/repr-and-int-take-quadratic-time-in-python