问题
I am using the attached code to integrate a version of Fitzhugh-Nagumo model :
from scipy.integrate import odeint
import numpy as np
import time
P = {'epsilon':0.1,
'a1':1.0,
'a2':1.0,
'b':2.0,
'c':0.2}
def fhn_rhs(V,t,P):
u,v = V[0],V[1]
u_t = u - u**3 - v
v_t = P['epsilon']*(u - P['b']*v - P['c'])
return np.stack((u_t,v_t))
def integrate(func,V0,t,args,step='RK4'):
start = time.clock()
P = args[0]
result=[V0]
for i,tmp in enumerate(t[1:]):
result.append(RK4step(func,result[i],tmp,P,(t[i+1]-t[i])))
print "Integration took ",time.clock() - start, " s"
return np.array(result)
def RK4step(rhs,V,t,P,dt):
k_1 = dt*rhs(V,t,P)
k_2 = dt*rhs((V+(1.0/2.0)*k_1),t,P)
k_3 = dt*rhs((V+(1.0/2.0)*k_2),t,P)
k_4 = dt*rhs((V+k_3),t,P)
return V+(1.0/6.0)*k_1+(1.0/3.0)*k_2+(1.0/3.0)*k_3+(1.0/6.0)*k_4
Comparing between my integrate
and scipy.integrate.odeint
gives the following:
In [8]: import cProfile
In [9]: %timeit integrate(fhn_rhs,np.stack((0.1,0.2)),np.linspace(0,100,1000),args=(P,))
10 loops, best of 3: 36.4 ms per loop
In [10]: %timeit odeint(fhn_rhs,np.stack((0.1,0.2)),np.linspace(0,100,1000),args=(P,))
100 loops, best of 3: 3.45 ms per loop
In [11]: cProfile.run('integrate(fhn_rhs,np.stack((0.1,0.2)),np.linspace(0,100,1000),args=(P,))')
45972 function calls in 0.098 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.098 0.098 <string>:1(<module>)
3996 0.011 0.000 0.078 0.000 fhn.py:20(fhn_rhs)
1 0.002 0.002 0.097 0.097 fhn.py:42(integrate)
999 0.016 0.000 0.094 0.000 fhn.py:52(RK4step)
1 0.000 0.000 0.000 0.000 function_base.py:9(linspace)
7994 0.011 0.000 0.021 0.000 numeric.py:484(asanyarray)
3997 0.029 0.000 0.067 0.000 shape_base.py:282(stack)
11991 0.008 0.000 0.008 0.000 shape_base.py:337(<genexpr>)
3997 0.002 0.000 0.002 0.000 {len}
999 0.001 0.000 0.001 0.000 {method 'append' of 'list' objects}
1 0.000 0.000 0.000 0.000 {method 'astype' of 'numpy.ndarray' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
1 0.000 0.000 0.000 0.000 {numpy.core.multiarray.arange}
7995 0.010 0.000 0.010 0.000 {numpy.core.multiarray.array}
3997 0.006 0.000 0.006 0.000 {numpy.core.multiarray.concatenate}
1 0.000 0.000 0.000 0.000 {numpy.core.multiarray.result_type}
Any suggestions on how I can make my code run faster? Can I use numba
somehow to accelerate it?
回答1:
You are not comparing the same things. To see at what points odeint
actually evaluates the ODE function, put a print t
statement in (of course while not timing it). odeint
and generally methods with adaptive time steps produce a sparse list of integration samples and interpolate the desired output from them.
You would have to use an error estimator for the RK4 method and based on that replicate this adaptive scheme.
And of course, interpreted python code using vector objects will never be competitive with the compiled FORTRAN code of lsoda called from odeint
using simple arrays during its execution.
An example for using RK4 in an adaptive step size scheme with interpolation:
def RK4Step(f, x, y, h, k1):
k2=f(x+0.5*h, y+0.5*h*k1)
k3=f(x+0.5*h, y+0.5*h*k2)
k4=f(x+ h, y+ h*k3)
return (k1+2*(k2+k3)+k4)/6.0
def RK4TwoStep(f, x, y, h, k1):
step1 = RK4Step(f, x , y , 0.5*h, k1 )
x1, y1 = x+0.5*h, y+0.5*h*step1;
step2 = RK4Step(f, x1, y1, 0.5*h, f(x1, y1) )
return (step1+step2)/2
def RK4odeint(fin,times,y0, tol):
# numpy-ify the inputs
f = lambda t,y : np.array(fin(t,y))
y0 = np.array(y0)
# allocate output structure
yout = np.array([y0]*len(times));
# in consequence, yout[0] = y0;
# initialize integrator variables
h = times[1]-times[0];
hmax = abs(times[-1]-times[0]);
# last and current point of the numerical integration
ycurr = ylast = qcurr = qlast = y0;
tcurr = tlast = times[0];
fcurr = flast = f(tcurr, ycurr);
totalerr = 0.0
totalvar = 0.0
for i, t in enumerate(times[1:]):
# remember that t == t[i+1], result goes to yout[i+1]
while (t-tcurr)*h>0:
# advance the integration
k1, k2 = RK4Step(f,tcurr,ycurr,h, fcurr), RK4TwoStep(f,tcurr,ycurr,h, fcurr);
# RK4 is of fourth order, that is,
# k1 = (y(x+h)-y(x))/h + C*h^4
# k2 = (y(x+h)-y(x))/h + C*h^4/16
# Using the double step k2 gives
# C*h^4/16 = (k2-k1)/15 as local error density
# change h to match the global relative error density tol
# use |k2| as scale for the absolute error
# |k1-k2|/15*hfac^4 = tol*|k2|, h <- h*hfac
scale = max(abs(k2))
steperr = max(abs(k1-k2))/2
# compute the ideal step size factor and sanitize the result to prevent ridiculous changes
hfac = ( tol*scale / ( 1e-16+steperr) )**0.25
hfac = min(10, max(0.01, hfac) )
# repeat the step if there is a significant step size correction
if ( abs(h*hfac)<hmax and (0.6 > hfac or hfac > 3 )):
# recompute with new step size
h *= hfac;
k2 = RK4TwoStep(f, tcurr, ycurr, h, fcurr) ;
# update and cycle the integration points
ylast = ycurr; ycurr = ycurr + h*k2;
tlast = tcurr; tcurr += h;
flast = fcurr; fcurr = f(tcurr, ycurr);
# cubic Bezier control points
qlast = ylast + (tcurr-tlast)/3*flast;
qcurr = ycurr - (tcurr-tlast)/3*fcurr;
totalvar += h*scale;
totalerr = (1+h*scale)*totalerr + h*steperr;
reportstr = "internal step to t=%12.8f \t" % tcurr;
#now tlast <= t <= tcurr, can interpolate the value for yout[i+1] using the cubic Bezier formula
s = (t - tlast)/(tcurr - tlast);
yout[i+1] = (1-s)**2*((1-s)*ylast + 3*s*qlast) + s**2*(3*(1-s)*qcurr + s*ycurr)
return np.array(yout)
来源:https://stackoverflow.com/questions/40866387/python-speeding-up-my-runge-kutta-integration-code-challenge