How to speed up a python 2 program with multiple nested for-loops

问题

This code has multiple for-loops and the lists I read in have 999 points each. I want to iterate this up to 10,000 times. However, even iterating it only 2 times takes nearly 10 minutes.

Even though I'm posting this specific code, I think an answer to my question can help others run their codes with a lot of data more quickly.

Any of your advice is appreciated. Thanks a lot.

What this code does: Basically, I'm reading in arrays from textfile as lists. Each list (e.g. x1,y1,z1... etc) has 999 elements each. I operate on each element in the list based on the other elements (the two inner loops). The end result is a totally new list which I've called x2. This code is then supposed to repeat the operations "n # of times" (the outer loop).

My issue is that I can only repeat this a for a few iterations before it just takes to long to execute.

import matplotlib.pyplot as plt
from astropy.table import Table
from astropy.io import ascii
import numpy as np
import argparse
import time
#for 200
start_time = time.time()

npoints=999
n1, mass1, x1, y1,z1,vx1,vy1,vz1,fx_list,fy_list,fz_list= [],[],[],[],[],[],[],[],[],[],[]
AngL_list=[]
Etot0_list=[]
G=1
dt=.01

with open('homo_sph_N1000_R3_v1.dat') as f:
     for row in f.readlines():  
        if not row.startswith("#"):
            spaces=row.split('   ')
            n1.append(float(spaces[0]))
            mass1.append(float(spaces[1]))
            x1.append(float(spaces[2]))
            y1.append(float(spaces[3]))
            z1.append(float(spaces[4]))
            vx1.append(float(spaces[5]))
            vy1.append(float(spaces[6]))
            vz1.append(float(spaces[7]))

for n in range(2):
#changes the particle on which the forces are acting
     for xn in range(0,npoints):
     #changes the forces from other particles acting on the particle
          for step in range(0,npoints):
          #Here we find the accelearation for every particle
               fx=((G*mass1[xn]*mass1[step+1]*((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.)))/  (  abs((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.))**2.+(.2)**2  )**(3./2.))

               fy=((G*mass1[xn]*mass1[step+1]*((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.)))/  (    abs((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.))**2+(.2)**2 )**(3./2.))

               fz=((G*mass1[xn]*mass1[step+1]*((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.)))/  (    abs((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.))**2+(.2)**2 )**(3./2.))

               #Then put store it in an array
               fx_list.append(fx)
               fy_list.append(fy)
               fz_list.append(fz)


          #Now, I need to split that array up by npoints, each particle has npoints forces acting on it. 
          fxx= np.array_split(fx_list,npoints)
          fyy= np.array_split(fy_list,npoints)
          fzz= np.array_split(fz_list,npoints)

          #since the force on a particle is the sum of all forces acting on it, I'm summing each variable in each array together. e.g. [1,2,3]=[6] 
          fxxx_list=[]
          fyyy_list=[]
          fzzz_list=[]
          for xn in range(0,npoints):
               fxxx= np.sum(fxx[xn])
               fyyy= np.sum(fyy[xn])
               fzzz= np.sum(fzz[xn]) 

               #and save that in array. Now I have the accelearation on each particle. 
               fxxx_list.append(fxxx)
               fyyy_list.append(fyyy)
               fzzz_list.append(fzzz) 

          #This is where i begin the integration

          vx2=[]
          vy2=[]
          vz2=[] 
          for xn in range(0,npoints):

               vx11=vx1[xn]+.5*(fxxx_list[xn]+fxxx_list[xn])*dt
               vy11=vy1[xn]+.5*(fyyy_list[xn]+fyyy_list[xn])*dt
               vz11=vz1[xn]+.5*(fzzz_list[xn]+fyyy_list[xn])*dt 

               vx2.append(vx11)
               vy2.append(vy11)
               vz2.append(vz11) 

          x2=[]
          y2=[]
          z2=[]
          for xn in range(0,npoints):
               x11=(x1[xn]+vx2[xn]*dt)+(.5*fxxx_list[xn]*(dt**2))
               y11=(y1[xn]+vy2[xn]*dt)+(.5*fyyy_list[xn]*(dt**2))
               z11=(z1[xn]+vz2[xn]*dt)+(.5*fzzz_list[xn]*(dt**2)) 

               x2.append(x11)
               y2.append(y11)
               z2.append(z11)

x1,y1,z1,vx1,vy1,vz1 = x2,y2,z2,vx2,vy2,vz2

print x2,y2 
plt.scatter(x2,y2)

print("--- %s seconds ---" % (time.time() - start_time))    

plt.show()

回答1:

It's only a small speed-up, but the code seems to be doing a lot of x**2 (x squared).

In python3, generally it's slower to execute x**2 rather than x*x. Consider a simple test program:

import time

iteration_count=99999999

# Do a lot of squaring with the ** operator
start1 = time.time()
sum = 0
for i in range( iteration_count ):
    sum += i ** 2
end1 = time.time()


# Do a lot of squaring with i * i
start2 = time.time()
sum = 0
for i in range( iteration_count ):
    sum += i * i
end2 = time.time()

print("x**2 => %f seconds" % (end1-start1))
print("x*x  => %f seconds" % (end2-start2))

Which gives me the results:

$ python3 ./squared.py 
x**2 => 21.347830 seconds
x*x  => 8.983334 seconds

I did run it a bunch of times, it doesn't vary much.

The code in the question is doing a lot of calculations to make fx, fy and fz (This seems to be the same for each? is this correct?) If there is any commonality in these computations, intermediate results should be removed and only calculated once.

For example, instead of:

fx=((G*mass1[xn]*mass1[step+1]*((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.) ...
fy=((G*mass1[xn]*mass1[step+1]*((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.) ...
fz=((G*mass1[xn]*mass1[step+1]*((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.) ...

The first part should be computed only once:

g_mass = G*mass1[xn]*mass1[step+1]
fx=((g_mass * ((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.) ...
fy=((g_mass * ((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.) ...
fz=((g_mass * ((x1[step+1]**2.+y1[step+1]**2.+z1[step+1]**2.) ...

And similarly for any parts of these formulae with common components.

回答2:

I think you should be able to get a big speed-up (maybe of order a factor 1000) if you convert you inputs to numpy arrays, do operations on the numpy arrays, and preallocate numpy arrays to their required size, rather than using lists and appending them as you go.

For example, just taking the start of your example, you could do something like the following (although I can't guarantee it's doing exactly what you want, but is just guidance)

with open('homo_sph_N1000_R3_v1.dat') as f:
    for row in f.readlines():  
        if not row.startswith("#"):
            spaces=row.split('   ')
            n1.append(float(spaces[0]))
            mass1.append(float(spaces[1]))
            x1.append(float(spaces[2]))
            y1.append(float(spaces[3]))
            z1.append(float(spaces[4]))
            vx1.append(float(spaces[5]))
            vy1.append(float(spaces[6]))
            vz1.append(float(spaces[7]))

# convert to numpy arrays
n1 = np.array(n1)
mass1 = np.array(mass1)
# KEEP DOING THIS FOR THE OTHER INPUTS

for n in range(2):
    # PREALLOCATE
    fx = np.zeros(npoints, npoints-1)
    fy = np.zeros(npoints, npoints-1)
    fz = np.zeros(npoints, npoints-1)

    #changes the particle on which the forces are acting
    for xn in range(0,npoints):
        #changes the forces from other particles acting on the particle

        # REMOVE THE INNER FOR LOOP AND JUST USE THE ARRAYS
        #for step in range(0,npoints):
        #Here we find the accelearation for every particle
        fx[xn] = ((G*mass1[xn]*mass1[1:]*((x1[1:]**2.+y1[1:]**2.+z1[1:]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.)))/  (  abs((x1[1:]**2.+y1[1:]**2.+z1[1:]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.))**2.+(.2)**2  )**(3./2.))

        fy[xn] = ((G*mass1[xn]*mass1[1:]*((x1[1:]**2.+y1[1:]**2.+z1[1:]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.)))/  (    abs((x1[1:]**2.+y1[1:]**2.+z1[1:]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.))**2+(.2)**2 )**(3./2.))

        fz[xn] = ((G*mass1[xn]*mass1[1:]*((x1[1:]**2.+y1[1:]**2.+z1[1:]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.)))/  (    abs((x1[1:]**2.+y1[1:]**2.+z1[1:]**2.)-(x1[xn]**2.+y1[xn]**2.+z1[xn]**2.))**2+(.2)**2 )**(3./2.))

      #Now, I need to split that array up by npoints, each particle has npoints forces acting on it. 
      fxx= np.array_split(fx,npoints)
      fyy= np.array_split(fy,npoints)
      fzz= np.array_split(fz,npoints)

      #since the force on a particle is the sum of all forces acting on it, I'm summing each variable in each array together. e.g. [1,2,3]=[6] 
      fxxx= np.sum(fxx[xn], axis=1)
      fyyy= np.sum(fyy[xn], axis=1)
      fzzz= np.sum(fzz[xn], axis=1)

来源：https://stackoverflow.com/questions/54563163/how-to-speed-up-a-python-2-program-with-multiple-nested-for-loops

标签

python

numpy

matplotlib

astropy