How to speed up the following code using numba?

问题

I am doing a molecular dynamics simulation. It consists of numerical integration, many for loops, manipulating large NumPy arrays. I have tried to use NumPy function and arrays wherever possible. But the code is still too slow. I thought of using numba jit as a speedup. But it always throws an error message.

Here is the code.

# -*- coding: utf-8 -*-
"""
Created on Sat Mar 28 12:10:42 2020

@author: Sandipan
"""

import numpy as np
import matplotlib.pyplot as plt
from numba import jit
import os
import sys

# Setting up the simulation
NSteps =100 # Number of steps
deltat = 0.005 # Time step in reduced time units
temp   = 0.851# #Reduced temperature
DumpFreq = 100 # Save the position to file every DumpFreq steps
epsilon = 1.0 # LJ parameter for the energy between particles
DIM     =3
N       =500
density =0.776
Rcutoff =3


#----------------------Function Definitions---------------------

#------------------Initialise Configuration--------

@jit(nopython=True)
def initialise_config(N,DIM,density):
    velocity = (np.random.randn(N,DIM)-0.5)


    # Set initial momentum to zero
    COM_V = np.sum(velocity)/N     #Center of mass velocity
    velocity = velocity - COM_V    # Fix any center-of-mass drift

    # Calculate initial kinetic energy
    k_energy=0
    for i in range (N):
        k_energy+=np.dot(velocity[i],velocity[i])
    vscale=np.sqrt(DIM*temp/k_energy)
    velocity*=vscale

    #Initialize with zeroes
    coords = np.zeros([N,DIM]);

    # Get the cooresponding box size
    L = (N/density)**(1.0/DIM)

    """ Find the lowest perfect cube greater than or equal to the number of
     particles"""
    nCube = 2

    while (nCube**3 < N):
        nCube = nCube + 1



    # Assign particle positions
    ip=-1
    x=0
    y=0
    z=0

    for i in range(0,nCube):
        for j in range(0,nCube):
            for k in range(0,nCube):
                if(ip<N):
                    x=(i+0.5)*(L/nCube)
                    y=(j+0.5)*(L/nCube)
                    z=(k+0.5)*(L/nCube)
                    coords[ip]=np.array([x,y,z])
                    ip=ip+1
                else:
                    break
    return coords,velocity,L


@jit(nopython=True)
def wrap(pos,L):
    '''Apply perodic boundary conditions.'''

    for i in range (len(pos)):
        for k in range(DIM):
                if (pos[i][k]>0.5):
                    pos[i][k]=pos[i][k]-1
                if (pos[i][k]<-0.5):
                    pos[i][k]=pos[i][k]+1


    return (pos)    






@jit(nopython=True)
def LJ_Forces(pos,acc,epsilon,L,DIM,N):
    # Compute forces on positions using the Lennard-Jones potential
    # Uses double nested loop which is slow O(N^2) time unsuitable for large systems
    Sij = np.zeros(DIM) # Box scaled units
    Rij = np.zeros(DIM) # Real space units

    #Set all variables to zero
    ene_pot = np.zeros(N)
    acc = acc*0
    virial=0.0

    # Loop over all pairs of particles
    for i in range(N-1):
        for j in range(i+1,N): #i+1 to N ensures we do not double count
            Sij = pos[i]-pos[j] # Distance in box scaled units
            for l in range(DIM): # Periodic interactions
                if (np.abs(Sij[l])>0.5):
                    Sij[l] = Sij[l] - np.copysign(1.0,Sij[l]) # If distance is greater than 0.5  (scaled units) then subtract 0.5 to find periodic interaction distance.

            Rij   = L*Sij # Scale the box to the real units in this case reduced LJ units
            Rsqij = np.dot(Rij,Rij) # Calculate the square of the distance

            if(Rsqij < Rcutoff**2):
                # Calculate LJ potential inside cutoff
                # We calculate parts of the LJ potential at a time to improve the efficieny of the computation (most important for compiled code)
                rm2      = 1.0/Rsqij # 1/r^2
                rm6      = rm2**3
                forcefact=(rm2**4)*(rm6-0.5) # 1/r^6
                phi      =4*(rm6**2-rm6)

                ene_pot[i] = ene_pot[i]+0.5*phi # Accumulate energy

                ene_pot[j] = ene_pot[j]+0.5*phi # Accumulate energy

                virial     = virial-forcefact*Rsqij # Virial is needed to calculate the pressure
                acc[i]     = acc[i]+forcefact*Sij # Accumulate forces
                acc[j]     = acc[j]-forcefact*Sij # (Fji=-Fij)
    return 48*acc, np.sum(ene_pot)/N, -virial/DIM # return the acceleration vector, potential energy and virial coefficient


@jit(nopython=True)
def Calculate_Temperature(vel,L,DIM,N):

    ene_kin = 0.0

    for i in range(N):
        real_vel = L*vel[i]
        ene_kin = ene_kin + 0.5*np.dot(real_vel,real_vel)

    ene_kin_aver = 1.0*ene_kin/N
    temperature = 2.0*ene_kin_aver/DIM

    return ene_kin_aver,temperature


# Main MD loop
@jit(nopython=True)
def main():

    # Vectors to store parameter values at each step

    ene_kin_aver = np.ones(NSteps)
    ene_pot_aver = np.ones(NSteps)
    temperature = np.ones(NSteps)
    virial = np.ones(NSteps)
    pressure = np.ones(NSteps)


    pos,vel,L = initialise_config(N,DIM,density)
    acc = (np.random.randn(N,DIM)-0.5)
    volume=L**3

    # Open file which we will save the outputs to
    if os.path.exists('energy2'):
        os.remove('energy2')
    f = open('traj.xyz', 'w')

    for k in range(NSteps):

        # Refold positions according to periodic boundary conditions
        pos=wrap(pos,L)

        # r(t+dt) modify positions according to velocity and acceleration
        pos = pos + deltat*vel + 0.5*(deltat**2.0)*acc # Step 1

        # Calculate temperature
        ene_kin_aver[k],temperature[k] = Calculate_Temperature(vel,L,DIM,N)

        # Rescale velocities and take half step
        chi = np.sqrt(temp/temperature[k])
        vel = chi*vel + 0.5*deltat*acc # v(t+dt/2) Step 2

        # Compute forces a(t+dt),ene_pot,virial
        acc, ene_pot_aver[k], virial[k] = LJ_Forces(pos,acc,epsilon,L,DIM,N) # Step 3

        # Complete the velocity step 
        vel = vel + 0.5*deltat*acc # v(t+dt/2) Step 4

        # Calculate temperature
        ene_kin_aver[k],temperature[k] = Calculate_Temperature(vel,L,DIM,N)

        # Calculate pressure
        pressure[k]= density*temperature[k] + virial[k]/volume

    # Print output to file every DumpFreq number of steps
        if(k%DumpFreq==0): # The % symbol is the modulus so if the Step is a whole multiple of DumpFreq then print the values

            f.write("%s\n" %(N)) # Write the number of particles to file
            # Write all of the quantities at this step to the file
            f.write("Energy %s, Temperature %.5f\n" %(ene_kin_aver[k]+ene_pot_aver[k],temperature[k]))
            for n in range(N): # Write the positions to file
                f.write("X"+" ")
                for l in range(DIM):
                    f.write(str(pos[n][l]*L)+" ")
                f.write("\n")


        if (k%5==0):
           # print("\rStep: {0} KE: {1}   PE: {2} Energy:  {3}".format(k, ene_kin_aver[k], ene_pot_aver[k],ene_kin_aver[k]+ene_pot_aver[k]))
            sys.stdout.write("\rStep: {0} KE: {1}   PE: {2} Energy:  {3}".format(k, ene_kin_aver[k], ene_pot_aver[k],ene_kin_aver[k]+ene_pot_aver[k]))
            sys.stdout.flush()

    return ene_kin_aver, ene_pot_aver, temperature, pressure, pos

#------------------------------------------------------    
ene_kin_aver, ene_pot_aver, temperature, pressure, pos = main()




# Plot all of the quantities
def plot():
    plt.figure(figsize=[7,12])
    plt.rc('xtick', labelsize=15) 
    plt.rc('ytick', labelsize=15)
    plt.subplot(4, 1, 1)
    plt.plot(ene_kin_aver,'k-')
    plt.ylabel(r"$E_{K}", fontsize=20)
    plt.subplot(4, 1, 2)
    plt.plot(ene_pot_aver,'k-')
    plt.ylabel(r"$E_{P}$", fontsize=20)
    plt.subplot(4, 1, 3)
    plt.plot(temperature,'k-')
    plt.ylabel(r"$T$", fontsize=20)
    plt.subplot(4, 1, 4)
    plt.plot(pressure,'k-')
    plt.ylabel(r"$P$", fontsize=20)
    plt.show()

plot()

The error I am getting is:


runfile('E:/Project/LJMD4.py', wdir='E:/Project')
Traceback (most recent call last):

  File "<ipython-input-8-aeebce887079>", line 1, in <module>
    runfile('E:/Project/LJMD4.py', wdir='E:/Project')

  File "C:\Users\Sandipan\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
    execfile(filename, namespace)

  File "C:\Users\Sandipan\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
    exec(compile(f.read(), filename, 'exec'), namespace)

  File "E:/Project/LJMD4.py", line 226, in <module>
    ene_kin_aver, ene_pot_aver, temperature, pressure, pos = main()

  File "C:\Users\Sandipan\Anaconda3\lib\site-packages\numba\dispatcher.py", line 351, in _compile_for_args
    error_rewrite(e, 'typing')

  File "C:\Users\Sandipan\Anaconda3\lib\site-packages\numba\dispatcher.py", line 318, in error_rewrite
    reraise(type(e), e, None)

  File "C:\Users\Sandipan\Anaconda3\lib\site-packages\numba\six.py", line 658, in reraise
    raise value.with_traceback(tb)

TypingError: cannot determine Numba type of <class 'builtin_function_or_method'>

When I searched on the internet, I found numba may not support some function I am using. But I am not using any Pandas or other data frame. I am just using pure python loop or NumPy which as far numba documentation suggests are well supported. I have tried removing numba from some functions and making nopython=0 but still, it sends different error messages. I can't figure out what is wrong with it. Without numba the code will not be feasible for actual use. Any further tips on speedup will be of great help. Thank you in advance.

回答1:

A few common errors

Use of unsupported functions

file operations, many string operation. These can be in a objmode block. In this example I commented these things out.
Wrong way of initializing arrays

Only tuples are supported, not lists (Numpy accepts both but the documentation there are only tuples mentioned)
Checking for division by zero and throwing an exception

This is the standard behavior of Python, but not Numpy. If you want to avoid this overhead (if/else on every division) turn on the Numpy default behaviour (error_model="numpy").
Use of globals

Globals are hard coded into the compiled code (as you would directly write them into the code). They cannot be changed without recompilation.
Wrong indexing of Numpy arrays

pos[i][k] instead of pos[i,k]. Numba may optimize this away, but this has a quite noticeable negative impact in Pure Python code.

Working version

# -*- coding: utf-8 -*-
"""
Created on Sat Mar 28 12:10:42 2020

@author: Sandipan
"""

import numpy as np
import matplotlib.pyplot as plt
from numba import jit
import os
import sys

# All globals are compile time constants
# recompilation needed if you change this values
# Better way: hand a tuple of all needed vars to the functions
# params=(NSteps,deltat,temp,DumpFreq,epsilon,DIM,N,density,Rcutoff)

# Setting up the simulation
NSteps =100 # Number of steps
deltat = 0.005 # Time step in reduced time units
temp   = 0.851# #Reduced temperature
DumpFreq = 100 # Save the position to file every DumpFreq steps
epsilon = 1.0 # LJ parameter for the energy between particles
DIM     =3
N       =500
density =0.776
Rcutoff =3

params=(NSteps,deltat,temp,DumpFreq,epsilon,DIM,N,density,Rcutoff)

#----------------------Function Definitions---------------------

#------------------Initialise Configuration--------

#error_model=True
#Do you really want to search for division by zeros (additional cost)?

@jit(nopython=True,error_model="numpy")
def initialise_config(N,DIM,density):
    velocity = (np.random.randn(N,DIM)-0.5)


    # Set initial momentum to zero
    COM_V = np.sum(velocity)/N     #Center of mass velocity
    velocity = velocity - COM_V    # Fix any center-of-mass drift

    # Calculate initial kinetic energy
    k_energy=0
    for i in range (N):
        k_energy+=np.dot(velocity[i],velocity[i])
    vscale=np.sqrt(DIM*temp/k_energy)
    velocity*=vscale

    #wrong array initialization (use tuple)
    #Initialize with zeroes
    coords = np.zeros((N,DIM))

    # Get the cooresponding box size
    L = (N/density)**(1.0/DIM)

    """ Find the lowest perfect cube greater than or equal to the number of
     particles"""
    nCube = 2

    while (nCube**3 < N):
        nCube = nCube + 1



    # Assign particle positions
    ip=-1
    x=0
    y=0
    z=0

    for i in range(0,nCube):
        for j in range(0,nCube):
            for k in range(0,nCube):
                if(ip<N):
                    x=(i+0.5)*(L/nCube)
                    y=(j+0.5)*(L/nCube)
                    z=(k+0.5)*(L/nCube)
                    coords[ip]=np.array([x,y,z])
                    ip=ip+1
                else:
                    break
    return coords,velocity,L


@jit(nopython=True)
def wrap(pos,L):
    '''Apply perodic boundary conditions.'''

    #correct array indexing
    for i in range (len(pos)):
        for k in range(DIM):
                if (pos[i,k]>0.5):
                    pos[i,k]=pos[i,k]-1
                if (pos[i,k]<-0.5):
                    pos[i,k]=pos[i,k]+1


    return (pos)    


@jit(nopython=True,error_model="numpy")
def LJ_Forces(pos,acc,epsilon,L,DIM,N):
    # Compute forces on positions using the Lennard-Jones potential
    # Uses double nested loop which is slow O(N^2) time unsuitable for large systems
    Sij = np.zeros(DIM) # Box scaled units
    Rij = np.zeros(DIM) # Real space units

    #Set all variables to zero
    ene_pot = np.zeros(N)
    acc = acc*0
    virial=0.0

    # Loop over all pairs of particles
    for i in range(N-1):
        for j in range(i+1,N): #i+1 to N ensures we do not double count
            Sij = pos[i]-pos[j] # Distance in box scaled units
            for l in range(DIM): # Periodic interactions
                if (np.abs(Sij[l])>0.5):
                    Sij[l] = Sij[l] - np.copysign(1.0,Sij[l]) # If distance is greater than 0.5  (scaled units) then subtract 0.5 to find periodic interaction distance.

            Rij   = L*Sij # Scale the box to the real units in this case reduced LJ units
            Rsqij = np.dot(Rij,Rij) # Calculate the square of the distance

            if(Rsqij < Rcutoff**2):
                # Calculate LJ potential inside cutoff
                # We calculate parts of the LJ potential at a time to improve the efficieny of the computation (most important for compiled code)
                rm2      = 1.0/Rsqij # 1/r^2
                rm6      = rm2**3
                forcefact=(rm2**4)*(rm6-0.5) # 1/r^6
                phi      =4*(rm6**2-rm6)

                ene_pot[i] = ene_pot[i]+0.5*phi # Accumulate energy

                ene_pot[j] = ene_pot[j]+0.5*phi # Accumulate energy

                virial     = virial-forcefact*Rsqij # Virial is needed to calculate the pressure
                acc[i]     = acc[i]+forcefact*Sij # Accumulate forces
                acc[j]     = acc[j]-forcefact*Sij # (Fji=-Fij)
    #If you want to get get the best performance, sum directly in the loop intead of 
    #summing at the end np.sum(ene_pot)
    return 48*acc, np.sum(ene_pot)/N, -virial/DIM # return the acceleration vector, potential energy and virial coefficient


@jit(nopython=True,error_model="numpy")
def Calculate_Temperature(vel,L,DIM,N):

    ene_kin = 0.0

    for i in range(N):
        real_vel = L*vel[i]
        ene_kin = ene_kin + 0.5*np.dot(real_vel,real_vel)

    ene_kin_aver = 1.0*ene_kin/N
    temperature = 2.0*ene_kin_aver/DIM

    return ene_kin_aver,temperature


# Main MD loop
@jit(nopython=True,error_model="numpy")
def main(params):
    NSteps,deltat,temp,DumpFreq,epsilon,DIM,N,density,Rcutoff=params
    # Vectors to store parameter values at each step

    ene_kin_aver = np.ones(NSteps)
    ene_pot_aver = np.ones(NSteps)
    temperature = np.ones(NSteps)
    virial = np.ones(NSteps)
    pressure = np.ones(NSteps)


    pos,vel,L = initialise_config(N,DIM,density)
    acc = (np.random.randn(N,DIM)-0.5)
    volume=L**3

    # Open file which we will save the outputs to
    # Unsupported operations have to be in an objectmode block
    # or simply write the outputs at the end in a pure Python function
    """
    if os.path.exists('energy2'):
        os.remove('energy2')
    f = open('traj.xyz', 'w')
    """
    for k in range(NSteps):

        # Refold positions according to periodic boundary conditions
        pos=wrap(pos,L)

        # r(t+dt) modify positions according to velocity and acceleration
        pos = pos + deltat*vel + 0.5*(deltat**2.0)*acc # Step 1

        # Calculate temperature
        ene_kin_aver[k],temperature[k] = Calculate_Temperature(vel,L,DIM,N)

        # Rescale velocities and take half step
        chi = np.sqrt(temp/temperature[k])
        vel = chi*vel + 0.5*deltat*acc # v(t+dt/2) Step 2

        # Compute forces a(t+dt),ene_pot,virial
        acc, ene_pot_aver[k], virial[k] = LJ_Forces(pos,acc,epsilon,L,DIM,N) # Step 3

        # Complete the velocity step 
        vel = vel + 0.5*deltat*acc # v(t+dt/2) Step 4

        # Calculate temperature
        ene_kin_aver[k],temperature[k] = Calculate_Temperature(vel,L,DIM,N)

        # Calculate pressure
        pressure[k]= density*temperature[k] + virial[k]/volume

        # Print output to file every DumpFreq number of steps
        """
        if(k%DumpFreq==0): # The % symbol is the modulus so if the Step is a whole multiple of DumpFreq then print the values

            f.write("%s\n" %(N)) # Write the number of particles to file
            # Write all of the quantities at this step to the file
            f.write("Energy %s, Temperature %.5f\n" %(ene_kin_aver[k]+ene_pot_aver[k],temperature[k]))
            for n in range(N): # Write the positions to file
                f.write("X"+" ")
                for l in range(DIM):
                    f.write(str(pos[n][l]*L)+" ")
                f.write("\n")

        #Simple prints without formating are supported
        if (k%5==0):
            #print("\rStep: {0} KE: {1}   PE: {2} Energy:  {3}".format(k, ene_kin_aver[k], ene_pot_aver[k],ene_kin_aver[k]+ene_pot_aver[k]))
            #sys.stdout.write("\rStep: {0} KE: {1}   PE: {2} Energy:  {3}".format(k, ene_kin_aver[k], ene_pot_aver[k],ene_kin_aver[k]+ene_pot_aver[k]))
            #sys.stdout.flush()
        """
    return ene_kin_aver, ene_pot_aver, temperature, pressure, pos

#------------------------------------------------------    
ene_kin_aver, ene_pot_aver, temperature, pressure, pos = main(params)




# Plot all of the quantities
def plot():
    plt.figure(figsize=[7,12])
    plt.rc('xtick', labelsize=15) 
    plt.rc('ytick', labelsize=15)
    plt.subplot(4, 1, 1)
    plt.plot(ene_kin_aver,'k-')
    plt.ylabel(r"$E_{K}", fontsize=20)
    plt.subplot(4, 1, 2)
    plt.plot(ene_pot_aver,'k-')
    plt.ylabel(r"$E_{P}$", fontsize=20)
    plt.subplot(4, 1, 3)
    plt.plot(temperature,'k-')
    plt.ylabel(r"$T$", fontsize=20)
    plt.subplot(4, 1, 4)
    plt.plot(pressure,'k-')
    plt.ylabel(r"$P$", fontsize=20)
    plt.show()

plot()

来源：https://stackoverflow.com/questions/60909472/how-to-speed-up-the-following-code-using-numba

标签

python-3.x

numpy

numba

scientific-computing