GEKKO and Scipy.optimize lead to different results in nonlinear parameter estimation

问题

I am learning how to use GEKKO for parameter estimation problems and as a first step, I am developing example problems that I have previously implemented using Scipy minimization routines. These have been done following the information available in APMonitor.com and the courses available within. The current problem is a batch reactor simulation of a methanol-to-hydrocarbons process obtained from: http://www.daetools.com/docs/tutorials-all.html#tutorial-che-opt-5

The model description can be followed in the code described further below, but the elementary steps considered are:

   A --> B   
   A + B --> C   
   C + B --> P   
   A --> C   
   A --> P   
   A + B --> P

where experimental data is available for concentrations of A, C, and P as a function of time. The goal of the model is to estimate the rate constants for the six elementary reactions (k1-k6). The difficulty I am encountering right now is that my GEKKO model and my Scipy.optimize - based models lead to different parameter estimates, despite using the same experimental data and initial guesses for the parameters. I have also compared this model to one developed using gPROMS and Athena Visual Studio, with the scipy model agreeing with the parameter estimates obtained with these closed-sourced programs. The estimated parameters for each program are shown below:

Scipy model (L-BFGS-B optimizer): [k1 k2 k3 k4 k5 k6] = [2.779, 0., 0.197, 3.042, 2.148, 0.541]
GEKKO model (IPOPT optimizer): [k1 k2 k3 k4 k5 k6] = [3.7766387559, 1.1826920269e-07, 0.21242442412, 4.130394645, 2.4232122905, 3.3140978171]

Interestingly, both models lead to the same objective function value of 0.0123 at the end of the optimization and look similar in the plots of species concentration vs time. I have tried changing GEKKO's optimizer and tightened tolerances to 1E-8 to no avail. My guess is that my GEKKO model isn't properly set up, but I cannot find the issue with it. Any help would be appreciated in pointing me to possible issues that may be leading to the model discrepancies. I attach the two scripts below:

Scipy model

import numpy as np
from scipy.integrate import solve_ivp
from scipy.optimize import minimize
import matplotlib.pyplot as plt

#Experimental data
times  = np.array([0.0, 0.071875, 0.143750, 0.215625, 0.287500, 0.359375, 0.431250,
                      0.503125, 0.575000, 0.646875, 0.718750, 0.790625, 0.862500,
                      0.934375, 1.006250, 1.078125, 1.150000])
A_obs = np.array([1.0, 0.552208, 0.300598, 0.196879, 0.101175, 0.065684, 0.045096,
                      0.028880, 0.018433, 0.011509, 0.006215, 0.004278, 0.002698,
                      0.001944, 0.001116, 0.000732, 0.000426])
C_obs = np.array([0.0, 0.187768, 0.262406, 0.350412, 0.325110, 0.367181, 0.348264,
                      0.325085, 0.355673, 0.361805, 0.363117, 0.327266, 0.330211,
                      0.385798, 0.358132, 0.380497, 0.383051])
P_obs = np.array([0.0, 0.117684, 0.175074, 0.236679, 0.234442, 0.270303, 0.272637,
                      0.274075, 0.278981, 0.297151, 0.297797, 0.298722, 0.326645,
                      0.303198, 0.277822, 0.284194, 0.301471])

def rxn(x, k): #rate equations in power law form r = k [A][B]
    A = x[0]
    B = x[1]
    C = x[2]
    P = x[3]
    
    k1 = k[0]
    k2 = k[1]
    k3 = k[2]
    k4 = k[3]
    k5 = k[4]
    k6 = k[5]
    
    r1 = k1 * A
    r2 = k2 * A * B
    r3 = k3 * C * B
    r4 = k4 * A
    r5 = k5 * A
    r6 = k6 * A * B
    
    return [r1, r2, r3, r4, r5, r6] #returns reaction rate of each equation

#mass balance diff eqs, function calls rxn function 

def mass_balances(t, x, *args): 
        k = args
        r = rxn(x, k)
        dAdt = - r[0] - r[1] - r[3] - r[4] - r[5]
        dBdt = + r[0] - r[1] - r[2] - r[5]
        dCdt = + r[1] - r[2] + r[3]
        dPdt = + r[2] + r[4] + r[5]

        return [dAdt, dBdt, dCdt, dPdt]
    
IC = [1.0, 0, 0, 0] #Initial conditions of species A, B, C, P
ki= [1, 1, 1, 1, 1, 1]

#Objective function definition

def obj_fun(k):   
#solve initial value problem over time span of data
    sol  = solve_ivp(mass_balances,[min(times),max(times)],IC, args = (k), t_eval=(times)) 
    y_model = np.vstack((sol.y[0],sol.y[2],sol.y[3])).T
    obs = np.vstack((A_obs, C_obs, P_obs)).T
    err = np.sum((y_model-obs)**2)
   
    return err

bnds = ((0, None), (0, None),(0, None),(0, None),(0, None),(0, None))
model = minimize(obj_fun,ki, bounds=bnds, method = 'L-BFGS-B')
k_opt = model.x

print(k_opt.round(decimals = 3))

y_calc = solve_ivp(mass_balances,[min(times),max(times)],IC, args = (model.x), t_eval=(times)) 

plt.plot(y_calc.t, y_calc.y.T)
plt.plot(times,A_obs,'bo')
plt.plot(times,C_obs,'gx')
plt.plot(times,P_obs,'rs')

GEKKO Model

import numpy as np
import matplotlib.pyplot as plt
from gekko import GEKKO

#Experimental data
times  = np.array([0.0, 0.071875, 0.143750, 0.215625, 0.287500, 0.359375, 0.431250,
                      0.503125, 0.575000, 0.646875, 0.718750, 0.790625, 0.862500,
                      0.934375, 1.006250, 1.078125, 1.150000])
A_obs = np.array([1.0, 0.552208, 0.300598, 0.196879, 0.101175, 0.065684, 0.045096,
                      0.028880, 0.018433, 0.011509, 0.006215, 0.004278, 0.002698,
                      0.001944, 0.001116, 0.000732, 0.000426])
C_obs = np.array([0.0, 0.187768, 0.262406, 0.350412, 0.325110, 0.367181, 0.348264,
                      0.325085, 0.355673, 0.361805, 0.363117, 0.327266, 0.330211,
                      0.385798, 0.358132, 0.380497, 0.383051])
P_obs = np.array([0.0, 0.117684, 0.175074, 0.236679, 0.234442, 0.270303, 0.272637,
                      0.274075, 0.278981, 0.297151, 0.297797, 0.298722, 0.326645,
                      0.303198, 0.277822, 0.284194, 0.301471])


m = GEKKO(remote = False)

t = m.time = times


Am = m.CV(value=A_obs, lb = 0)
Cm = m.CV(value=C_obs, lb = 0)
Pm = m.CV(value=P_obs, lb = 0)

A = m.Var(1, lb = 0)
B = m.Var(0, lb = 0)
C = m.Var(0, lb = 0)
P = m.Var(0, lb = 0)

Am.FSTATUS = 1
Cm.FSTATUS = 1
Pm.FSTATUS = 1
    
k1 = m.FV(1, lb = 0)
k2 = m.FV(1, lb = 0)
k3 = m.FV(1, lb = 0)
k4 = m.FV(1, lb = 0)
k5 = m.FV(1, lb = 0)
k6 = m.FV(1, lb = 0)

k1.STATUS = 1
k2.STATUS = 1
k3.STATUS = 1
k4.STATUS = 1
k5.STATUS = 1
k6.STATUS = 1

r1 = m.Var(0, lb = 0)
r2 = m.Var(0, lb = 0)
r3 = m.Var(0, lb = 0)
r4 = m.Var(0, lb = 0)
r5 = m.Var(0, lb = 0)
r6 = m.Var(0, lb = 0)
   
m.Equation(r1 == k1 * A)
m.Equation(r2 == k2 * A * B)
m.Equation(r3 == k3 * C * B)
m.Equation(r4 == k4 * A)
m.Equation(r5 == k5 * A)
m.Equation(r6 == k6 * A * B)
    

#mass balance diff eqs, function calls rxn function 
m.Equation(A.dt() == - r1 - r2 - r4 - r5 - r6)
m.Equation(B.dt() ==  r1 - r2 - r3 - r6)
m.Equation(C.dt() ==  r2 - r3 + r4)
m.Equation(P.dt() ==  r3 + r5 + r6)

m.Obj((A-Am)**2+(P-Pm)**2+(C-Cm)**2)


m.options.IMODE = 5
m.options.SOLVER = 3 #IPOPT optimizer
m.options.RTOL = 1E-8
m.options.OTOL = 1E-8
m.solve()

k_opt = [k1.value[0],k2.value[0], k3.value[0], k4.value[0], k5.value[0], k6.value[0]]
print(k_opt)
plt.plot(t,A)
plt.plot(t,C)
plt.plot(t,P)
plt.plot(t,B)
plt.plot(times,A_obs,'bo')
plt.plot(times,C_obs,'gx')
plt.plot(times,P_obs,'rs')

回答1:

Here are a couple suggestions:

Set m.options.NODES=3 or higher up to 6 to get better integration accuracy.
Set Am, Cm, Pm as parameters instead of variables. They are fixed inputs.
Try different initial conditions. There may be multiple local minima.
The objective function may be flat so that different parameter values give the same objective function value. You can test the parameter confidence intervals to see if the data gives narrow or wide joint confidence regions.

Here are the results with the modifications:

import numpy as np
import matplotlib.pyplot as plt
from gekko import GEKKO

#Experimental data
times  = np.array([0.0, 0.071875, 0.143750, 0.215625, 0.287500, 0.359375, 0.431250,
                      0.503125, 0.575000, 0.646875, 0.718750, 0.790625, 0.862500,
                      0.934375, 1.006250, 1.078125, 1.150000])
A_obs = np.array([1.0, 0.552208, 0.300598, 0.196879, 0.101175, 0.065684, 0.045096,
                      0.028880, 0.018433, 0.011509, 0.006215, 0.004278, 0.002698,
                      0.001944, 0.001116, 0.000732, 0.000426])
C_obs = np.array([0.0, 0.187768, 0.262406, 0.350412, 0.325110, 0.367181, 0.348264,
                      0.325085, 0.355673, 0.361805, 0.363117, 0.327266, 0.330211,
                      0.385798, 0.358132, 0.380497, 0.383051])
P_obs = np.array([0.0, 0.117684, 0.175074, 0.236679, 0.234442, 0.270303, 0.272637,
                      0.274075, 0.278981, 0.297151, 0.297797, 0.298722, 0.326645,
                      0.303198, 0.277822, 0.284194, 0.301471])

m = GEKKO(remote=False)

t = m.time = times

Am = m.Param(value=A_obs, lb = 0)
Cm = m.Param(value=C_obs, lb = 0)
Pm = m.Param(value=P_obs, lb = 0)

A = m.Var(1, lb = 0)
B = m.Var(0, lb = 0)
C = m.Var(0, lb = 0)
P = m.Var(0, lb = 0)

k = m.Array(m.FV,6,value=1,lb=0)  
for ki in k:
    ki.STATUS = 1
k1,k2,k3,k4,k5,k6 = k

r1 = m.Var(0, lb = 0)
r2 = m.Var(0, lb = 0)
r3 = m.Var(0, lb = 0)
r4 = m.Var(0, lb = 0)
r5 = m.Var(0, lb = 0)
r6 = m.Var(0, lb = 0)
   
m.Equation(r1 == k1 * A)
m.Equation(r2 == k2 * A * B)
m.Equation(r3 == k3 * C * B)
m.Equation(r4 == k4 * A)
m.Equation(r5 == k5 * A)
m.Equation(r6 == k6 * A * B)

#mass balance diff eqs, function calls rxn function 
m.Equation(A.dt() == - r1 - r2 - r4 - r5 - r6)
m.Equation(B.dt() ==  r1 - r2 - r3 - r6)
m.Equation(C.dt() ==  r2 - r3 + r4)
m.Equation(P.dt() ==  r3 + r5 + r6)

m.Minimize((A-Am)**2)
m.Minimize((P-Pm)**2)
m.Minimize((C-Cm)**2)

m.options.IMODE = 5
m.options.SOLVER = 3 #IPOPT optimizer
m.options.RTOL = 1E-8
m.options.OTOL = 1E-8
m.options.NODES = 5
m.solve()

k_opt = []
for ki in k:
    k_opt.append(ki.value[0])
print(k_opt)

plt.plot(t,A)
plt.plot(t,C)
plt.plot(t,P)
plt.plot(t,B)
plt.plot(times,A_obs,'bo')
plt.plot(times,C_obs,'gx')
plt.plot(times,P_obs,'rs')
plt.show()

来源：https://stackoverflow.com/questions/63350026/gekko-and-scipy-optimize-lead-to-different-results-in-nonlinear-parameter-estima

标签

python

gekko