问题
I try to get ARIMA configuration some faster that I acctually do.
So I use a Iterate method to compare all ARIMA combinations to select better. For that I create a function to Iterate:
def difference(dataset, interval=1):
diff = list()
for i in range(interval, len(dataset)):
value = dataset[i] - dataset[i - interval]
diff.append(value)
return np.array(diff)
# invert differenced value
def inverse_difference(history, yhat, interval=1):
return yhat + history[-interval]
# evaluate an ARIMA model for a given order (p,d,q) and return RMSE
def evaluate_arima_model(dataset, arima_order):
dataset = dataset.astype('float32')
train_size = int(len(dataset) * 0.50)
train, test = dataset[0:train_size], dataset[train_size:]
history = [x for x in train]
# make predictions
predictions = list()
for t in range(len(test)):
# difference data
months_in_year = maxlength
diff = difference(history, months_in_year)
model = ARIMA(diff, order=arima_order)
model_fit = model.fit(trend='nc', disp=0)
yhat = model_fit.forecast()[0]
yhat = inverse_difference(history, yhat, months_in_year)
predictions.append(yhat)
history.append(test[t])
# calculate out of sample error
mse = mean_squared_error(test, predictions)
rmse = sqrt(mse)
return rmse
Actually I do that in minutes with this method. But isn't good time for an API where I gonna use the logic.
# evaluate combinations of p, d and q values for an ARIMA model
def evaluate_models(dataset, p_values, d_values, q_values):
dataset = dataset.astype('float32')
train_size = int(len(dataset) * 0.50)
train, test = dataset[0:train_size], dataset[train_size:]
global best_score, best_cfg
best_score, best_cfg = float("inf"), None
for p in p_values:
for d in d_values:
for q in q_values:
order = (p,d,q)
try:
mse = evaluate_arima_model(dataset, order)
if mse < best_score:
best_score, best_cfg = mse, order
print('ARIMA%s RMSE=%.3f' % (order,mse))
except:
continue
# print(best_cfg, best_score)
print('Best ARIMA%s RMSE=%.3f' % (best_cfg, best_score))
# evaluate parameters
p_values = range(0, 7)
d_values = range(0, 3)
q_values = range(0, 7)
warnings.filterwarnings("ignore")
evaluate_models(data_train.values, p_values, d_values, q_values)
To acelerate the process I want use Multiprocessing method iterating evaluate_arima_model
function. But ProcessPoolExecutor don't work because don't print any result
# evaluate combinations of p, d and q values for an ARIMA model
orders = []
def fill_orders( p_values, d_values, q_values):
for p in p_values:
for d in d_values:
for q in q_values:
order = (p,d,q)
orders.append(order)
# fill orders array
p_values = range(0, 7)
d_values = range(0, 3)
q_values = range(0, 7)
warnings.filterwarnings("ignore")
fill_orders(p_values, d_values, q_values)
with concurrent.futures.ProcessPoolExecutor() as executor:
results = [executor.submit(evaluate_arima_model, (dataset, order)) for order in orders]
for f in concurrent.futures.as_completed(results):
print(f.result())
try:
f.result()
except:
continue
else:
print(f.result())
回答1:
I wouldn't expect the second block of code you've shown to do anything. For this code:
# evaluate combinations of p, d and q values for an ARIMA model
orders = []
def evaluate_models( p_values, d_values, q_values):
for p in p_values:
for d in d_values:
for q in q_values:
order = (p,d,q)
orders.append(order)
with concurrent.futures.ProcessPoolExecutor() as executor:
results = [executor.submit(evaluate_arima_model, (dataset, order)) for order in orders]
for f in concurrent.futures.as_completed(results):
print(f.result())
try:
f.result()
except:
continue
else:
print(f.result())
orders
will always be empty because you are declaring it so and then never calling evaluate_models
, or anything else that could be putting objects into orders
. Since orders
is empty, no processes will be registered to run, and results
will also be empty, and so this code won't do anything. Do you mean to call evaluate_models
before you do with concurrent.futures....
?
来源:https://stackoverflow.com/questions/65066091/processpoolexecutor-dont-execute