问题
I am trying to train a LSTM and in my model I have an exponential learning rate decay and a dropout layer. In order to deactivate the dropout layer when testing and validating, I have put a placeholder for the dropout rate and given it a default value of 1.0 and when training i am setting it to 0.5. The dropou_rate placeholder value is passed to the tf.layers.dropout(). When I run this during the validation I get the following error.
ValueError: Input contains NaN, infinity or a value too large for dtype('float64').
shown below is the stack trace:
Traceback (most recent call last): File "/home/suleka/Documents/sales_prediction/SalesPrediction_LSTM_mv.py", line 329, in train_test() File "/home/suleka/Documents/sales_prediction/SalesPrediction_LSTM_mv.py", line 270, in train_test meanSquaredError = mean_squared_error(nonescaled_y, pred_vals) File "/home/suleka/anaconda3/lib/python3.6/site-packages/sklearn/metrics/regression.py", line 238, in mean_squared_error y_true, y_pred, multioutput) File "/home/suleka/anaconda3/lib/python3.6/site-packages/sklearn/metrics/regression.py", line 77, in _check_reg_targets y_pred = check_array(y_pred, ensure_2d=False) File "/home/suleka/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py", line 453, in check_array _assert_all_finite(array) File "/home/suleka/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py", line 44, in _assert_all_finite " or a value too large for %r." % X.dtype) ValueError: Input contains NaN, infinity or a value too large for dtype('float64').
When I put the learning rate as a value in tf.layers.dropout like:
dropout = tf.layers.dropout(last, rate=0.5, training=True)
The code works fine. I am not sure what is happening in the code.
Shown below is my complete code:
import tensorflow as tf
import matplotlib as mplt
mplt.use('agg') # Must be before importing matplotlib.pyplot or pylab!
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from math import sqrt
import csv
np.random.seed(1)
tf.set_random_seed(1)
class RNNConfig():
input_size = 1
num_steps = 7#5
lstm_size = 64 #16
num_layers = 1
keep_prob = 0.8
batch_size = 16 #64
init_epoch = 15 # 5
max_epoch = 20 # 100 or 50
# test_ratio = 0.2
fileName = 'store2_1.csv'
graph = tf.Graph()
column_min_max = [[0,11000], [1,7]]
columns = ['Sales', 'DayOfWeek','SchoolHoliday', 'Promo']
features = len(columns)
hidden1_nodes = 64
hidden2_nodes = 8
config = RNNConfig()
def segmentation(data):
seq = [price for tup in data[config.columns].values for price in tup]
seq = np.array(seq)
# split into items of features
seq = [np.array(seq[i * config.features: (i + 1) * config.features])
for i in range(len(seq) // config.features)]
# split into groups of num_steps
X = np.array([seq[i: i + config.num_steps] for i in range(len(seq) - config.num_steps)])
y = np.array([seq[i + config.num_steps] for i in range(len(seq) - config.num_steps)])
# get only sales value
y = [[y[i][0]] for i in range(len(y))]
y = np.asarray(y)
return X, y
def scale(data):
for i in range (len(config.column_min_max)):
data[config.columns[i]] = (data[config.columns[i]] - config.column_min_max[i][0]) / ((config.column_min_max[i][1]) - (config.column_min_max[i][0]))
return data
def rescle(test_pred):
prediction = [(pred * (config.column_min_max[0][1] - config.column_min_max[0][0])) + config.column_min_max[0][0] for pred in test_pred]
return prediction
def pre_process():
store_data = pd.read_csv(config.fileName)
store_data = store_data.drop(store_data[(store_data.Open == 0) & (store_data.Sales == 0)].index)
#
# store_data = store_data.drop(store_data[(store_data.Open != 0) & (store_data.Sales == 0)].index)
# ---for segmenting original data --------------------------------
# original_data = store_data.copy()
## train_size = int(len(store_data) * (1.0 - test_ratio))
validation_len = len(store_data[(store_data.Month == 6) & (store_data.Year == 2015)].index)
test_len = len(store_data[(store_data.Month == 7) & (store_data.Year == 2015)].index)
train_size = int(len(store_data) - (validation_len + test_len))
train_data = store_data[:train_size]
validation_data = store_data[(train_size - config.num_steps): validation_len + train_size]
test_data = store_data[((validation_len + train_size) - config.num_steps):]
original_val_data = validation_data.copy()
original_test_data = test_data.copy()
# -------------- processing train data---------------------------------------
scaled_train_data = scale(train_data)
train_X, train_y = segmentation(scaled_train_data)
# -------------- processing validation data---------------------------------------
scaled_validation_data = scale(validation_data)
val_X, val_y = segmentation(scaled_validation_data)
# -------------- processing test data---------------------------------------
scaled_test_data = scale(test_data)
test_X, test_y = segmentation(scaled_test_data)
# ----segmenting original validation data-----------------------------------------------
nonescaled_val_X, nonescaled_val_y = segmentation(original_val_data)
# ----segmenting original test data---------------------------------------------
nonescaled_test_X, nonescaled_test_y = segmentation(original_test_data)
return train_X, train_y, test_X, test_y, val_X, val_y, nonescaled_test_y, nonescaled_val_y
def generate_batches(train_X, train_y, batch_size):
num_batches = int(len(train_X)) // batch_size
if batch_size * num_batches < len(train_X):
num_batches += 1
batch_indices = range(num_batches)
for j in batch_indices:
batch_X = train_X[j * batch_size: (j + 1) * batch_size]
batch_y = train_y[j * batch_size: (j + 1) * batch_size]
assert set(map(len, batch_X)) == {config.num_steps}
yield batch_X, batch_y
def mean_absolute_percentage_error(y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
itemindex = np.where(y_true == 0)
y_true = np.delete(y_true, itemindex)
y_pred = np.delete(y_pred, itemindex)
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
def RMSPE(y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
return np.sqrt(np.mean(np.square(((y_true - y_pred) / y_pred)), axis=0))
def plot(true_vals,pred_vals,name):
fig = plt.figure()
fig = plt.figure(dpi=100, figsize=(20, 7))
days = range(len(true_vals))
plt.plot(days, pred_vals, label='pred sales')
plt.plot(days, true_vals, label='truth sales')
plt.legend(loc='upper left', frameon=False)
plt.xlabel("day")
plt.ylabel("sales")
plt.grid(ls='--')
plt.savefig(name, format='png', bbox_inches='tight', transparent=False)
plt.close()
def write_results(true_vals,pred_vals,name):
with open(name, "w") as f:
writer = csv.writer(f)
writer.writerows(zip(true_vals, pred_vals))
def train_test():
train_X, train_y, test_X, test_y, val_X, val_y, nonescaled_test_y, nonescaled_val_y = pre_process()
# Add nodes to the graph
with config.graph.as_default():
tf.set_random_seed(1)
learning_rate = tf.placeholder(tf.float32, None, name="learning_rate")
inputs = tf.placeholder(tf.float32, [None, config.num_steps, config.features], name="inputs")
targets = tf.placeholder(tf.float32, [None, config.input_size], name="targets")
global_step = tf.Variable(0, trainable=False)
dropout_rate = tf.placeholder_with_default(1.0, shape=())
learning_rate = tf.train.exponential_decay(learning_rate=learning_rate, global_step=global_step, decay_rate=0.96, decay_steps=5, staircase=False)
cell = tf.contrib.rnn.LSTMCell(config.lstm_size, state_is_tuple=True, activation=tf.nn.relu)
val1, _ = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)
val = tf.transpose(val1, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1, name="last_lstm_output")
# hidden layer
last = tf.layers.dense(last, units=config.hidden1_nodes, activation=tf.nn.relu)
last = tf.layers.dense(last, units=config.hidden2_nodes, activation=tf.nn.relu)
weight = tf.Variable(tf.truncated_normal([config.hidden2_nodes, config.input_size]))
bias = tf.Variable(tf.constant(0.1, shape=[config.input_size]))
dropout = tf.layers.dropout(last, rate=dropout_rate, training=True)
prediction = tf.matmul(dropout, weight) + bias
loss = tf.losses.mean_squared_error(targets,prediction)
optimizer = tf.train.AdamOptimizer(learning_rate)
minimize = optimizer.minimize(loss, global_step=global_step)
# correct_prediction = tf.sqrt(tf.losses.mean_squared_error(prediction, targets))
# --------------------training------------------------------------------------------
with tf.Session(graph=config.graph) as sess:
tf.set_random_seed(1)
tf.global_variables_initializer().run()
iteration = 1
for epoch_step in range(config.max_epoch):
for batch_X, batch_y in generate_batches(train_X, train_y, config.batch_size):
train_data_feed = {
inputs: batch_X,
targets: batch_y,
learning_rate: 0.01,
dropout_rate: 0.5
}
train_loss, _, value,gs = sess.run([loss, minimize, val1,global_step], train_data_feed)
if iteration % 5 == 0:
print("Epoch: {}/{}".format(epoch_step, config.max_epoch),
"Iteration: {}".format(iteration),
"Train loss: {:.6f}".format(train_loss))
iteration += 1
saver = tf.train.Saver()
saver.save(sess, "checkpoints_sales/sales_pred.ckpt")
# --------------------validation------------------------------------------------------
with tf.Session(graph=config.graph) as sess:
tf.set_random_seed(1)
saver.restore(sess, tf.train.latest_checkpoint('checkpoints_sales'))
test_data_feed = {
inputs: val_X,
dropout_rate: 1.0
}
test_pred = sess.run(prediction, test_data_feed)
# rmsse = sess.run(correct_prediction, test_data_feed)
pred_vals = rescle(test_pred)
pred_vals = np.array(pred_vals)
pred_vals = pred_vals.flatten()
pred_vals = pred_vals.tolist()
nonescaled_y = nonescaled_val_y.flatten()
nonescaled_y = nonescaled_y.tolist()
plot(nonescaled_y, pred_vals, "Sales Prediction VS Truth mv testSet.png")
write_results(nonescaled_y, pred_vals, "Sales Prediction batch mv results_all validationSet.csv")
meanSquaredError = mean_squared_error(nonescaled_y, pred_vals)
rootMeanSquaredError = sqrt(meanSquaredError)
print("RMSE:", rootMeanSquaredError)
mae = mean_absolute_error(nonescaled_y, pred_vals)
print("MAE:", mae)
mape = mean_absolute_percentage_error(nonescaled_y, pred_vals)
print("MAPE:", mape)
rmse_val = RMSPE(nonescaled_y, pred_vals)
print("RMSPE:", rmse_val)
# --------------------testing------------------------------------------------------
with tf.Session(graph=config.graph) as sess:
tf.set_random_seed(1)
saver.restore(sess, tf.train.latest_checkpoint('checkpoints_sales'))
test_data_feed = {
inputs: test_X,
dropout_rate: 1.0
}
test_pred = sess.run(prediction, test_data_feed)
# rmsse = sess.run(correct_prediction, test_data_feed)
pred_vals = rescle(test_pred)
pred_vals = np.array(pred_vals)
pred_vals = (np.round(pred_vals, 0)).astype(np.int32)
pred_vals = pred_vals.flatten()
pred_vals = pred_vals.tolist()
nonescaled_y = nonescaled_test_y.flatten()
nonescaled_y = nonescaled_y.tolist()
plot(nonescaled_y, pred_vals, "Sales Prediction VS Truth mv testSet.png")
write_results(nonescaled_y, pred_vals, "Sales Prediction batch mv results_all validationSet.csv")
meanSquaredError = mean_squared_error(nonescaled_y, pred_vals)
rootMeanSquaredError = sqrt(meanSquaredError)
print("RMSE:", rootMeanSquaredError)
mae = mean_absolute_error(nonescaled_y, pred_vals)
print("MAE:", mae)
mape = mean_absolute_percentage_error(nonescaled_y, pred_vals)
print("MAPE:", mape)
rmse_val = RMSPE(nonescaled_y, pred_vals)
print("RMSPE:", rmse_val)
if __name__ == '__main__':
train_test()
回答1:
When using tf.layers.dropout the rate argument tells how much of the data to drop when you give 1.0 all the output is gone, replace 1.0 with 0.0 and it should work. TensorFlow documentation: https://www.tensorflow.org/api_docs/python/tf/layers/dropout
回答2:
I am putting this because even though @Almog's answer was correct it didn't have the explanation I wanted. So for anyone confused like me:
If you use:
'tf.nn.dropout()'
to deactivate the dropout layer you should put
keep_prob= 1.0 not keep_prob=0.0
as keep_prob means 'The probability that each element is kept.' So keeping it as 1.0 makes sense to deactivate it.
If you are using
'tf.layers.dropout()'
you should put:
rate=0.0 not rate=1.0
as rate here means 'The dropout rate (should be between 0 and 1). E.g. "rate=0.1" would drop out 10% of input units'. So if I put rate=0.0 it means that none of the input units will be dropped.
来源:https://stackoverflow.com/questions/54069395/input-contains-nan-infinity-or-a-value-too-large-for-dtypefloat64-in-tensor