问题
I am just a novice in area of deep learning.
I made my first basic attempt with Keras Conv1D. Not sure what I did and whether I did it right. My input data is simply total sales by every week (total of 313 weeks), for stores across US and with a time step of 1.
Here is my code:
from pandas import read_csv
import matplotlib.pyplot as plt
import numpy
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset)-look_back):
a = dataset[i:(i+look_back), 0]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return numpy.array(dataX), numpy.array(dataY)
seed = 7
numpy.random.seed(seed)
dataframe = read_csv('D:/MIS793/Dataset/Academic Dataset External 2/Python scripts/totalsale _byweek.csv', usecols=[1], engine='python')
plt.plot(dataframe)
plt.show()
dataset = dataframe.values
dataset = dataset.astype('float32')
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
# reshape into X=t and Y=t+1
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
trainX = trainX.reshape(trainX.shape[0], trainX.shape[1], 1).astype('float32')
testX = testX.reshape(testX.shape[0], testX.shape[1], 1).astype('float32')
model = Sequential()
model.add(Conv1D(filters=10, kernel_size=1, padding='same', strides=1, activation='relu',input_shape=(1,1)))
model.add(MaxPooling1D(pool_size=1))
model.add(Flatten())
model.add(Dense(250, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mse', optimizer='adam', metrics=['mae'])
print(model.summary())
model.fit(trainX, trainY, validation_data=(testX, testY), epochs=10, batch_size=100)
scores = model.evaluate(testX, testY, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))
Not sure about few things here:
- Reshaping of trainX and testX.
- Value of kernel_size and input_shape
My idea here is it's just one vector of sales value. 10 filters, each of size 1 move from one value to another. Input shape is of the format time step, dimensions.
I only got accuracy of 10.91%! So my first question is whether I am feeding in the right parameters.
Thanks
ASC
回答1:
With model.metrics_names you can get the labels of your scores variable.
In your case it will be ['loss', 'mean_absolute_error'].
So what you are printing is not the accuracy, but the mae, multiplied by 100.
回答2:
I tried using accuracy instead of mae. However I got accuracy as 0%. Just wondering as this was about predicting numerical values, should I really use accuracy? Here is my latest code.
from pandas import read_csv
import matplotlib.pyplot as plt
import numpy
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset)-look_back):
a = dataset[i:(i+look_back), 0]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return numpy.array(dataX), numpy.array(dataY)
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
dataframe = read_csv('D:/MIS793/Dataset/Academic Dataset External 2/Python scripts/totalsale _byweek.csv', usecols=[1], engine='python')
plt.plot(dataframe)
plt.show()
dataset = dataframe.values
dataset = dataset.astype('float32')
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
# reshape into X=t and Y=t+1
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
trainX = trainX.reshape(trainX.shape[0], trainX.shape[1],1).astype('float32')
testX = testX.reshape(testX.shape[0], testX.shape[1],1).astype('float32')
model = Sequential()
model.add(Conv1D(filters=20, kernel_size=1, padding='same', strides=1, activation='relu',input_shape=(1,1)))
model.add(MaxPooling1D(pool_size=1))
model.add(Conv1D(filters=10, kernel_size=1, padding='same', strides=1, activation='relu'))
model.add(MaxPooling1D(pool_size=1))
model.add(Flatten())
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(trainX, trainY, validation_data=(testX, testY), epochs=10, batch_size=100)
scores = model.evaluate(testX, testY, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))
OR should I go with MAE?
If I go with MAE, my scores will look like below:
[0.12740663779013364, 0.31208728355111426]
First one is loss and second one is MAE. Isn't that a better metrics in this case?
The final line will be like this:
print("MAE: %.2f%%" % (scores[1]))
Thanks Anindya
来源:https://stackoverflow.com/questions/48049962/keras-conv1d-for-time-series