问题
I'm training a classifier to get a factor for an optimization. My data-set contains 800 samples as beginning (some are similar with just few modification).
I developed my model with TensorFlow using GoogleColab environment.
I have used a simple MLP for this problem, with 3 hidden layers each one has 256 nodes as first stage. I have also 64 classes 😬.
I have variable length inputs and I had fixed this problem with "-1" padding.
with my actual features I know that I will get bad accuracy, but I did not expect zero accuracy and the very big loss.
This was my data-set after omitting some features that I have noticed that influence negatively the accuracy :
0 1 2 4 5 6 8 9 11 13 15 17 19 21
805 6 10 11 1 3 1 6 64 2 1.0 64.0 64.0 64.0 -1.0
334 6 12 18 0 2 4 7 2 1 32.0 128.0 64.0 128.0 -1.0
781 7 10 11 1 3 1 6 2 2 2.0 64.0 32.0 32.0 64.0
[Edited] : And here is some lines of my labels:
0
0 108
1 30
2 30
3 16
4 62
5 126
6 22
7 30
8 48
And here is a set of my result :
epoch[0] step [0] train -- loss : 50751.734375, accuracy : 0.0
epoch[0] step [100] train -- loss : 27310.064453125, accuracy : 0.0
epoch[0] step [200] train -- loss : 58120.6015625, accuracy : 0.0
epoch[0] step [300] train -- loss : 31801.9453125, accuracy : 0.0
epoch[0] step [400] train -- loss : 54360.76171875, accuracy : 0.0
epoch[0] step [500] train -- loss : 59946.67578125, accuracy : 0.0
epoch[1] step [0] train -- loss : 40612.06640625, accuracy : 0.0
epoch[1] step [100] train -- loss : 43229.734375, accuracy : 0.0
epoch[1] step [200] train -- loss : 36951.84375, accuracy : 0.0
epoch[1] step [300] train -- loss : 45225.828125, accuracy : 0.0
epoch[1] step [400] train -- loss : 47055.1796875, accuracy : 0.0
epoch[1] step [500] train -- loss : 54023.23046875, accuracy : 0.0
I wonder why I get this big loss, and also that my loss does not converge too :(.
Those was the bizarre graph of accuracy and loss I got :
My code work perfectly, but I still doubt maybe my I did not write things well so I got this mess.
This is the important part of my code :
class MLP():
'''
This is the implementation of the Multi Layer Perceptron
'''
def __init__(self, x_train, y_train, n_classes, n_hiddens=3, activation=tf.nn.relu):
...
################################ Create the model ##############################
def multilayer_perceptron(self,X):
# Hidden fully connected layer with n_hidden_1 neurons
layer_1 = tf.layers.dense(inputs=X, units= self.n_hidden_1, use_bias=True, kernel_initializer=self._init, name= 'layer_1')
layer_1 = tf.layers.batch_normalization(layer_1,training=self.is_train)
layer_1 = self.activation(layer_1)
# Hidden fully connected layer with n_hidden_2 neurons
layer_2 = tf.layers.dense(inputs=layer_1, units= self.n_hidden_2, use_bias=True, kernel_initializer=self._init, name= 'layer_2')
layer_2 = tf.layers.batch_normalization(layer_2,training=self.is_train)
layer_2 = self.activation(layer_2)
# Hidden fully connected layer with n_hidden_3 neurons
layer_3 = tf.layers.dense(inputs=layer_2, units= self.n_hidden_3, use_bias=True, kernel_initializer=self._init, name= 'layer_3')
layer_3 = tf.layers.batch_normalization(layer_3, training=self.is_train)
layer_3 = self.activation(layer_3)
# Output fully connected layer with the output
out_layer = tf.layers.dense(inputs=layer_3, units= self.n_classes, use_bias=True, kernel_initializer=self._init, name= 'out_layer')
tf.summary.histogram('pre-activations', out_layer)
return layer_1, layer_2, layer_3, out_layer
And here where I calculate the loss and accuracy of training set :
def loss(self, X, Y):
_, _, _, self.predicted_out = self.multilayer_perceptron(X)
print("Predicted out", self.predicted_out)
with tf.name_scope('loss'):
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.predicted_out, labels=Y))
tf.summary.scalar('loss', loss)
with tf.name_scope('accuracy'):
predicted_class = tf.nn.softmax(self.predicted_out)
with tf.name_scope('correct_prediction'):
correct_prediction = tf.equal(tf.argmax(predicted_class, 1), tf.argmax(Y, 1))
with tf.name_scope('accuracy'):
accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
tf.summary.scalar('accuracy', accuracy)
self.merged = tf.summary.merge_all()
return loss, accuracy
This the function of training :
def train(self):
self.train_writer = tf.summary.FileWriter('./Graph', self.sess.graph)
# training data
train_input = self.Normalize(self.x_train)
train_output = self.y_train.copy()
save_sess=self.sess
#costs history :
costs = []
costs_inter=[]
#for early stopping :
best_cost=1000000
stop = False
last_improvement=0
n_samples = train_input.shape[0] # size of the training set
#train the mini_batches model using the early stopping criteria
epoch = 0
while epoch < self.max_epochs and stop == False:
#train the model on the traning set by mini batches
#suffle then split the training set to mini-batches of size self.batch_size
seq =list(range(n_samples))
random.shuffle(seq)
mini_batches = [
seq[k:k+self.batch_size]
for k in range(0,n_samples, self.batch_size)
]
avg_cost = 0. # The average cost of mini_batches
step= 0
for sample in mini_batches:
batch_x = x_train.iloc[sample, :]
batch_y =train_output.iloc[sample, :]
feed_dict={self.X: batch_x,self.Y:batch_y, self.is_train:True}
self.train_summary, _, cost,acc=self.sess.run([self.merged, self.train_step, self.loss_, self.accuracy_], feed_dict=feed_dict)
avg_cost += cost *len(sample)/n_samples
print('epoch[{}] step [{}] train -- loss : {}, accuracy : {}'.format(epoch,step, cost, acc))
self.train_writer.add_summary(self.train_summary, global_step=step)
step += 100
#cost history since the last best cost
costs_inter.append(avg_cost)
epoch +=1
#Test the model
pred = tf.nn.softmax(self.predicted_out) # Apply softmax to logits
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(self.Y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
accuracy_test = self.sess.run([accuracy],feed_dict={self.X: x_test, self.Y: y_test,self.is_train:False})
#print("Accuracy:", accuracy.eval({self.X: x_test, self.Y: y_test}))
print("Accuracy_test : ", accuracy_test)
# Writes the summaries to disk
self.train_writer.flush()
# Flushes the summaries to disk and closes the SummaryWriter
self.train_writer.close()
return costs
Here where I call my method train, I was planing to apply cross validation to see if I can improve the model accuracy but I had not yet :
def cross_validation(self,batch_size, n_hidden_1 , n_hidden_2, n_hidden_3, learning_rate):
##### Other parameter
self.batch_size = batch_size
self.n_hidden_1 = n_hidden_1
self.n_hidden_2 = n_hidden_2
self.n_hidden_3 = n_hidden_3
self.learning_rate = learning_rate
self.require_improvement= 20
self.max_epochs = 80
self._init = tf.random_normal_initializer
self.optimizer=tf.train.AdamOptimizer
loss = 0
tf.reset_default_graph()
with tf.name_scope('input'):
self.X=tf.placeholder("float",shape=[None,self.x_train.shape[1]])
self.Y=tf.placeholder("float",shape=[None,self.y_train.shape[1]])
self.is_train = tf.placeholder(tf.bool, name="is_train")
self.loss_, self.accuracy_ = self.loss(self.X, self.Y)
self.train_step = self.optimizer(self.learning_rate).minimize(self.loss_)
# Initiate a tensor session
init = tf.global_variables_initializer()
self.sess = tf.Session()
self.sess.run(init)
#train the model
loss = self.train()
self.sess.close()
del self.sess
return loss
And finally this my main:
if __name__=='__main__':
tbc = tb.TensorBoardColab()
mlp = MLP(x_train, y_train, n_classes)
loss= mlp.cross_validation(batch_size, n_hidden_1 , n_hidden_2, n_hidden_3, learning_rate)
I had expected to get bad accuracy but not "0" and the very big loss without any convergence has shocked me. Can someone tell me where are the problems here?
回答1:
there are quite a few points you need to take care of
you should remove the tf summary file before the start of each training, as the global step will restart from 0 according to your code
your loss function is
softmax_cross_entropy_with_logits_v2
, to use this you may need to encode your label in onehot, and try to minimize logit layer close to that onehot label with internal softmax function in this function. If you want to keep current ground truth label, please check sparse_softmax_cross_entropy_with_logits. The usages are similar but some of them need to be onehot label. Check detailed explaination here
来源:https://stackoverflow.com/questions/55923207/my-classifier-has-very-big-loss-and-accuracy-is-always-0