Loss goes to NAN when training the custom YOLO model

瘦欲@ 提交于 2020-04-30 10:37:52

问题


I implemented a custom loss function and model for YOLO using Keras. I am using Tensorflow as backend.

import pickle
import tensorflow as tf
import numpy as np 
import matplotlib.pyplot as plt 
from keras.models import Sequential,load_model
from keras.layers import Dense,Conv2D,Activation,MaxPooling2D,Flatten
import keras as k
from keras import optimizers
import cv2

batch=12

sess= tf.Session()

#loss function
def yolo_loss(yTrue,yPred):
    coord=5
    noobj=0.5
    L_noobj=1
    L_obj=1
    if yTrue[6] == 1: 
       L_obj=0

    if yTrue[5] == 1:
       L_noobj=0

    w=coord*L_obj*(tf.square([tf.sqrt(yTrue[2])-tf.sqrt(yPred[2])])) 
    h=coord*L_obj*(tf.square([yTrue[3]-yPred[3]]))
    x=coord*L_obj*(tf.square([yTrue[0]-yPred[0]]))
    y=coord*L_obj*(tf.square([yTrue[1]-yPred[1]])) 
    no_obj=noobj*L_noobj*(tf.square([yTrue[6]-yPred[6]])) 
    obj=L_obj*(tf.square([yTrue[5]-yPred[5]])) 
    clss=L_obj*(tf.square([yTrue[4]-yPred[4]]))
    loss=w+h+x+y+no_obj+obj+clss
    return loss

def custom_loss(yTrue,yPred):
    loss=None
    for a in range(batch):
        loss_per_sample=0
        for b in range(4):
            for c in range(4):
                loss_per_sample += yolo_loss(yTrue[a,b,c,0:],yPred[a,b,c,0:])  
        if loss == None:
            loss=tf.stack(loss_per_sample)
        else:
            x=tf.stack(loss_per_sample)
            loss=tf.concat([loss,x],0)

    loss=tf.reshape(loss,[-1,1])      
    return loss 

#load data and labels 
x_train=pickle.load(open('data_image.pickle','rb'))
y_train=pickle.load(open('data_label.pickle','rb'))
test=pickle.load(open('test_image.pickle','rb'))


# model
model=Sequential()

model.add(Conv2D(16,(7,7),input_shape=x_train.shape[1:],padding="same"))
model.add(Activation("relu"))
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(32,(3,3),padding="same"))
model.add(Activation("relu"))
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(64,(3,3),padding="same"))
model.add(Activation("relu"))
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(128,(3,3),padding="same"))
model.add(Activation("relu"))
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(512,(3,3),padding="same"))
model.add(Activation("relu"))
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(512,(3,3),padding="same"))
model.add(Activation("relu"))

model.add(Conv2D(1024,(3,3),padding="same"))
model.add(Activation("relu"))

model.add(Conv2D(7,(3,3),padding="same"))
model.add(Activation("relu"))

adam = optimizers.adam(lr=0.001)
model.compile(loss=custom_loss,optimizer=adam,metrics=["accuracy"]) 


model.fit(x_train,y_train,batch_size=batch,epochs=100)

model.save('yolo.model')

When I train the model Loss value goes to NAN.but after I remove the tf.sqrt() from the "W" and "h" in Custom loss function Loss is almost come to zero. But the problem is "W" and "h" value of the bounding box is always zero. I think there something in tf.sqrt() function. Please can someone tell me what is going on here.



回答1:


I think this some kind of a division by zero error I had this issue using Yolo with darkflow for player detection one thing I did to fix this was making a couple of adjustment to the batch size and learning rate.




回答2:


You are using relu in last layer, which is not expected. This may be causing dying gradients.

Also, do some checks before using sqrt function such as negative values.

model.add(Conv2D(7,(3,3),padding="same"))
model.add(Activation("relu"))

adam = optimizers.adam(lr=0.001)
model.compile(loss=custom_loss,optimizer=adam,metrics=["accuracy"]) 


来源:https://stackoverflow.com/questions/54103762/loss-goes-to-nan-when-training-the-custom-yolo-model

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!