Loss goes to NAN when training the custom YOLO model

问题

I implemented a custom loss function and model for YOLO using Keras. I am using Tensorflow as backend.

import pickle
import tensorflow as tf
import numpy as np 
import matplotlib.pyplot as plt 
from keras.models import Sequential,load_model
from keras.layers import Dense,Conv2D,Activation,MaxPooling2D,Flatten
import keras as k
from keras import optimizers
import cv2

batch=12

sess= tf.Session()

#loss function
def yolo_loss(yTrue,yPred):
    coord=5
    noobj=0.5
    L_noobj=1
    L_obj=1
    if yTrue[6] == 1: 
       L_obj=0

    if yTrue[5] == 1:
       L_noobj=0

    w=coord*L_obj*(tf.square([tf.sqrt(yTrue[2])-tf.sqrt(yPred[2])])) 
    h=coord*L_obj*(tf.square([yTrue[3]-yPred[3]]))
    x=coord*L_obj*(tf.square([yTrue[0]-yPred[0]]))
    y=coord*L_obj*(tf.square([yTrue[1]-yPred[1]])) 
    no_obj=noobj*L_noobj*(tf.square([yTrue[6]-yPred[6]])) 
    obj=L_obj*(tf.square([yTrue[5]-yPred[5]])) 
    clss=L_obj*(tf.square([yTrue[4]-yPred[4]]))
    loss=w+h+x+y+no_obj+obj+clss
    return loss

def custom_loss(yTrue,yPred):
    loss=None
    for a in range(batch):
        loss_per_sample=0
        for b in range(4):
            for c in range(4):
                loss_per_sample += yolo_loss(yTrue[a,b,c,0:],yPred[a,b,c,0:])  
        if loss == None:
            loss=tf.stack(loss_per_sample)
        else:
            x=tf.stack(loss_per_sample)
            loss=tf.concat([loss,x],0)

    loss=tf.reshape(loss,[-1,1])      
    return loss 

#load data and labels 
x_train=pickle.load(open('data_image.pickle','rb'))
y_train=pickle.load(open('data_label.pickle','rb'))
test=pickle.load(open('test_image.pickle','rb'))


# model
model=Sequential()

model.add(Conv2D(16,(7,7),input_shape=x_train.shape[1:],padding="same"))
model.add(Activation("relu"))
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(32,(3,3),padding="same"))
model.add(Activation("relu"))
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(64,(3,3),padding="same"))
model.add(Activation("relu"))
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(128,(3,3),padding="same"))
model.add(Activation("relu"))
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(512,(3,3),padding="same"))
model.add(Activation("relu"))
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(512,(3,3),padding="same"))
model.add(Activation("relu"))

model.add(Conv2D(1024,(3,3),padding="same"))
model.add(Activation("relu"))

model.add(Conv2D(7,(3,3),padding="same"))
model.add(Activation("relu"))

adam = optimizers.adam(lr=0.001)
model.compile(loss=custom_loss,optimizer=adam,metrics=["accuracy"]) 


model.fit(x_train,y_train,batch_size=batch,epochs=100)

model.save('yolo.model')

When I train the model Loss value goes to NAN.but after I remove the tf.sqrt() from the "W" and "h" in Custom loss function Loss is almost come to zero. But the problem is "W" and "h" value of the bounding box is always zero. I think there something in tf.sqrt() function. Please can someone tell me what is going on here.

回答1:

I think this some kind of a division by zero error I had this issue using Yolo with darkflow for player detection one thing I did to fix this was making a couple of adjustment to the batch size and learning rate.

回答2:

You are using relu in last layer, which is not expected. This may be causing dying gradients.

Also, do some checks before using sqrt function such as negative values.

model.add(Conv2D(7,(3,3),padding="same"))
model.add(Activation("relu"))

adam = optimizers.adam(lr=0.001)
model.compile(loss=custom_loss,optimizer=adam,metrics=["accuracy"])

来源：https://stackoverflow.com/questions/54103762/loss-goes-to-nan-when-training-the-custom-yolo-model

标签

python-3.x

tensorflow

keras

yolo