问题
I am trying to train a rather large LSTM on a large dataset and have 4 GPUs to distribute the load. If I try to train on just one of them (any of them, I've tried each) it functions correctly, but after adding the multi_gpu_model code it crashes my entire system when I try to run it. Here is my multi-gpu code
batch_size = 8
model = Sequential()
model.add(Masking(mask_value=0., input_shape=(len(inputData[0]), len(inputData[0][0])) ))
model.add(LSTM(256, return_sequences=True))
model.add(Dropout(.2))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(.2))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(.2))
model.add(Dense(len(outputData[0][0]), activation='softmax'))
rms = RMSprop()
p_model = multi_gpu_model(model, gpus=4)
p_model.compile(loss='categorical_crossentropy',optimizer=rms, metrics=['categorical_accuracy'])
print("Fitting")
p_model.fit_generator(songBatchGenerator(songList,batch_size), epochs=250, verbose=1, shuffle=False, steps_per_epoch=math.ceil(len(songList)/batch_size))
pickleSave('kerasTrained.pickle', parallel_model)
print("Saved")
changing this to
batch_size = 8
model = Sequential()
model.add(Masking(mask_value=0., input_shape=(len(inputData[0]), len(inputData[0][0])) ))
model.add(LSTM(256, return_sequences=True))
model.add(Dropout(.2))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(.2))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(.2))
model.add(Dense(len(outputData[0][0]), activation='softmax'))
rms = RMSprop()
model.compile(loss='categorical_crossentropy',optimizer=rms, metrics=['categorical_accuracy'])
print("Fitting")
model.fit_generator(songBatchGenerator(songList,batch_size), epochs=250, verbose=1, shuffle=False, steps_per_epoch=math.ceil(len(songList)/batch_size))
pickleSave('kerasTrained.pickle', parallel_model)
print("Saved")
functions perfectly
3 of the GPUs are Nvidia 1060 3GB and 1 is a 6GB, and the system has about 4GB of memory (although I doubt that's the issue since I'm using a generator).
回答1:
Keras uses all the 4 GPUs computation and the code compilation can be made with CPU. You can try the below code. For more information have a look at the tensorflow website link https://www.tensorflow.org/api_docs/python/tf/keras/utils/multi_gpu_model
def create_model():
batch_size = 8
model = Sequential()
model.add(Masking(mask_value=0., input_shape=(len(inputData[0]), len(inputData[0][0])) ))
model.add(LSTM(256, return_sequences=True))
model.add(Dropout(.2))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(.2))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(.2))
model.add(Dense(len(outputData[0][0]), activation='softmax'))
return model
# we'll store a copy of the model on *every* GPU and then combine
# the results from the gradient updates on the CPU
# initialize the model
with tf.device("/cpu:0"):
model = create_model()
# make the model parallel
p_model = multi_gpu_model(model, gpus=4)
rms = RMSprop()
p_model.compile(loss='categorical_crossentropy',optimizer=rms, metrics=['categorical_accuracy'])
print("Fitting")
p_model.fit_generator(songBatchGenerator(songList,batch_size), epochs=250, verbose=1, shuffle=False, steps_per_epoch=math.ceil(len(songList)/batch_size))
pickleSave('kerasTrained.pickle', parallel_model)
print("Saved")
来源:https://stackoverflow.com/questions/54760473/keras-multi-gpu-model-causes-system-to-crash