I came across a strange issue when using keras to implement GAN model.
With GAN we need to build up G and D first, and then add a new Sequential model (GAN) and add(G), add(D) sequentially afterwards.
Keras seems to backprop back to G (via GAN model) when I do D.train_on_batch
, and I got an InvalidArgumentError: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
.
If I remove the GAN model
(the last stacked G then D sequential model), it computes d_loss
correctly.
My environment is:
- Ubuntu 16.04
- keras 1.2.2
- tensorflow-gpu 1.0.0
- keras config:
{ "backend": "tensorflow", "image_dim_ordering": "tf", "epsilon": 1e-07, "floatx": "float32" }
I know that quite many people have succeeded in implementing GAN with keras, so I am wondering where I got wrong.
import numpy as np
import keras.layers as kl
import keras.models as km
import keras.optimizers as ko
from keras.datasets import mnist
batch_size = 16
lr = 0.0001
def noise_gen(batch_size, z_dim):
noise = np.zeros((batch_size, z_dim), dtype=np.float32)
for i in range(batch_size):
noise[i, :] = np.random.uniform(-1, 1, z_dim)
return noise
# --------------------Generator Model--------------------
model = km.Sequential()
model.add(kl.Dense(input_dim=100, output_dim=1024))
model.add(kl.Activation('relu'))
model.add(kl.Dense(7*7*128))
model.add(kl.BatchNormalization())
model.add(kl.Activation('relu'))
model.add(kl.Reshape((7, 7, 128), input_shape=(7*7*128,)))
model.add(kl.Deconvolution2D(64, 5, 5, (None, 14, 14, 64), subsample=(2, 2),
input_shape=(7, 7, 128), border_mode='same'))
model.add(kl.BatchNormalization())
model.add(kl.Activation('relu'))
model.add(kl.Deconvolution2D(1, 5, 5, (None, 28, 28, 1), subsample=(2, 2),
input_shape=(14, 14, 64), border_mode='same'))
G = model
G.compile( loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))
# --------------------Discriminator Model--------------------
model = km.Sequential()
model.add(kl.Convolution2D( 64, 5, 5, subsample=(2, 2), input_shape=(28, 28, 1)))
model.add(kl.LeakyReLU(alpha=0.2))
model.add(kl.Convolution2D(128, 5, 5, subsample=(2, 2)))
model.add(kl.BatchNormalization())
model.add(kl.LeakyReLU(alpha=0.2))
model.add(kl.Flatten())
model.add(kl.Dense(1))
model.add(kl.Activation('sigmoid'))
D = model
D.compile( loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))
# --------------------GAN Model--------------------
model = km.Sequential()
model.add(G)
D.trainable = False # Is this necessary?
model.add(D)
GAN = model
GAN.compile(loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))
# --------------------Main Code--------------------
(X, _), _ = mnist.load_data()
X = X / 255.
X = X[:, :, :, np.newaxis]
X_batch = X[0:batch_size, :]
Z1_batch = noise_gen(batch_size, 100)
Z2_batch = noise_gen(batch_size, 100)
fake_batch = G.predict(Z1_batch)
real_batch = X_batch
print('--------------------Fake Image Generated!--------------------')
combined_X_batch = np.concatenate((real_batch, fake_batch))
combined_y_batch = np.concatenate((np.ones((batch_size, 1)), np.zeros((batch_size, 1))))
print('real_batch={}, fake_batch={}'.format(real_batch.shape, fake_batch.shape))
D.trainable = True
d_loss = D.train_on_batch(combined_X_batch, combined_y_batch)
print('--------------------Discriminator trained!--------------------')
print(d_loss)
D.trainable = False
g_loss = GAN.train_on_batch(Z2_batch, np.ones((batch_size, 1)))
print('--------------------GAN trained!--------------------')
print(g_loss)
Error Message:
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
[[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
[[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
[[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
[[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
[[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
[[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
Traceback (most recent call last):
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1022, in _do_call
return fn(*args)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1004, in _run_fn
status, run_metadata)
File "/usr/lib/python3.5/contextlib.py", line 66, in __exit__
next(self.gen)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/errors_impl.py", line 469, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
[[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
[[Node: mul_5/_77 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_1018_mul_5", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "./gen.py", line 84, in <module>
d_loss = D.train_on_batch(combined_X_batch, combined_y_batch)
File "/usr/local/lib/python3.5/dist-packages/keras/models.py", line 766, in train_on_batch
class_weight=class_weight)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 1320, in train_on_batch
outputs = self.train_function(ins)
File "/usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py", line 1943, in __call__
feed_dict=feed_dict)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 767, in run
run_metadata_ptr)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 965, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1015, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1035, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
[[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
[[Node: mul_5/_77 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_1018_mul_5", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op 'dense_input_1', defined at:
File "./gen.py", line 20, in <module>
model.add(kl.Dense(input_dim=100, output_dim=1024))
File "/usr/local/lib/python3.5/dist-packages/keras/models.py", line 299, in add
layer.create_input_layer(batch_input_shape, input_dtype)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/topology.py", line 397, in create_input_layer
dtype=input_dtype, name=name)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/topology.py", line 1198, in Input
input_tensor=tensor)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/topology.py", line 1116, in __init__
name=self.name)
File "/usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py", line 321, in placeholder
x = tf.placeholder(dtype, shape=shape, name=name)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_ops.py", line 1520, in placeholder
name=name)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 2149, in _placeholder
name=name)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op
op_def=op_def)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 2395, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1264, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'dense_input_1' with dtype float
[[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
[[Node: mul_5/_77 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_1018_mul_5", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
First, I would advise you to switch to the Functional API models. These kinds of mixed models are more easily handled by Functional models.
I have no idea why your solution didn't work to be honnest, it seems like when you link the D model to a new input, it gets kind of "corrupted" and gets linked to it. The way I have found around that problem, is to define the layers and use them for both the Discriminator and the GAN models. Here is the code :
import numpy as np
from keras.layers import *
import keras.models as km
import keras.optimizers as ko
from keras.datasets import mnist
batch_size = 16
lr = 0.0001
def noise_gen(batch_size, z_dim):
noise = np.zeros((batch_size, z_dim), dtype=np.float32)
for i in range(batch_size):
noise[i, :] = np.random.uniform(-1, 1, z_dim)
return noise
# Changes the traiable argument for all the layers of model
# to the boolean argument "trainable"
def make_trainable(model, trainable):
model.trainable = trainable
for l in model.layers:
l.trainable = trainable
# --------------------Generator Model--------------------
g_input = Input(shape=(100,))
g_hidden = Dense(1024, activation='relu')(g_input)
g_hidden = Dense(7*7*128, activation='relu')(g_hidden)
g_hidden = BatchNormalization()(g_hidden)
g_hidden = Reshape((7,7,128))(g_hidden)
g_hidden = Deconvolution2D(64,5,5, (None, 14, 14, 64), subsample=(2,2),
border_mode='same', activation='relu')(g_hidden)
g_hidden = BatchNormalization()(g_hidden)
g_output = Deconvolution2D(1,5,5, (None, 28, 28, 1), subsample=(2,2),
border_mode='same')(g_hidden)
G = km.Model(input=g_input,output=g_output)
G.compile(loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))
G.summary()
# --------------------Discriminator Model--------------------
d_input = Input(shape=(28,28,1))
d_l1 = Convolution2D(64,5,5, subsample=(2,2))
d_hidden_1 = d_l1(d_input)
d_l2 = LeakyReLU(alpha=0.2)
d_hidden_2 = d_l2(d_hidden_1)
d_l3 = Convolution2D(128,5,5, subsample=(2,2))
d_hidden_3 = d_l3(d_hidden_2)
d_l4 = BatchNormalization()
d_hidden_4 = d_l4(d_hidden_3)
d_l5 = LeakyReLU(alpha=0.2)
d_hidden_5 = d_l5(d_hidden_4)
d_l6 = Flatten()
d_hidden_6 = d_l6(d_hidden_5)
d_l7 = Dense(1, activation='sigmoid')
d_output = d_l7(d_hidden_6)
D = km.Model(input=d_input,output=d_output)
D.compile(loss='binary_crossentropy',optimizer=ko.SGD(lr=lr,momentum=0.9, nesterov=True))
D.summary()
# --------------------GAN Model--------------------
make_trainable(D,False)
gan_input = Input(shape=(100,))
gan_hidden = G(gan_input)
gan_hidden = d_l1(gan_hidden)
gan_hidden = d_l2(gan_hidden)
gan_hidden = d_l3(gan_hidden)
gan_hidden = d_l4(gan_hidden)
gan_hidden = d_l5(gan_hidden)
gan_hidden = d_l6(gan_hidden)
gan_output = d_l7(gan_hidden)
GAN = km.Model(input=gan_input,output=gan_output)
GAN.compile(loss='binary_crossentropy',optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))
GAN.summary()
# --------------------Main Code--------------------
(X, _), _ = mnist.load_data()
X = X / 255.
X = X[:, :, :, np.newaxis]
X_batch = X[0:batch_size, :]
Z1_batch = noise_gen(batch_size, 100)
Z2_batch = noise_gen(batch_size, 100)
print(type(X_batch),X_batch.shape)
print(type(Z1_batch),Z1_batch.shape)
fake_batch = G.predict(Z1_batch)
real_batch = X_batch
print('--------------------Fake Image Generated!--------------------')
combined_X_batch = np.concatenate((real_batch, fake_batch))
combined_y_batch = np.concatenate((np.ones((batch_size, 1)), np.zeros((batch_size, 1))))
print('real_batch={}, fake_batch={}'.format(real_batch.shape, fake_batch.shape))
print(type(combined_X_batch),combined_X_batch.dtype,combined_X_batch.shape)
print(type(combined_y_batch),combined_y_batch.dtype,combined_y_batch.shape)
make_trainable(D,True)
d_loss = D.train_on_batch(combined_X_batch, combined_y_batch)
print('--------------------Discriminator trained!--------------------')
print(d_loss)
make_trainable(D,False)
g_loss = GAN.train_on_batch(Z2_batch, np.ones((batch_size, 1)))
print('--------------------GAN trained!--------------------')
print(g_loss)
Does that help?
After strived for quite a long time, I finally get it that it's the Discriminator's BatchNormalization layer that caused the problem.
If you just comment out the model.add(kl.BatchNormalization())
in the Discriminator. It'll work fine.
However, as @NassimBen shown, the functional API does not cause any problems.
import numpy as np
import keras.layers as kl
import keras.models as km
import keras.optimizers as ko
from keras.datasets import mnist
batch_size = 16
lr = 0.0001
def noise_gen(batch_size, z_dim):
noise = np.zeros((batch_size, z_dim), dtype=np.float32)
for i in range(batch_size):
noise[i, :] = np.random.uniform(-1, 1, z_dim)
return noise
# --------------------Generator Model--------------------
model = km.Sequential()
model.add(kl.Dense(input_dim=100, output_dim=1024))
model.add(kl.Activation('relu'))
model.add(kl.Dense(7*7*128))
model.add(kl.BatchNormalization())
model.add(kl.Activation('relu'))
model.add(kl.Reshape((7, 7, 128), input_shape=(7*7*128,)))
model.add(kl.Deconvolution2D(64, 5, 5, (None, 14, 14, 64), subsample=(2, 2),
input_shape=(7, 7, 128), border_mode='same'))
model.add(kl.BatchNormalization())
model.add(kl.Activation('relu'))
model.add(kl.Deconvolution2D(1, 5, 5, (None, 28, 28, 1), subsample=(2, 2),
input_shape=(14, 14, 64), border_mode='same'))
G = model
G.compile( loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))
# --------------------Discriminator Model--------------------
model = km.Sequential()
model.add(kl.Convolution2D( 64, 5, 5, subsample=(2, 2), input_shape=(28, 28, 1)))
model.add(kl.LeakyReLU(alpha=0.2))
model.add(kl.Convolution2D(128, 5, 5, subsample=(2, 2)))
# model.add(kl.BatchNormalization())
model.add(kl.LeakyReLU(alpha=0.2))
model.add(kl.Flatten())
model.add(kl.Dense(1))
model.add(kl.Activation('sigmoid'))
D = model
D.compile( loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))
# --------------------GAN Model--------------------
model = km.Sequential()
model.add(G)
D.trainable = False # Is this necessary?
model.add(D)
GAN = model
GAN.compile(loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))
# --------------------Main Code--------------------
(X, _), _ = mnist.load_data()
X = X / 255.
X = X[:, :, :, np.newaxis]
X_batch = X[0:batch_size, :]
Z1_batch = noise_gen(batch_size, 100)
Z2_batch = noise_gen(batch_size, 100)
fake_batch = G.predict(Z1_batch)
real_batch = X_batch
print('--------------------Fake Image Generated!--------------------')
combined_X_batch = np.concatenate((real_batch, fake_batch))
combined_y_batch = np.concatenate((np.ones((batch_size, 1)), np.zeros((batch_size, 1))))
print('real_batch={}, fake_batch={}'.format(real_batch.shape, fake_batch.shape))
D.trainable = True
d_loss = D.train_on_batch(combined_X_batch, combined_y_batch)
print('--------------------Discriminator trained!--------------------')
print(d_loss)
D.trainable = False
g_loss = GAN.train_on_batch(Z2_batch, np.ones((batch_size, 1)))
print('--------------------GAN trained!--------------------')
print(g_loss)
来源:https://stackoverflow.com/questions/42422646/keras-train-partial-model-issue-about-gan-model