Keras train partial model issue (about GAN model)

问题

I came across a strange issue when using keras to implement GAN model.

With GAN we need to build up G and D first, and then add a new Sequential model (GAN) and add(G), add(D) sequentially afterwards.

Keras seems to backprop back to G (via GAN model) when I do D.train_on_batch, and I got an InvalidArgumentError: You must feed a value for placeholder tensor 'dense_input_1' with dtype float.

If I remove the GAN model (the last stacked G then D sequential model), it computes d_loss correctly.

My environment is:

Ubuntu 16.04
keras 1.2.2
tensorflow-gpu 1.0.0
keras config: { "backend": "tensorflow", "image_dim_ordering": "tf", "epsilon": 1e-07, "floatx": "float32" }

I know that quite many people have succeeded in implementing GAN with keras, so I am wondering where I got wrong.

import numpy as np
import keras.layers as kl
import keras.models as km
import keras.optimizers as ko
from keras.datasets import mnist

batch_size = 16
lr = 0.0001

def noise_gen(batch_size, z_dim):
    noise = np.zeros((batch_size, z_dim), dtype=np.float32)
    for i in range(batch_size):
        noise[i, :] = np.random.uniform(-1, 1, z_dim)
    return noise

# --------------------Generator Model--------------------

model = km.Sequential()

model.add(kl.Dense(input_dim=100, output_dim=1024))
model.add(kl.Activation('relu'))

model.add(kl.Dense(7*7*128))
model.add(kl.BatchNormalization())
model.add(kl.Activation('relu'))
model.add(kl.Reshape((7, 7, 128), input_shape=(7*7*128,)))

model.add(kl.Deconvolution2D(64, 5, 5, (None, 14, 14, 64), subsample=(2, 2),
    input_shape=(7, 7, 128), border_mode='same'))
model.add(kl.BatchNormalization())
model.add(kl.Activation('relu'))

model.add(kl.Deconvolution2D(1, 5, 5, (None, 28, 28, 1), subsample=(2, 2),
    input_shape=(14, 14, 64), border_mode='same'))

G = model
G.compile(  loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))

# --------------------Discriminator Model--------------------

model = km.Sequential()

model.add(kl.Convolution2D( 64, 5, 5, subsample=(2, 2), input_shape=(28, 28, 1)))
model.add(kl.LeakyReLU(alpha=0.2))

model.add(kl.Convolution2D(128, 5, 5, subsample=(2, 2)))
model.add(kl.BatchNormalization())
model.add(kl.LeakyReLU(alpha=0.2))

model.add(kl.Flatten())
model.add(kl.Dense(1))
model.add(kl.Activation('sigmoid'))

D = model
D.compile(  loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))

# --------------------GAN Model--------------------

model = km.Sequential()
model.add(G)
D.trainable = False  # Is this necessary?
model.add(D)
GAN = model
GAN.compile(loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))

# --------------------Main Code--------------------
(X, _), _ = mnist.load_data()
X = X / 255.
X = X[:, :, :, np.newaxis]

X_batch = X[0:batch_size, :]
Z1_batch = noise_gen(batch_size, 100)
Z2_batch = noise_gen(batch_size, 100)

fake_batch = G.predict(Z1_batch)
real_batch = X_batch
print('--------------------Fake Image Generated!--------------------')

combined_X_batch = np.concatenate((real_batch, fake_batch))
combined_y_batch = np.concatenate((np.ones((batch_size, 1)), np.zeros((batch_size, 1))))
print('real_batch={}, fake_batch={}'.format(real_batch.shape, fake_batch.shape))

D.trainable = True
d_loss = D.train_on_batch(combined_X_batch, combined_y_batch)
print('--------------------Discriminator trained!--------------------')
print(d_loss)

D.trainable = False
g_loss = GAN.train_on_batch(Z2_batch, np.ones((batch_size, 1)))
print('--------------------GAN trained!--------------------')
print(g_loss)

Error Message:

W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
     [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
     [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
     [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
     [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
     [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
     [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
Traceback (most recent call last):
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1022, in _do_call
    return fn(*args)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1004, in _run_fn
    status, run_metadata)
  File "/usr/lib/python3.5/contextlib.py", line 66, in __exit__
    next(self.gen)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/errors_impl.py", line 469, in raise_exception_on_not_ok_status
    pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
     [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
     [[Node: mul_5/_77 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_1018_mul_5", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "./gen.py", line 84, in <module>
    d_loss = D.train_on_batch(combined_X_batch, combined_y_batch)
  File "/usr/local/lib/python3.5/dist-packages/keras/models.py", line 766, in train_on_batch
    class_weight=class_weight)
  File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 1320, in train_on_batch
    outputs = self.train_function(ins)
  File "/usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py", line 1943, in __call__
    feed_dict=feed_dict)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 767, in run
    run_metadata_ptr)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 965, in _run
    feed_dict_string, options, run_metadata)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1015, in _do_run
    target_list, options, run_metadata)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1035, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'dense_input_1' with dtype float
     [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
     [[Node: mul_5/_77 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_1018_mul_5", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

Caused by op 'dense_input_1', defined at:
  File "./gen.py", line 20, in <module>
    model.add(kl.Dense(input_dim=100, output_dim=1024))
  File "/usr/local/lib/python3.5/dist-packages/keras/models.py", line 299, in add
    layer.create_input_layer(batch_input_shape, input_dtype)
  File "/usr/local/lib/python3.5/dist-packages/keras/engine/topology.py", line 397, in create_input_layer
    dtype=input_dtype, name=name)
  File "/usr/local/lib/python3.5/dist-packages/keras/engine/topology.py", line 1198, in Input
    input_tensor=tensor)
  File "/usr/local/lib/python3.5/dist-packages/keras/engine/topology.py", line 1116, in __init__
    name=self.name)
  File "/usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py", line 321, in placeholder
    x = tf.placeholder(dtype, shape=shape, name=name)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_ops.py", line 1520, in placeholder
    name=name)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 2149, in _placeholder
    name=name)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op
    op_def=op_def)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 2395, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1264, in __init__
    self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'dense_input_1' with dtype float
     [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
     [[Node: mul_5/_77 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_1018_mul_5", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

回答1:

First, I would advise you to switch to the Functional API models. These kinds of mixed models are more easily handled by Functional models.

I have no idea why your solution didn't work to be honnest, it seems like when you link the D model to a new input, it gets kind of "corrupted" and gets linked to it. The way I have found around that problem, is to define the layers and use them for both the Discriminator and the GAN models. Here is the code :

import numpy as np
from keras.layers import *
import keras.models as km
import keras.optimizers as ko
from keras.datasets import mnist

batch_size = 16
lr = 0.0001

def noise_gen(batch_size, z_dim):
    noise = np.zeros((batch_size, z_dim), dtype=np.float32)
    for i in range(batch_size):
        noise[i, :] = np.random.uniform(-1, 1, z_dim)
    return noise

# Changes the traiable argument for all the layers of model
# to the boolean argument "trainable"
def make_trainable(model, trainable):
    model.trainable = trainable
    for l in model.layers:
        l.trainable = trainable

# --------------------Generator Model--------------------

g_input = Input(shape=(100,))

g_hidden = Dense(1024, activation='relu')(g_input)
g_hidden = Dense(7*7*128, activation='relu')(g_hidden)
g_hidden = BatchNormalization()(g_hidden)
g_hidden = Reshape((7,7,128))(g_hidden)

g_hidden = Deconvolution2D(64,5,5, (None, 14, 14, 64), subsample=(2,2),
        border_mode='same', activation='relu')(g_hidden)
g_hidden = BatchNormalization()(g_hidden)
g_output = Deconvolution2D(1,5,5, (None, 28, 28, 1), subsample=(2,2),
        border_mode='same')(g_hidden)

G = km.Model(input=g_input,output=g_output)
G.compile(loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))
G.summary()

# --------------------Discriminator Model--------------------

d_input = Input(shape=(28,28,1))

d_l1 = Convolution2D(64,5,5, subsample=(2,2))
d_hidden_1 = d_l1(d_input)
d_l2 = LeakyReLU(alpha=0.2)
d_hidden_2 = d_l2(d_hidden_1)

d_l3 = Convolution2D(128,5,5, subsample=(2,2))
d_hidden_3 = d_l3(d_hidden_2)
d_l4 = BatchNormalization()
d_hidden_4 = d_l4(d_hidden_3)
d_l5 = LeakyReLU(alpha=0.2)
d_hidden_5 = d_l5(d_hidden_4)

d_l6 = Flatten()
d_hidden_6 = d_l6(d_hidden_5)
d_l7 = Dense(1, activation='sigmoid')
d_output = d_l7(d_hidden_6)

D = km.Model(input=d_input,output=d_output)
D.compile(loss='binary_crossentropy',optimizer=ko.SGD(lr=lr,momentum=0.9, nesterov=True))
D.summary()

# --------------------GAN Model--------------------
make_trainable(D,False)

gan_input = Input(shape=(100,))
gan_hidden = G(gan_input)
gan_hidden = d_l1(gan_hidden)
gan_hidden = d_l2(gan_hidden)
gan_hidden = d_l3(gan_hidden)
gan_hidden = d_l4(gan_hidden)
gan_hidden = d_l5(gan_hidden)
gan_hidden = d_l6(gan_hidden)
gan_output = d_l7(gan_hidden)

GAN = km.Model(input=gan_input,output=gan_output)
GAN.compile(loss='binary_crossentropy',optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))
GAN.summary()

# --------------------Main Code--------------------
(X, _), _ = mnist.load_data()
X = X / 255.
X = X[:, :, :, np.newaxis]

X_batch = X[0:batch_size, :]
Z1_batch = noise_gen(batch_size, 100)
Z2_batch = noise_gen(batch_size, 100)

print(type(X_batch),X_batch.shape)
print(type(Z1_batch),Z1_batch.shape)

fake_batch = G.predict(Z1_batch)
real_batch = X_batch
print('--------------------Fake Image Generated!--------------------')

combined_X_batch = np.concatenate((real_batch, fake_batch))
combined_y_batch = np.concatenate((np.ones((batch_size, 1)), np.zeros((batch_size, 1))))
print('real_batch={}, fake_batch={}'.format(real_batch.shape, fake_batch.shape))
print(type(combined_X_batch),combined_X_batch.dtype,combined_X_batch.shape)
print(type(combined_y_batch),combined_y_batch.dtype,combined_y_batch.shape)
make_trainable(D,True)
d_loss = D.train_on_batch(combined_X_batch, combined_y_batch)
print('--------------------Discriminator trained!--------------------')
print(d_loss)

make_trainable(D,False)
g_loss = GAN.train_on_batch(Z2_batch, np.ones((batch_size, 1)))
print('--------------------GAN trained!--------------------')
print(g_loss)

Does that help?

回答2:

After strived for quite a long time, I finally get it that it's the Discriminator's BatchNormalization layer that caused the problem.

If you just comment out the model.add(kl.BatchNormalization()) in the Discriminator. It'll work fine.

However, as @NassimBen shown, the functional API does not cause any problems.

import numpy as np
import keras.layers as kl
import keras.models as km
import keras.optimizers as ko
from keras.datasets import mnist

batch_size = 16
lr = 0.0001

def noise_gen(batch_size, z_dim):
    noise = np.zeros((batch_size, z_dim), dtype=np.float32)
    for i in range(batch_size):
        noise[i, :] = np.random.uniform(-1, 1, z_dim)
    return noise

# --------------------Generator Model--------------------

model = km.Sequential()

model.add(kl.Dense(input_dim=100, output_dim=1024))
model.add(kl.Activation('relu'))

model.add(kl.Dense(7*7*128))
model.add(kl.BatchNormalization())
model.add(kl.Activation('relu'))
model.add(kl.Reshape((7, 7, 128), input_shape=(7*7*128,)))

model.add(kl.Deconvolution2D(64, 5, 5, (None, 14, 14, 64), subsample=(2, 2),
                             input_shape=(7, 7, 128), border_mode='same'))
model.add(kl.BatchNormalization())
model.add(kl.Activation('relu'))

model.add(kl.Deconvolution2D(1, 5, 5, (None, 28, 28, 1), subsample=(2, 2),
                             input_shape=(14, 14, 64), border_mode='same'))

G = model
G.compile(  loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))

# --------------------Discriminator Model--------------------

model = km.Sequential()

model.add(kl.Convolution2D( 64, 5, 5, subsample=(2, 2), input_shape=(28, 28, 1)))
model.add(kl.LeakyReLU(alpha=0.2))

model.add(kl.Convolution2D(128, 5, 5, subsample=(2, 2)))
# model.add(kl.BatchNormalization())
model.add(kl.LeakyReLU(alpha=0.2))

model.add(kl.Flatten())
model.add(kl.Dense(1))
model.add(kl.Activation('sigmoid'))

D = model
D.compile(  loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))

# --------------------GAN Model--------------------

model = km.Sequential()
model.add(G)
D.trainable = False  # Is this necessary?
model.add(D)
GAN = model
GAN.compile(loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True))

# --------------------Main Code--------------------
(X, _), _ = mnist.load_data()
X = X / 255.
X = X[:, :, :, np.newaxis]

X_batch = X[0:batch_size, :]
Z1_batch = noise_gen(batch_size, 100)
Z2_batch = noise_gen(batch_size, 100)

fake_batch = G.predict(Z1_batch)
real_batch = X_batch
print('--------------------Fake Image Generated!--------------------')

combined_X_batch = np.concatenate((real_batch, fake_batch))
combined_y_batch = np.concatenate((np.ones((batch_size, 1)), np.zeros((batch_size, 1))))
print('real_batch={}, fake_batch={}'.format(real_batch.shape, fake_batch.shape))

D.trainable = True
d_loss = D.train_on_batch(combined_X_batch, combined_y_batch)
print('--------------------Discriminator trained!--------------------')
print(d_loss)

D.trainable = False
g_loss = GAN.train_on_batch(Z2_batch, np.ones((batch_size, 1)))
print('--------------------GAN trained!--------------------')
print(g_loss)

来源：https://stackoverflow.com/questions/42422646/keras-train-partial-model-issue-about-gan-model

标签

tensorflow

machine-learning

keras

generative-adversarial-network