问题
I am learning tensorflow-probability and this is a toy example of logistic regression with the titanic dataset. My model does not seem to learn and the loss is nan. I don't understand why.
Below you will find three different implementations, all return the same results.
One uses a sigmoid activation, the second uses a DistributionLambda Layer with a Bernoulli distribution and the third a DistributionLambda Layer with a Beta distribution.
Are there any corrections I should make to this code?
def nll(y_true, y_pred):
return -y_pred.log_prob(y_true)
titanic_df = sns.load_dataset('titanic')
titanic_df['sex'] = titanic_df['sex'].astype('category').cat.codes
titanic_df = titanic_df[['survived', 'pclass', 'sex', 'age']].astype(float)
survived = titanic_df['survived']
pclass = titanic_df['pclass']
sex = titanic_df['sex']
age = titanic_df['age']
num_samples= titanic_df.shape[0]
kl_divergence_function = (lambda q, p, _: tfd.kl_divergence(q, p) /num_samples)
#----------------------------------------------------------------------------------------------------
pclass_in = Input(shape = (1,))
sex_in = Input(shape = (1,))
age_in = Input(shape = (1, ))
x = Concatenate()([pclass_in, sex_in, age_in])
y = tfp.layers.DenseFlipout(
units= 1,
activation='sigmoid',
kernel_posterior_fn=tfp.layers.default_mean_field_normal_fn(),
bias_posterior_fn=tfp.layers.default_mean_field_normal_fn(),
kernel_divergence_fn= kl_divergence_function)(x)
# Model compilation.
model = tf.keras.Model(inputs= [pclass_in, sex_in, age_in], outputs= y)
# We use the binary_crossentropy loss since this toy example contains
# two labels. The Keras API will then automatically add the
# Kullback-Leibler divergence (contained on the individual layers of
# the model), to the cross entropy loss, effectively
# calcuating the (negated) Evidence Lower Bound Loss (ELBO)
optimizer = tf.keras.optimizers.Adam(lr= 0.01)
model.compile(optimizer, loss= 'binary_crossentropy', metrics=['accuracy'])
model.summary()
history = model.fit(x = [pclass, sex, age], y = survived, epochs = 10, validation_split = 0.2)
Epoch 1/10
23/23 [==============================] - 0s 10ms/step - loss: nan - accuracy: 0.6110 - val_loss: nan - val_accuracy: 0.6425
Epoch 2/10
23/23 [==============================] - 0s 2ms/step - loss: nan - accuracy: 0.6096 - val_loss: nan - val_accuracy: 0.6425
Epoch 3/10
23/23 [==============================] - 0s 2ms/step - loss: nan - accuracy: 0.6096 - val_loss: nan - val_accuracy: 0.6425
Epoch 4/10
23/23 [==============================] - 0s 2ms/step - loss: nan - accuracy: 0.6096 - val_loss: nan - val_accuracy: 0.6425
Epoch 5/10
23/23 [==============================] - 0s 2ms/step - loss: nan - accuracy: 0.6096 - val_loss: nan - val_accuracy: 0.6425
Epoch 6/10
23/23 [==============================] - 0s 2ms/step - loss: nan - accuracy: 0.6096 - val_loss: nan - val_accuracy: 0.6425
Epoch 7/10
23/23 [==============================] - 0s 2ms/step - loss: nan - accuracy: 0.6096 - val_loss: nan - val_accuracy: 0.6425
Epoch 8/10
23/23 [==============================] - 0s 2ms/step - loss: nan - accuracy: 0.6096 - val_loss: nan - val_accuracy: 0.6425
Epoch 9/10
23/23 [==============================] - 0s 2ms/step - loss: nan - accuracy: 0.6096 - val_loss: nan - val_accuracy: 0.6425
Epoch 10/10
23/23 [==============================] - 0s 2ms/step - loss: nan - accuracy: 0.6096 - val_loss: nan - val_accuracy: 0.6425
#===============================================================================
#----------------------------------------------------------------------------------------------------
pclass_in = Input(shape = (1,))
sex_in = Input(shape = (1,))
age_in = Input(shape = (1, ))
x = Concatenate()([pclass_in, sex_in, age_in])
x = tfp.layers.DenseFlipout(
units= 1, # Since the beta distribution takes two parameters
activation='relu',
kernel_posterior_fn=tfp.layers.default_mean_field_normal_fn(),
bias_posterior_fn=tfp.layers.default_mean_field_normal_fn(),
kernel_divergence_fn= kl_divergence_function)(x)
y = tfpl.DistributionLambda(lambda t: tfd.Bernoulli(probs = t), convert_to_tensor_fn = tfd.Distribution.sample)(x)
# Model compilation.
model = tf.keras.Model(inputs= [pclass_in, sex_in, age_in], outputs= y)
# We use the binary_crossentropy loss since this toy example contains
# two labels. The Keras API will then automatically add the
# Kullback-Leibler divergence (contained on the individual layers of
# the model), to the cross entropy loss, effectively
# calcuating the (negated) Evidence Lower Bound Loss (ELBO)
optimizer = tf.keras.optimizers.Adam(lr= 0.01)
model.compile(optimizer, loss= nll, metrics=['accuracy'])
model.summary()
history = model.fit(x = [pclass, sex, age], y = survived, epochs = 10, validation_split = 0.2)
#================================================================================
#----------------------------------------------------------------------------------------------------
pclass_in = Input(shape = (1,))
sex_in = Input(shape = (1,))
age_in = Input(shape = (1, ))
x = Concatenate()([pclass_in, sex_in, age_in])
x = tfp.layers.DenseFlipout(
units= 2, # Since the beta distribution takes two parameters
activation='relu',
kernel_posterior_fn=tfp.layers.default_mean_field_normal_fn(),
bias_posterior_fn=tfp.layers.default_mean_field_normal_fn(),
kernel_divergence_fn= kl_divergence_function)(x)
y = tfpl.DistributionLambda(lambda t: tfd.Beta(t[...,:1], t[...,1:]), convert_to_tensor_fn = tfd.Distribution.sample)(x)
# Model compilation.
model = tf.keras.Model(inputs= [pclass_in, sex_in, age_in], outputs= y)
# We use the binary_crossentropy loss since this toy example contains
# two labels. The Keras API will then automatically add the
# Kullback-Leibler divergence (contained on the individual layers of
# the model), to the cross entropy loss, effectively
# calcuating the (negated) Evidence Lower Bound Loss (ELBO)
optimizer = tf.keras.optimizers.Adam(lr= 0.01)
model.compile(optimizer, loss= nll, metrics=['accuracy'])
model.summary()
history = model.fit(x = [pclass, sex, age], y = survived, epochs = 10, validation_split = 0.2)
来源:https://stackoverflow.com/questions/65099103/toy-example-of-logistic-regression-with-tensorflow-probability-and-the-titanic-d