Context
Suppose we have some 1D data (e.g. time series), where all series have fixed length l:
# [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] index
example = [ 0, 1, 1, 0, 23, 22, 20, 14, 9, 2, 0, 0] # l = 12
and we want to perform semantic segmentation, with n classes:
# [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] index
labeled = [
[ 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], # class 1
[ 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0], # class 2
[ 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0], # class 3
#[ ... ],
[ 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1], # class n
]
then the output for a single example has shape [n, l]
(i.e. the data_format
is not "channels_last"
) and the batched output has shape [b, n, l]
, where b
is the number of examples in the batch.
These classes are independent, so it is my understanding that the use sigmoid cross entropy is applicable here as the loss rather than softmax cross entropy.
Question
I have a few small related questions in regards to the expected format for and use of tf.nn.sigmoid_cross_entropy_with_logits
:
since the network outputs a tensor in the same shape as the batched labels, should I train the network under the assumption that it outputs logits, or take the keras approach (see keras's
binary_crossentropy
) and assume it outputs probabilities?given the 1d segmentation problem, should I call
tf.nn.sigmoid_cross_entropy_with_logits
on:data_format='channels_first'
(as shown above), ordata_format='channels_last'
(example.T)
if I want the labels to be assigned individually per channel?
should the loss operation passed to the optimizer be:
tf.nn.sigmoid_cross_entropy_with_logits(labels, logits)
,tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels, logits))
, ortf.losses.sigmoid_cross_entropy
?
Code
This Colab, highlights my confusion and demonstrates that the data_format
does in fact matter..., but the documentation does not explicitly state which is expected.
Dummy data
c = 5 # number of channels (label classes)
p = 10 # number of positions ('pixels')
# data_format = 'channels_first', shape = [classes, pixels]
# 'logits' for 2 examples
pred_1 = np.array([[random.random() for v in range(p)]for n in range(c)]).astype(float)
pred_2 = np.array([[random.random() for v in range(p)]for n in range(c)]).astype(float)
# 'ground truth' for the above 2 examples
targ_1 = np.array([[0 if random.random() < 0.8 else 1 for v in range(p)]for n in range(c)]).astype(float)
targ_2 = np.array([[0 if random.random() < 0.8 else 1 for v in range(p)]for n in range(c)]).astype(float)
# batched form of the above examples
preds = np.array([pred_1, pred_2])
targs = np.array([targ_1, targ_2])
# data_format = 'channels_last', shape = [pixels, classes]
t_pred_1 = pred_1.T
t_pred_2 = pred_2.T
t_targ_1 = targ_1.T
t_targ_2 = targ_2.T
t_preds = np.array([t_pred_1, t_pred_2])
t_targs = np.array([t_targ_1, t_targ_2])
losses
tf.nn
# calculate individual losses for 'channels_first'
loss_1 = tf.nn.sigmoid_cross_entropy_with_logits(labels=targ_1, logits=pred_1)
loss_2 = tf.nn.sigmoid_cross_entropy_with_logits(labels=targ_2, logits=pred_2)
# calculate batch loss for 'channels_first'
b_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=targs, logits=preds)
# calculate individual losses for 'channels_last'
t_loss_1 = tf.nn.sigmoid_cross_entropy_with_logits(labels=t_targ_1, logits=t_pred_1)
t_loss_2 = tf.nn.sigmoid_cross_entropy_with_logits(labels=t_targ_2, logits=t_pred_2)
# calculate batch loss for 'channels_last'
t_b_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=t_targs, logits=t_preds)
# get actual tensors
with tf.Session() as sess:
# loss for 'channels_first'
l1 = sess.run(loss_1)
l2 = sess.run(loss_2)
# batch loss for 'channels_first'
bl = sess.run(b_loss)
# loss for 'channels_last'
t_l1 = sess.run(t_loss_1)
t_l2 = sess.run(t_loss_2)
# batch loss for 'channels_last'
t_bl = sess.run(t_b_loss)
tf.reduced_mean(tf.nn)
# calculate individual losses for 'channels_first'
rm_loss_1 = tf.reduce_mean(loss_1)
rm_loss_2 = tf.reduce_mean(loss_2)
# calculate batch loss for 'channels_first'
rm_b_loss = tf.reduce_mean(b_loss)
# calculate individual losses for 'channels_last'
rm_t_loss_1 = tf.reduce_mean(t_loss_1)
rm_t_loss_2 = tf.reduce_mean(t_loss_2)
# calculate batch loss for 'channels_last'
rm_t_b_loss = tf.reduce_mean(t_b_loss)
# get actual tensors
with tf.Session() as sess:
# loss for 'channels_first'
rm_l1 = sess.run(rm_loss_1)
rm_l2 = sess.run(rm_loss_2)
# batch loss for 'channels_first'
rm_bl = sess.run(rm_b_loss)
# loss for 'channels_last'
rm_t_l1 = sess.run(rm_t_loss_1)
rm_t_l2 = sess.run(rm_t_loss_2)
# batch loss for 'channels_last'
rm_t_bl = sess.run(rm_t_b_loss)
tf.losses
# calculate individual losses for 'channels_first'
tf_loss_1 = tf.losses.sigmoid_cross_entropy(multi_class_labels=targ_1, logits=pred_1)
tf_loss_2 = tf.losses.sigmoid_cross_entropy(multi_class_labels=targ_2, logits=pred_2)
# calculate batch loss for 'channels_first'
tf_b_loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=targs, logits=preds)
# calculate individual losses for 'channels_last'
tf_t_loss_1 = tf.losses.sigmoid_cross_entropy(multi_class_labels=t_targ_1, logits=t_pred_1)
tf_t_loss_2 = tf.losses.sigmoid_cross_entropy(multi_class_labels=t_targ_2, logits=t_pred_2)
# calculate batch loss for 'channels_last'
tf_t_b_loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=t_targs, logits=t_preds)
# get actual tensors
with tf.Session() as sess:
# loss for 'channels_first'
tf_l1 = sess.run(tf_loss_1)
tf_l2 = sess.run(tf_loss_2)
# batch loss for 'channels_first'
tf_bl = sess.run(tf_b_loss)
# loss for 'channels_last'
tf_t_l1 = sess.run(tf_t_loss_1)
tf_t_l2 = sess.run(tf_t_loss_2)
# batch loss for 'channels_last'
tf_t_bl = sess.run(tf_t_b_loss)
Test equivalency
data_format equivalency
# loss _should_(?) be the same for 'channels_first' and 'channels_last' data_format
# test example_1
e1 = (l1 == t_l1.T).all()
# test example 2
e2 = (l2 == t_l2.T).all()
# loss calculated for each example and then batched together should be the same
# as the loss calculated on the batched examples
ea = (np.array([l1, l2]) == bl).all()
t_ea = (np.array([t_l1, t_l2]) == t_bl).all()
# loss calculated on the batched examples for 'channels_first' should be the same
# as loss calculated on the batched examples for 'channels_last'
eb = (bl == np.transpose(t_bl, (0, 2, 1))).all()
e1, e2, ea, t_ea, eb
# (True, False, False, False, True) <- changes every time, so True is happenstance
equivalency between tf.reduce_mean and tf.losses
l_e1 = tf_l1 == rm_l1
l_e2 = tf_l2 == rm_l2
l_eb = tf_bl == rm_bl
l_t_e1 = tf_t_l1 == rm_t_l1
l_t_e2 = tf_t_l2 == rm_t_l2
l_t_eb = tf_t_bl == rm_t_bl
l_e1, l_e2, l_eb, l_t_e1, l_t_e2, l_t_eb
# (False, False, False, False, False, False)
Both tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(...))
and tf.losses.sigmoid_cross_entropy(...)
(with default arguments) are computing the same thing. The problem is in your tests where you use ==
to compare two floating-point numbers. Instead, use np.isclose
method to check whether two floating-point numbers are equal or not:
# loss _should_(?) be the same for 'channels_first' and 'channels_last' data_format
# test example_1
e1 = np.isclose(l1, t_l1.T).all()
# test example 2
e2 = np.isclose(l2, t_l2.T).all()
# loss calculated for each example and then batched together should be the same
# as the loss calculated on the batched examples
ea = np.isclose(np.array([l1, l2]), bl).all()
t_ea = np.isclose(np.array([t_l1, t_l2]), t_bl).all()
# loss calculated on the batched examples for 'channels_first' should be the same
# as loss calculated on the batched examples for 'channels_last'
eb = np.isclose(bl, np.transpose(t_bl, (0, 2, 1))).all()
e1, e2, ea, t_ea, eb
# (True, True, True, True, True)
And:
l_e1 = np.isclose(tf_l1, rm_l1)
l_e2 = np.isclose(tf_l2, rm_l2)
l_eb = np.isclose(tf_bl, rm_bl)
l_t_e1 = np.isclose(tf_t_l1, rm_t_l1)
l_t_e2 = np.isclose(tf_t_l2, rm_t_l2)
l_t_eb = np.isclose(tf_t_bl, rm_t_bl)
l_e1, l_e2, l_eb, l_t_e1, l_t_e2, l_t_eb
# (True, True, True, True, True, True)
来源:https://stackoverflow.com/questions/53612973/tensorflow-sigmoid-cross-entropy-with-logits-for-1d-data