问题
I want to create a custom training loop in tensorflow 2 and use tensorboard for visualization. Here is an example I've created based on tensorflow documentation:
import tensorflow as tf
import datetime
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # which gpu to use
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
train_dataset = train_dataset.shuffle(60000).batch(64)
test_dataset = test_dataset.batch(64)
def create_model():
return tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28), name='Flatten_1'),
tf.keras.layers.Dense(512, activation='relu', name='Dense_1'),
tf.keras.layers.Dropout(0.2, name='Dropout_1'),
tf.keras.layers.Dense(10, activation='softmax', name='Dense_2')
], name='Network')
# Loss and optimizer
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()
# Define our metrics
train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy('train_accuracy')
test_loss = tf.keras.metrics.Mean('test_loss', dtype=tf.float32)
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy('test_accuracy')
@tf.function
def train_step(model, optimizer, x_train, y_train):
with tf.GradientTape() as tape:
predictions = model(x_train, training=True)
loss = loss_object(y_train, predictions)
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
train_loss(loss)
train_accuracy(y_train, predictions)
@tf.function
def test_step(model, x_test, y_test):
predictions = model(x_test)
loss = loss_object(y_test, predictions)
test_loss(loss)
test_accuracy(y_test, predictions)
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = '/NAS/Dataset/logs/gradient_tape/' + current_time + '/train'
test_log_dir = '/NAS/Dataset/logs/gradient_tape/' + current_time + '/test'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
test_summary_writer = tf.summary.create_file_writer(test_log_dir)
model = create_model() # reset our model
EPOCHS = 5
for epoch in range(EPOCHS):
for (x_train, y_train) in train_dataset:
train_step(model, optimizer, x_train, y_train)
with train_summary_writer.as_default():
tf.summary.scalar('loss', train_loss.result(), step=epoch)
tf.summary.scalar('accuracy', train_accuracy.result(), step=epoch)
for (x_test, y_test) in test_dataset:
test_step(model, x_test, y_test)
with test_summary_writer.as_default():
tf.summary.scalar('loss', test_loss.result(), step=epoch)
tf.summary.scalar('accuracy', test_accuracy.result(), step=epoch)
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print(template.format(epoch + 1,
train_loss.result(),
train_accuracy.result() * 100,
test_loss.result(),
test_accuracy.result() * 100))
# Reset metrics every epoch
train_loss.reset_states()
test_loss.reset_states()
train_accuracy.reset_states()
test_accuracy.reset_states()
I am accessing tensorboard with the following command on terminal:
tensorboard --logdir=.....
The code above produce summaries for losses and metrics. My question is:
- How can i produce the graph of this process?
I've tried to use the recommended commands from tensorflow: tf.summary.trace_on() and tf.summary.trace_export(), but I haven't managed to plot the graph. Maybe I am using them wrong. I whould really appreciate any suggestion on how to do this.
回答1:
As answered here, I'm sure there's a better way, but a simple workaround is to just use the existing tensorboard callback logic:
tb_callback = tf.keras.callbacks.TensorBoard(LOG_DIR)
tb_callback.set_model(model) # Writes the graph to tensorboard summaries using
an internal file writer
来源:https://stackoverflow.com/questions/60639731/tensorboard-for-custom-training-loop-in-tensorflow-2