问题
I'm new to Tensorflow, and I'm sorry, because this seems to be a very basic question, but unfortunately I can't find anything on Google, maybe I'm using wrong keywords.
I have some expressions derived from placeholders (as far as I understand the logic of tensorflow), and a few variables that need to be evaluated without recomputing "placeholdered" expressions. Below is my quite ugly code (which is supposed to be manually constructed 3-layer neural network) where evaluation happens in a loop.
The problem is that when I computed derived expressions (ys, deltas), I want to evaluate all the weights in a single run without incorrect recomputation of ys and deltas, which I think is supposed to happen at the moment. There are probably other errors in this code which prevent it from working correctly (however, 1-layer code written in this way works and gives its 92% of accuracy as expected), but it's difficult to figure it out until at least computation stages are not messed up.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
#launch tensorflow session
import tensorflow as tf
sess = tf.InteractiveSession(config=tf.ConfigProto(
intra_op_parallelism_threads=4))
def nonlin(x,deriv=False): # I want my custom activation function
if(deriv==True):
return tf.nn.sigmoid(x)*(1 - tf.nn.sigmoid(x))
return tf.nn.sigmoid(x)
#placeholders
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
# Variables ----------------
#weights and biases
W1 = tf.Variable(tf.zeros([784,10])) # 784x10 matrix (because we have 784 input features and 10 outputs)
b1 = tf.Variable(tf.zeros([10]))
W2 = tf.Variable(tf.zeros([10,10]))
b2 = tf.Variable(tf.zeros([10]))
W3 = tf.Variable(tf.zeros([10,10]))
b3 = tf.Variable(tf.zeros([10]))
# ---------------------
sess.run(tf.global_variables_initializer())
# derived expressions -------------------------
# Forward pass
y1 = nonlin(tf.matmul(x,W1) + b1)
y2 = nonlin(tf.matmul(y1,W2) + b2)
y3 = nonlin(tf.matmul(y2,W3) + b3)
error3 = y_ - y3 # quadratic cost derivative
# Backward pass
delta3 = tf.multiply(error3,nonlin(y3, deriv=True)) #assign delta
error2 = tf.matmul(delta3,W3, transpose_b=True)
delta2 = tf.multiply(error2,nonlin(y2, deriv=True))
error1 = tf.matmul(delta2,W2, transpose_b=True)
delta1 = tf.multiply(error1,nonlin(y1, deriv=True))
learning_rate = 0.1
# And my ugly update step which is not working:------------------------
w1_assign = tf.assign(W1, tf.add(W1, tf.multiply(learning_rate, tf.reduce_mean(tf.matmul(tf.expand_dims(x,-1), tf.expand_dims(delta1,-1), transpose_b=True), 0)) ))
b1_assign = tf.assign(b1, tf.add(b1, tf.multiply(learning_rate, tf.reduce_mean(delta1, 0)) ))
w2_assign = tf.assign(W2, tf.add(W2, tf.multiply(learning_rate, tf.reduce_mean(tf.matmul(tf.expand_dims(y1,-1), tf.expand_dims(delta2,-1), transpose_b=True), 0)) ))
b2_assign = tf.assign(b2, tf.add(b2, tf.multiply(learning_rate, tf.reduce_mean(delta2, 0)) ))
w3_assign = tf.assign(W3, tf.add(W3, tf.multiply(learning_rate, tf.reduce_mean(tf.matmul(tf.expand_dims(y2,-1), tf.expand_dims(delta3,-1), transpose_b=True), 0)) ))
b3_assign = tf.assign(b3, tf.add(b3, tf.multiply(learning_rate, tf.reduce_mean(delta3, 0)) ))
# accuracy evaluation ----------------------
correct_prediction = tf.equal(tf.argmax(y3,1), tf.argmax(y_,1)) #a list of booleans.
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# Main loop:----------------------
for epoch in range(1000):
batch = mnist.train.next_batch(1000)
# apply across batch
sess.run(w1_assign , feed_dict={x: batch[0], y_: batch[1]})
sess.run(b1_assign , feed_dict={x: batch[0], y_: batch[1]})
sess.run(w2_assign , feed_dict={x: batch[0], y_: batch[1]})
sess.run(b2_assign , feed_dict={x: batch[0], y_: batch[1]})
sess.run(w3_assign , feed_dict={x: batch[0], y_: batch[1]})
sess.run(b3_assign , feed_dict={x: batch[0], y_: batch[1]})
# precision computation
print(str(accuracy.eval(feed_dict={x: batch[0], y_: batch[1]})) + " / epoch: " + str(epoch)) # evaluate
UPDATE:
Based on this answer, looks like if I provide arguments to sess.run in a list, I will have all the intermediate variables initialized only once, but the order is unknown. I tried my network then with the following modification which includes passing arguments in a list and additional variables to store new weights and make them not mess up with original ones (sorry for the extensive code, but I tried to make it immediately executable for you):
def nonlin(x,deriv=False):
if(deriv==True):
return tf.nn.sigmoid(x)*(1 - tf.nn.sigmoid(x))
return tf.nn.sigmoid(x)
#We start building the computation graph by creating nodes for the input images and target output classes.
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
#weights and biases
W1 = tf.Variable(tf.random_uniform([784,400])) # 784x10 matrix (because we have 784 input features and 10 outputs)
b1 = tf.Variable(tf.random_uniform([400]))
W2 = tf.Variable(tf.random_uniform([400,30])) # 784x10 matrix (because we have 784 input features and 10 outputs)
b2 = tf.Variable(tf.random_uniform([30]))
W3 = tf.Variable(tf.random_uniform([30,10])) # 784x10 matrix (because we have 784 input features and 10 outputs)
b3 = tf.Variable(tf.random_uniform([10]))
# temporary containers to avoid messing up computations
W1tmp = tf.Variable(tf.zeros([784,400])) # 784x10 matrix (because we have 784 input features and 10 outputs)
b1tmp = tf.Variable(tf.zeros([400]))
W2tmp = tf.Variable(tf.zeros([400,30])) # 400x30 matrix as second layer
b2tmp = tf.Variable(tf.zeros([30]))
W3tmp = tf.Variable(tf.zeros([30,10])) # 30x10 matrix (because we have 10 outputs)
b3tmp = tf.Variable(tf.zeros([10]))
#Before Variables can be used within a session, they must be initialized using that session.
sess.run(tf.global_variables_initializer())
# multiplication across batch
# The tf.batch_matmul() op was removed in 3a88ec0. You can now use tf.matmul() to perform batch matrix multiplications (i.e. for tensors with rank > 2).
# Forward pass
y1 = nonlin(tf.matmul(x,W1) + b1)
y2 = nonlin(tf.matmul(y1,W2) + b2)
y3 = nonlin(tf.matmul(y2,W3) + b3)
error3 = y_ - y3 # quadratic cost derivative
# Backward pass
# error and y have same dimensions. It's only W that is unique
delta3 = tf.multiply(error3,nonlin(y3, deriv=True)) #assign delta
error2 = tf.matmul(delta3,W3, transpose_b=True)
delta2 = tf.multiply(error2,nonlin(y2, deriv=True))
error1 = tf.matmul(delta2,W2, transpose_b=True)
delta1 = tf.multiply(error1,nonlin(y1, deriv=True))
learning_rate = tf.constant(3.0)
# we first assign the deepest level to avoid extra evaluations
#with tf.control_dependencies([y1,y2,y3,delta1,delta2,delta3]):
w1_assign = tf.assign(W1tmp, tf.add(W1, tf.multiply(learning_rate, tf.reduce_mean(tf.matmul(tf.expand_dims(x,-1), tf.expand_dims(delta1,-1), transpose_b=True), 0)) ))
b1_assign = tf.assign(b1tmp, tf.add(b1, tf.multiply(learning_rate, tf.reduce_mean(delta1, 0)) ))
w2_assign = tf.assign(W2tmp, tf.add(W2, tf.multiply(learning_rate, tf.reduce_mean(tf.matmul(tf.expand_dims(y1,-1), tf.expand_dims(delta2,-1), transpose_b=True), 0)) ))
b2_assign = tf.assign(b2tmp, tf.add(b2, tf.multiply(learning_rate, tf.reduce_mean(delta2, 0)) ))
w3_assign = tf.assign(W3tmp, tf.add(W3, tf.multiply(learning_rate, tf.reduce_mean(tf.matmul(tf.expand_dims(y2,-1), tf.expand_dims(delta3,-1), transpose_b=True), 0)) ))
b3_assign = tf.assign(b3tmp, tf.add(b3, tf.multiply(learning_rate, tf.reduce_mean(delta3, 0)) ))
w1_ok = tf.assign(W1,W1tmp)
w2_ok = tf.assign(W2,W2tmp)
w3_ok = tf.assign(W3,W3tmp)
b1_ok = tf.assign(b1,b1tmp)
b2_ok = tf.assign(b2,b2tmp)
b3_ok = tf.assign(b3,b3tmp)
#accuracy evaluation
correct_prediction = tf.equal(tf.argmax(y3,1), tf.argmax(y_,1)) #a list of booleans.
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# we can use only single batch, just to check that everything works
#batch = mnist.train.next_batch(1000)
for epoch in range(10000):
batch = mnist.train.next_batch(1000)
#train_step.run(feed_dict={x: batch[0], y_: batch[1]})
#When you call sess.run([x, y, z]) once, TensorFlow executes each op that those tensors depend on one time only (unless there's a tf.while_loop() in your graph). If a tensor appears twice in the list (like mul in your example), TensorFlow will execute it once and return two copies of the result. To run the assignment more than once, you must either call sess.run() multiple times, or use tf.while_loop() to put a loop in your graph.
# write new variable values to containers
sess.run([w1_assign,w2_assign,w3_assign,b1_assign,b2_assign,b3_assign] , feed_dict={x: batch[0], y_: batch[1]})
# write container contents into variables in a separate session
sess.run([w1_ok,w2_ok,w3_ok,b1_ok,b2_ok,b3_ok])# , feed_dict={x: batch[0], y_: batch[1]})
# precision computation
print(str(accuracy.eval(feed_dict={x: batch[0], y_: batch[1]})) + " / epoch: " + str(epoch)) # evaluate
So the question is whether it is at least correct code for Tensorflow? I found network structure and learning rate which gives some results, but they still seem to be very poor (around 75%).
来源:https://stackoverflow.com/questions/44737322/tensorflow-assign-multiple-variable-values-in-single-run-without-recomputation