Run parallel op with different inputs and same placeholder

问题

I have the necessity to calculate more then one accuracy in the same time, concurrently.

correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))

The piece of code is the same of the mnist example in the tutorial of TensorFlow but instead of having:

W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

I have two placeolder because I already calculated and stored them.

W = tf.placeholder(tf.float32, [784, 10])
b = tf.placeholder(tf.float32, [10])

I want to fill the network with the values I aready have and then calculate the accuracy and this have to happen for each network I loaded.

So if I load 20 networks I want to calculate in parallel the accuracy for each one. There is a way with the session run to execute the same operation with different input?

回答1:

You have multiple options to make things happen in parallel:

Parallelize using multiple python threads / subprocesses. (See Python's "multiprocessing" library.)
Batch up the operations into single larger operations. (e.g. Similar to the image operations that operate on a batch of images simultaneously https://www.tensorflow.org/api_docs/python/image/resizing#resize_bilinear.)
Make a single graph that has the 20 network accuracy calculations.

I think the last one is the easiest, so I've included a bit of sample code below to get you started:

import tensorflow as tf

def construct_accuracy_calculation(i):
  W = tf.placeholder(tf.float32, [784, 10], name=("%d_W" % i))
  b = tf.placeholder(tf.float32, [10], name=("%d_b" % i))
  # ...
  correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
  accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  return (W, b, accuracy)


def main():
  accuracy_computations = []
  feed_dict={}
  for i in xrange(NUM_NETWORKS):
    (W, b) = load_network(i)
    (W_op, b_op, accuracy) = construct_accuracy_calculation(i)
    feed_dict[W_op] = W
    feed_dict[b_op] = b
    accuracy_computations.append(accuracy)

  # sess = ...
  accuracy_values = sess.run(accuracy_computations, feed_dict=feed_dict)

if __name__ == "__main__":
  main()

回答2:

One approach to parallelizing TF computations is to execute run calls in parallel using threads (TF is incompatible with multiprocessing). It's a bit more complicated than other approaches because you have to handle parallelism yourself on the Python side.

Here's an example that runs same matmul op in same session in different Python threads with different fed inputs and runs about 4x faster with 4 threads compared to 1 thread

import os, sys, queue, threading, time
import tensorflow as tf
import numpy as np

def p(s):
    # helper function for printing from multiple threads
    # need to append \n or results get intermixed in notebook
    print(s+"\n", flush=True, end="")


num_threads = 4
data_size = 32  # number of data points to enqueue
work_per_thread = data_size/num_threads
timeout = 10  # grace period for dequeing

input_queue = queue.Queue(data_size)
output_queue = queue.Queue(data_size)
dtype = np.float32

# use matrix vector matmul since it's compute intensive and uses single core
# see issue #6752
n = 16*1024
with tf.device("/cpu:0"):
    x = tf.placeholder(dtype)
    matrix = tf.Variable(tf.ones((n, n)))
    vector = tf.Variable(tf.ones((n, 1)))
    y = tf.matmul(matrix, vector)[0, 0] + x

# turn off graph-rewriting optimizations
sess = tf.Session(config=tf.ConfigProto(graph_options=tf.GraphOptions(optimizer_options=tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0))))
sess.run(tf.global_variables_initializer())

done = False
def runner(runner_id):
    p("Starting runner %s" % (runner_id,))
    count = 0
    while not done:
        try:
            x_val = input_queue.get(timeout=1)
        except queue.Empty:
            # retry on empty queue
            continue
        p("Start computing %d on %d" %(x_val, runner_id))
        out = sess.run(y, {x: x_val})
        count+=1
        output_queue.put(out)
        if count>=work_per_thread:
            break
    else:
        p("Stopping runner "+str(runner_id))

threads = []
print("Creating threads.")
for i in range(num_threads):
    t = threading.Thread(target=runner, args=(i,))
    threads.append(t)

for i in range(data_size):
    input_queue.put(i, timeout=timeout)

# start threads
p("Launching runners.")
start_time = time.time()
for t in threads:
    t.start()

p("Reading results.")
for i in range(data_size):
    try:
        p("Main thread: obtained %.2f" % (output_queue.get(timeout=timeout),))
    except queue.Empty:
        print("No results after %d, terminating computation."%(timeout,))
        break
else:
    p("Computed successfully.")
done = True

p("Waiting for threads to finish.")
for t in threads:
    t.join()
print("Done in %.2f seconds" %(time.time() - start_time))

来源：https://stackoverflow.com/questions/41595966/run-parallel-op-with-different-inputs-and-same-placeholder

标签

concurrency

tensorflow