Computing Hessian in tensorflow is quite easy:
x = tf.Variable([1., 1., 1.], dtype=tf.float32, name=\"x\")
f = (x[0] + x[1] ** 2 + x[0] * x[1] + x[2]) ** 2
h
EDIT: Here is a more fleshed out solution, essentially the same but for an arbitrary number of variables. Also I have added the option of using Python or TensorFlow loops for the Jacobian. Note the code assumes all variables are 1D tensors.
from itertools import combinations, count
import tensorflow as tf
def jacobian(y, x, tf_loop=False):
# If the shape of Y is fully defined you can choose between a
# Python-level or TF-level loop to make the Jacobian matrix
# If the shape of Y is not fully defined you must use TF loop
# In both cases it is just a matter of stacking gradients for each Y
if tf_loop or y.shape.num_elements() is None:
i = tf.constant(0, dtype=tf.int32)
y_size = tf.size(y)
rows = tf.TensorArray(dtype=y.dtype, size=y_size, element_shape=x.shape)
_, rows = tf.while_loop(
lambda i, rows: i < y_size,
lambda i, rows: [i + 1, rows.write(i, tf.gradients(y[i], x)[0])],
[i, rows])
return rows.stack()
else:
return tf.stack([tf.gradients(y[i], x)[0]
for i in range(y.shape.num_elements())], axis=0)
def hessian_multivar(ys, xs, tf_loop=False):
# List of list of pieces of the Hessian matrix
hessian_pieces = [[None] * len(xs) for _ in xs]
# Hessians with respect to each x (diagonal pieces of the full Hessian)
for i, h in enumerate(tf.hessians(ys, xs)):
hessian_pieces[i][i] = h
# First-order derivatives
xs_grad = tf.gradients(ys, xs)
# Pairwise second order derivatives as Jacobian matrices
for (i1, (x1, g1)), (i2, (x2, g2)) in combinations(zip(count(), zip(xs, xs_grad)), 2):
# Derivates in both orders
hessian_pieces[i1][i2] = jacobian(g1, x2, tf_loop=tf_loop)
hessian_pieces[i2][i1] = jacobian(g2, x1, tf_loop=tf_loop)
# Concatenate everything together
return tf.concat([tf.concat(hp, axis=1) for hp in hessian_pieces], axis=0)
# Test it with three variables
with tf.Graph().as_default():
x = tf.Variable([1., 1.], dtype=tf.float32, name="x")
y = tf.Variable([1.], dtype=tf.float32, name="y")
z = tf.Variable([1., 1.], dtype=tf.float32, name="z")
f = (x[0] + x[1] ** 2 + x[0] * x[1] + y + x * y * z) ** 2
hessian = hessian_multivar(f, [x, y, z])
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init_op)
print(sess.run(hessian))
Output:
[[26. 54. 30. 16. 4.]
[54. 90. 38. 6. 18.]
[30. 38. 16. 14. 14.]
[16. 6. 14. 2. 0.]
[ 4. 18. 14. 0. 2.]]
I'm not sure if there can be a "good" way of doing that with the current API. Obviously, you can compute the Hessian matrix elements by yourself... It is not very elegant and probably not the fastest solution either, but here is how it might be done in your example:
import tensorflow as tf
x = tf.Variable([1., 1.], dtype=tf.float32, name="x")
y = tf.Variable([1.], dtype=tf.float32, name="y")
f = (x[0] + x[1] ** 2 + x[0] * x[1] + y) ** 2
# X and Y pieces of Hessian
hx, hy = tf.hessians(f, [x, y])
# First-order X and Y derivatives
gx, gy = tf.gradients(f, [x, y])
# Remanining elements of Hessian can be computed as Jacobian matrices with
# X, Y and first-order derivatives. However TensorFlow does not implement this
# (https://github.com/tensorflow/tensorflow/issues/675)
# So you have to build it "by hand"
hxy = [tf.gradients(gx[i], y)[0] for i in range(x.shape.num_elements())]
hxy = tf.concat(hxy, axis=0)
# Here since Y has one element only it is easier
hyx, = tf.gradients(gy, x)
# Combine pieces of Hessian
h1 = tf.concat([hx, tf.expand_dims(hxy, 1)], axis=1)
h2 = tf.concat([tf.expand_dims(hyx, 0), hy], axis=1)
hessian = tf.concat([h1, h2], axis=0)
# Test it
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init_op)
print(sess.run(hessian))
Output:
[[ 8. 20. 4.]
[20. 34. 6.]
[ 4. 6. 2.]]