Compute hessian with respect to several variables in tensorflow

前端 未结 1 1391
傲寒
傲寒 2020-12-18 12:51

Computing Hessian in tensorflow is quite easy:

x = tf.Variable([1., 1., 1.], dtype=tf.float32, name=\"x\")
f = (x[0] + x[1] ** 2 + x[0] * x[1] + x[2]) ** 2
h         


        
1条回答
  •  隐瞒了意图╮
    2020-12-18 13:23

    EDIT: Here is a more fleshed out solution, essentially the same but for an arbitrary number of variables. Also I have added the option of using Python or TensorFlow loops for the Jacobian. Note the code assumes all variables are 1D tensors.

    from itertools import combinations, count
    import tensorflow as tf
    
    def jacobian(y, x, tf_loop=False):
        # If the shape of Y is fully defined you can choose between a
        # Python-level or TF-level loop to make the Jacobian matrix
        # If the shape of Y is not fully defined you must use TF loop
        # In both cases it is just a matter of stacking gradients for each Y
        if tf_loop or y.shape.num_elements() is None:
            i = tf.constant(0, dtype=tf.int32)
            y_size = tf.size(y)
            rows = tf.TensorArray(dtype=y.dtype, size=y_size, element_shape=x.shape)
            _, rows = tf.while_loop(
                lambda i, rows: i < y_size,
                lambda i, rows: [i + 1, rows.write(i, tf.gradients(y[i], x)[0])],
                [i, rows])
            return rows.stack()
        else:
            return tf.stack([tf.gradients(y[i], x)[0]
                             for i in range(y.shape.num_elements())], axis=0)
    
    def hessian_multivar(ys, xs, tf_loop=False):
        # List of list of pieces of the Hessian matrix
        hessian_pieces = [[None] * len(xs) for _ in xs]
        # Hessians with respect to each x (diagonal pieces of the full Hessian)
        for i, h in enumerate(tf.hessians(ys, xs)):
            hessian_pieces[i][i] = h
        # First-order derivatives
        xs_grad = tf.gradients(ys, xs)
        # Pairwise second order derivatives as Jacobian matrices
        for (i1, (x1, g1)), (i2, (x2, g2)) in combinations(zip(count(), zip(xs, xs_grad)), 2):
            # Derivates in both orders
            hessian_pieces[i1][i2] = jacobian(g1, x2, tf_loop=tf_loop)
            hessian_pieces[i2][i1] = jacobian(g2, x1, tf_loop=tf_loop)
        # Concatenate everything together
        return tf.concat([tf.concat(hp, axis=1) for hp in hessian_pieces], axis=0)
    
    # Test it with three variables
    with tf.Graph().as_default():
        x = tf.Variable([1., 1.], dtype=tf.float32, name="x")
        y = tf.Variable([1.], dtype=tf.float32, name="y")
        z = tf.Variable([1., 1.], dtype=tf.float32, name="z")
        f = (x[0] + x[1] ** 2 + x[0] * x[1] + y + x * y * z) ** 2
        hessian = hessian_multivar(f, [x, y, z])
        init_op = tf.global_variables_initializer()
        with tf.Session() as sess:
            sess.run(init_op)
            print(sess.run(hessian))
    

    Output:

    [[26. 54. 30. 16.  4.]
     [54. 90. 38.  6. 18.]
     [30. 38. 16. 14. 14.]
     [16.  6. 14.  2.  0.]
     [ 4. 18. 14.  0.  2.]]
    

    I'm not sure if there can be a "good" way of doing that with the current API. Obviously, you can compute the Hessian matrix elements by yourself... It is not very elegant and probably not the fastest solution either, but here is how it might be done in your example:

    import tensorflow as tf
    
    x = tf.Variable([1., 1.], dtype=tf.float32, name="x")
    y = tf.Variable([1.], dtype=tf.float32, name="y")
    f = (x[0] + x[1] ** 2 + x[0] * x[1] + y) ** 2
    # X and Y pieces of Hessian
    hx, hy = tf.hessians(f, [x, y])
    # First-order X and Y derivatives
    gx, gy = tf.gradients(f, [x, y])
    # Remanining elements of Hessian can be computed as Jacobian matrices with
    # X, Y and first-order derivatives. However TensorFlow does not implement this
    # (https://github.com/tensorflow/tensorflow/issues/675)
    # So you have to build it "by hand"
    hxy = [tf.gradients(gx[i], y)[0] for i in range(x.shape.num_elements())]
    hxy = tf.concat(hxy, axis=0)
    # Here since Y has one element only it is easier
    hyx, = tf.gradients(gy, x)
    # Combine pieces of Hessian
    h1 = tf.concat([hx, tf.expand_dims(hxy, 1)], axis=1)
    h2 = tf.concat([tf.expand_dims(hyx, 0), hy], axis=1)
    hessian = tf.concat([h1, h2], axis=0)
    # Test it
    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init_op)
        print(sess.run(hessian))
    

    Output:

    [[ 8. 20.  4.]
     [20. 34.  6.]
     [ 4.  6.  2.]]
    

    0 讨论(0)
提交回复
热议问题