Implementation of homography in Tensorflow

后端未结

关注

 1  1439

逝去的感伤 2021-01-16 04:56

I am trying to implement a network in TensorFlow that learns to predict homography (Wiki). My network will output a 4-dimensional vector, which will be used to translate 2 i

1条回答

-上瘾入骨i (楼主)

2021-01-16 05:31

Since you are only interested in optimizing translations, I came up with this function to perform a translation and have gradients for the vectors, using @tf.custom_gradient:

import tensorflow as tf

@tf.custom_gradient
def my_img_translate(imgs, translates):
    # Interpolation model has to be fixed due to limitations of tf.custom_gradient
    interpolation = 'NEAREST'
    imgs_translated = tf.contrib.image.translate(imgs, translates, interpolation=interpolation)
    def grad(img_translated_grads):
        translates_x = translates[:, 0]
        translates_y = translates[:, 1]
        translates_zero = tf.zeros_like(translates_x)
        # X gradients
        imgs_x_grad = (imgs[:, :, :-2] - imgs[:, :, 2:]) / 2
        imgs_x_grad = tf.concat([(imgs[:, :, :1] - imgs[:, :, 1:2]),
                                 imgs_x_grad,
                                 (imgs[:, :, -2:-1] - imgs[:, :, -1:])], axis=2)
        imgs_x_grad_translated = tf.contrib.image.translate(
            imgs_x_grad, tf.stack([translates_x, translates_zero], axis=1),
            interpolation=interpolation)
        translates_x_grad = tf.reduce_sum(img_translated_grads * imgs_x_grad_translated, axis=(1, 2, 3))
        # Y gradients
        imgs_y_grad = (imgs[:, :-2] - imgs[:, 2:]) / 2
        imgs_y_grad = tf.concat([(imgs[:, :1] - imgs[:, 1:2]),
                                 imgs_y_grad,
                                 (imgs[:, -2:-1] - imgs[:, -1:])], axis=1)
        imgs_y_grad_translated = tf.contrib.image.translate(
            imgs_y_grad, tf.stack([translates_zero, translates_y], axis=1),
            interpolation=interpolation)
        translates_y_grad = tf.reduce_sum(img_translated_grads * imgs_y_grad_translated, axis=(1, 2, 3))
        # Complete gradient
        translates_grad = tf.stack([translates_x_grad, translates_y_grad], axis=1)
        return None, translates_grad
    return imgs_translated, grad

Note that in this case I am not returning any gradient for the images, since these will not be optimized (but if you wanted that, you could in principle use the builtin gradient for the translation operation).

I tested this for a simple use case of translating an image so its center had the highest value:

import tensorflow as tf
import numpy as np

@tf.custom_gradient
def my_img_translate(imgs, translates):
    # Interpolation model has to be fixed due to limitations of tf.custom_gradient
    interpolation = 'NEAREST'
    imgs_translated = tf.contrib.image.translate(imgs, translates, interpolation=interpolation)
    def grad(img_translated_grads):
        translates_x = translates[:, 0]
        translates_y = translates[:, 1]
        translates_zero = tf.zeros_like(translates_x)
        # X gradients
        imgs_x_grad = (imgs[:, :, :-2] - imgs[:, :, 2:]) / 2
        imgs_x_grad = tf.concat([(imgs[:, :, :1] - imgs[:, :, 1:2]),
                                 imgs_x_grad,
                                 (imgs[:, :, -2:-1] - imgs[:, :, -1:])], axis=2)
        imgs_x_grad_translated = tf.contrib.image.translate(
            imgs_x_grad, tf.stack([translates_x, translates_zero], axis=1),
            interpolation=interpolation)
        translates_x_grad = tf.reduce_sum(img_translated_grads * imgs_x_grad_translated, axis=(1, 2, 3))
        # Y gradients
        imgs_y_grad = (imgs[:, :-2] - imgs[:, 2:]) / 2
        imgs_y_grad = tf.concat([(imgs[:, :1] - imgs[:, 1:2]),
                                 imgs_y_grad,
                                 (imgs[:, -2:-1] - imgs[:, -1:])], axis=1)
        imgs_y_grad_translated = tf.contrib.image.translate(
            imgs_y_grad, tf.stack([translates_zero, translates_y], axis=1),
            interpolation=interpolation)
        translates_y_grad = tf.reduce_sum(img_translated_grads * imgs_y_grad_translated, axis=(1, 2, 3))
        # Complete gradient
        translates_grad = tf.stack([translates_x_grad, translates_y_grad], axis=1)
        return None, translates_grad
    return imgs_translated, grad

# Test operations
imgs = tf.placeholder(tf.float32, [None, None, None, None])
translates = tf.Variable([0, 0], dtype=tf.float32)
translates_tiled = tf.tile(translates[tf.newaxis], (tf.shape(imgs)[0], 1))
imgs_translated = my_img_translate(imgs, translates_tiled)
imgs_midpoint = imgs_translated[:, tf.shape(imgs_translated)[1] // 2, tf.shape(imgs_translated)[2] // 2]
loss = -tf.reduce_sum(tf.square(imgs_midpoint))
train_op = tf.train.GradientDescentOptimizer(10).minimize(loss)
init_op = tf.global_variables_initializer()

with tf.Session() as sess:
    # Make test image
    xs, ys = np.meshgrid(np.linspace(-2, 1, 100), np.linspace(-1, 2, 75))
    test_img = (1 / (1 + np.square(xs))) * (1 / (1 + np.square(ys)))
    test_img /= np.max(test_img)
    test_img_batch = test_img[np.newaxis, :, :, np.newaxis]
    # Train
    sess.run(init_op)
    for _ in range(100):
        sess.run(train_op, feed_dict={imgs: test_img_batch})
    # Show result
    test_img_result = sess.run(imgs_translated, feed_dict={imgs: test_img_batch})[0, :, :, 0]
    plt.figure()
    plt.subplot(121)
    plt.imshow(test_img)
    plt.subplot(122)
    plt.imshow(test_img_result)

Result:

0 讨论(0)