以下对Deformable convolutional networks(可变形卷积)keras/tensorflow主要的代码进行理解和注释
代码原地址:https://github.com/kastnerkyle/deform-conv
layers.py
from __future__ import absolute_import, division
import tensorflow as tf
from keras.layers import Conv2D
from keras.initializers import RandomNormal
from deform_conv.deform_conv import tf_batch_map_offsets
class ConvOffset2D(Conv2D): #继承2D卷积
"""ConvOffset2D"""
def __init__(self, filters, init_normal_stddev=0.01, **kwargs):
"""Init"""
self.filters = filters
super(ConvOffset2D, self).__init__(
self.filters * 2, (3, 3), padding='same', use_bias=False, #由于要计算x,y坐标的偏移量,所以需要两倍的channels
# TODO gradients are near zero if init is zeros
kernel_initializer='zeros',
# kernel_initializer=RandomNormal(0, init_normal_stddev),
**kwargs
)
def call(self, x):
# TODO offsets probably have no nonlinearity?
x_shape = x.get_shape() # 输入tensor的shape=(b,h,w,c)
offsets = super(ConvOffset2D, self).call(x) #进行对输入卷积,2*channels,shape=(b,h,w,2c)
offsets = self._to_bc_h_w_2(offsets, x_shape) #将offses的shape转化为(bc,h,w,2),两个通道分别表示x,y的偏移量
x = self._to_bc_h_w(x, x_shape)#将输入shape变为(bc,h,w)
x_offset = tf_batch_map_offsets(x, offsets) #得到片以后新坐标的所有像素值
x_offset = self._to_b_h_w_c(x_offset, x_shape)# 变换维度
return x_offset
def compute_output_shape(self, input_shape):
return input_shape
@staticmethod
def _to_bc_h_w_2(x, x_shape):
"""(b, h, w, 2c) -> (b*c, h, w, 2)"""
x = tf.transpose(x, [0, 3, 1, 2])#交换维度(b,2c,h,w)
x = tf.reshape(x, (-1, int(x_shape[1]), int(x_shape[2]), 2))#(bc,h,w,2)
return x
@staticmethod
def _to_bc_h_w(x, x_shape):
"""(b, h, w, c) -> (b*c, h, w)"""
x = tf.transpose(x, [0, 3, 1, 2])#交换维度(b,c,h,w)
x = tf.reshape(x, (-1, int(x_shape[1]), int(x_shape[2])))#(bc,h,w)
return x
@staticmethod
def _to_b_h_w_c(x, x_shape):
"""(b*c, h, w) -> (b, h, w, c)"""
x = tf.reshape(
x, (-1, int(x_shape[3]), int(x_shape[1]), int(x_shape[2]))
)
x = tf.transpose(x, [0, 2, 3, 1])
return x
deform_conv.py
from __future__ import absolute_import, division
import numpy as np
from scipy.ndimage.interpolation import map_coordinates as sp_map_coordinates
import tensorflow as tf
def tf_flatten(a):
"""Flatten tensor"""
return tf.reshape(a, [-1])
def tf_repeat(a, repeats, axis=0):
"""TensorFlow version of np.repeat for 1D"""
# https://github.com/tensorflow/tensorflow/issues/8521
assert len(a.get_shape()) == 1
a = tf.expand_dims(a, -1)
a = tf.tile(a, [1, repeats])
a = tf_flatten(a)
return a
def tf_repeat_2d(a, repeats):
"""Tensorflow version of np.repeat for 2D"""
assert len(a.get_shape()) == 2 # 二维
a = tf.expand_dims(a, 0) # 在第0维之前扩一维
a = tf.tile(a, [repeats, 1, 1]) # 在第0维重复repeats次
return a
def tf_map_coordinates(input, coords, order=1):
"""Tensorflow verion of scipy.ndimage.map_coordinates
Note that coords is transposed and only 2D is supported
Parameters
----------
input : tf.Tensor. shape = (s, s)
coords : tf.Tensor. shape = (n_points, 2)
"""
assert order == 1
coords_lt = tf.cast(tf.floor(coords), 'int32')
coords_rb = tf.cast(tf.ceil(coords), 'int32')
coords_lb = tf.stack([coords_lt[:, 0], coords_rb[:, 1]], axis=1)
coords_rt = tf.stack([coords_rb[:, 0], coords_lt[:, 1]], axis=1)
vals_lt = tf.gather_nd(input, coords_lt)
vals_rb = tf.gather_nd(input, coords_rb)
vals_lb = tf.gather_nd(input, coords_lb)
vals_rt = tf.gather_nd(input, coords_rt)
coords_offset_lt = coords - tf.cast(coords_lt, 'float32')
vals_t = vals_lt + (vals_rt - vals_lt) * coords_offset_lt[:, 0]
vals_b = vals_lb + (vals_rb - vals_lb) * coords_offset_lt[:, 0]
mapped_vals = vals_t + (vals_b - vals_t) * coords_offset_lt[:, 1]
return mapped_vals
def sp_batch_map_coordinates(inputs, coords):
"""Reference implementation for batch_map_coordinates"""
coords = coords.clip(0, inputs.shape[1] - 1)
mapped_vals = np.array([
sp_map_coordinates(input, coord.T, mode='nearest', order=1)
for input, coord in zip(inputs, coords)
])
return mapped_vals
def tf_batch_map_coordinates(input, coords, order=1):
"""Batch version of tf_map_coordinates
Only supports 2D feature maps
Parameters
----------
input : tf.Tensor. shape = (b, s, s)
coords : tf.Tensor. shape = (b, n_points, 2)
"""
input_shape = tf.shape(input)
batch_size = input_shape[0]
input_size = input_shape[1]
n_coords = tf.shape(coords)[1]
coords = tf.clip_by_value(coords, 0, tf.cast(input_size, 'float32') - 1)#基于定义的min与max对tesor数据进行截断操作,目的是为了应对梯度爆发或者梯度消失的情况
coords_lt = tf.cast(tf.floor(coords), 'int32')#双线性插值,左上角的值,所有坐标向下取整
coords_rb = tf.cast(tf.ceil(coords), 'int32')#右下角的值,向上取整
coords_lb = tf.stack([coords_lt[..., 0], coords_rb[..., 1]], axis=-1)#左下角的值是,x最小,y最大,按通道堆叠左上角的x,右下角的y即可
coords_rt = tf.stack([coords_rb[..., 0], coords_lt[..., 1]], axis=-1)#同上,x最大y嘴小
idx = tf_repeat(tf.range(batch_size), n_coords)
def _get_vals_by_coords(input, coords):
indices = tf.stack([
idx, tf_flatten(coords[..., 0]), tf_flatten(coords[..., 1])
], axis=-1) # 根据batch,x,y建立索引
vals = tf.gather_nd(input, indices)#取得输入对应索引位置处的值,vals为一维
vals = tf.reshape(vals, (batch_size, n_coords))#转化成二维
return vals
vals_lt = _get_vals_by_coords(input, coords_lt) #获取四个角的像素值
vals_rb = _get_vals_by_coords(input, coords_rb)
vals_lb = _get_vals_by_coords(input, coords_lb)
vals_rt = _get_vals_by_coords(input, coords_rt)
coords_offset_lt = coords - tf.cast(coords_lt, 'float32')#进行双线性插值,得到目标坐标的像素值
vals_t = vals_lt + (vals_rt - vals_lt) * coords_offset_lt[..., 0]
vals_b = vals_lb + (vals_rb - vals_lb) * coords_offset_lt[..., 0]
mapped_vals = vals_t + (vals_b - vals_t) * coords_offset_lt[..., 1]
return mapped_vals #得到偏移后坐标的所有像素值
def sp_batch_map_offsets(input, offsets):
"""Reference implementation for tf_batch_map_offsets"""
batch_size = input.shape[0]
input_size = input.shape[1]
offsets = offsets.reshape(batch_size, -1, 2)
grid = np.stack(np.mgrid[:input_size, :input_size], -1).reshape(-1, 2)
grid = np.repeat([grid], batch_size, axis=0)
coords = offsets + grid
coords = coords.clip(0, input_size - 1)
mapped_vals = sp_batch_map_coordinates(input, coords)
return mapped_vals
def tf_batch_map_offsets(input, offsets, order=1):
"""Batch map offsets into input
Parameters
---------
input : tf.Tensor. shape = (b, s, s)
offsets: tf.Tensor. shape = (b, s, s, 2)
"""
input_shape = tf.shape(input) #(bc,h,w) ,h=w
batch_size = input_shape[0] #bc
input_size = input_shape[1]#h=w
offsets = tf.reshape(offsets, (batch_size, -1, 2))#(bc,h*w,2)
grid = tf.meshgrid(
tf.range(input_size), tf.range(input_size), indexing='ij'
)#广播,将一个以为tensor进行广播,当存在两个输入时=(a,b),
#先将a按行广播为size(b)行,再将b按列广播为size(a)列(当index=‘xy’时,为笛卡尔坐标系,当index=‘ij’,则为矩阵坐标系,将前面顺序交换)
#input_size=3,range(3)=[0,1,2]有
# [[0 0 0]
# [1 1 1]
# [2 2 2]]
# [[0 1 2]
# [0 1 2]
# [0 1 2]]
grid = tf.stack(grid, axis=-1) #将两个通道堆叠在一起,则生成一个2通道的tensor,shape=(h,h,2)/(3,3,2),每个位置上是一个坐标(ij)
#[[[0 0]
# [0 1]
# [0 2]]
# [[1 0]
# [1 1]
# [1 2]]
# [[2 0]
# [2 1]
# [2 2]]]
grid = tf.cast(grid, 'float32')
grid = tf.reshape(grid, (-1, 2)) #(h*h,2) 变成二维,每个元素表示一个坐标
# [[0. 0.]
# [0. 1.]
# [0. 2.]
# [1. 0.]
# [1. 1.]
# [1. 2.]
# [2. 0.]
# [2. 1.]
# [2. 2.]]
grid = tf_repeat_2d(grid, batch_size) # 重复第0维,bc次,shape=(bc,h*h,2)
coords = offsets + grid #每个通道的坐标都加上偏移量
# 坐标变成了小数,需要
mapped_vals = tf_batch_map_coordinates(input, coords)
return mapped_vals
cnn.py
from __future__ import absolute_import, division
from keras.layers import Input, Conv2D, Activation, GlobalAvgPool2D, Dense, BatchNormalization
from deform_conv.layers import ConvOffset2D
def get_cnn():
inputs = l = Input((28, 28, 1), name='input')
# conv11
l = Conv2D(32, (3, 3), padding='same', name='conv11')(l)
l = Activation('relu', name='conv11_relu')(l)
l = BatchNormalization(name='conv11_bn')(l)
# conv12
l = Conv2D(64, (3, 3), padding='same', strides=(2, 2), name='conv12')(l)
l = Activation('relu', name='conv12_relu')(l)
l = BatchNormalization(name='conv12_bn')(l)
# conv21
l = Conv2D(128, (3, 3), padding='same', name='conv21')(l)
l = Activation('relu', name='conv21_relu')(l)
l = BatchNormalization(name='conv21_bn')(l)
# conv22
l = Conv2D(128, (3, 3), padding='same', strides=(2, 2), name='conv22')(l)
l = Activation('relu', name='conv22_relu')(l)
l = BatchNormalization(name='conv22_bn')(l)
# out
l = GlobalAvgPool2D(name='avg_pool')(l)
l = Dense(10, name='fc1')(l)
outputs = l = Activation('softmax', name='out')(l)
return inputs, outputs
def get_deform_cnn(trainable):
inputs = l = Input((28, 28, 1), name='input')
# conv11
l = Conv2D(32, (3, 3), padding='same', name='conv11', trainable=trainable)(l)
l = Activation('relu', name='conv11_relu')(l)
l = BatchNormalization(name='conv11_bn')(l)
# conv12
l_offset = ConvOffset2D(32, name='conv12_offset')(l)
l = Conv2D(64, (3, 3), padding='same', strides=(2, 2), name='conv12', trainable=trainable)(l_offset)
l = Activation('relu', name='conv12_relu')(l)
l = BatchNormalization(name='conv12_bn')(l)
# conv21
l_offset = ConvOffset2D(64, name='conv21_offset')(l)
l = Conv2D(128, (3, 3), padding='same', name='conv21', trainable=trainable)(l_offset)
l = Activation('relu', name='conv21_relu')(l)
l = BatchNormalization(name='conv21_bn')(l)
# conv22
l_offset = ConvOffset2D(128, name='conv22_offset')(l)
l = Conv2D(128, (3, 3), padding='same', strides=(2, 2), name='conv22', trainable=trainable)(l_offset)
l = Activation('relu', name='conv22_relu')(l)
l = BatchNormalization(name='conv22_bn')(l)
# out
l = GlobalAvgPool2D(name='avg_pool')(l)
l = Dense(10, name='fc1', trainable=trainable)(l)
outputs = l = Activation('softmax', name='out')(l)
return inputs, outputs
本代码中使用tensorflow2.0的一些函数测试
import tensorflow as tf
import numpy as np
# x = [1, 2, 3]
# y = [4, 5, 6]
# X, Y = tf.meshgrid(x, y)
# print(X.numpy())
# print(Y.numpy())
# print(X.shape)
# print(Y.shape)
# x = tf.range(-2,2,1)
# y = tf.range(-2,2,1)
# z = tf.range(-2,2,1)
# X,Y,Z = tf.meshgrid(x,y,z)
# print(x.numpy())
# print(y.numpy())
# print(z.numpy())
# print(X.numpy())
# print(Y.numpy())
# print(Z.numpy())
# print(X.shape)
# print(Y.shape)
# print(Z.shape)
def tf_flatten(a):
"""Flatten tensor"""
return tf.reshape(a, [-1])
def tf_repeat(a, repeats, axis=0):
"""TensorFlow version of np.repeat for 1D"""
# https://github.com/tensorflow/tensorflow/issues/8521
assert len(a.get_shape()) == 1
a = tf.expand_dims(a, -1)
a = tf.tile(a, [1, repeats])
a = tf_flatten(a)
return a
def tf_repeat_2d(a, repeats):
"""Tensorflow version of np.repeat for 2D"""
assert len(a.get_shape()) == 2
a = tf.expand_dims(a, 0)
a = tf.tile(a, [repeats, 1, 1])
return a
x = tf.range(3)
# y = tf.range(3)
# # z = tf.range(3)
# c,d=tf.meshgrid(x,y,indexing='ij')
# a = tf.meshgrid(x,y,indexing='ij')
# print(c.numpy())
# print(d.numpy())
# print(a[0].shape)
# a = tf.stack(a,-1)
# print(y.shape)
# print(a.numpy())
# print(a.shape)
# # print(b.numpy())
# a = tf.cast(a, 'float32')
# print(a.numpy())
# a = tf.reshape(a, (-1, 2))
# print(a.shape)
# print(a.numpy())
# a = tf_repeat_2d(a, 3)
# print(a.numpy())
# print(a.shape)
# offsets = np.array([[0.1 ,0.2],
# [-0.1 ,-0.2],
# [0., 2.],
# [1., 0.],
# [1., 1.],
# [1., 2.],
# [2. ,0.],
# [2., 1.],
# [2. ,2.]])
# coo = offsets+a
# print(coo.numpy())
p = tf_repeat(x,3)
print(p.numpy())
q = np.array([0 ,1 ,2,0, 1, 2,0, 1 ,2])
# t = tf.range(9)
indices = tf.stack([
p, q
], axis=-1)
print(indices.numpy())
k=tf.Variable([[1, 2, 3, 4, 5],
[6, 7, 8, 9, 10],
[11, 12, 13, 14, 15]])
val = tf.gather_nd(k,indices)
print(val.numpy())
val = tf.reshape(val, (1, 9))
print(val.numpy())
来源:CSDN
作者:Dilatitana
链接:https://blog.csdn.net/SweetWind1996/article/details/104573635