关于Deformable Convolutional Networks的论文解读,共分为5个部分,本章是第五部分:
- [ ] Part1: 快速学习实现仿射变换
- [ ] Part2: Spatial Transfomer Networks论文解读
- [ ] Part3: TenosorFlow实现STN
- [ ] Part4: Deformable Convolutional Networks论文解读
- [x] Part5: TensorFlow实现Deformable ConvNets
本章讲解使用TensorFlow实现Deformable ConvNets。
Deformable Convolution介绍
论文给出的代码是MXNet写的,blog里使用的是TensorFlow/Keras实现的简易版。
注意该TensorFlow版存在的问题:
- 前向速度太慢,下面案例模型带变形卷积层的前向传播约需要240ms,而正常的CNN需要10ms都不到。
- 只是简单的变形层实现,没有Deformable Align-Rol层。
- 使用的Keras,原MXNet是要快很多的
相关资源地址:
在变形的MNIST上示意图:
初探Deformable Conv
下载代码:
git clone https://github.com/felixlaumon/deform-conv.git
解压,在相应的目录使用Jupyter创建新的对话:
导包:
from __future__ import division
# %env CUDA_VISIBLE_DEVICES=0
import numpy as np
import tensorflow as tf
import keras.backend as K
from keras.models import Model
from keras.losses import categorical_crossentropy
from keras.optimizers import Adam, SGD
from deform_conv.layers import ConvOffset2D
from deform_conv.callbacks import TensorBoard
from deform_conv.cnn import get_cnn, get_deform_cnn
from deform_conv.mnist import get_gen
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
K.set_session(sess)
# 配置训练和测试数据
batch_size = 32
n_train = 60000
n_test = 10000
steps_per_epoch = int(np.ceil(n_train / batch_size))
validation_steps = int(np.ceil(n_test / batch_size))
# 正常的mnist训练数据集
train_gen = get_gen(
'train', batch_size=batch_size,
scale=(1.0, 1.0), translate=0.0,
shuffle=True
)
# 正常的mnist测试数据集
test_gen = get_gen(
'test', batch_size=batch_size,
scale=(1.0, 1.0), translate=0.0,
shuffle=False
)
# 变形的mnist训练数据集
train_scaled_gen = get_gen(
'train', batch_size=batch_size,
scale=(1.0, 2.5), translate=0.2,
shuffle=True
)
# 变形的mnist测试数据集
test_scaled_gen = get_gen(
'test', batch_size=batch_size,
scale=(1.0, 2.5), translate=0.2,
shuffle=False
)
常规的CNN模型
训练常规的CNN模型:
inputs, outputs = get_cnn()
model = Model(inputs=inputs, outputs=outputs)
model.summary() # 打印网络结构
optim = Adam(1e-3)
# optim = SGD(1e-3, momentum=0.99, nesterov=True)
loss = categorical_crossentropy
model.compile(optim, loss, metrics=['accuracy'])
model.fit_generator(
train_gen, steps_per_epoch=steps_per_epoch,
epochs=10, verbose=1,
validation_data=test_gen, validation_steps=validation_steps
)
model.save_weights('models/cnn.h5')
模型结构:
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input (InputLayer) (None, 28, 28, 1) 0
_________________________________________________________________
conv11 (Conv2D) (None, 28, 28, 32) 320
_________________________________________________________________
conv11_relu (Activation) (None, 28, 28, 32) 0
_________________________________________________________________
conv11_bn (BatchNormalizatio (None, 28, 28, 32) 128
_________________________________________________________________
conv12 (Conv2D) (None, 14, 14, 64) 18496
_________________________________________________________________
conv12_relu (Activation) (None, 14, 14, 64) 0
_________________________________________________________________
conv12_bn (BatchNormalizatio (None, 14, 14, 64) 256
_________________________________________________________________
conv21 (Conv2D) (None, 14, 14, 128) 73856
_________________________________________________________________
conv21_relu (Activation) (None, 14, 14, 128) 0
_________________________________________________________________
conv21_bn (BatchNormalizatio (None, 14, 14, 128) 512
_________________________________________________________________
conv22 (Conv2D) (None, 7, 7, 128) 147584
_________________________________________________________________
conv22_relu (Activation) (None, 7, 7, 128) 0
_________________________________________________________________
conv22_bn (BatchNormalizatio (None, 7, 7, 128) 512
_________________________________________________________________
avg_pool (GlobalAveragePooli (None, 128) 0
_________________________________________________________________
fc1 (Dense) (None, 10) 1290
_________________________________________________________________
out (Activation) (None, 10) 0
=================================================================
Total params: 242,954
Trainable params: 242,250
Non-trainable params: 704
常规的CNN在正常和变形的MNIST数据集上测试:
# ---
# Evaluate normal CNN
model.load_weights('models/cnn.h5', by_name=True)
val_loss, val_acc = model.evaluate_generator(
test_gen, steps=validation_steps
)
print('Test accuracy', val_acc)
# 0.9874
val_loss, val_acc = model.evaluate_generator(
test_scaled_gen, steps=validation_steps
)
print('Test accuracy with scaled images', val_acc)
# 0.5701
测试结果:
Test accuracy 0.9884
Test accuracy with scaled images 0.577
带入变形层的Deform-Conv模型
训练模型,注意这是在上面常规的CNN基础在做fine-tune:
# ---
# Deformable CNN
inputs, outputs = get_deform_cnn(trainable=False)
model = Model(inputs=inputs, outputs=outputs)
model.load_weights('models/cnn.h5', by_name=True)
model.summary()
optim = Adam(5e-4)
# optim = SGD(1e-4, momentum=0.99, nesterov=True)
loss = categorical_crossentropy
model.compile(optim, loss, metrics=['accuracy'])
model.fit_generator(
train_scaled_gen, steps_per_epoch=steps_per_epoch,
epochs=20, verbose=1,
validation_data=test_scaled_gen, validation_steps=validation_steps
)
# Epoch 20/20
# 1875/1875 [==============================] - 442s 236ms/step - loss: 0.2554 - acc: 0.9203 - val_loss: 0.2030 - val_acc: 0.9357
model.save_weights('models/deform_cnn.h5')
网络结构和训练过程:
Layer (type) Output Shape Param #
=================================================================
input (InputLayer) (None, 28, 28, 1) 0
_________________________________________________________________
conv11 (Conv2D) (None, 28, 28, 32) 320
_________________________________________________________________
conv11_relu (Activation) (None, 28, 28, 32) 0
_________________________________________________________________
conv11_bn (BatchNormalizatio (None, 28, 28, 32) 128
_________________________________________________________________
conv12_offset (ConvOffset2D) (None, 28, 28, 32) 18432
_________________________________________________________________
conv12 (Conv2D) (None, 14, 14, 64) 18496
_________________________________________________________________
conv12_relu (Activation) (None, 14, 14, 64) 0
_________________________________________________________________
conv12_bn (BatchNormalizatio (None, 14, 14, 64) 256
_________________________________________________________________
conv21_offset (ConvOffset2D) (None, 14, 14, 64) 73728
_________________________________________________________________
conv21 (Conv2D) (None, 14, 14, 128) 73856
_________________________________________________________________
conv21_relu (Activation) (None, 14, 14, 128) 0
_________________________________________________________________
conv21_bn (BatchNormalizatio (None, 14, 14, 128) 512
_________________________________________________________________
conv22_offset (ConvOffset2D) (None, 14, 14, 128) 294912
_________________________________________________________________
conv22 (Conv2D) (None, 7, 7, 128) 147584
_________________________________________________________________
conv22_relu (Activation) (None, 7, 7, 128) 0
_________________________________________________________________
conv22_bn (BatchNormalizatio (None, 7, 7, 128) 512
_________________________________________________________________
avg_pool (GlobalAveragePooli (None, 128) 0
_________________________________________________________________
fc1 (Dense) (None, 10) 1290
_________________________________________________________________
out (Activation) (None, 10) 0
=================================================================
Total params: 630,026
Trainable params: 387,776
Non-trainable params: 242,250
Epoch 1/20
1875/1875 [==============================] - 397s 212ms/step - loss: 0.3851 - acc: 0.8873 - val_loss: 0.2935 - val_acc: 0.9102
Epoch 2/20
1875/1875 [==============================] - 281s 150ms/step - loss: 0.3454 - acc: 0.8971 - val_loss: 0.2775 - val_acc: 0.9123
Epoch 3/20
1875/1875 [==============================] - 316s 169ms/step - loss: 0.3299 - acc: 0.8994 - val_loss: 0.2838 - val_acc: 0.9127
Epoch 4/20
1875/1875 [==============================] - 348s 186ms/step - loss: 0.3299 - acc: 0.8994 - val_loss: 0.2839 - val_acc: 0.9120
Epoch 5/20
1875/1875 [==============================] - 372s 198ms/step - loss: 0.3198 - acc: 0.9014 - val_loss: 0.2781 - val_acc: 0.9149
Epoch 6/20
1875/1875 [==============================] - 378s 202ms/step - loss: 0.3057 - acc: 0.9040 - val_loss: 0.2475 - val_acc: 0.9243
Epoch 7/20
1875/1875 [==============================] - 468s 250ms/step - loss: 0.2942 - acc: 0.9076 - val_loss: 0.2487 - val_acc: 0.9234
Epoch 8/20
1875/1875 [==============================] - 469s 250ms/step - loss: 0.2917 - acc: 0.9085 - val_loss: 0.2448 - val_acc: 0.9211
Epoch 9/20
1875/1875 [==============================] - 442s 236ms/step - loss: 0.2936 - acc: 0.9075 - val_loss: 0.2383 - val_acc: 0.9248
Epoch 10/20
1875/1875 [==============================] - 431s 230ms/step - loss: 0.2928 - acc: 0.9079 - val_loss: 0.2516 - val_acc: 0.9208
Epoch 11/20
1875/1875 [==============================] - 458s 244ms/step - loss: 0.2886 - acc: 0.9089 - val_loss: 0.2347 - val_acc: 0.9262
Epoch 12/20
1875/1875 [==============================] - 434s 231ms/step - loss: 0.2830 - acc: 0.9099 - val_loss: 0.2342 - val_acc: 0.9253
Epoch 13/20
1875/1875 [==============================] - 453s 242ms/step - loss: 0.2745 - acc: 0.9127 - val_loss: 0.2308 - val_acc: 0.9257
Epoch 14/20
1875/1875 [==============================] - 449s 240ms/step - loss: 0.2795 - acc: 0.9124 - val_loss: 0.2279 - val_acc: 0.9287
Epoch 15/20
1875/1875 [==============================] - 458s 244ms/step - loss: 0.2709 - acc: 0.9139 - val_loss: 0.2338 - val_acc: 0.9288
Epoch 16/20
1875/1875 [==============================] - 422s 225ms/step - loss: 0.2767 - acc: 0.9116 - val_loss: 0.2145 - val_acc: 0.9286
Epoch 17/20
1875/1875 [==============================] - 364s 194ms/step - loss: 0.2663 - acc: 0.9160 - val_loss: 0.2259 - val_acc: 0.9302
Epoch 18/20
1875/1875 [==============================] - 366s 195ms/step - loss: 0.2665 - acc: 0.9162 - val_loss: 0.2118 - val_acc: 0.9325
Epoch 19/20
1875/1875 [==============================] - 403s 215ms/step - loss: 0.2634 - acc: 0.9168 - val_loss: 0.2204 - val_acc: 0.9309
Epoch 20/20
1875/1875 [==============================] - 442s 236ms/step - loss: 0.2554 - acc: 0.9203 - val_loss: 0.2030 - val_acc: 0.9357
变形卷积的效果:
# --
# Evaluate deformable CNN
model.load_weights('models/deform_cnn.h5')
val_loss, val_acc = model.evaluate_generator(
test_scaled_gen, steps=validation_steps
)
print('Test accuracy of deformable convolution with scaled images', val_acc)
# 0.9255
val_loss, val_acc = model.evaluate_generator(
test_gen, steps=validation_steps
)
print('Test accuracy of deformable convolution with regular images', val_acc)
# 0.9727
输出为:
Test accuracy of deformable convolution with scaled images 0.9323
Test accuracy of deformable convolution with regular images 0.9016
对比与常规的CNN:
模型 | 正常MNIST | 变形MNIST |
---|---|---|
常规CNN | 0.9884 | 0.577 |
变形CNN | 0.9016 | 0.9323 |
这里在正常MNIST数据集上表现下降很多,实际操作中因为增加了3个变形层,只是在变形的MNIST做了fine-tune,可以在正常的MNIST上做fine-tune做适配。
变形卷积模型分析
上述使用了两个模型常规的cnn
和变形的cnn
,来源于deform-conv/deform_conv/cnn.py文件。
代码如下:
def get_cnn():
inputs = l = Input((28, 28, 1), name='input')
# conv11
l = Conv2D(32, (3, 3), padding='same', name='conv11')(l)
l = Activation('relu', name='conv11_relu')(l)
l = BatchNormalization(name='conv11_bn')(l)
# conv12
l = Conv2D(64, (3, 3), padding='same', strides=(2, 2), name='conv12')(l)
l = Activation('relu', name='conv12_relu')(l)
l = BatchNormalization(name='conv12_bn')(l)
# conv21
l = Conv2D(128, (3, 3), padding='same', name='conv21')(l)
l = Activation('relu', name='conv21_relu')(l)
l = BatchNormalization(name='conv21_bn')(l)
# conv22
l = Conv2D(128, (3, 3), padding='same', strides=(2, 2), name='conv22')(l)
l = Activation('relu', name='conv22_relu')(l)
l = BatchNormalization(name='conv22_bn')(l)
# out
l = GlobalAvgPool2D(name='avg_pool')(l)
l = Dense(10, name='fc1')(l)
outputs = l = Activation('softmax', name='out')(l)
return inputs, outputs
def get_deform_cnn(trainable):
inputs = l = Input((28, 28, 1), name='input')
# conv11
l = Conv2D(32, (3, 3), padding='same', name='conv11', trainable=trainable)(l)
l = Activation('relu', name='conv11_relu')(l)
l = BatchNormalization(name='conv11_bn')(l)
# conv12
l_offset = ConvOffset2D(32, name='conv12_offset')(l)
l = Conv2D(64, (3, 3), padding='same', strides=(2, 2), name='conv12', trainable=trainable)(l_offset)
l = Activation('relu', name='conv12_relu')(l)
l = BatchNormalization(name='conv12_bn')(l)
# conv21
l_offset = ConvOffset2D(64, name='conv21_offset')(l)
l = Conv2D(128, (3, 3), padding='same', name='conv21', trainable=trainable)(l_offset)
l = Activation('relu', name='conv21_relu')(l)
l = BatchNormalization(name='conv21_bn')(l)
# conv22
l_offset = ConvOffset2D(128, name='conv22_offset')(l)
l = Conv2D(128, (3, 3), padding='same', strides=(2, 2), name='conv22', trainable=trainable)(l_offset)
l = Activation('relu', name='conv22_relu')(l)
l = BatchNormalization(name='conv22_bn')(l)
# out
l = GlobalAvgPool2D(name='avg_pool')(l)
l = Dense(10, name='fc1', trainable=trainable)(l)
outputs = l = Activation('softmax', name='out')(l)
return inputs, outputs
可以看到两个模型的区别很明显,get_deform_cnn
相比于get_cnn
多个3个ConvOffset2D
层。其他的层都相同,故deform_cnn
能够在cnn
的基础上做fine-tune.
ConvOffset2D层
由论文可分为以下几步:
- 对于输出特征图
U
,正常的卷积输出的通道 - 对于可变性卷积,在
U
上使用的普通卷积,得到的特征图,这代表变形卷积采样的偏移量(是代表x,y两个方向)。 - 对于得到的的特征图,shape为,输入是。将代表偏移的特征图
offsets
与原本的采样位置x
相加得到实际的采样位置coord
。- 将偏移变形为–>
- 输入变形为–>
- 调用
tf_batch_map_offsets
函数做采样,得到采样后 - 采样后变形得到最终输出–>
下面我们一步一步看。
ConvOffset2D
类定义
关键的ConvOffset2D
类在deform_conv/layers.py下定义:
class ConvOffset2D(Conv2D):
"""
ConvOffset2D卷积层学习2D的偏移量,使用双线性插值输出变形后采样值
"""
def __init__(self, filters, init_normal_stddev=0.01, **kwargs):
"""Init
Parameters
----------
filters : int
Number of channel of the input feature map
init_normal_stddev : float
Normal kernel initialization
**kwargs:
Pass to superclass. See Con2D layer in Keras
"""
self.filters = filters
# 注意通道数翻倍,输出的特征图表示偏移量x,y
super(ConvOffset2D, self).__init__(
self.filters * 2, (3, 3), padding='same', use_bias=False,
kernel_initializer=RandomNormal(0, init_normal_stddev),
**kwargs
)
def call(self, x):
"""Return the deformed featured map"""
x_shape = x.get_shape()
# 卷积输出得到2倍通道的feature map,获取到偏移量 大小为(batch,h,w,2c)
offsets = super(ConvOffset2D, self).call(x)
# offsets reshape成: (b*c, h, w, 2) 共有b*c个map.大小为h,w
offsets = self._to_bc_h_w_2(offsets, x_shape)
# 将输入x也切换成这样: (b*c, h, w)
x = self._to_bc_h_w(x, x_shape)
# 双线性采样得到采样后的X_offset: (b*c, h, w)
x_offset = tf_batch_map_offsets(x, offsets)
# 再变原本的shape,即x_offset: (b, h, w, c)
x_offset = self._to_b_h_w_c(x_offset, x_shape)
return x_offset
def compute_output_shape(self, input_shape):
"""Output shape is the same as input shape
Because this layer does only the deformation part
"""
return input_shape
@staticmethod
def _to_bc_h_w_2(x, x_shape):
"""(b, h, w, 2c) -> (b*c, h, w, 2)"""
x = tf.transpose(x, [0, 3, 1, 2])
x = tf.reshape(x, (-1, int(x_shape[1]), int(x_shape[2]), 2))
return x
@staticmethod
def _to_bc_h_w(x, x_shape):
"""(b, h, w, c) -> (b*c, h, w)"""
x = tf.transpose(x, [0, 3, 1, 2])
x = tf.reshape(x, (-1, int(x_shape[1]), int(x_shape[2])))
return x
@staticmethod
def _to_b_h_w_c(x, x_shape):
"""(b*c, h, w) -> (b, h, w, c)"""
x = tf.reshape(
x, (-1, int(x_shape[3]), int(x_shape[1]), int(x_shape[2]))
)
x = tf.transpose(x, [0, 2, 3, 1])
return x
再具体看看tf_batch_map_offsets
函数:
tf_batch_map_offsets函数
前面得到了偏移的和变形的输入。
- 先将偏移原本的feature采样位置相加,得到实际的采样position
- 调用
tf_batch_map_coordinates
做双线性插值操作,得到输出
变形是一堆的张量操作,没怎么看懂。。
def tf_batch_map_offsets(input, offsets, order=1):
"""Batch map offsets into input
Parameters
---------
input : tf.Tensor. shape = (b, s, s)
offsets: tf.Tensor. shape = (b, s, s, 2)
Returns
-------
tf.Tensor. shape = (b, s, s)
"""
input_shape = tf.shape(input)
batch_size = input_shape[0]
input_size = input_shape[1]
offsets = tf.reshape(offsets, (batch_size, -1, 2))
grid = tf.meshgrid(
tf.range(input_size), tf.range(input_size), indexing='ij'
)
grid = tf.stack(grid, axis=-1)
grid = tf.cast(grid, 'float32')
grid = tf.reshape(grid, (-1, 2))
grid = tf_repeat_2d(grid, batch_size)
coords = offsets + grid # 实际的采样坐标
mapped_vals = tf_batch_map_coordinates(input, coords) # 双线性插值
return mapped_vals
tf_batch_map_coordinates
函数对应的双线性插值操作:
- 获取采样位置周围的4个坐标点位置
- 获取采样位置的像素值,双线性插值得到实际的采样结果
def tf_batch_map_coordinates(input, coords, order=1):
"""Batch version of tf_map_coordinates
Only supports 2D feature maps
Parameters
----------
input : tf.Tensor. shape = (b, s, s)
coords : tf.Tensor. shape = (b, n_points, 2)
Returns
-------
tf.Tensor. shape = (b, s, s)
"""
input_shape = tf.shape(input)
batch_size = input_shape[0]
input_size = input_shape[1]
n_coords = tf.shape(coords)[1]
# 包装加上偏移后的Position没有超过边界
coords = tf.clip_by_value(coords, 0, tf.cast(input_size, 'float32') - 1)
# 获取采样的四个角坐标,用于双线性插值
coords_lt = tf.cast(tf.floor(coords), 'int32')
coords_rb = tf.cast(tf.ceil(coords), 'int32')
coords_lb = tf.stack([coords_lt[..., 0], coords_rb[..., 1]], axis=-1)
coords_rt = tf.stack([coords_rb[..., 0], coords_lt[..., 1]], axis=-1)
idx = tf_repeat(tf.range(batch_size), n_coords)
# 得到像素值
def _get_vals_by_coords(input, coords):
indices = tf.stack([
idx, tf_flatten(coords[..., 0]), tf_flatten(coords[..., 1])
], axis=-1)
vals = tf.gather_nd(input, indices)
vals = tf.reshape(vals, (batch_size, n_coords))
return vals
# 获取对应坐标像素值
vals_lt = _get_vals_by_coords(input, coords_lt)
vals_rb = _get_vals_by_coords(input, coords_rb)
vals_lb = _get_vals_by_coords(input, coords_lb)
vals_rt = _get_vals_by_coords(input, coords_rt)
# 双线性插值
coords_offset_lt = coords - tf.cast(coords_lt, 'float32')
vals_t = vals_lt + (vals_rt - vals_lt) * coords_offset_lt[..., 0]
vals_b = vals_lb + (vals_rb - vals_lb) * coords_offset_lt[..., 0]
mapped_vals = vals_t + (vals_b - vals_t) * coords_offset_lt[..., 1]
# 返回双线性插值采样值
return mapped_vals
来源:CSDN
作者:DFann
链接:https://blog.csdn.net/u011974639/article/details/79996353