代码文件结构
- bbox_transform.py # bounding box变换。
- generate_anchors.py # 生成anchor,根据几种尺度和比例生成的anchor。
- proposal_layer.py # 通过将估计的边界框变换应用于一组常规框(称为“锚点”)来输出对象检测候选区域。选出合适的ROIS。
- anchor_target_layer.py # 将anchor对应ground truth。生成anchor分类标签和边界框回归目标。为anchor找到训练所需的ground truth类别和坐标变换信息。
- proposal_target_layer_cascade.py # 将对象检测候选分配给ground truth目标。生成候选分类标签和边界框回归目标。为选择出的rois找到训练所需的ground truth类别和坐标变换信息
- rpn.py # RPN网络定义。
参考
详细的Faster R-CNN源码解析之RPN源码解析 https://blog.csdn.net/jiongnima/article/details/79781792
Faster R-CNN 入坑之源码阅读 https://www.jianshu.com/p/a223853f8402?tdsourcetag=s_pcqq_aiomsg 对RPN部分代码进行注释。
1 rpn.py
1 class _RPN(nn.Module): 2 """ region proposal network """ 3 def __init__(self, din): 4 super(_RPN, self).__init__() 5 6 #得到输入特征图的深度 7 self.din = din # get depth of input feature map, e.g., 512 8 #anchor的尺度 __C.ANCHOR_SCALES = [8,16,32] 9 self.anchor_scales = cfg.ANCHOR_SCALES 10 #anchor的比例 __C.ANCHOR_RATIOS = [0.5,1,2] 11 self.anchor_ratios = cfg.ANCHOR_RATIOS 12 #特征步长 __C.FEAT_STRIDE = [16, ] 13 self.feat_stride = cfg.FEAT_STRIDE[0] 14 15 # define the convrelu layers processing input feature map 16 #定义处理输入要素图的convrelu层 17 #nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True) 18 self.RPN_Conv = nn.Conv2d(self.din, 512, 3, 1, 1, bias=True) 19 20 # define bg/fg classifcation score layer 21 #定义背景和前景分类得分 22 #对每个anchor都要进行背景或前景的分类得分,个数就是尺度个数乘以比例个数再乘以2 23 self.nc_score_out = len(self.anchor_scales) * len(self.anchor_ratios) * 2 # 2(bg/fg) * 9 (anchors) 24 #上面是RPN卷积 这里是分类, 网络输入是512 输出是参数个数 25 self.RPN_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0) 26 27 # define anchor box offset prediction layer 28 #定义anchor的偏移层 29 #偏移的输出个数是anchor个数乘以4 30 self.nc_bbox_out = len(self.anchor_scales) * len(self.anchor_ratios) * 4 # 4(coords) * 9 (anchors) 31 #网络输入是512 输出是参数个数 32 self.RPN_bbox_pred = nn.Conv2d(512, self.nc_bbox_out, 1, 1, 0) 33 34 # define proposal layer 35 #定义候选区域层 _ProposalLayer 36 # 参数是 特征步长 尺度 比例 37 self.RPN_proposal = _ProposalLayer(self.feat_stride, self.anchor_scales, self.anchor_ratios) 38 39 # define anchor target layer 40 #定义anchor目标层 _AnchorTargetLayer 41 # 参数是 特征步长 尺度 比例 42 self.RPN_anchor_target = _AnchorTargetLayer(self.feat_stride, self.anchor_scales, self.anchor_ratios) 43 44 self.rpn_loss_cls = 0 #分类损失 45 self.rpn_loss_box = 0 #回归损失 46 47 @staticmethod #静态方法 48 #将x reshape 49 def reshape(x, d): 50 input_shape = x.size() 51 x = x.view( 52 input_shape[0], 53 int(d), 54 int(float(input_shape[1] * input_shape[2]) / float(d)), 55 input_shape[3] 56 ) 57 return x 58 59 def forward(self, base_feat, im_info, gt_boxes, num_boxes): 60 61 #features信息包括batch_size,data_height,data_width,num_channels 62 #即批尺寸,特征数据高度,特征数据宽度,特征的通道数。 63 batch_size = base_feat.size(0)#特征的第一维 64 65 # return feature map after convrelu layer 66 # 在卷积之后返回特征图 67 rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True) 68 # get rpn classification score 69 #得到RPN分类得分 70 rpn_cls_score = self.RPN_cls_score(rpn_conv1) 71 72 ##将rpn_cls_score转化为rpn_cls_score_reshape 73 rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2) 74 #用softmax函数得到概率 75 rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1) 76 #前景背景分类,2个参数 77 rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out) 78 79 # get rpn offsets to the anchor boxes 80 #4个参数的偏移 81 rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1) 82 83 # proposal layer 84 cfg_key = 'TRAIN' if self.training else 'TEST' 85 86 #用anchor提取候选区域 87 #参数有分类概率 四个参数偏移 图片信息 88 rois = self.RPN_proposal((rpn_cls_prob.data, rpn_bbox_pred.data, 89 im_info, cfg_key)) 90 91 self.rpn_loss_cls = 0#分类损失 92 self.rpn_loss_box = 0#回归损失 93 94 # generating training labels and build the rpn loss 95 #生成训练标签并构建rpn损失 96 if self.training:#训练 97 assert gt_boxes is not None 98 99 #anchor的目标 100 rpn_data = self.RPN_anchor_target((rpn_cls_score.data, gt_boxes, im_info, num_boxes)) 101 102 # compute classification loss 103 #计算分类损失 104 #permute(多维数组,[维数的组合]) 该函数是改变维数 105 #contiguous:view只能用在contiguous的variable上。 106 #如果在view之前用了transpose, permute等,需要用contiguous()来返回一个contiguous copy。 107 ##返回rpn网络判断的anchor前后景分数 108 rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2) 109 ##返回每个anchor属于前景还是后景的ground truth 110 rpn_label = rpn_data[0].view(batch_size, -1) 111 112 rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1)) 113 rpn_cls_score = torch.index_select(rpn_cls_score.view(-1,2), 0, rpn_keep) 114 rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data) 115 rpn_label = Variable(rpn_label.long()) 116 self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label) 117 fg_cnt = torch.sum(rpn_label.data.ne(0)) 118 119 rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:] 120 121 # compute bbox regression loss 122 #计算回归损失 123 124 ##在训练计算边框误差时有用,仅对未超出图像边界的anchor有用 125 rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights) 126 ##在训练计算边框误差时有用,仅对未超出图像边界的anchor有用 127 rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights) 128 ##返回每个anchor对应的事实的四个偏移值 129 rpn_bbox_targets = Variable(rpn_bbox_targets) 130 131 ##计算rpn的边界损失loss,请注意在这里用到了inside和outside_weights 132 self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, 133 rpn_bbox_outside_weights, sigma=3, dim=[1,2,3]) 134 135 return rois, self.rpn_loss_cls, self.rpn_loss_box
2 generate_anchors.py
这一部分比较简单,就是把几种尺度几种比例(这里是3种)的anchor合起来用anchors来存储所有的anchor。
详细注释如下:
1 # Verify that we compute the same anchors as Shaoqing's matlab implementation: 2 # 3 # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 4 # >> anchors 5 # 6 # anchors = %9种anchor 7 # 8 # -83 -39 100 56 9 # -175 -87 192 104 10 # -359 -183 376 200 11 # -55 -55 72 72 12 # -119 -119 136 136 13 # -247 -247 264 264 14 # -35 -79 52 96 15 # -79 -167 96 184 16 # -167 -343 184 360 17 18 #array([[ -83., -39., 100., 56.], 19 # [-175., -87., 192., 104.], 20 # [-359., -183., 376., 200.], 21 # [ -55., -55., 72., 72.], 22 # [-119., -119., 136., 136.], 23 # [-247., -247., 264., 264.], 24 # [ -35., -79., 52., 96.], 25 # [ -79., -167., 96., 184.], 26 # [-167., -343., 184., 360.]]) 27 28 try: 29 xrange # Python 2 30 except NameError: 31 xrange = range # Python 3 32 33 34 def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 35 scales=2**np.arange(3, 6)):#arange函数用于创建等差数组3 4 5 36 """ 37 Generate anchor (reference) windows by enumerating aspect ratios X 38 scales wrt a reference (0, 0, 15, 15) window. 39 """ 40 41 base_anchor = np.array([1, 1, base_size, base_size]) - 1 42 ratio_anchors = _ratio_enum(base_anchor, ratios) 43 #vstack(tup) ,参数tup可以是元组,列表,或者numpy数组,返回结果为numpy的数组, 44 #就是横着排起来 45 anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 46 for i in xrange(ratio_anchors.shape[0])]) 47 return anchors 48 49 #Return width, height, x center, and y center for an anchor (window). 50 #得到anchor宽 高 中点坐标 51 def _whctrs(anchor): 52 """ 53 Return width, height, x center, and y center for an anchor (window). 54 """ 55 56 w = anchor[2] - anchor[0] + 1 57 h = anchor[3] - anchor[1] + 1 58 x_ctr = anchor[0] + 0.5 * (w - 1) 59 y_ctr = anchor[1] + 0.5 * (h - 1) 60 return w, h, x_ctr, y_ctr 61 62 #把给的anchor合在一起,按列排 63 def _mkanchors(ws, hs, x_ctr, y_ctr): 64 """ 65 Given a vector of widths (ws) and heights (hs) around a center 66 (x_ctr, y_ctr), output a set of anchors (windows). 67 """ 68 69 ws = ws[:, np.newaxis]#np.newaxis 在使用和功能上等价于 None,其实就是 None 的一个别名。 70 hs = hs[:, np.newaxis] 71 anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 72 y_ctr - 0.5 * (hs - 1), 73 x_ctr + 0.5 * (ws - 1), 74 y_ctr + 0.5 * (hs - 1))) 75 return anchors 76 77 #每个比例下有一组anchor 78 def _ratio_enum(anchor, ratios): 79 """ 80 Enumerate a set of anchors for each aspect ratio wrt an anchor. 81 """ 82 83 w, h, x_ctr, y_ctr = _whctrs(anchor)#上面定义的函数 得到anchor的宽高中心 84 size = w * h 85 size_ratios = size / ratios#该比例下anchor的大小 86 ws = np.round(np.sqrt(size_ratios)) 87 hs = np.round(ws * ratios) 88 anchors = _mkanchors(ws, hs, x_ctr, y_ctr)#把这个比例下的anchor保留下来 89 return anchors 90 91 #每个尺度下有一组anchor 92 def _scale_enum(anchor, scales): 93 """ 94 Enumerate a set of anchors for each scale wrt an anchor. 95 """ 96 97 w, h, x_ctr, y_ctr = _whctrs(anchor) 98 ws = w * scales 99 hs = h * scales 100 anchors = _mkanchors(ws, hs, x_ctr, y_ctr)#把这个比例下的anchor保留下来 101 return anchors 102 103 if __name__ == '__main__': 104 import time 105 t = time.time() 106 a = generate_anchors()#得到的anchor 107 print(time.time() - t) 108 print(a) 109 from IPython import embed; embed()
3 proposal_layer.py
根据anchor得到候选区域,这里有NMS,在后面再介绍。详细注释如下:
1 #通过将估计的边界框变换应用于一组常规框(称为“锚点”)来输出对象检测候选区域。 2 class _ProposalLayer(nn.Module): 3 """ 4 Outputs object detection proposals by applying estimated bounding-box 5 transformations to a set of regular boxes (called "anchors"). 6 """ 7 #参数是 特征步长 尺度 比例 8 def __init__(self, feat_stride, scales, ratios): 9 super(_ProposalLayer, self).__init__() 10 #得到特征步长 11 self._feat_stride = feat_stride 12 #得到所有的anchor 13 self._anchors = torch.from_numpy(generate_anchors(scales=np.array(scales), 14 ratios=np.array(ratios))).float() 15 #anchors的行数就是所有anchor的个数 16 self._num_anchors = self._anchors.size(0) 17 18 # rois blob: holds R regions of interest, each is a 5-tuple #一个索引和四个矩形参数 19 # (n, x1, y1, x2, y2) specifying an image batch index n and a 20 # rectangle (x1, y1, x2, y2) 21 # top[0].reshape(1, 5) 22 # 23 # # scores blob: holds scores for R regions of interest 24 # if len(top) > 1: 25 # top[1].reshape(1, 1, 1, 1) 26 27 def forward(self, input): 28 29 # Algorithm: 30 # 31 # for each (H, W) location i 32 # generate A anchor boxes centered on cell i 33 # apply predicted bbox deltas at cell i to each of the A anchors 34 # clip predicted boxes to image 35 # remove predicted boxes with either height or width < threshold 36 # sort all (proposal, score) pairs by score from highest to lowest 37 # take top pre_nms_topN proposals before NMS 38 # apply NMS with threshold 0.7 to remaining proposals 39 # take after_nms_topN proposals after NMS 40 # return the top proposals (-> RoIs top, scores top) 41 #在NMS后得到最佳的 42 43 44 # the first set of _num_anchors channels are bg probs 45 #_num_anchors通道的第一组是背景概率 46 # the second set are the fg probs 47 #第二组是前景概率 48 scores = input[0][:, self._num_anchors:, :, :]#分类概率 49 bbox_deltas = input[1]#偏移 50 im_info = input[2]#图像信息 51 cfg_key = input[3]#是training还是test 52 53 #设置一些参数 54 pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N 55 post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N 56 nms_thresh = cfg[cfg_key].RPN_NMS_THRESH 57 min_size = cfg[cfg_key].RPN_MIN_SIZE 58 59 #批尺寸 60 batch_size = bbox_deltas.size(0) 61 #下面是在原图上生成anchor 62 feat_height, feat_width = scores.size(2), scores.size(3) 63 shift_x = np.arange(0, feat_width) * self._feat_stride#shape: [width,] 特征图相对于原图的偏移 64 shift_y = np.arange(0, feat_height) * self._feat_stride#shape: [height,] 65 shift_x, shift_y = np.meshgrid(shift_x, shift_y) #生成网格 shift_x shape: [height, width], shift_y shape: [height, width] 66 shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(), 67 shift_x.ravel(), shift_y.ravel())).transpose()) #shape[height*width, 4] 68 shifts = shifts.contiguous().type_as(scores).float() 69 70 A = self._num_anchors 71 K = shifts.size(0) 72 73 self._anchors = self._anchors.type_as(scores) 74 # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous() 75 anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) 76 anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) 77 78 # Transpose and reshape predicted bbox transformations to get them 79 # into the same order as the anchors: 80 #转置和重塑预测的bbox转换,使它们与锚点的顺序相同: 81 82 bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() 83 bbox_deltas = bbox_deltas.view(batch_size, -1, 4) 84 85 # Same story for the scores: 86 scores = scores.permute(0, 2, 3, 1).contiguous() 87 scores = scores.view(batch_size, -1) 88 89 # Convert anchors into proposals via bbox transformations 90 #通过bbox转换将锚点转换为候选区域 91 proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) 92 93 # 2. clip predicted boxes to image 94 #裁剪预测框到图像 95 #严格限制proposal的四个角在图像边界内 96 proposals = clip_boxes(proposals, im_info, batch_size) 97 # proposals = clip_boxes_batch(proposals, im_info, batch_size) 98 99 # assign the score to 0 if it's non keep. 100 # keep = self._filter_boxes(proposals, min_size * im_info[:, 2]) 101 102 # trim keep index to make it euqal over batch 103 # keep_idx = torch.cat(tuple(keep_idx), 0) 104 105 # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size) 106 # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4) 107 108 # _, order = torch.sort(scores_keep, 1, True) 109 110 scores_keep = scores 111 proposals_keep = proposals 112 _, order = torch.sort(scores_keep, 1, True) 113 114 output = scores.new(batch_size, post_nms_topN, 5).zero_() 115 for i in range(batch_size): 116 # # 3. remove predicted boxes with either height or width < threshold 117 # # (NOTE: convert min_size to input image scale stored in im_info[2]) 118 #删除高度或宽度<阈值的预测框(注意:将min_size转换为存储在im_info [2]中的输入图像比例) 119 proposals_single = proposals_keep[i] 120 scores_single = scores_keep[i] 121 122 # # 4. sort all (proposal, score) pairs by score from highest to lowest 123 #按分数从最高到最低排序所有(h候选区域,得分)对 124 # # 5. take top pre_nms_topN (e.g. 6000) 125 #取顶部pre_nms_topN 126 order_single = order[i] 127 128 if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): 129 order_single = order_single[:pre_nms_topN] 130 131 proposals_single = proposals_single[order_single, :] 132 scores_single = scores_single[order_single].view(-1,1) 133 134 # 6. apply nms (e.g. threshold = 0.7) 135 # 7. take after_nms_topN (e.g. 300) 136 # 8. return the top proposals (-> RoIs top) 137 138 keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh, force_cpu=not cfg.USE_GPU_NMS) 139 keep_idx_i = keep_idx_i.long().view(-1) 140 141 if post_nms_topN > 0: 142 keep_idx_i = keep_idx_i[:post_nms_topN] 143 proposals_single = proposals_single[keep_idx_i, :] 144 scores_single = scores_single[keep_idx_i, :] 145 146 # padding 0 at the end. 147 num_proposal = proposals_single.size(0) 148 output[i,:,0] = i 149 output[i,:num_proposal,1:] = proposals_single 150 151 return output 152 153 def backward(self, top, propagate_down, bottom): 154 """This layer does not propagate gradients.""" 155 pass 156 157 def reshape(self, bottom, top): 158 """Reshaping happens during the call to forward.""" 159 pass 160 161 #删除任何小于min_size的边框 162 def _filter_boxes(self, boxes, min_size): 163 """Remove all boxes with any side smaller than min_size.""" 164 ws = boxes[:, :, 2] - boxes[:, :, 0] + 1 165 hs = boxes[:, :, 3] - boxes[:, :, 1] + 1 166 #expand_as(ws) 将tensor扩展为参数ws的大小 167 keep = ((ws >= min_size.view(-1,1).expand_as(ws)) & (hs >= min_size.view(-1,1).expand_as(hs))) 168 return keep
4 bbox_transform.py
就是一些变换,注释如下:
1 #在计算anchor的坐标变换值的时候,使用到了bbox_transform函数, 2 #注意在计算坐标变换的时候是将anchor的表示形式变成中心坐标与长宽 3 def bbox_transform(ex_rois, gt_rois): 4 ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 5 ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 6 ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 7 ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights#计算得到每个anchor的中心坐标和长宽 8 9 gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 10 gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 11 gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 12 gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights#计算每个anchor对应的ground truth box对应的中心坐标和长宽 13 14 targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths#计算四个坐标变换值 15 targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 16 targets_dw = torch.log(gt_widths / ex_widths) 17 targets_dh = torch.log(gt_heights / ex_heights) 18 19 targets = torch.stack( 20 (targets_dx, targets_dy, targets_dw, targets_dh),1)#对于每一个anchor,得到四个关系值 shape: [4, num_anchor] 21 22 return targets 23 24 def bbox_transform_batch(ex_rois, gt_rois): 25 26 if ex_rois.dim() == 2: 27 ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 28 ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 29 ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 30 ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 31 32 gt_widths = gt_rois[:, :, 2] - gt_rois[:, :, 0] + 1.0 33 gt_heights = gt_rois[:, :, 3] - gt_rois[:, :, 1] + 1.0 34 gt_ctr_x = gt_rois[:, :, 0] + 0.5 * gt_widths 35 gt_ctr_y = gt_rois[:, :, 1] + 0.5 * gt_heights 36 37 targets_dx = (gt_ctr_x - ex_ctr_x.view(1,-1).expand_as(gt_ctr_x)) / ex_widths 38 targets_dy = (gt_ctr_y - ex_ctr_y.view(1,-1).expand_as(gt_ctr_y)) / ex_heights 39 targets_dw = torch.log(gt_widths / ex_widths.view(1,-1).expand_as(gt_widths)) 40 targets_dh = torch.log(gt_heights / ex_heights.view(1,-1).expand_as(gt_heights)) 41 42 elif ex_rois.dim() == 3: 43 ex_widths = ex_rois[:, :, 2] - ex_rois[:, :, 0] + 1.0 44 ex_heights = ex_rois[:,:, 3] - ex_rois[:,:, 1] + 1.0 45 ex_ctr_x = ex_rois[:, :, 0] + 0.5 * ex_widths 46 ex_ctr_y = ex_rois[:, :, 1] + 0.5 * ex_heights 47 48 gt_widths = gt_rois[:, :, 2] - gt_rois[:, :, 0] + 1.0 49 gt_heights = gt_rois[:, :, 3] - gt_rois[:, :, 1] + 1.0 50 gt_ctr_x = gt_rois[:, :, 0] + 0.5 * gt_widths 51 gt_ctr_y = gt_rois[:, :, 1] + 0.5 * gt_heights 52 53 targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 54 targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 55 targets_dw = torch.log(gt_widths / ex_widths) 56 targets_dh = torch.log(gt_heights / ex_heights) 57 else: 58 raise ValueError('ex_roi input dimension is not correct.') 59 60 targets = torch.stack( 61 (targets_dx, targets_dy, targets_dw, targets_dh),2) 62 63 return targets 64 65 #bbox_transform_inv函数结合RPN的输出对所有初始框进行了坐标变换 66 def bbox_transform_inv(boxes, deltas, batch_size): 67 68 ##获得初始proposal的中心和长宽信息 69 widths = boxes[:, :, 2] - boxes[:, :, 0] + 1.0 70 heights = boxes[:, :, 3] - boxes[:, :, 1] + 1.0 71 ctr_x = boxes[:, :, 0] + 0.5 * widths 72 ctr_y = boxes[:, :, 1] + 0.5 * heights 73 74 ##获得坐标变换信息 75 dx = deltas[:, :, 0::4] 76 dy = deltas[:, :, 1::4] 77 dw = deltas[:, :, 2::4] 78 dh = deltas[:, :, 3::4] 79 80 # #得到改变后的proposal的中心和长宽信息 81 pred_ctr_x = dx * widths.unsqueeze(2) + ctr_x.unsqueeze(2) 82 pred_ctr_y = dy * heights.unsqueeze(2) + ctr_y.unsqueeze(2) 83 pred_w = torch.exp(dw) * widths.unsqueeze(2) 84 pred_h = torch.exp(dh) * heights.unsqueeze(2) 85 86 #将改变后的proposal的中心和长宽信息还原成左上角和右下角的版本 87 pred_boxes = deltas.clone() 88 # x1 89 pred_boxes[:, :, 0::4] = pred_ctr_x - 0.5 * pred_w 90 # y1 91 pred_boxes[:, :, 1::4] = pred_ctr_y - 0.5 * pred_h 92 # x2 93 pred_boxes[:, :, 2::4] = pred_ctr_x + 0.5 * pred_w 94 # y2 95 pred_boxes[:, :, 3::4] = pred_ctr_y + 0.5 * pred_h 96 97 return pred_boxes 98 99 def clip_boxes_batch(boxes, im_shape, batch_size): 100 """ 101 Clip boxes to image boundaries. 102 """ 103 num_rois = boxes.size(1) 104 105 boxes[boxes < 0] = 0 106 # batch_x = (im_shape[:,0]-1).view(batch_size, 1).expand(batch_size, num_rois) 107 # batch_y = (im_shape[:,1]-1).view(batch_size, 1).expand(batch_size, num_rois) 108 109 batch_x = im_shape[:, 1] - 1 110 batch_y = im_shape[:, 0] - 1 111 112 boxes[:,:,0][boxes[:,:,0] > batch_x] = batch_x 113 boxes[:,:,1][boxes[:,:,1] > batch_y] = batch_y 114 boxes[:,:,2][boxes[:,:,2] > batch_x] = batch_x 115 boxes[:,:,3][boxes[:,:,3] > batch_y] = batch_y 116 117 return boxes 118 119 #严格限制proposal的四个角在图像边界内 120 def clip_boxes(boxes, im_shape, batch_size): 121 122 for i in range(batch_size): 123 boxes[i,:,0::4].clamp_(0, im_shape[i, 1]-1) 124 boxes[i,:,1::4].clamp_(0, im_shape[i, 0]-1) 125 boxes[i,:,2::4].clamp_(0, im_shape[i, 1]-1) 126 boxes[i,:,3::4].clamp_(0, im_shape[i, 0]-1) 127 128 return boxes 129 130 131 ##计算重合程度,两个框之间的重合区域的面积 / 两个区域一共加起来的面 132 def bbox_overlaps(anchors, gt_boxes): 133 """ 134 anchors: (N, 4) ndarray of float 135 gt_boxes: (K, 4) ndarray of float 136 137 overlaps: (N, K) ndarray of overlap between boxes and query_boxes 138 """ 139 N = anchors.size(0) 140 K = gt_boxes.size(0) 141 142 gt_boxes_area = ((gt_boxes[:,2] - gt_boxes[:,0] + 1) * 143 (gt_boxes[:,3] - gt_boxes[:,1] + 1)).view(1, K) 144 145 anchors_area = ((anchors[:,2] - anchors[:,0] + 1) * 146 (anchors[:,3] - anchors[:,1] + 1)).view(N, 1) 147 148 boxes = anchors.view(N, 1, 4).expand(N, K, 4) 149 query_boxes = gt_boxes.view(1, K, 4).expand(N, K, 4) 150 151 iw = (torch.min(boxes[:,:,2], query_boxes[:,:,2]) - 152 torch.max(boxes[:,:,0], query_boxes[:,:,0]) + 1) 153 iw[iw < 0] = 0 154 155 ih = (torch.min(boxes[:,:,3], query_boxes[:,:,3]) - 156 torch.max(boxes[:,:,1], query_boxes[:,:,1]) + 1) 157 ih[ih < 0] = 0 158 159 ua = anchors_area + gt_boxes_area - (iw * ih) 160 overlaps = iw * ih / ua 161 162 return overlaps 163 164 def bbox_overlaps_batch(anchors, gt_boxes): 165 """ 166 anchors: (N, 4) ndarray of float 167 gt_boxes: (b, K, 5) ndarray of float 168 169 overlaps: (N, K) ndarray of overlap between boxes and query_boxes 170 """ 171 batch_size = gt_boxes.size(0) 172 173 174 if anchors.dim() == 2: 175 176 N = anchors.size(0) 177 K = gt_boxes.size(1) 178 179 anchors = anchors.view(1, N, 4).expand(batch_size, N, 4).contiguous() 180 gt_boxes = gt_boxes[:,:,:4].contiguous() 181 182 183 gt_boxes_x = (gt_boxes[:,:,2] - gt_boxes[:,:,0] + 1) 184 gt_boxes_y = (gt_boxes[:,:,3] - gt_boxes[:,:,1] + 1) 185 gt_boxes_area = (gt_boxes_x * gt_boxes_y).view(batch_size, 1, K) 186 187 anchors_boxes_x = (anchors[:,:,2] - anchors[:,:,0] + 1) 188 anchors_boxes_y = (anchors[:,:,3] - anchors[:,:,1] + 1) 189 anchors_area = (anchors_boxes_x * anchors_boxes_y).view(batch_size, N, 1) 190 191 gt_area_zero = (gt_boxes_x == 1) & (gt_boxes_y == 1) 192 anchors_area_zero = (anchors_boxes_x == 1) & (anchors_boxes_y == 1) 193 194 boxes = anchors.view(batch_size, N, 1, 4).expand(batch_size, N, K, 4) 195 query_boxes = gt_boxes.view(batch_size, 1, K, 4).expand(batch_size, N, K, 4) 196 197 iw = (torch.min(boxes[:,:,:,2], query_boxes[:,:,:,2]) - 198 torch.max(boxes[:,:,:,0], query_boxes[:,:,:,0]) + 1) 199 iw[iw < 0] = 0 200 201 ih = (torch.min(boxes[:,:,:,3], query_boxes[:,:,:,3]) - 202 torch.max(boxes[:,:,:,1], query_boxes[:,:,:,1]) + 1) 203 ih[ih < 0] = 0 204 ua = anchors_area + gt_boxes_area - (iw * ih) 205 overlaps = iw * ih / ua 206 207 # mask the overlap here. 208 overlaps.masked_fill_(gt_area_zero.view(batch_size, 1, K).expand(batch_size, N, K), 0) 209 overlaps.masked_fill_(anchors_area_zero.view(batch_size, N, 1).expand(batch_size, N, K), -1) 210 211 elif anchors.dim() == 3: 212 N = anchors.size(1) 213 K = gt_boxes.size(1) 214 215 if anchors.size(2) == 4: 216 anchors = anchors[:,:,:4].contiguous() 217 else: 218 anchors = anchors[:,:,1:5].contiguous() 219 220 gt_boxes = gt_boxes[:,:,:4].contiguous() 221 222 gt_boxes_x = (gt_boxes[:,:,2] - gt_boxes[:,:,0] + 1) 223 gt_boxes_y = (gt_boxes[:,:,3] - gt_boxes[:,:,1] + 1) 224 gt_boxes_area = (gt_boxes_x * gt_boxes_y).view(batch_size, 1, K) 225 226 anchors_boxes_x = (anchors[:,:,2] - anchors[:,:,0] + 1) 227 anchors_boxes_y = (anchors[:,:,3] - anchors[:,:,1] + 1) 228 anchors_area = (anchors_boxes_x * anchors_boxes_y).view(batch_size, N, 1) 229 230 gt_area_zero = (gt_boxes_x == 1) & (gt_boxes_y == 1) 231 anchors_area_zero = (anchors_boxes_x == 1) & (anchors_boxes_y == 1) 232 233 boxes = anchors.view(batch_size, N, 1, 4).expand(batch_size, N, K, 4) 234 query_boxes = gt_boxes.view(batch_size, 1, K, 4).expand(batch_size, N, K, 4) 235 236 iw = (torch.min(boxes[:,:,:,2], query_boxes[:,:,:,2]) - 237 torch.max(boxes[:,:,:,0], query_boxes[:,:,:,0]) + 1) 238 iw[iw < 0] = 0 239 240 ih = (torch.min(boxes[:,:,:,3], query_boxes[:,:,:,3]) - 241 torch.max(boxes[:,:,:,1], query_boxes[:,:,:,1]) + 1) 242 ih[ih < 0] = 0 243 ua = anchors_area + gt_boxes_area - (iw * ih) 244 245 overlaps = iw * ih / ua 246 247 # mask the overlap here. 248 overlaps.masked_fill_(gt_area_zero.view(batch_size, 1, K).expand(batch_size, N, K), 0) 249 overlaps.masked_fill_(anchors_area_zero.view(batch_size, N, 1).expand(batch_size, N, K), -1) 250 else: 251 raise ValueError('anchors input dimension is not correct.') 252 253 return overlaps
5 anchor_target_layer.py
为anchor找到训练所需的ground truth类别和坐标变换信息
6 proposal_target_layer_cascade.py
为选出的ROIS找到训练所需的ground truth类别和坐标变换信息
【占坑,未完待续…】
ref:https://blog.csdn.net/weixin_43872578/article/details/87898070