本blog为github上CharlesShang/TFFRCNN版源码解析系列代码笔记
---------------个人学习笔记---------------
----------------本文作者吴疆--------------
------点击此处链接至博客园原文------
与roi_data_layer/minibatch.py类似,该函数可能并未执行
"""Compute minibatch blobs for training a Fast R-CNN network."""
1.get_minibatch(roidb, num_classes)
更新roidb[i]'info_boxes'字段(未知内容,18是什么意思)、增加'data'(图像数据blob)和'parameters'字段(相关参数,含num_scale 图像缩放尺度数量、num_aspect 使用纵横比数量、cfg.TRAIN.SCALES、cfg.TRAIN.SCALE_MAPPING、cfg.TRAIN.ASPECT_HEIGHTS、cfg.TRAIN.ASPECT_WIDTHS,后面3个值均无应该会报错,也有可能该函数并未执行),未见调用
# 更新roidb[i]'info_boxes'字段、增加'data'和'parameters'字段 def get_minibatch(roidb, num_classes): """Given a roidb, construct a minibatch sampled from it.""" num_images = len(roidb) # 默认TRAIN.BATCH_SIZE = 128 assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ 'num_images ({}) must divide BATCH_SIZE ({})'. \ format(num_images, cfg.TRAIN.BATCH_SIZE) # Get the input image blob, formatted for caffe im_blob = _get_image_blob(roidb) # build the box information blob # 这里的18写死,指的是什么??? info_boxes_blob = np.zeros((0, 18), dtype=np.float32) # 默认TRAIN.SCALES = (600,) num_scale = len(cfg.TRAIN.SCALES) for i in xrange(num_images): info_boxes = roidb[i]['info_boxes'] # change the batch index # 为何要这样处理???第3、8列各自代表什么??? info_boxes[:,2] += i * num_scale info_boxes[:,7] += i * num_scale info_boxes_blob = np.vstack((info_boxes_blob, info_boxes)) # build the parameter blob # 默认TRAIN.ASPECTS= (1,)仅一个??? (Aspect ratio to use during training) num_aspect = len(cfg.TRAIN.ASPECTS) num = 2 + 2 * num_scale + 2 * num_aspect # 6? # parameters_blob存储以下参数 # num_scale 图像缩放尺度数量 len(cfg.TRAIN.SCALES) = 1 # num_aspect 使用纵横比数量 len(cfg.TRAIN.ASPECTS) = 1 # cfg.TRAIN.SCALES (600,) # cfg.TRAIN.SCALE_MAPPING 无该值 按理会触发error???或许该函数未被调用 # cfg.TRAIN.ASPECT_HEIGHTS 无该值 按理会触发error??? # cfg.TRAIN.ASPECT_WIDTHS 无该值 按理会触发error??? parameters_blob = np.zeros((num), dtype=np.float32) parameters_blob[0] = num_scale parameters_blob[1] = num_aspect parameters_blob[2:2+num_scale] = cfg.TRAIN.SCALES parameters_blob[2+num_scale:2+2*num_scale] = cfg.TRAIN.SCALE_MAPPING parameters_blob[2+2*num_scale:2+2*num_scale+num_aspect] = cfg.TRAIN.ASPECT_HEIGHTS parameters_blob[2+2*num_scale+num_aspect:2+2*num_scale+2*num_aspect] = cfg.TRAIN.ASPECT_WIDTHS # For debug visualizations # _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob) blobs = {'data': im_blob, 'info_boxes': info_boxes_blob, 'parameters': parameters_blob} return blobs
2._get_image_blob(roidb)
对传入的roidb中图像减均值、缩放处理,得到处理后的图像存储到processes_ims列表中,将其作为参数传入im_list_to_blob(...)函数中返回图像数据blob,被get_minibatch(...)函数调用,构成blobs中的‘data’字段
与roi_data_layer/minibatch.py(仅使用了target_size单一尺度进行缩放)此函数的区别在于缩放使用了多尺度TRAIN.SCALES_BASE = (0.25, 0.5, 1.0, 2.0, 3.0),为何要使用多尺度?未见调用
def _get_image_blob(roidb): """Builds an input blob from the images in the roidb at the different scales.""" num_images = len(roidb) # 存储缩放后的图像构成的列表,作为参数传入im_list_to_blob(...)函数得到图像数据blob processed_ims = [] for i in xrange(num_images): # read image im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS # build image pyramid # 与roi_data_layer/minibatch.py中_get_image_blob(...)区别在此!!! # 默认TRAIN.SCALES_BASE = (0.25, 0.5, 1.0, 2.0, 3.0) # 为何这里使用了多尺度???(Scales to compute real features) for im_scale in cfg.TRAIN.SCALES_BASE: im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) processed_ims.append(im) # Create a blob to hold the input images,blob.py中 blob = im_list_to_blob(processed_ims) return blob
3._project_image_blob(im_rois, im_scale_factor)
对rois进行缩放,未见调用
def _project_im_rois(im_rois, im_scale_factor): """Project image RoIs into the rescaled training image.""" rois = im_rois * im_scale_factor return rois
4._get_bbox_regression_labels(bbox_target_data, num_classes)
扩充N*5 bbox_targets to N*(4*num_classes)仅某类有非0的回归目标值(网络接受的shape)、构造N*(4*num_classes)的bbox_loss_weights,返回 bbox_targets和bbox_loss_weights,未见调用
# 扩充N*5 bbox_targets to N*(4*num_classes)仅某类有非0的回归目标值 # 构造N*(4*num_classes)的bbox_loss_weights def _get_bbox_regression_labels(bbox_target_data, num_classes): """ Bounding-box regression targets are stored in a compact紧密的,紧凑的 form in the roidb. This function expands those targets into the 4-of-4*K representation used by the network (i.e. only one class has non-zero targets). The loss weights are similarly expanded. Returns: bbox_target_data (ndarray): N x 4K blob of regression targets bbox_loss_weights (ndarray): N x 4K blob of loss weights """ clss = bbox_target_data[:, 0] bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) bbox_loss_weights = np.zeros(bbox_targets.shape, dtype=np.float32) inds = np.where(clss > 0)[0] # 剔除bg for ind in inds: cls = clss[ind] start = 4 * cls end = start + 4 # 扩充N*5 bbox_targets to N*(4*num_classes)仅某类有非0的回归目标值 bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] # shape为N*(4*num_classes),仅某类对应值为1 1 1 1,其余全0 bbox_loss_weights[ind, start:end] = [1., 1., 1., 1.] return bbox_targets, bbox_loss_weights
5._vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob)
绘制roi矩形框,打印相关信息,未见调用
# 绘制roi矩形框,打印相关信息 def _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob): """Visualize a mini-batch for debugging.""" import matplotlib.pyplot as plt for i in xrange(rois_blob.shape[0]): # 1(roi来源索引)+4(roi坐标) rois = rois_blob[i, :] # 该roi来源图像索引 im_ind = rois[0] roi = rois[2:] im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy() im += cfg.PIXEL_MEANS im = im[:, :, (2, 1, 0)] im = im.astype(np.uint8) cls = labels_blob[i] subcls = sublabels_blob[i] plt.imshow(im) print 'class: ', cls, ' subclass: ', subcls plt.gca().add_patch( plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0], roi[3] - roi[1], fill=False, edgecolor='r', linewidth=3) ) plt.show()