keras Mask Rcnn代码走读（七）-mask生成

获取了待检测图片的分类回归信息，我们将回归信息（即待检测目标的边框信息）单独提取出来，结合金字塔特征mrcnn_feature_maps，进行Mask生成工作（input_image_meta用于提取输入图片长宽，进行金字塔ROI处理

# Detections
# output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in
# normalized coordinates
detections = DetectionLayer(config, name="mrcnn_detection")(
    [rpn_rois, mrcnn_class, mrcnn_bbox, input_image_meta])

# Create masks for detections
detection_boxes = KL.Lambda(lambda x: x[..., :4])(detections)
mrcnn_mask = build_fpn_mask_graph(detection_boxes, mrcnn_feature_maps,
                                  input_image_meta,
                                  config.MASK_POOL_SIZE,
                                  config.NUM_CLASSES,
                                  train_bn=config.TRAIN_BN)

def build_fpn_mask_graph(rois, feature_maps, image_meta,
                         pool_size, num_classes, train_bn=True):
    """Builds the computation graph of the mask head of Feature Pyramid Network.

    rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized
          coordinates.
    feature_maps: List of feature maps from different layers of the pyramid,
                  [P2, P3, P4, P5]. Each has a different resolution.
    image_meta: [batch, (meta data)] Image details. See compose_image_meta()
    pool_size: The width of the square feature map generated from ROI Pooling.
    num_classes: number of classes, which determines the depth of the results
    train_bn: Boolean. Train or freeze Batch Norm layers

    Returns: Masks [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, NUM_CLASSES]
    """
    # ROI Pooling
    # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels]
    x = PyramidROIAlign([pool_size, pool_size],
                        name="roi_align_mask")([rois, image_meta] + feature_maps)

    # Conv layers
    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
                           name="mrcnn_mask_conv1")(x)
    x = KL.TimeDistributed(BatchNorm(),
                           name='mrcnn_mask_bn1')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
                           name="mrcnn_mask_conv2")(x)
    x = KL.TimeDistributed(BatchNorm(),
                           name='mrcnn_mask_bn2')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
                           name="mrcnn_mask_conv3")(x)
    x = KL.TimeDistributed(BatchNorm(),
                           name='mrcnn_mask_bn3')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
                           name="mrcnn_mask_conv4")(x)
    x = KL.TimeDistributed(BatchNorm(),
                           name='mrcnn_mask_bn4')(x, training=train_bn)
    x = KL.Activation('relu')(x)

    x = KL.TimeDistributed(KL.Conv2DTranspose(256, (2, 2), strides=2, activation="relu"),
                           name="mrcnn_mask_deconv")(x)
    x = KL.TimeDistributed(KL.Conv2D(num_classes, (1, 1), strides=1, activation="sigmoid"),
                           name="mrcnn_mask")(x)
    return x

模型输出Tensor：

# num_anchors,    每张图片上生成的锚框数量
# num_rois,       每张图片上由锚框筛选出的推荐区数量，
# #               由 POST_NMS_ROIS_TRAINING 或 POST_NMS_ROIS_INFERENCE 规定
# num_detections, 每张图片上最终检测输出框，
# #               由 DETECTION_MAX_INSTANCES 规定

# detections,     [batch, num_detections, (y1, x1, y2, x2, class_id, score)]
# mrcnn_class,    [batch, num_rois, NUM_CLASSES] classifier probabilities
# mrcnn_bbox,     [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))]
# mrcnn_mask,     [batch, num_detections, MASK_POOL_SIZE, MASK_POOL_SIZE, NUM_CLASSES]
# rpn_rois,       [batch, num_rois, (y1, x1, y2, x2, class_id, score)]
# rpn_class,      [batch, num_anchors, 2]
# rpn_bbox        [batch, num_anchors, 4]

来源：CSDN

作者：AI剑客

链接：https://blog.csdn.net/qq_43258953/article/details/103482331

标签

rcnn

num

relu