def add_fpn_retinanet_outputs()

in detectron/modeling/retinanet_heads.py [0:0]


def add_fpn_retinanet_outputs(model, blobs_in, dim_in, spatial_scales):
    """RetinaNet head. For classification and box regression, we can chose to
    have the same conv tower or a separate tower. "bl_feat_list" stores the list
    of feature blobs for bbox prediction. These blobs can be shared cls feature
    blobs if we share the tower or else are independent blobs.
    """
    dim_out = dim_in
    k_max = cfg.FPN.RPN_MAX_LEVEL  # coarsest level of pyramid
    k_min = cfg.FPN.RPN_MIN_LEVEL  # finest level of pyramid
    A = len(cfg.RETINANET.ASPECT_RATIOS) * cfg.RETINANET.SCALES_PER_OCTAVE

    # compute init for bias
    bias_init = get_retinanet_bias_init(model)

    assert len(blobs_in) == k_max - k_min + 1
    bbox_feat_list = []
    cls_pred_dim = (
        model.num_classes if cfg.RETINANET.SOFTMAX else (model.num_classes - 1)
    )
    # unpacked bbox feature and add prediction layers
    bbox_regr_dim = (
        4 * (model.num_classes - 1) if cfg.RETINANET.CLASS_SPECIFIC_BBOX else 4
    )

    # ==========================================================================
    # classification tower with logits and prob prediction
    # ==========================================================================
    for lvl in range(k_min, k_max + 1):
        bl_in = blobs_in[k_max - lvl]  # blobs_in is in reversed order
        # classification tower stack convolution starts
        for nconv in range(cfg.RETINANET.NUM_CONVS):
            suffix = 'n{}_fpn{}'.format(nconv, lvl)
            dim_in, dim_out = dim_in, dim_in
            if lvl == k_min:
                bl_out = model.Conv(
                    bl_in,
                    'retnet_cls_conv_' + suffix,
                    dim_in,
                    dim_out,
                    3,
                    stride=1,
                    pad=1,
                    weight_init=('GaussianFill', {
                        'std': 0.01
                    }),
                    bias_init=('ConstantFill', {
                        'value': 0.
                    })
                )
            else:
                bl_out = model.ConvShared(
                    bl_in,
                    'retnet_cls_conv_' + suffix,
                    dim_in,
                    dim_out,
                    3,
                    stride=1,
                    pad=1,
                    weight='retnet_cls_conv_n{}_fpn{}_w'.format(nconv, k_min),
                    bias='retnet_cls_conv_n{}_fpn{}_b'.format(nconv, k_min)
                )
            bl_in = model.Relu(bl_out, bl_out)
            bl_feat = bl_in
        # cls tower stack convolution ends. Add the logits layer now
        if lvl == k_min:
            retnet_cls_pred = model.Conv(
                bl_feat,
                'retnet_cls_pred_fpn{}'.format(lvl),
                dim_in,
                cls_pred_dim * A,
                3,
                pad=1,
                stride=1,
                weight_init=('GaussianFill', {
                    'std': 0.01
                }),
                bias_init=bias_init
            )
        else:
            retnet_cls_pred = model.ConvShared(
                bl_feat,
                'retnet_cls_pred_fpn{}'.format(lvl),
                dim_in,
                cls_pred_dim * A,
                3,
                pad=1,
                stride=1,
                weight='retnet_cls_pred_fpn{}_w'.format(k_min),
                bias='retnet_cls_pred_fpn{}_b'.format(k_min)
            )
        if not model.train:
            if cfg.RETINANET.SOFTMAX:
                model.net.GroupSpatialSoftmax(
                    retnet_cls_pred,
                    'retnet_cls_prob_fpn{}'.format(lvl),
                    num_classes=cls_pred_dim
                )
            else:
                model.net.Sigmoid(
                    retnet_cls_pred, 'retnet_cls_prob_fpn{}'.format(lvl)
                )
        if cfg.RETINANET.SHARE_CLS_BBOX_TOWER:
            bbox_feat_list.append(bl_feat)

    # ==========================================================================
    # bbox tower if not sharing features with the classification tower with
    # logits and prob prediction
    # ==========================================================================
    if not cfg.RETINANET.SHARE_CLS_BBOX_TOWER:
        for lvl in range(k_min, k_max + 1):
            bl_in = blobs_in[k_max - lvl]  # blobs_in is in reversed order
            for nconv in range(cfg.RETINANET.NUM_CONVS):
                suffix = 'n{}_fpn{}'.format(nconv, lvl)
                dim_in, dim_out = dim_in, dim_in
                if lvl == k_min:
                    bl_out = model.Conv(
                        bl_in,
                        'retnet_bbox_conv_' + suffix,
                        dim_in,
                        dim_out,
                        3,
                        stride=1,
                        pad=1,
                        weight_init=('GaussianFill', {
                            'std': 0.01
                        }),
                        bias_init=('ConstantFill', {
                            'value': 0.
                        })
                    )
                else:
                    bl_out = model.ConvShared(
                        bl_in,
                        'retnet_bbox_conv_' + suffix,
                        dim_in,
                        dim_out,
                        3,
                        stride=1,
                        pad=1,
                        weight='retnet_bbox_conv_n{}_fpn{}_w'.format(
                            nconv, k_min
                        ),
                        bias='retnet_bbox_conv_n{}_fpn{}_b'.format(
                            nconv, k_min
                        )
                    )
                bl_in = model.Relu(bl_out, bl_out)
                # Add octave scales and aspect ratio
                # At least 1 convolution for dealing different aspect ratios
                bl_feat = bl_in
            bbox_feat_list.append(bl_feat)
    # Depending on the features [shared/separate] for bbox, add prediction layer
    for i, lvl in enumerate(range(k_min, k_max + 1)):
        bbox_pred = 'retnet_bbox_pred_fpn{}'.format(lvl)
        bl_feat = bbox_feat_list[i]
        if lvl == k_min:
            model.Conv(
                bl_feat,
                bbox_pred,
                dim_in,
                bbox_regr_dim * A,
                3,
                pad=1,
                stride=1,
                weight_init=('GaussianFill', {
                    'std': 0.01
                }),
                bias_init=('ConstantFill', {
                    'value': 0.
                })
            )
        else:
            model.ConvShared(
                bl_feat,
                bbox_pred,
                dim_in,
                bbox_regr_dim * A,
                3,
                pad=1,
                stride=1,
                weight='retnet_bbox_pred_fpn{}_w'.format(k_min),
                bias='retnet_bbox_pred_fpn{}_b'.format(k_min)
            )