in detectron/modeling/FPN.py [0:0]
def add_fpn(model, fpn_level_info):
"""Add FPN connections based on the model described in the FPN paper."""
# FPN levels are built starting from the highest/coarest level of the
# backbone (usually "conv5"). First we build down, recursively constructing
# lower/finer resolution FPN levels. Then we build up, constructing levels
# that are even higher/coarser than the starting level.
fpn_dim = cfg.FPN.DIM
min_level, max_level = get_min_max_levels()
# Count the number of backbone stages that we will generate FPN levels for
# starting from the coarest backbone stage (usually the "conv5"-like level)
# E.g., if the backbone level info defines stages 4 stages: "conv5",
# "conv4", ... "conv2" and min_level=2, then we end up with 4 - (2 - 2) = 4
# backbone stages to add FPN to.
num_backbone_stages = (
len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL)
)
lateral_input_blobs = fpn_level_info.blobs[:num_backbone_stages]
output_blobs = [
'fpn_inner_{}'.format(s)
for s in fpn_level_info.blobs[:num_backbone_stages]
]
fpn_dim_lateral = fpn_level_info.dims
xavier_fill = ('XavierFill', {})
# For the coarsest backbone level: 1x1 conv only seeds recursion
if cfg.FPN.USE_GN:
# use GroupNorm
c = model.ConvGN(
lateral_input_blobs[0],
output_blobs[0], # note: this is a prefix
dim_in=fpn_dim_lateral[0],
dim_out=fpn_dim,
group_gn=get_group_gn(fpn_dim),
kernel=1,
pad=0,
stride=1,
weight_init=xavier_fill,
bias_init=const_fill(0.0)
)
output_blobs[0] = c # rename it
else:
model.Conv(
lateral_input_blobs[0],
output_blobs[0],
dim_in=fpn_dim_lateral[0],
dim_out=fpn_dim,
kernel=1,
pad=0,
stride=1,
weight_init=xavier_fill,
bias_init=const_fill(0.0)
)
#
# Step 1: recursively build down starting from the coarsest backbone level
#
# For other levels add top-down and lateral connections
for i in range(num_backbone_stages - 1):
add_topdown_lateral_module(
model,
output_blobs[i], # top-down blob
lateral_input_blobs[i + 1], # lateral blob
output_blobs[i + 1], # next output blob
fpn_dim, # output dimension
fpn_dim_lateral[i + 1] # lateral input dimension
)
# Post-hoc scale-specific 3x3 convs
blobs_fpn = []
spatial_scales = []
for i in range(num_backbone_stages):
if cfg.FPN.USE_GN:
# use GroupNorm
fpn_blob = model.ConvGN(
output_blobs[i],
'fpn_{}'.format(fpn_level_info.blobs[i]),
dim_in=fpn_dim,
dim_out=fpn_dim,
group_gn=get_group_gn(fpn_dim),
kernel=3,
pad=1,
stride=1,
weight_init=xavier_fill,
bias_init=const_fill(0.0)
)
else:
fpn_blob = model.Conv(
output_blobs[i],
'fpn_{}'.format(fpn_level_info.blobs[i]),
dim_in=fpn_dim,
dim_out=fpn_dim,
kernel=3,
pad=1,
stride=1,
weight_init=xavier_fill,
bias_init=const_fill(0.0)
)
blobs_fpn += [fpn_blob]
spatial_scales += [fpn_level_info.spatial_scales[i]]
#
# Step 2: build up starting from the coarsest backbone level
#
# Check if we need the P6 feature map
if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1:
# Original FPN P6 level implementation from our CVPR'17 FPN paper
P6_blob_in = blobs_fpn[0]
P6_name = P6_blob_in + '_subsampled_2x'
# Use max pooling to simulate stride 2 subsampling
P6_blob = model.MaxPool(P6_blob_in, P6_name, kernel=1, pad=0, stride=2)
blobs_fpn.insert(0, P6_blob)
spatial_scales.insert(0, spatial_scales[0] * 0.5)
# Coarser FPN levels introduced for RetinaNet
if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL:
fpn_blob = fpn_level_info.blobs[0]
dim_in = fpn_level_info.dims[0]
for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1):
fpn_blob_in = fpn_blob
if i > HIGHEST_BACKBONE_LVL + 1:
fpn_blob_in = model.Relu(fpn_blob, fpn_blob + '_relu')
fpn_blob = model.Conv(
fpn_blob_in,
'fpn_' + str(i),
dim_in=dim_in,
dim_out=fpn_dim,
kernel=3,
pad=1,
stride=2,
weight_init=xavier_fill,
bias_init=const_fill(0.0)
)
dim_in = fpn_dim
blobs_fpn.insert(0, fpn_blob)
spatial_scales.insert(0, spatial_scales[0] * 0.5)
return blobs_fpn, fpn_dim, spatial_scales