def _construct_network()

in slowfast/models/ptv_model_builder.py [0:0]


    def _construct_network(self, cfg):
        """
        Builds a single pathway ResNet model.

        Args:
            cfg (CfgNode): model building configs, details are in the
                comments of the config file.
        """

        # Params from configs.
        norm_module = get_norm(cfg)
        head_act = get_head_act(cfg.MODEL.HEAD_ACT)
        pool_size = _POOL1[cfg.MODEL.ARCH]
        num_groups = cfg.RESNET.NUM_GROUPS
        spatial_dilations = cfg.RESNET.SPATIAL_DILATIONS
        spatial_strides = cfg.RESNET.SPATIAL_STRIDES
        temp_kernel = _TEMPORAL_KERNEL_BASIS[cfg.MODEL.ARCH]
        stage1_pool = pool_size[0][0] != 1 or len(set(pool_size[0])) > 1
        stage_spatial_stride = (
            spatial_strides[0][0],
            spatial_strides[1][0],
            spatial_strides[2][0],
            spatial_strides[3][0],
        )
        if cfg.MODEL.ARCH == "i3d":
            stage_conv_a_kernel_size = (
                (3, 1, 1),
                [(3, 1, 1), (1, 1, 1)],
                [(3, 1, 1), (1, 1, 1)],
                [(1, 1, 1), (3, 1, 1)],
            )
        else:
            stage_conv_a_kernel_size = (
                (temp_kernel[1][0][0], 1, 1),
                (temp_kernel[2][0][0], 1, 1),
                (temp_kernel[3][0][0], 1, 1),
                (temp_kernel[4][0][0], 1, 1),
            )

        # Head from config
        if cfg.DETECTION.ENABLE:
            self.detection_head = create_res_roi_pooling_head(
                in_features=cfg.RESNET.WIDTH_PER_GROUP * 2 ** (4 + 1),
                out_features=cfg.MODEL.NUM_CLASSES,
                pool=nn.AvgPool3d,
                output_size=(1, 1, 1),
                pool_kernel_size=(
                    cfg.DATA.NUM_FRAMES // pool_size[0][0],
                    1,
                    1,
                ),
                dropout_rate=cfg.MODEL.DROPOUT_RATE,
                activation=None,
                output_with_global_average=False,
                pool_spatial=nn.MaxPool2d,
                resolution=[cfg.DETECTION.ROI_XFORM_RESOLUTION] * 2,
                spatial_scale=1.0 / float(cfg.DETECTION.SPATIAL_SCALE_FACTOR),
                sampling_ratio=0,
                roi=ROIAlign,
            )

        self.model = create_resnet(
            # Input clip configs.
            input_channel=cfg.DATA.INPUT_CHANNEL_NUM[0],
            # Model configs.
            model_depth=cfg.RESNET.DEPTH,
            model_num_class=cfg.MODEL.NUM_CLASSES,
            dropout_rate=cfg.MODEL.DROPOUT_RATE,
            # Normalization configs.
            norm=norm_module,
            # Activation configs.
            activation=partial(nn.ReLU, inplace=cfg.RESNET.INPLACE_RELU),
            # Stem configs.
            stem_dim_out=cfg.RESNET.WIDTH_PER_GROUP,
            stem_conv_kernel_size=(temp_kernel[0][0][0], 7, 7),
            stem_conv_stride=(1, 2, 2),
            stem_pool=nn.MaxPool3d,
            stem_pool_kernel_size=(1, 3, 3),
            stem_pool_stride=(1, 2, 2),
            # Stage configs.
            stage1_pool=nn.MaxPool3d if stage1_pool else None,
            stage1_pool_kernel_size=pool_size[0],
            stage_conv_a_kernel_size=stage_conv_a_kernel_size,
            stage_conv_b_kernel_size=(
                (1, 3, 3),
                (1, 3, 3),
                (1, 3, 3),
                (1, 3, 3),
            ),
            stage_conv_b_num_groups=(
                num_groups,
                num_groups,
                num_groups,
                num_groups,
            ),
            stage_conv_b_dilation=(
                (1, spatial_dilations[0][0], spatial_dilations[0][0]),
                (1, spatial_dilations[1][0], spatial_dilations[1][0]),
                (1, spatial_dilations[2][0], spatial_dilations[2][0]),
                (1, spatial_dilations[3][0], spatial_dilations[3][0]),
            ),
            stage_spatial_h_stride=stage_spatial_stride,
            stage_spatial_w_stride=stage_spatial_stride,
            stage_temporal_stride=(1, 1, 1, 1),
            bottleneck=create_bottleneck_block,
            # Head configs.
            head=create_res_basic_head if not self.detection_mode else None,
            head_pool=nn.AvgPool3d,
            head_pool_kernel_size=(
                cfg.DATA.NUM_FRAMES // pool_size[0][0],
                cfg.DATA.TRAIN_CROP_SIZE // 32 // pool_size[0][1],
                cfg.DATA.TRAIN_CROP_SIZE // 32 // pool_size[0][2],
            ),
            head_activation=None,
            head_output_with_global_average=False,
        )

        self.post_act = head_act