def create_resnet_with_roi_head()

in pytorchvideo/models/resnet.py [0:0]


def create_resnet_with_roi_head(
    *,
    # Input clip configs.
    input_channel: int = 3,
    # Model configs.
    model_depth: int = 50,
    model_num_class: int = 80,
    dropout_rate: float = 0.5,
    # Normalization configs.
    norm: Callable = nn.BatchNorm3d,
    # Activation configs.
    activation: Callable = nn.ReLU,
    # Stem configs.
    stem_dim_out: int = 64,
    stem_conv_kernel_size: Tuple[int] = (1, 7, 7),
    stem_conv_stride: Tuple[int] = (1, 2, 2),
    stem_pool: Callable = nn.MaxPool3d,
    stem_pool_kernel_size: Tuple[int] = (1, 3, 3),
    stem_pool_stride: Tuple[int] = (1, 2, 2),
    stem: Callable = create_res_basic_stem,
    # Stage configs.
    stage1_pool: Callable = None,
    stage1_pool_kernel_size: Tuple[int] = (2, 1, 1),
    stage_conv_a_kernel_size: Union[Tuple[int], Tuple[Tuple[int]]] = (
        (1, 1, 1),
        (1, 1, 1),
        (3, 1, 1),
        (3, 1, 1),
    ),
    stage_conv_b_kernel_size: Union[Tuple[int], Tuple[Tuple[int]]] = (
        (1, 3, 3),
        (1, 3, 3),
        (1, 3, 3),
        (1, 3, 3),
    ),
    stage_conv_b_num_groups: Tuple[int] = (1, 1, 1, 1),
    stage_conv_b_dilation: Union[Tuple[int], Tuple[Tuple[int]]] = (
        (1, 1, 1),
        (1, 1, 1),
        (1, 1, 1),
        (1, 2, 2),
    ),
    stage_spatial_h_stride: Tuple[int] = (1, 2, 2, 1),
    stage_spatial_w_stride: Tuple[int] = (1, 2, 2, 1),
    stage_temporal_stride: Tuple[int] = (1, 1, 1, 1),
    bottleneck: Union[Tuple[Callable], Callable] = create_bottleneck_block,
    # Head configs.
    head: Callable = create_res_roi_pooling_head,
    head_pool: Callable = nn.AvgPool3d,
    head_pool_kernel_size: Tuple[int] = (4, 1, 1),
    head_output_size: Tuple[int] = (1, 1, 1),
    head_activation: Callable = nn.Sigmoid,
    head_output_with_global_average: bool = False,
    head_spatial_resolution: Tuple[int] = (7, 7),
    head_spatial_scale: float = 1.0 / 16.0,
    head_sampling_ratio: int = 0,