in pytorchvideo/models/resnet.py [0:0]
def create_resnet_with_roi_head(
*,
# Input clip configs.
input_channel: int = 3,
# Model configs.
model_depth: int = 50,
model_num_class: int = 80,
dropout_rate: float = 0.5,
# Normalization configs.
norm: Callable = nn.BatchNorm3d,
# Activation configs.
activation: Callable = nn.ReLU,
# Stem configs.
stem_dim_out: int = 64,
stem_conv_kernel_size: Tuple[int] = (1, 7, 7),
stem_conv_stride: Tuple[int] = (1, 2, 2),
stem_pool: Callable = nn.MaxPool3d,
stem_pool_kernel_size: Tuple[int] = (1, 3, 3),
stem_pool_stride: Tuple[int] = (1, 2, 2),
stem: Callable = create_res_basic_stem,
# Stage configs.
stage1_pool: Callable = None,
stage1_pool_kernel_size: Tuple[int] = (2, 1, 1),
stage_conv_a_kernel_size: Union[Tuple[int], Tuple[Tuple[int]]] = (
(1, 1, 1),
(1, 1, 1),
(3, 1, 1),
(3, 1, 1),
),
stage_conv_b_kernel_size: Union[Tuple[int], Tuple[Tuple[int]]] = (
(1, 3, 3),
(1, 3, 3),
(1, 3, 3),
(1, 3, 3),
),
stage_conv_b_num_groups: Tuple[int] = (1, 1, 1, 1),
stage_conv_b_dilation: Union[Tuple[int], Tuple[Tuple[int]]] = (
(1, 1, 1),
(1, 1, 1),
(1, 1, 1),
(1, 2, 2),
),
stage_spatial_h_stride: Tuple[int] = (1, 2, 2, 1),
stage_spatial_w_stride: Tuple[int] = (1, 2, 2, 1),
stage_temporal_stride: Tuple[int] = (1, 1, 1, 1),
bottleneck: Union[Tuple[Callable], Callable] = create_bottleneck_block,
# Head configs.
head: Callable = create_res_roi_pooling_head,
head_pool: Callable = nn.AvgPool3d,
head_pool_kernel_size: Tuple[int] = (4, 1, 1),
head_output_size: Tuple[int] = (1, 1, 1),
head_activation: Callable = nn.Sigmoid,
head_output_with_global_average: bool = False,
head_spatial_resolution: Tuple[int] = (7, 7),
head_spatial_scale: float = 1.0 / 16.0,
head_sampling_ratio: int = 0,