pytorchvideo/models/head.py [51:123]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    dropout_rate: float = 0.5,
    # Activation configs.
    activation: Callable = None,
    # Output configs.
    output_with_global_average: bool = True,
) -> nn.Module:
    """
    Creates ResNet basic head. This layer performs an optional pooling operation
    followed by an optional dropout, a fully-connected projection, an activation layer
    and a global spatiotemporal averaging.

    ::


                                        Pooling
                                           ↓
                                        Dropout
                                           ↓
                                       Projection
                                           ↓
                                       Activation
                                           ↓
                                       Averaging

    Activation examples include: ReLU, Softmax, Sigmoid, and None.
    Pool3d examples include: AvgPool3d, MaxPool3d, AdaptiveAvgPool3d, and None.

    Args:

        in_features: input channel size of the resnet head.
        out_features: output channel size of the resnet head.

        pool (callable): a callable that constructs resnet head pooling layer,
            examples include: nn.AvgPool3d, nn.MaxPool3d, nn.AdaptiveAvgPool3d, and
            None (not applying pooling).
        pool_kernel_size (tuple): pooling kernel size(s) when not using adaptive
            pooling.
        pool_stride (tuple): pooling stride size(s) when not using adaptive pooling.
        pool_padding (tuple): pooling padding size(s) when not using adaptive
            pooling.
        output_size (tuple): spatial temporal output size when using adaptive
            pooling.

        activation (callable): a callable that constructs resnet head activation
            layer, examples include: nn.ReLU, nn.Softmax, nn.Sigmoid, and None (not
            applying activation).

        dropout_rate (float): dropout rate.

        output_with_global_average (bool): if True, perform global averaging on temporal
            and spatial dimensions and reshape output to batch_size x out_features.
    """

    if activation is None:
        activation_model = None
    elif activation == nn.Softmax:
        activation_model = activation(dim=1)
    else:
        activation_model = activation()

    if pool is None:
        pool_model = None
    elif pool == nn.AdaptiveAvgPool3d:
        pool_model = pool(output_size)
    else:
        pool_model = pool(
            kernel_size=pool_kernel_size, stride=pool_stride, padding=pool_padding
        )

    if output_with_global_average:
        output_pool = nn.AdaptiveAvgPool3d(1)
    else:
        output_pool = None
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


pytorchvideo/models/head.py [221:313]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    dropout_rate: float = 0.5,
    # Activation configs.
    activation: Callable = None,
    # Output configs.
    output_with_global_average: bool = True,
) -> nn.Module:
    """
    Creates ResNet RoI head. This layer performs an optional pooling operation
    followed by an RoI projection, an optional 2D spatial pool, an optional dropout,
    a fully-connected projection, an activation layer
    and a global spatiotemporal averaging.

                                        Pool3d
                                           ↓
                                       RoI Align
                                           ↓
                                        Pool2d
                                           ↓
                                        Dropout
                                           ↓
                                       Projection
                                           ↓
                                       Activation
                                           ↓
                                       Averaging

    Activation examples include: ReLU, Softmax, Sigmoid, and None.
    Pool3d examples include: AvgPool3d, MaxPool3d, AdaptiveAvgPool3d, and None.
    RoI examples include: detectron2.layers.ROIAlign, detectron2.layers.ROIAlignRotated,
        tochvision.ops.RoIAlign and None
    Pool2d examples include: MaxPool2e, AvgPool2d, and None.

    Args:
        Projection related configs:
            in_features: input channel size of the resnet head.
            out_features: output channel size of the resnet head.

        RoI layer related configs:
            resolution (tuple): h, w sizes of the RoI interpolation.
            spatial_scale (float): scale the input boxes by this number
            sampling_ratio (int): number of inputs samples to take for each output
                sample interpolation. 0 to take samples densely.
            roi (callable): a callable that constructs the roi interpolation layer,
                examples include detectron2.layers.ROIAlign,
                detectron2.layers.ROIAlignRotated, and None.

        Pooling related configs:
            pool (callable): a callable that constructs resnet head pooling layer,
                examples include: nn.AvgPool3d, nn.MaxPool3d, nn.AdaptiveAvgPool3d, and
                None (not applying pooling).
            pool_kernel_size (tuple): pooling kernel size(s) when not using adaptive
                pooling.
            pool_stride (tuple): pooling stride size(s) when not using adaptive pooling.
            pool_padding (tuple): pooling padding size(s) when not using adaptive
                pooling.
            output_size (tuple): spatial temporal output size when using adaptive
                pooling.
            pool_spatial (callable): a callable that constructs the 2d pooling layer which
                follows the RoI layer, examples include: nn.AvgPool2d, nn.MaxPool2d, and
                None (not applying spatial pooling).

        Activation related configs:
            activation (callable): a callable that constructs resnet head activation
                layer, examples include: nn.ReLU, nn.Softmax, nn.Sigmoid, and None (not
                applying activation).

        Dropout related configs:
            dropout_rate (float): dropout rate.

        Output related configs:
            output_with_global_average (bool): if True, perform global averaging on temporal
                and spatial dimensions and reshape output to batch_size x out_features.
    """
    if activation is None:
        activation_model = None
    elif activation == nn.Softmax:
        activation_model = activation(dim=1)
    else:
        activation_model = activation()

    if pool is None:
        pool_model = None
    elif pool == nn.AdaptiveAvgPool3d:
        pool_model = pool(output_size)
    else:
        pool_model = pool(
            kernel_size=pool_kernel_size, stride=pool_stride, padding=pool_padding
        )

    if output_with_global_average:
        output_pool = nn.AdaptiveAvgPool3d(1)
    else:
        output_pool = None
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -