def forward()

in pytorchvideo/models/head.py [0:0]


    def forward(self, x: torch.Tensor, bboxes: torch.Tensor) -> torch.Tensor:
        """
        Args:
            x (torch.tensor): input tensor
            bboxes (torch.tensor): Accociated bounding boxes.
                The format is N*5 (Index, X_1,Y_1,X_2,Y_2) if using RoIAlign
                and N*6 (Index, x_ctr, y_ctr, width, height, angle_degrees) if
                using RoIAlignRotated.
        """
        # Performs 3d pooling.
        if self.pool is not None:
            x = self.pool(x)
        # Performs roi layer using bboxes
        if self.roi_layer is not None:
            temporal_dim = x.shape[-3]
            if temporal_dim != 1:
                raise Exception(
                    "Temporal dimension should be 1. Consider modifying the pool layer."
                )
            x = torch.squeeze(x, -3)
            x = self.roi_layer(x, bboxes)
            # Performs spatial 2d pooling.
            if self.pool_spatial is not None:
                x = self.pool_spatial(x)
            x = x.unsqueeze(-3)
        # Performs dropout.
        if self.dropout is not None:
            x = self.dropout(x)
        # Performs projection.
        if self.proj is not None:
            x = x.permute((0, 2, 3, 4, 1))
            x = self.proj(x)
            x = x.permute((0, 4, 1, 2, 3))
        # Performs activation.
        if self.activation is not None:
            x = self.activation(x)

        if self.output_pool is not None:
            # Performs global averaging.
            x = self.output_pool(x)
            x = x.view(x.shape[0], -1)
        return x