def forward_singlecrop()

in models/base_model.py [0:0]


    def forward_singlecrop(self, video, target_shape=None):
        """
        Args:
            video (torch.Tensor, Bx#clipsxCxTxHxW)
            target_shape: The shape of the target. Some of these layers might
                be able to use this information.
        """
        outputs = {}
        aux_losses = {}
        batch_size = video.size(0)
        num_clips = video.size(1)
        # Fold the clips dimension into the batch for feature extraction, upto
        # temporal aggregation
        video = video.flatten(0, 1)
        feats = self.backbone(video)
        outputs['backbone'] = feats
        # Spatial mean
        feats = torch.mean(feats, [-1, -2])
        # store temporal mean as well
        outputs['backbone_mean'] = torch.mean(feats, [-1])
        # If it's not sequential and can be applied here
        if len(self.project_mlp) > 0 and (outputs['backbone_mean'].size(-1) ==
                                          self.project_mlp[0].in_features):
            outputs['backbone_mean_projected'] = self.project_mlp(
                outputs['backbone_mean'])
        # Move the time dimension inside: B,C,T -> B,T,C
        feats = feats.permute((0, 2, 1))
        # Map the feats to intermediate dimension, that rest of the code
        # will operate on. Only if the original feature is not already
        if feats.shape[-1] != self.cfg.intermediate_featdim:
            assert self.mapper_to_inter is not None, (
                f'The backbone feat does not match intermediate {feats.shape} '
                f'and {self.cfg.intermediate_featdim}. Please set '
                f'model.backbone_dim correctly.')
            feats = self.mapper_to_inter(feats)
        feats_agg, agg_losses = self.temporal_aggregator(feats)
        aux_losses.update(agg_losses)
        feats_agg = self.reset_temp_agg_feat_dim(feats_agg)
        outputs['temp_agg'] = feats_agg
        # For the contrastive loss, I need a projected version of this feature
        outputs['temp_agg_projected'] = self.project_mlp(feats_agg)
        # Now before future prediction, need to unfold the clips back out,
        # and concat on the temporal dimension
        if num_clips > 1:
            assert (
                (feats_agg.ndim == 2)
                or (feats_agg.ndim == 3 and feats_agg.size(1) == 1)
            ), ('Should be using some temporal aggregation when using clips')
            feats_agg = feats_agg.reshape((batch_size, num_clips) +
                                          feats_agg.shape[1:])
            if feats_agg.ndim == 4:
                feats_agg = torch.flatten(feats_agg, 1, 2)
            # now feats_agg back to 3D (B, T, F)
        feats_past = feats_agg
        # Now the future prediction, also it might update the past features
        # like the GPT style models would
        (feats_past, feats_future, future_losses,
         endpoints) = self.future_predictor(feats_past, target_shape)
        aux_losses.update(future_losses)
        outputs.update(endpoints)
        outputs['future'] = feats_future
        outputs['past'] = feats_past
        # Apply a classifier on the past features, might be supervising that
        if self.cfg.classifier_on_past:
            feats_past_drop = self.dropout(feats_past)
            outputs.update(
                self._apply_classifier(feats_past_drop,
                                       outputs_prefix=PAST_LOGITS_PREFIX))
        # For the contrastive loss, I need a projected version of this feature
        outputs['future_projected'] = self.project_mlp(feats_agg)
        # Aggregate again, if asked for
        feats_future_agg, future_agg_losses = (
            self.temporal_aggregator_after_future_pred(feats_future))
        aux_losses.update(future_agg_losses)
        outputs['future_agg'] = feats_future_agg
        feats_future_agg_drop = self.dropout(feats_future_agg)
        outputs.update(self._apply_classifier(feats_future_agg_drop))
        if self.regression_head:
            outputs['logits_regression'] = self.regression_head(
                feats_future_agg_drop)
        return outputs, aux_losses