in models/base_model.py [0:0]
def forward(self, video, *args, **kwargs):
"""
Args: video (torch.Tensor)
Could be (B, #clips, C, T, H, W) or
(B, #clips, #crops, C, T, H, W)
Returns:
Final features
And any auxiliarly losses produced by the model
"""
if video.ndim == 6:
video_crops = [video]
elif video.ndim == 7 and video.size(2) == 1:
video_crops = [video.squeeze(2)]
elif video.ndim == 7:
video_crops = torch.unbind(video, dim=2)
else:
raise NotImplementedError('Unsupported size %s' % video.shape)
feats_losses = [
self.forward_singlecrop(el, *args, **kwargs) for el in video_crops
]
feats, losses = zip(*feats_losses)
# Convert to dict of lists
feats = {k: [dic[k] for dic in feats] for k in feats[0]}
losses = {k: [dic[k] for dic in losses] for k in losses[0]}
# Average over the crops
feats = {
k: torch.mean(torch.stack(el, dim=0), dim=0)
for k, el in feats.items()
}
losses = {
k: torch.mean(torch.stack(el, dim=0), dim=0)
for k, el in losses.items()
}
return feats, losses