in pytorchvideo/data/domsev.py [0:0]
def __getitem__(self, index) -> Dict[str, Any]:
"""
Samples a video clip associated to the given index.
Args:
index (int): index for the video clip.
Returns:
A video clip with the following format if transform is None.
.. code-block:: text
{{
'video_id': <str>,
'video': <video_tensor>,
'audio': <audio_tensor>,
'labels': <labels_tensor>,
'start_time': <float>,
'stop_time': <float>
}}
"""
clip = self._clips[index]
# Filter labels by only the ones that appear within the clip boundaries,
# and unpack the labels so there is one per frame in the clip
labels_in_video = self._labels_per_video[clip.video_id]
labels_in_clip = []
for label_data in labels_in_video:
overlap_period = _get_overlap_for_time_range_pair(
clip.start_time,
clip.stop_time,
label_data.start_time,
label_data.stop_time,
)
if overlap_period is not None:
overlap_start_time, overlap_stop_time = overlap_period
# Convert the overlapping period between clip and label to
# 0-indexed start and stop frame indexes, so we can unpack 1
# label per frame.
overlap_start_frame = _seconds_to_frame_index(
overlap_start_time, self._frames_per_second
)
overlap_stop_frame = _seconds_to_frame_index(
overlap_stop_time, self._frames_per_second
)
# Append 1 label per frame
for _ in range(overlap_start_frame, overlap_stop_frame):
labels_in_clip.append(label_data)
# Convert the list of LabelData objects to a tensor of just the label IDs
label_ids = [labels_in_clip[i].label_id for i in range(len(labels_in_clip))]
label_ids_tensor = torch.tensor(label_ids)
clip_data = {
"video_id": clip.video_id,
**self._videos[clip.video_id].get_clip(clip.start_time, clip.stop_time),
"labels": label_ids_tensor,
"start_time": clip.start_time,
"stop_time": clip.stop_time,
}
if self._transform:
clip_data = self._transform(clip_data)
return clip_data