in datasets/AVideoDataset.py [0:0]
def _construct_loader(self):
"""
Construct the video loader.
"""
# Get list of paths
os.makedirs(self.path_to_data_dir, exist_ok=True)
path_to_file = os.path.join(
self.path_to_data_dir, f"{self.ds_name}_{self.mode}.txt"
)
if not os.path.exists(path_to_file):
files = list(sorted(glob.glob(os.path.join(self.data_prefix, '*', '*'))))
with open(path_to_file, 'w') as f:
for item in files:
f.write("%s\n" % item)
# Get list of indices and labels
self._path_to_videos = []
self._labels = []
self._spatial_temporal_idx = []
self._vid_indices = []
with open(path_to_file, "r") as f:
for clip_idx, path in enumerate(f.read().splitlines()):
for idx in range(self._num_clips):
self._path_to_videos.append(
os.path.join(self.data_prefix, path)
)
class_name = path.split('/')[-2]
label = self.class_to_idx[class_name]
self._labels.append(int(label))
self._spatial_temporal_idx.append(idx)
self._vid_indices.append(clip_idx)
self._video_meta[clip_idx * self._num_clips + idx] = {}
assert (
len(self._path_to_videos) > 0
), "Failed to load {} split {} from {}".format(
self.ds_name, self._split_idx, path_to_file
)
print(
"Constructing {} dataloader (size: {}) from {}".format(
self.ds_name, len(self._path_to_videos), path_to_file
)
)
# Create / Load valid indices (has audio)
if self.ds_name in ['kinetics', 'kinetics600']:
if self.mode == 'train':
vid_valid_file = f'{self.path_to_data_dir}/{self.ds_name}_valid.pkl'
if os.path.exists(vid_valid_file):
with open(vid_valid_file, 'rb') as handle:
self.valid_indices = pickle.load(handle)
else:
self.valid_indices = filter_videos(self._path_to_videos, decode_audio=self.decode_audio)
with open(vid_valid_file, 'wb') as handle:
pickle.dump(
self.valid_indices,
handle,
protocol=pickle.HIGHEST_PROTOCOL
)
if self.num_data_samples is not None:
rand_indices = np.random.choice(range(len(self.valid_indices)), self.num_data_samples, replace=False)
self.valid_indices = np.array(self.valid_indices)[rand_indices]
else:
# self.valid_indices = [i for i in range(0, len(self._path_to_videos))]
vid_valid_file = f'{self.path_to_data_dir}/{self.ds_name}_valid_{self.mode}_{self.decode_audio}.pkl'
if os.path.exists(vid_valid_file):
with open(vid_valid_file, 'rb') as handle:
self.valid_indices = pickle.load(handle)
else:
self.valid_indices = filter_videos(self._path_to_videos, decode_audio=self.decode_audio)
with open(vid_valid_file, 'wb') as handle:
pickle.dump(
self.valid_indices,
handle,
protocol=pickle.HIGHEST_PROTOCOL
)
print(f"Total number of videos: {len(self._path_to_videos)}, Valid videos: {len(self.valid_indices)}", flush=True)
else: # ucf101 and hmdb-51
if self.ds_name == 'ucf101':
train = True if self.mode == 'train' else False
self.valid_indices = select_fold_ucf101(self.data_prefix, self._path_to_videos, self.ucf101_annotation_path, self.fold, train)
elif self.ds_name == 'hmdb51':
train = True if self.mode == 'train' else False
self.valid_indices = select_fold_hmdb51(self._path_to_videos, self.hmdb51_annotation_path, self.fold, train)
else:
assert(False)
print(f"Total number of videos: {len(self._path_to_videos)}, Valid videos: {len(self.valid_indices)}", flush=True)
# Make lists a Manager objects
self._path_to_videos = self.manager.list(self._path_to_videos)
self.valid_indices = self.manager.list(self.valid_indices)