in affordance_seg/dataset.py [0:0]
def populate_dset(self, data_dir, K=2000):
episodes = list(glob.glob(f'{data_dir}/*.npz'))
N = len(episodes)
assert N >= K, f'Not enough episodes collected (# episodes={N}, K={K})'
# split episodes by scene
episode_by_scene = collections.defaultdict(list)
for fl in episodes:
scene, episode_id, _ = os.path.basename(fl).split('_')
episode_by_scene[scene].append(fl)
for scene in episode_by_scene:
np.random.shuffle(episode_by_scene[scene])
# round robin over scenes to re-populate the list of episodes
episodes = []
for scene in itertools.cycle(episode_by_scene.keys()):
if len(episode_by_scene[scene])==0:
continue
episodes.append(episode_by_scene[scene].pop())
if len(episodes)==K:
break
print (f'Populated dataset with {K} episodes out of {N} episodes')
# load all episodes and add each frame to the dataset
episode_data = Parallel(n_jobs=16, verbose=5)(delayed(self.load_episode)(ep) for ep in episodes)
for frames, masks, poses, info in tqdm.tqdm(episode_data, total=len(episode_data)):
self.add_episode(frames, masks, poses, info)
# split data into train and val set
self.train_data, self.val_data = self.split_entries(self.entries)
print (f'Populated with {len(self.entries)} entries')