def populate_dset()

in affordance_seg/dataset.py [0:0]


    def populate_dset(self, data_dir, K=2000):
        
        episodes = list(glob.glob(f'{data_dir}/*.npz'))
        N = len(episodes)
        assert N >= K, f'Not enough episodes collected (# episodes={N}, K={K})'

        # split episodes by scene
        episode_by_scene = collections.defaultdict(list)
        for fl in episodes:
            scene, episode_id, _ = os.path.basename(fl).split('_')
            episode_by_scene[scene].append(fl)

        for scene in episode_by_scene:
        	np.random.shuffle(episode_by_scene[scene])

        # round robin over scenes to re-populate the list of episodes
        episodes = []
        for scene in itertools.cycle(episode_by_scene.keys()):
            if len(episode_by_scene[scene])==0:
                continue

            episodes.append(episode_by_scene[scene].pop())
            if len(episodes)==K:
                break

        print (f'Populated dataset with {K} episodes out of {N} episodes')

        # load all episodes and add each frame to the dataset
        episode_data = Parallel(n_jobs=16, verbose=5)(delayed(self.load_episode)(ep) for ep in episodes)
        for frames, masks, poses, info in tqdm.tqdm(episode_data, total=len(episode_data)):
            self.add_episode(frames, masks, poses, info)

        # split data into train and val set
        self.train_data, self.val_data = self.split_entries(self.entries)

        print (f'Populated with {len(self.entries)} entries')