def __init__()

in data/create_rgb_dataset.py [0:0]


    def __init__(self, split, gpu_id, opts, vectorize=False, seed=0) -> None:
        self.vectorize = vectorize

        print("gpu_id", gpu_id)
        resolution = opts.W
        if opts.use_semantics:
            sensors = ["RGB_SENSOR", "DEPTH_SENSOR", "SEMANTIC_SENSOR"]
        else:
            sensors = ["RGB_SENSOR", "DEPTH_SENSOR"]
        if split == "train":
            data_path = opts.train_data_path
        elif split == "val":
            data_path = opts.val_data_path
        elif split == "test":
            data_path = opts.test_data_path
        else:
            raise Exception("Invalid split")
        unique_dataset_name = opts.dataset

        self.num_parallel_envs = 5

        self.images_before_reset = opts.images_before_reset
        config = make_config(
            opts.config,
            gpu_id,
            split,
            data_path,
            sensors,
            resolution,
            opts.scenes_dir,
        )
        data_dir = os.path.join(
            "data/scene_episodes/", unique_dataset_name + "_" + split
        )
        self.dataset_name = config.DATASET.TYPE
        print(data_dir)
        if not os.path.exists(data_dir):
            os.makedirs(data_dir)
        data_path = os.path.join(data_dir, "dataset_one_ep_per_scene.json.gz")
        # Creates a dataset where each episode is a random spawn point in each scene.
        print("One ep per scene", flush=True)
        if not (os.path.exists(data_path)):
            print("Creating dataset...", flush=True)
            dataset = make_dataset(config.DATASET.TYPE, config=config.DATASET)
            # Get one episode per scene in dataset
            scene_episodes = {}
            for episode in tqdm.tqdm(dataset.episodes):
                if episode.scene_id not in scene_episodes:
                    scene_episodes[episode.scene_id] = episode

            scene_episodes = list(scene_episodes.values())
            dataset.episodes = scene_episodes
            if not os.path.exists(data_path):
                # Multiproc do check again before write.
                json = dataset.to_json().encode("utf-8")
                with gzip.GzipFile(data_path, "w") as fout:
                    fout.write(json)
            print("Finished dataset...", flush=True)

        # Load in data and update the location to the proper location (else
        # get a weird, uninformative, error -- Affine2Dtransform())
        dataset = mp3d_dataset.PointNavDatasetV1()
        with gzip.open(data_path, "rt") as f:
            dataset.from_json(f.read())

            for i in range(0, len(dataset.episodes)):
                dataset.episodes[i].scene_id = dataset.episodes[i].scene_id.replace(
                    '/checkpoint/erikwijmans/data/mp3d/',
                        opts.scenes_dir + '/mp3d/')

        config.TASK.SENSORS = ["POINTGOAL_SENSOR"]

        config.freeze()

        self.rng = np.random.RandomState(seed)

        # Now look at vector environments
        if self.vectorize:
            configs, datasets = _load_datasets(
                (
                    opts.config,
                    gpu_id,
                    split,
                    data_path,
                    sensors,
                    resolution,
                    opts.scenes_dir,
                ),
                dataset,
                data_path,
                opts.scenes_dir + '/mp3d/',
                num_workers=self.num_parallel_envs,
            )
            num_envs = len(configs)

            env_fn_args = tuple(zip(configs, datasets, range(num_envs)))
            envs = habitat.VectorEnv(
                env_fn_args=env_fn_args,
                multiprocessing_start_method="forkserver",
            )

            self.env = envs
            self.num_train_envs = int(0.9 * (self.num_parallel_envs))
            self.num_val_envs = self.num_parallel_envs - self.num_train_envs
        else:
            self.env = habitat.Env(config=config, dataset=dataset)
            self.env_sim = self.env.sim
            self.rng.shuffle(self.env.episodes)
            self.env_sim = self.env.sim

        self.num_samples = 0

        # Set up intrinsic parameters
        self.hfov = config.SIMULATOR.DEPTH_SENSOR.HFOV * np.pi / 180.0
        self.W = resolution
        self.K = np.array(
            [
                [1.0 / np.tan(self.hfov / 2.0), 0.0, 0.0, 0.0],
                [0, 1.0 / np.tan(self.hfov / 2.0), 0.0, 0.0],
                [0.0, 0.0, 1.0, 0.0],
                [0.0, 0.0, 0.0, 1.0],
            ],
            dtype=np.float32,
        )

        self.invK = np.linalg.inv(self.K)

        self.config = config
        self.opts = opts

        if self.opts.normalize_image:
            self.transform = transforms.Compose(
                [
                    transforms.ToTensor(),
                    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
                ]
            )  # Using same normalization as BigGan
        else:
            self.transform = transforms.ToTensor()