def get_vector_sample()

in data/create_rgb_dataset.py [0:0]


    def get_vector_sample(self, index, num_views, isTrain=True):
        if self.num_samples % self.images_before_reset == 0:
            self.env.reset()

        # Randomly choose an index of given environments
        if isTrain:
            index = index % self.num_train_envs
        else:
            index = (index % self.num_val_envs) + self.num_train_envs

        depths = []
        rgbs = []
        cameras = []
        semantics = []

        orig_location = np.array(self.env.sample_navigable_point(index))
        rand_angle = self.rng.uniform(0, 2 * np.pi)

        orig_rotation = [0, np.sin(rand_angle / 2), 0, np.cos(rand_angle / 2)]
        obs = self.env.get_observations_at(
            index, position=orig_location, rotation=orig_rotation
        )
        for i in range(0, num_views):
            rand_location = orig_location.copy()
            rand_rotation = orig_rotation.copy()
            if self.opts.image_type == "translation":
                rand_location[[0]] = (
                    orig_location[[0]] + self.rng.rand() * 0.2 - 0.1
                )
            elif self.opts.image_type == "outpaint":
                rand_rotation = jitter_quaternions(
                    quaternion.from_float_array(orig_rotation),
                    self.rng,
                    angle=10,
                )
                rand_rotation = quaternion.as_float_array(
                    rand_rotation
                ).tolist()
            elif self.opts.image_type == "fixedRT_baseline":
                rand_location = self.rand_location
                rotation = self.rand_rotation
            else:
                rand_location[0] = (
                    orig_location[0] + self.rng.rand() * 0.32 - 0.15
                )
                rand_rotation = jitter_quaternions(
                    quaternion.from_float_array(orig_rotation),
                    self.rng,
                    angle=10,
                )
                rand_rotation = quaternion.as_float_array(
                    rand_rotation
                ).tolist()

            obs = self.env.get_observations_at(
                index, position=rand_location, rotation=rand_rotation
            )

            depths += [torch.Tensor(obs["depth"][..., 0]).unsqueeze(0)]
            rgbs += [self.transform(obs["rgb"].astype(np.float32) / 256.0)]

            if "semantic" in obs.keys():
                instance_semantic = torch.Tensor(
                    obs["semantic"].astype(np.int32)
                ).unsqueeze(0)
                class_semantic = torch.zeros(instance_semantic.size()).long()

                id_to_label = {
                    int(obj.id.split("_")[-1]): obj.category.index()
                    for obj in self.env.sim.semantic_annotations().objects
                }

                for id_scene in id_to_label.keys():
                    class_semantic[instance_semantic == id_scene] = id_to_label[
                        id_scene
                    ]

                semantics += [class_semantic]

            position, rotation = self.env.get_agent_state(index)
            rotation = quaternion.as_rotation_matrix(
                quaternion.from_float_array(rotation)
            )
            P, Pinv = get_camera_matrices(position=position, rotation=rotation)
            cameras += [{"P": P, "Pinv": Pinv, "K": self.K, "Kinv": self.invK}]

        self.num_samples += 1
        if len(semantics) > 0:
            return {
                "images": rgbs,
                "depths": depths,
                "cameras": cameras,
                "semantics": semantics,
            }

        return {"images": rgbs, "depths": depths, "cameras": cameras}