in data/create_rgb_dataset.py [0:0]
def get_singleenv_sample(self, num_views) -> Dict[str, np.ndarray]:
if self.num_samples % self.images_before_reset == 0:
old_env = self.env._current_episode_index
self.env.reset()
print(
"RESETTING %d to %d \n"
% (old_env, self.env._current_episode_index),
flush=True,
)
depths = []
rgbs = []
cameras = []
semantics = []
rand_location = self.env_sim.sample_navigable_point()
if self.opts.image_type == "fixedRT_baseline":
rand_angle = self.angle_rng.uniform(0, 2 * np.pi)
else:
rand_angle = self.rng.uniform(0, 2 * np.pi)
rand_rotation = [0, np.sin(rand_angle / 2), 0, np.cos(rand_angle / 2)]
obs = self.env_sim.get_observations_at(
position=rand_location,
rotation=rand_rotation,
keep_agent_at_new_pose=True,
)
for i in range(0, num_views):
position = rand_location.copy()
rotation = rand_rotation.copy()
if self.opts.image_type == "translation":
position[0] = position[0] + self.rng.rand() * 0.2 - 0.1
elif self.opts.image_type == "outpaint":
rotation = quaternion.as_float_array(
jitter_quaternions(
quaternion.from_float_array(rand_rotation),
self.rng,
angle=10,
)
).tolist()
elif self.opts.image_type == "fixedRT_baseline":
rand_location = self.rand_location
rotation = self.rand_rotation
else:
position[0] = position[0] + self.rng.rand() * 0.3 - 0.15
rotation = quaternion.as_float_array(
jitter_quaternions(
quaternion.from_float_array(rand_rotation),
self.rng,
angle=10,
)
).tolist()
obs = self.env_sim.get_observations_at(
position=position,
rotation=rotation,
keep_agent_at_new_pose=True,
)
depths += [torch.Tensor(obs["depth"][..., 0]).unsqueeze(0)]
rgbs += [self.transform(obs["rgb"].astype(np.float32) / 256.0)]
if "semantic" in obs.keys():
instance_semantic = torch.Tensor(
obs["semantic"].astype(np.int32)
).unsqueeze(0)
class_semantic = torch.zeros(instance_semantic.size()).long()
id_to_label = {
int(obj.id.split("_")[-1]): obj.category.index()
for obj in self.env.sim.semantic_annotations().objects
}
for id_scene in np.unique(instance_semantic.numpy()):
class_semantic[instance_semantic == id_scene] = id_to_label[
id_scene
]
semantics += [class_semantic]
agent_state = self.env_sim.get_agent_state().sensor_states["depth"]
rotation = quaternion.as_rotation_matrix(agent_state.rotation)
position = agent_state.position
P, Pinv = get_camera_matrices(position=position, rotation=rotation)
cameras += [{"P": P, "Pinv": Pinv, "K": self.K, "Kinv": self.invK}]
self.num_samples += 1
if len(semantics) > 0:
return {
"images": rgbs,
"depths": depths,
"cameras": cameras,
"semantics": semantics,
}
return {"images": rgbs, "depths": depths, "cameras": cameras}