in data/create_rgb_dataset.py [0:0]
def get_vector_sample(self, index, num_views, isTrain=True):
if self.num_samples % self.images_before_reset == 0:
self.env.reset()
# Randomly choose an index of given environments
if isTrain:
index = index % self.num_train_envs
else:
index = (index % self.num_val_envs) + self.num_train_envs
depths = []
rgbs = []
cameras = []
semantics = []
orig_location = np.array(self.env.sample_navigable_point(index))
rand_angle = self.rng.uniform(0, 2 * np.pi)
orig_rotation = [0, np.sin(rand_angle / 2), 0, np.cos(rand_angle / 2)]
obs = self.env.get_observations_at(
index, position=orig_location, rotation=orig_rotation
)
for i in range(0, num_views):
rand_location = orig_location.copy()
rand_rotation = orig_rotation.copy()
if self.opts.image_type == "translation":
rand_location[[0]] = (
orig_location[[0]] + self.rng.rand() * 0.2 - 0.1
)
elif self.opts.image_type == "outpaint":
rand_rotation = jitter_quaternions(
quaternion.from_float_array(orig_rotation),
self.rng,
angle=10,
)
rand_rotation = quaternion.as_float_array(
rand_rotation
).tolist()
elif self.opts.image_type == "fixedRT_baseline":
rand_location = self.rand_location
rotation = self.rand_rotation
else:
rand_location[0] = (
orig_location[0] + self.rng.rand() * 0.32 - 0.15
)
rand_rotation = jitter_quaternions(
quaternion.from_float_array(orig_rotation),
self.rng,
angle=10,
)
rand_rotation = quaternion.as_float_array(
rand_rotation
).tolist()
obs = self.env.get_observations_at(
index, position=rand_location, rotation=rand_rotation
)
depths += [torch.Tensor(obs["depth"][..., 0]).unsqueeze(0)]
rgbs += [self.transform(obs["rgb"].astype(np.float32) / 256.0)]
if "semantic" in obs.keys():
instance_semantic = torch.Tensor(
obs["semantic"].astype(np.int32)
).unsqueeze(0)
class_semantic = torch.zeros(instance_semantic.size()).long()
id_to_label = {
int(obj.id.split("_")[-1]): obj.category.index()
for obj in self.env.sim.semantic_annotations().objects
}
for id_scene in id_to_label.keys():
class_semantic[instance_semantic == id_scene] = id_to_label[
id_scene
]
semantics += [class_semantic]
position, rotation = self.env.get_agent_state(index)
rotation = quaternion.as_rotation_matrix(
quaternion.from_float_array(rotation)
)
P, Pinv = get_camera_matrices(position=position, rotation=rotation)
cameras += [{"P": P, "Pinv": Pinv, "K": self.K, "Kinv": self.invK}]
self.num_samples += 1
if len(semantics) > 0:
return {
"images": rgbs,
"depths": depths,
"cameras": cameras,
"semantics": semantics,
}
return {"images": rgbs, "depths": depths, "cameras": cameras}