in pytorch3d/datasets/r2n2/r2n2.py [0:0]
def __getitem__(self, model_idx, view_idxs: Optional[List[int]] = None) -> Dict:
"""
Read a model by the given index.
Args:
model_idx: The idx of the model to be retrieved in the dataset.
view_idx: List of indices of the view to be returned. Each index needs to be
contained in the loaded split (always between 0 and 23, inclusive). If
an invalid index is supplied, view_idx will be ignored and all the loaded
views will be returned.
Returns:
dictionary with following keys:
- verts: FloatTensor of shape (V, 3).
- faces: faces.verts_idx, LongTensor of shape (F, 3).
- synset_id (str): synset id.
- model_id (str): model id.
- label (str): synset label.
- images: FloatTensor of shape (V, H, W, C), where V is number of views
returned. Returns a batch of the renderings of the models from the R2N2 dataset.
- R: Rotation matrix of shape (V, 3, 3), where V is number of views returned.
- T: Translation matrix of shape (V, 3), where V is number of views returned.
- K: Intrinsic matrix of shape (V, 4, 4), where V is number of views returned.
- voxels: Voxels of shape (D, D, D), where D is the number of voxels along each
dimension.
"""
if isinstance(model_idx, tuple):
model_idx, view_idxs = model_idx
if view_idxs is not None:
if isinstance(view_idxs, int):
view_idxs = [view_idxs]
if not isinstance(view_idxs, list) and not torch.is_tensor(view_idxs):
raise TypeError(
"view_idxs is of type %s but it needs to be a list."
% type(view_idxs)
)
model_views = self.views_per_model_list[model_idx]
if view_idxs is not None and any(
idx not in self.views_per_model_list[model_idx] for idx in view_idxs
):
msg = """At least one of the indices in view_idxs is not available.
Specified view of the model needs to be contained in the
loaded split. If return_all_views is set to False, only one
random view is loaded. Try accessing the specified view(s)
after loading the dataset with self.return_all_views set to True.
Now returning all view(s) in the loaded dataset."""
warnings.warn(msg)
elif view_idxs is not None:
model_views = view_idxs
model = self._get_item_ids(model_idx)
model_path = path.join(
self.shapenet_dir, model["synset_id"], model["model_id"], "model.obj"
)
verts, faces, textures = self._load_mesh(model_path)
model["verts"] = verts
model["faces"] = faces
model["textures"] = textures
model["label"] = self.synset_dict[model["synset_id"]]
model["images"] = None
images, Rs, Ts, voxel_RTs = [], [], [], []
# Retrieve R2N2's renderings if required.
if self.return_images:
rendering_path = path.join(
self.r2n2_dir,
self.views_rel_path,
model["synset_id"],
model["model_id"],
"rendering",
)
# Read metadata file to obtain params for calibration matrices.
with open(path.join(rendering_path, "rendering_metadata.txt"), "r") as f:
metadata_lines = f.readlines()
for i in model_views:
# Read image.
image_path = path.join(rendering_path, "%02d.png" % i)
raw_img = Image.open(image_path)
image = torch.from_numpy(np.array(raw_img) / 255.0)[..., :3]
images.append(image.to(dtype=torch.float32))
# Get camera calibration.
azim, elev, yaw, dist_ratio, fov = [
float(v) for v in metadata_lines[i].strip().split(" ")
]
dist = dist_ratio * MAX_CAMERA_DISTANCE
# Extrinsic matrix before transformation to PyTorch3D world space.
RT = compute_extrinsic_matrix(azim, elev, dist)
R, T = self._compute_camera_calibration(RT)
Rs.append(R)
Ts.append(T)
voxel_RTs.append(RT)
# Intrinsic matrix extracted from the Blender with slight modification to work with
# PyTorch3D world space. Taken from meshrcnn codebase:
# https://github.com/facebookresearch/meshrcnn/blob/main/shapenet/utils/coords.py
K = torch.tensor(
[
[2.1875, 0.0, 0.0, 0.0],
[0.0, 2.1875, 0.0, 0.0],
[0.0, 0.0, -1.002002, -0.2002002],
[0.0, 0.0, 1.0, 0.0],
]
)
model["images"] = torch.stack(images)
model["R"] = torch.stack(Rs)
model["T"] = torch.stack(Ts)
model["K"] = K.expand(len(model_views), 4, 4)
voxels_list = []
# Read voxels if required.
voxel_path = path.join(
self.r2n2_dir,
self.voxels_rel_path,
model["synset_id"],
model["model_id"],
"model.binvox",
)
if self.return_voxels:
if not path.isfile(voxel_path):
msg = "Voxel file not found for model %s from category %s."
raise FileNotFoundError(msg % (model["model_id"], model["synset_id"]))
with open(voxel_path, "rb") as f:
# Read voxel coordinates as a tensor of shape (N, 3).
voxel_coords = read_binvox_coords(f)
# Align voxels to the same coordinate system as mesh verts.
voxel_coords = align_bbox(voxel_coords, model["verts"])
for RT in voxel_RTs:
# Compute projection matrix.
P = BLENDER_INTRINSIC.mm(RT)
# Convert voxel coordinates of shape (N, 3) to voxels of shape (D, D, D).
voxels = voxelize(voxel_coords, P, VOXEL_SIZE)
voxels_list.append(voxels)
model["voxels"] = torch.stack(voxels_list)
return model