demo.ipynb (357 lines of code) (raw):
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "e8a43380",
"metadata": {},
"outputs": [],
"source": [
"from pprint import pprint\n",
"from PIL import Image\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"\n",
"from metropolis import Metropolis\n",
"\n",
"# Loading the dataset\n",
"met = Metropolis(\n",
" \"train\", # Name of the split we want to load (i.e. 'train', 'test', 'val')\n",
" \"metropolis/\" # Path to the dataset's root folder\n",
")"
]
},
{
"cell_type": "markdown",
"id": "93d9bc36",
"metadata": {},
"source": [
"# Navigating the dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1ef79a13",
"metadata": {},
"outputs": [],
"source": [
"# Tables are available as attributes of `met`\n",
"pprint(met.instance[:3])\n",
"pprint(met.scene[:3])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "21a1c360",
"metadata": {},
"outputs": [],
"source": [
"# We can quickly get table entries given their token using the `get()` function\n",
"sample = met.get(\n",
" \"sample\", # Table name\n",
" \"tr1thGb4-HK8yPOzSZFHQQ\" # Token\n",
")\n",
"\n",
"pprint(sample)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4f88d940",
"metadata": {},
"outputs": [],
"source": [
"# Tables are cross-referenced using tokens, e.g. to get the scene a sample belongs to\n",
"scene = met.get(\"scene\", sample[\"scene_token\"])\n",
"pprint(scene)\n",
"\n",
"# The `Metropolis` class decorates the tables with some useful reverse-indices\n",
"# e.g. all sample_data belonging to a sample are referenced in `sample[\"data\"][{sensor_channel}]`\n",
"camera_left = met.get(\"sample_data\", sample[\"data\"][\"CAM_LEFT\"])\n",
"pprint(camera_left)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9d01bf38",
"metadata": {},
"outputs": [],
"source": [
"# Samples belonging to the same scene are organized in a double linked list\n",
"# and can be easily traversed in a while loop\n",
"next_sample_token = scene[\"first_sample_token\"]\n",
"\n",
"while next_sample_token:\n",
" next_sample = met.get(\"sample\", next_sample_token)\n",
" next_sample_token = next_sample[\"next_sample\"]\n",
" \n",
" print(f\"token: {next_sample['token']}, #2d annotations: {len(next_sample['anns_2d'])}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d2a7abbf",
"metadata": {},
"outputs": [],
"source": [
"# Samples are also decorated with lists containing all annotations belonging to them\n",
"# `sample[\"anns\"]` contains 3D annotation tokens, `sample[\"anns_2d\"]` 2D annotation tokens\n",
"annotations = [met.get(\"sample_annotation\", ann_token) for ann_token in sample[\"anns\"]]\n",
"\n",
"pprint(annotations[0])\n",
"\n",
"# Annotations belonging to a sample can also be retrieved in a more structured way\n",
"# using the `get_sample_data()` method, more on this later\n",
"_, annotations_3d, _, _ = met.get_sample_data(sample[\"data\"][\"CAM_LEFT\"])\n",
"\n",
"pprint(annotations_3d[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fbd35890",
"metadata": {},
"outputs": [],
"source": [
"# Similar to samples and scenes, annotations belonging to the same instance are\n",
"# organized in a double linked list\n",
"instance = met.get(\"instance\", annotations[0][\"instance_token\"])\n",
"next_annotation_token = instance[\"first_annotation_token\"]\n",
"\n",
"while next_annotation_token:\n",
" next_annotation = met.get(\"sample_annotation_2d\", next_annotation_token)\n",
" next_annotation_token = next_annotation[\"next_sample_annotation\"]\n",
" \n",
" print(f\"token: {next_annotation['token']}, box: {next_annotation['bounding_box']}\")"
]
},
{
"cell_type": "markdown",
"id": "914f95ec",
"metadata": {},
"source": [
"# Accessing the data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f606a3fd",
"metadata": {},
"outputs": [],
"source": [
"CHANNEL = \"CAM_FRONT\"\n",
"\n",
"# `get_sample_data()` collects all relevant information belonging to a sample data\n",
"# and returns it in a structured format\n",
"(\n",
" raw_data_path, # Path to the raw data file (e.g. an image)\n",
" boxes, # List of visible 3D boxes\n",
" boxes_2d, # List of visible 2D boxes\n",
" intrinsics # For images only, camera intrinsic parameters\n",
") = met.get_sample_data(\n",
" sample[\"data\"][CHANNEL], # sample_data_token\n",
" get_all_visible_boxes=True, # Return all annotations vs. only those annotated on this sample\n",
")\n",
"\n",
"print(f\"path: {raw_data_path}, #2d boxes: {len(boxes_2d)}, #3d boxes: {len(boxes)}\")\n",
"\n",
"# The raw data path can also be retrieved directly\n",
"raw_data_path = met.get_sample_data_path(sample[\"data\"][CHANNEL])\n",
"\n",
"Image.open(raw_data_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "963bc1a7",
"metadata": {},
"outputs": [],
"source": [
"# Project a point cloud into an image\n",
"(\n",
" points, # 3xN numpy array with the project points (points[2, :] == 1.)\n",
" depths, # N numpy array with the corresponding depth values\n",
" _\n",
") = met.map_pointcloud_to_image(\n",
" sample[\"data\"][\"MVS\"], # sample_data_token of the point cloud\n",
" sample[\"data\"][\"CAM_FRONT\"], # sample_data_token of the image\n",
")\n",
"\n",
"pprint(points[:2, :])\n",
"pprint(depths)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3e31ab4d",
"metadata": {},
"outputs": [],
"source": [
"# Point clouds are stored as npz files, and can be opened directly with numpy\n",
"points = np.load(met.get_sample_data_path(sample[\"data\"][\"MVS\"]))\n",
"pprint(met.get_sample_data_path(sample[\"data\"][\"MVS\"]))\n",
"\n",
"pprint(points[\"points\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ebf37dac",
"metadata": {},
"outputs": [],
"source": [
"# Machine-generated panoptic masks can be retrieved for any image with\n",
"(\n",
" meta, # Panoptic meta-data (see FORMAT.md for details)\n",
" mask # Segmentation mask as numpy array of integers\n",
") = met.get_panoptic_mask(\n",
" sample[\"data\"][\"CAM_FRONT\"] # sample_data_token of the image\n",
")\n",
"\n",
"plt.imshow(mask)\n",
"plt.show()\n",
"\n",
"pprint(meta)"
]
},
{
"cell_type": "markdown",
"id": "d1989da6",
"metadata": {},
"source": [
"# Visualizing the data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "741a7c25",
"metadata": {},
"outputs": [],
"source": [
"# Rendering images with point clouds\n",
"fig = plt.figure()\n",
"ax = fig.add_subplot()\n",
"\n",
"met.render_pointcloud_in_image(\n",
" sample[\"token\"], # sample_token\n",
" pointsensor_channel=\"LIDAR_MX2\", # Name of the point cloud channel to visualize\n",
" camera_channel=\"CAM_FRONT\", # Name of the camera channel to visualize\n",
" ax=ax\n",
")\n",
"\n",
"ax.figure.set_size_inches(10, 10)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "865dc318",
"metadata": {},
"outputs": [],
"source": [
"# Rendering images with annotations\n",
"fig = plt.figure()\n",
"ax = fig.add_subplot()\n",
"\n",
"met.render_sample_data(\n",
" sample[\"data\"][\"CAM_FRONT\"], # sample_data_token of the image to visualize\n",
" show_3d_boxes=True, # True to render 3D boxes, False to render 2D boxes\n",
" show_all_visible_3d_boxes=True, # Render all visible 3D boxes vs. only those annotated on this sample\n",
" ax=ax\n",
")\n",
"\n",
"ax.figure.set_size_inches(10, 10)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2fa5868c",
"metadata": {},
"outputs": [],
"source": [
"# Rendering point clouds with annotations\n",
"fig = plt.figure()\n",
"ax = fig.add_subplot()\n",
"\n",
"met.render_sample_data(\n",
" sample[\"data\"][\"LIDAR_MX2\"], # sample_data_token of the point cloud to visualize\n",
" show_all_visible_3d_boxes=True, # Render all visible 3D boxes vs. only those annotated on this sample\n",
" ax=ax\n",
")\n",
"\n",
"ax.figure.set_size_inches(10, 10)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "90f9addc",
"metadata": {},
"outputs": [],
"source": [
"# Rendering images with panoptic masks\n",
"met.render_panoptic(\n",
" sample[\"data\"][\"CAM_FRONT\"], # sample_data_token of the image to visualize\n",
" out_path=None,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2babb9d9",
"metadata": {},
"outputs": [],
"source": [
"# Rendering aerial view of the point clouds\n",
"# **NOTE**: this requires the GDAL library\n",
"fig = plt.figure()\n",
"ax = fig.add_subplot()\n",
"\n",
"met.render_aerial_view(\n",
" sample[\"data\"][\"MVS\"], # sample_data_token of the point cloud to visualize\n",
" ax=ax\n",
")\n",
"\n",
"ax.figure.set_size_inches(10, 10)\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}