demo.ipynb (357 lines of code) (raw):

{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "e8a43380", "metadata": {}, "outputs": [], "source": [ "from pprint import pprint\n", "from PIL import Image\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", "from metropolis import Metropolis\n", "\n", "# Loading the dataset\n", "met = Metropolis(\n", " \"train\", # Name of the split we want to load (i.e. 'train', 'test', 'val')\n", " \"metropolis/\" # Path to the dataset's root folder\n", ")" ] }, { "cell_type": "markdown", "id": "93d9bc36", "metadata": {}, "source": [ "# Navigating the dataset" ] }, { "cell_type": "code", "execution_count": null, "id": "1ef79a13", "metadata": {}, "outputs": [], "source": [ "# Tables are available as attributes of `met`\n", "pprint(met.instance[:3])\n", "pprint(met.scene[:3])" ] }, { "cell_type": "code", "execution_count": null, "id": "21a1c360", "metadata": {}, "outputs": [], "source": [ "# We can quickly get table entries given their token using the `get()` function\n", "sample = met.get(\n", " \"sample\", # Table name\n", " \"tr1thGb4-HK8yPOzSZFHQQ\" # Token\n", ")\n", "\n", "pprint(sample)" ] }, { "cell_type": "code", "execution_count": null, "id": "4f88d940", "metadata": {}, "outputs": [], "source": [ "# Tables are cross-referenced using tokens, e.g. to get the scene a sample belongs to\n", "scene = met.get(\"scene\", sample[\"scene_token\"])\n", "pprint(scene)\n", "\n", "# The `Metropolis` class decorates the tables with some useful reverse-indices\n", "# e.g. all sample_data belonging to a sample are referenced in `sample[\"data\"][{sensor_channel}]`\n", "camera_left = met.get(\"sample_data\", sample[\"data\"][\"CAM_LEFT\"])\n", "pprint(camera_left)" ] }, { "cell_type": "code", "execution_count": null, "id": "9d01bf38", "metadata": {}, "outputs": [], "source": [ "# Samples belonging to the same scene are organized in a double linked list\n", "# and can be easily traversed in a while loop\n", "next_sample_token = scene[\"first_sample_token\"]\n", "\n", "while next_sample_token:\n", " next_sample = met.get(\"sample\", next_sample_token)\n", " next_sample_token = next_sample[\"next_sample\"]\n", " \n", " print(f\"token: {next_sample['token']}, #2d annotations: {len(next_sample['anns_2d'])}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "d2a7abbf", "metadata": {}, "outputs": [], "source": [ "# Samples are also decorated with lists containing all annotations belonging to them\n", "# `sample[\"anns\"]` contains 3D annotation tokens, `sample[\"anns_2d\"]` 2D annotation tokens\n", "annotations = [met.get(\"sample_annotation\", ann_token) for ann_token in sample[\"anns\"]]\n", "\n", "pprint(annotations[0])\n", "\n", "# Annotations belonging to a sample can also be retrieved in a more structured way\n", "# using the `get_sample_data()` method, more on this later\n", "_, annotations_3d, _, _ = met.get_sample_data(sample[\"data\"][\"CAM_LEFT\"])\n", "\n", "pprint(annotations_3d[0])" ] }, { "cell_type": "code", "execution_count": null, "id": "fbd35890", "metadata": {}, "outputs": [], "source": [ "# Similar to samples and scenes, annotations belonging to the same instance are\n", "# organized in a double linked list\n", "instance = met.get(\"instance\", annotations[0][\"instance_token\"])\n", "next_annotation_token = instance[\"first_annotation_token\"]\n", "\n", "while next_annotation_token:\n", " next_annotation = met.get(\"sample_annotation_2d\", next_annotation_token)\n", " next_annotation_token = next_annotation[\"next_sample_annotation\"]\n", " \n", " print(f\"token: {next_annotation['token']}, box: {next_annotation['bounding_box']}\")" ] }, { "cell_type": "markdown", "id": "914f95ec", "metadata": {}, "source": [ "# Accessing the data" ] }, { "cell_type": "code", "execution_count": null, "id": "f606a3fd", "metadata": {}, "outputs": [], "source": [ "CHANNEL = \"CAM_FRONT\"\n", "\n", "# `get_sample_data()` collects all relevant information belonging to a sample data\n", "# and returns it in a structured format\n", "(\n", " raw_data_path, # Path to the raw data file (e.g. an image)\n", " boxes, # List of visible 3D boxes\n", " boxes_2d, # List of visible 2D boxes\n", " intrinsics # For images only, camera intrinsic parameters\n", ") = met.get_sample_data(\n", " sample[\"data\"][CHANNEL], # sample_data_token\n", " get_all_visible_boxes=True, # Return all annotations vs. only those annotated on this sample\n", ")\n", "\n", "print(f\"path: {raw_data_path}, #2d boxes: {len(boxes_2d)}, #3d boxes: {len(boxes)}\")\n", "\n", "# The raw data path can also be retrieved directly\n", "raw_data_path = met.get_sample_data_path(sample[\"data\"][CHANNEL])\n", "\n", "Image.open(raw_data_path)" ] }, { "cell_type": "code", "execution_count": null, "id": "963bc1a7", "metadata": {}, "outputs": [], "source": [ "# Project a point cloud into an image\n", "(\n", " points, # 3xN numpy array with the project points (points[2, :] == 1.)\n", " depths, # N numpy array with the corresponding depth values\n", " _\n", ") = met.map_pointcloud_to_image(\n", " sample[\"data\"][\"MVS\"], # sample_data_token of the point cloud\n", " sample[\"data\"][\"CAM_FRONT\"], # sample_data_token of the image\n", ")\n", "\n", "pprint(points[:2, :])\n", "pprint(depths)" ] }, { "cell_type": "code", "execution_count": null, "id": "3e31ab4d", "metadata": {}, "outputs": [], "source": [ "# Point clouds are stored as npz files, and can be opened directly with numpy\n", "points = np.load(met.get_sample_data_path(sample[\"data\"][\"MVS\"]))\n", "pprint(met.get_sample_data_path(sample[\"data\"][\"MVS\"]))\n", "\n", "pprint(points[\"points\"])" ] }, { "cell_type": "code", "execution_count": null, "id": "ebf37dac", "metadata": {}, "outputs": [], "source": [ "# Machine-generated panoptic masks can be retrieved for any image with\n", "(\n", " meta, # Panoptic meta-data (see FORMAT.md for details)\n", " mask # Segmentation mask as numpy array of integers\n", ") = met.get_panoptic_mask(\n", " sample[\"data\"][\"CAM_FRONT\"] # sample_data_token of the image\n", ")\n", "\n", "plt.imshow(mask)\n", "plt.show()\n", "\n", "pprint(meta)" ] }, { "cell_type": "markdown", "id": "d1989da6", "metadata": {}, "source": [ "# Visualizing the data" ] }, { "cell_type": "code", "execution_count": null, "id": "741a7c25", "metadata": {}, "outputs": [], "source": [ "# Rendering images with point clouds\n", "fig = plt.figure()\n", "ax = fig.add_subplot()\n", "\n", "met.render_pointcloud_in_image(\n", " sample[\"token\"], # sample_token\n", " pointsensor_channel=\"LIDAR_MX2\", # Name of the point cloud channel to visualize\n", " camera_channel=\"CAM_FRONT\", # Name of the camera channel to visualize\n", " ax=ax\n", ")\n", "\n", "ax.figure.set_size_inches(10, 10)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "865dc318", "metadata": {}, "outputs": [], "source": [ "# Rendering images with annotations\n", "fig = plt.figure()\n", "ax = fig.add_subplot()\n", "\n", "met.render_sample_data(\n", " sample[\"data\"][\"CAM_FRONT\"], # sample_data_token of the image to visualize\n", " show_3d_boxes=True, # True to render 3D boxes, False to render 2D boxes\n", " show_all_visible_3d_boxes=True, # Render all visible 3D boxes vs. only those annotated on this sample\n", " ax=ax\n", ")\n", "\n", "ax.figure.set_size_inches(10, 10)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "2fa5868c", "metadata": {}, "outputs": [], "source": [ "# Rendering point clouds with annotations\n", "fig = plt.figure()\n", "ax = fig.add_subplot()\n", "\n", "met.render_sample_data(\n", " sample[\"data\"][\"LIDAR_MX2\"], # sample_data_token of the point cloud to visualize\n", " show_all_visible_3d_boxes=True, # Render all visible 3D boxes vs. only those annotated on this sample\n", " ax=ax\n", ")\n", "\n", "ax.figure.set_size_inches(10, 10)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "90f9addc", "metadata": {}, "outputs": [], "source": [ "# Rendering images with panoptic masks\n", "met.render_panoptic(\n", " sample[\"data\"][\"CAM_FRONT\"], # sample_data_token of the image to visualize\n", " out_path=None,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "2babb9d9", "metadata": {}, "outputs": [], "source": [ "# Rendering aerial view of the point clouds\n", "# **NOTE**: this requires the GDAL library\n", "fig = plt.figure()\n", "ax = fig.add_subplot()\n", "\n", "met.render_aerial_view(\n", " sample[\"data\"][\"MVS\"], # sample_data_token of the point cloud to visualize\n", " ax=ax\n", ")\n", "\n", "ax.figure.set_size_inches(10, 10)\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 5 }