diff --git a/tutorial/tutorial.ipynb b/tutorial/tutorial.ipynb index dc7bf09..ed1da6b 100644 --- a/tutorial/tutorial.ipynb +++ b/tutorial/tutorial.ipynb @@ -1,796 +1,825 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "-pVhOfzLx9us" + }, + "source": [ + "# Waymo Open Dataset Tutorial\n", + "\n", + "- Website: https://waymo.com/open\n", + "- GitHub: https://github.com/waymo-research/waymo-open-dataset\n", + "\n", + "This tutorial demonstrates how to use the Waymo Open Dataset with two frames of data. Visit the [Waymo Open Dataset Website](https://waymo.com/open) to download the full dataset.\n", + "\n", + "To use, open this notebook in [Colab](https://colab.research.google.com).\n", + "\n", + "Uncheck the box \"Reset all runtimes before running\" if you run this colab directly from the remote kernel. Alternatively, you can make a copy before trying to run it by following \"File > Save copy in Drive ...\".\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WWdJUWiZdMte" + }, + "source": [ + "## Install waymo_open_dataset package" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { "colab": { - "name": "Waymo Open Dataset Tutorial.ipynb", - "provenance": [], - "private_outputs": true, - "collapsed_sections": [], - "toc_visible": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "-pVhOfzLx9us" - }, - "source": [ - "#Waymo Open Dataset Tutorial\n", - "\n", - "- Website: https://waymo.com/open\n", - "- GitHub: https://github.com/waymo-research/waymo-open-dataset\n", - "\n", - "This tutorial demonstrates how to use the Waymo Open Dataset with two frames of data. Visit the [Waymo Open Dataset Website](https://waymo.com/open) to download the full dataset.\n", - "\n", - "To use, open this notebook in [Colab](https://colab.research.google.com).\n", - "\n", - "Uncheck the box \"Reset all runtimes before running\" if you run this colab directly from the remote kernel. Alternatively, you can make a copy before trying to run it by following \"File > Save copy in Drive ...\".\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "WWdJUWiZdMte" - }, - "source": [ - "## Install waymo_open_dataset package" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "zxX_JIZrdKoR", - "colab": {} - }, - "source": [ - "!rm -rf waymo-od > /dev/null\n", - "!git clone https://github.com/waymo-research/waymo-open-dataset.git waymo-od\n", - "!cd waymo-od && git branch -a\n", - "!cd waymo-od && git checkout remotes/origin/master\n", - "!pip3 install --upgrade pip" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "sLIUOJzSyjj6", - "colab_type": "code", - "colab": {} - }, - "source": [ - "!pip3 install waymo-open-dataset-tf-2-1-0==1.2.0" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "hDQ1DPqwdfNW", - "colab": {} - }, - "source": [ - "import os\n", - "import tensorflow.compat.v1 as tf\n", - "import math\n", - "import numpy as np\n", - "import itertools\n", - "\n", - "tf.enable_eager_execution()\n", - "\n", - "from waymo_open_dataset.utils import range_image_utils\n", - "from waymo_open_dataset.utils import transform_utils\n", - "from waymo_open_dataset.utils import frame_utils\n", - "from waymo_open_dataset import dataset_pb2 as open_dataset" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "ibor0U9XBlX6" - }, - "source": [ - "## Read one frame\n", - "\n", - "Each file in the dataset is a sequence of frames ordered by frame start timestamps. We have extracted two frames from the dataset to demonstrate the dataset format." - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "29uZtYLJBx2r", - "colab": {} - }, - "source": [ - "FILENAME = '/content/waymo-od/tutorial/frames'\n", - "dataset = tf.data.TFRecordDataset(FILENAME, compression_type='')\n", - "for data in dataset:\n", - " frame = open_dataset.Frame()\n", - " frame.ParseFromString(bytearray(data.numpy()))\n", - " break" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "wHK95_JBUXUx", - "colab": {} - }, - "source": [ - "(range_images, camera_projections,\n", - " _, range_image_top_pose) = frame_utils.parse_range_image_and_camera_projection(\n", - " frame)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "eorb6V2qznV_" - }, - "source": [ - "###Examine frame context\n", - "\n", - "Refer to [dataset.proto](https://github.com/waymo-research/waymo-open-dataset/blob/master/waymo_open_dataset/dataset.proto) for the data format. The context contains shared information among all frames in the scene." - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "ZotEevt7S0fE", - "colab": {} - }, - "source": [ - "print(frame.context)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "_YcpB9SHkW4v" - }, - "source": [ - "## Visualize Camera Images and Camera Labels" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "18KfxT8RkMv0", - "colab": {} - }, - "source": [ - "import matplotlib.pyplot as plt\n", - "import matplotlib.patches as patches\n", - "\n", - "def show_camera_image(camera_image, camera_labels, layout, cmap=None):\n", - " \"\"\"Show a camera image and the given camera labels.\"\"\"\n", - "\n", - " ax = plt.subplot(*layout)\n", - "\n", - " # Draw the camera labels.\n", - " for camera_labels in frame.camera_labels:\n", - " # Ignore camera labels that do not correspond to this camera.\n", - " if camera_labels.name != camera_image.name:\n", - " continue\n", - "\n", - " # Iterate over the individual labels.\n", - " for label in camera_labels.labels:\n", - " # Draw the object bounding box.\n", - " ax.add_patch(patches.Rectangle(\n", - " xy=(label.box.center_x - 0.5 * label.box.length,\n", - " label.box.center_y - 0.5 * label.box.width),\n", - " width=label.box.length,\n", - " height=label.box.width,\n", - " linewidth=1,\n", - " edgecolor='red',\n", - " facecolor='none'))\n", - "\n", - " # Show the camera image.\n", - " plt.imshow(tf.image.decode_jpeg(camera_image.image), cmap=cmap)\n", - " plt.title(open_dataset.CameraName.Name.Name(camera_image.name))\n", - " plt.grid(False)\n", - " plt.axis('off')\n", - "\n", - "plt.figure(figsize=(25, 20))\n", - "\n", - "for index, image in enumerate(frame.images):\n", - " show_camera_image(image, frame.camera_labels, [3, 3, index+1])" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "JPc-xBE6VMHi" - }, - "source": [ - "##Visualize Range Images" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "mwZ4xcsHVO1V", - "colab": {} - }, - "source": [ - "plt.figure(figsize=(64, 20))\n", - "def plot_range_image_helper(data, name, layout, vmin = 0, vmax=1, cmap='gray'):\n", - " \"\"\"Plots range image.\n", - "\n", - " Args:\n", - " data: range image data\n", - " name: the image title\n", - " layout: plt layout\n", - " vmin: minimum value of the passed data\n", - " vmax: maximum value of the passed data\n", - " cmap: color map\n", - " \"\"\"\n", - " plt.subplot(*layout)\n", - " plt.imshow(data, cmap=cmap, vmin=vmin, vmax=vmax)\n", - " plt.title(name)\n", - " plt.grid(False)\n", - " plt.axis('off')\n", - "\n", - "def get_range_image(laser_name, return_index):\n", - " \"\"\"Returns range image given a laser name and its return index.\"\"\"\n", - " return range_images[laser_name][return_index]\n", - "\n", - "def show_range_image(range_image, layout_index_start = 1):\n", - " \"\"\"Shows range image.\n", - "\n", - " Args:\n", - " range_image: the range image data from a given lidar of type MatrixFloat.\n", - " layout_index_start: layout offset\n", - " \"\"\"\n", - " range_image_tensor = tf.convert_to_tensor(range_image.data)\n", - " range_image_tensor = tf.reshape(range_image_tensor, range_image.shape.dims)\n", - " lidar_image_mask = tf.greater_equal(range_image_tensor, 0)\n", - " range_image_tensor = tf.where(lidar_image_mask, range_image_tensor,\n", - " tf.ones_like(range_image_tensor) * 1e10)\n", - " range_image_range = range_image_tensor[...,0] \n", - " range_image_intensity = range_image_tensor[...,1]\n", - " range_image_elongation = range_image_tensor[...,2]\n", - " plot_range_image_helper(range_image_range.numpy(), 'range',\n", - " [8, 1, layout_index_start], vmax=75, cmap='gray')\n", - " plot_range_image_helper(range_image_intensity.numpy(), 'intensity',\n", - " [8, 1, layout_index_start + 1], vmax=1.5, cmap='gray')\n", - " plot_range_image_helper(range_image_elongation.numpy(), 'elongation',\n", - " [8, 1, layout_index_start + 2], vmax=1.5, cmap='gray')\n", - "frame.lasers.sort(key=lambda laser: laser.name)\n", - "show_range_image(get_range_image(open_dataset.LaserName.TOP, 0), 1)\n", - "show_range_image(get_range_image(open_dataset.LaserName.TOP, 1), 4)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "17Lwd2nVpex7" - }, - "source": [ - "##Point Cloud Conversion and Visualization" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "XIEDW1pfpmd-", - "colab": {} - }, - "source": [ - "points, cp_points = frame_utils.convert_range_image_to_point_cloud(\n", - " frame,\n", - " range_images,\n", - " camera_projections,\n", - " range_image_top_pose)\n", - "points_ri2, cp_points_ri2 = frame_utils.convert_range_image_to_point_cloud(\n", - " frame,\n", - " range_images,\n", - " camera_projections,\n", - " range_image_top_pose,\n", - " ri_index=1)\n", - "\n", - "# 3d points in vehicle frame.\n", - "points_all = np.concatenate(points, axis=0)\n", - "points_all_ri2 = np.concatenate(points_ri2, axis=0)\n", - "# camera projection corresponding to each point.\n", - "cp_points_all = np.concatenate(cp_points, axis=0)\n", - "cp_points_all_ri2 = np.concatenate(cp_points_ri2, axis=0)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "5MAKwTRWz3af" - }, - "source": [ - "###Examine number of points in each lidar sensor." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "E5_e4BCAGfYX" - }, - "source": [ - "First return." - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "bpsAJp2CqKrE", - "colab": {} - }, - "source": [ - "print(points_all.shape)\n", - "print(cp_points_all.shape)\n", - "print(points_all[0:2])\n", - "for i in range(5):\n", - " print(points[i].shape)\n", - " print(cp_points[i].shape)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "n3cdlVYFGiE_" - }, - "source": [ - "Second return." - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "QX6aj2EDGlRo", - "colab": {} - }, - "source": [ - "print(points_all_ri2.shape)\n", - "print(cp_points_all_ri2.shape)\n", - "print(points_all_ri2[0:2])\n", - "for i in range(5):\n", - " print(points_ri2[i].shape)\n", - " print(cp_points_ri2[i].shape)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "PCj3SDbuq9Nr" - }, - "source": [ - "###Show point cloud\n", - "3D point clouds are rendered using an internal tool, which is unfortunately not publicly available yet. Here is an example of what they look like." - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "K8VFnGnOq6cO", - "colab": {} - }, - "source": [ - "from IPython.display import Image, display\n", - "display(Image('/content/waymo-od/tutorial/3d_point_cloud.png'))" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "HiGOSt4mr0xA" - }, - "source": [ - "##Visualize Camera Projection" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "yRMUN-hur9wO", - "colab": {} - }, - "source": [ - "images = sorted(frame.images, key=lambda i:i.name)\n", - "cp_points_all_concat = np.concatenate([cp_points_all, points_all], axis=-1)\n", - "cp_points_all_concat_tensor = tf.constant(cp_points_all_concat)\n", - "\n", - "# The distance between lidar points and vehicle frame origin.\n", - "points_all_tensor = tf.norm(points_all, axis=-1, keepdims=True)\n", - "cp_points_all_tensor = tf.constant(cp_points_all, dtype=tf.int32)\n", - "\n", - "mask = tf.equal(cp_points_all_tensor[..., 0], images[0].name)\n", - "\n", - "cp_points_all_tensor = tf.cast(tf.gather_nd(\n", - " cp_points_all_tensor, tf.where(mask)), dtype=tf.float32)\n", - "points_all_tensor = tf.gather_nd(points_all_tensor, tf.where(mask))\n", - "\n", - "projected_points_all_from_raw_data = tf.concat(\n", - " [cp_points_all_tensor[..., 1:3], points_all_tensor], axis=-1).numpy()" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "Invsxz0xsXNA", - "colab": {} - }, - "source": [ - "def rgba(r):\n", - " \"\"\"Generates a color based on range.\n", - "\n", - " Args:\n", - " r: the range value of a given point.\n", - " Returns:\n", - " The color for a given range\n", - " \"\"\"\n", - " c = plt.get_cmap('jet')((r % 20.0) / 20.0)\n", - " c = list(c)\n", - " c[-1] = 0.5 # alpha\n", - " return c\n", - "\n", - "def plot_image(camera_image):\n", - " \"\"\"Plot a cmaera image.\"\"\"\n", - " plt.figure(figsize=(20, 12))\n", - " plt.imshow(tf.image.decode_jpeg(camera_image.image))\n", - " plt.grid(\"off\")\n", - "\n", - "def plot_points_on_image(projected_points, camera_image, rgba_func,\n", - " point_size=5.0):\n", - " \"\"\"Plots points on a camera image.\n", - "\n", - " Args:\n", - " projected_points: [N, 3] numpy array. The inner dims are\n", - " [camera_x, camera_y, range].\n", - " camera_image: jpeg encoded camera image.\n", - " rgba_func: a function that generates a color from a range value.\n", - " point_size: the point size.\n", - "\n", - " \"\"\"\n", - " plot_image(camera_image)\n", - "\n", - " xs = []\n", - " ys = []\n", - " colors = []\n", - "\n", - " for point in projected_points:\n", - " xs.append(point[0]) # width, col\n", - " ys.append(point[1]) # height, row\n", - " colors.append(rgba_func(point[2]))\n", - "\n", - " plt.scatter(xs, ys, c=colors, s=point_size, edgecolors=\"none\")" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "fx7mUQM2saI-", - "colab": {} - }, - "source": [ - "plot_points_on_image(projected_points_all_from_raw_data,\n", - " images[0], rgba, point_size=5.0)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "6rPeRqI54sBh" - }, - "source": [ - "## Install from source code\n", - "\n", - "The remaining part of this colab covers details of installing the repo form source code which provides a richer API." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "C8b-R3v1-6Eg" - }, - "source": [ - "### Install dependencies" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "HhD5nAIV-5Hj", - "colab": {} - }, - "source": [ - "!sudo apt install build-essential\n", - "!sudo apt-get install --assume-yes pkg-config zip g++ zlib1g-dev unzip python3 python3-pip\n", - "!wget https://github.com/bazelbuild/bazel/releases/download/0.28.0/bazel-0.28.0-installer-linux-x86_64.sh\n", - "!sudo bash ./bazel-0.28.0-installer-linux-x86_64.sh" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "etZQJlyu_N-_" - }, - "source": [ - "###Build and test (this can take 10 mins)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "EJmk8o-S_Flm" - }, - "source": [ - "Configure .bazelrc. This works with/without Tensorflow. This colab machine has Tensorflow installed." - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "KscXNRZo_Rtg", - "colab": {} - }, - "source": [ - "!cd waymo-od && ./configure.sh && cat .bazelrc && bazel clean" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "56xb_Ckj_8ZL", - "colab": {} - }, - "source": [ - "!cd waymo-od && bazel build ... --show_progress_rate_limit=10.0" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "P8pO1b1EALPA" - }, - "source": [ - "### Metrics computation\n", - "The core metrics computation library is written in C++, so it can be extended to other programming languages. It can compute detection metrics (mAP) and tracking metrics (MOTA). See more information about the metrics on the [website](https://waymo.com/open/next/).\n", - "\n", - "We provide command line tools and TensorFlow ops to call the detection metrics library to compute detection metrics. We will provide a similar wrapper for tracking metrics library in the future. You are welcome to contribute your wrappers." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "UIBufmNJBdGR" - }, - "source": [ - "#### Command line detection metrics computation\n", - "\n", - "The command takes a pair of files for prediction and ground truth. Read the comment in waymo_open_dataset/metrics/tools/compute_detection_metrics_main.cc for details of the data format." - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "bDGzTcNgBVG-", - "colab": {} - }, - "source": [ - "!cd waymo-od && bazel-bin/waymo_open_dataset/metrics/tools/compute_detection_metrics_main waymo_open_dataset/metrics/tools/fake_predictions.bin waymo_open_dataset/metrics/tools/fake_ground_truths.bin" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "LJn7Z9AjBii-" - }, - "source": [ - "#### TensorFlow custom op\n", - "\n", - "A TensorFlow op is defined at metrics/ops/metrics_ops.cc. We provide a python wrapper of the op at metrics/ops/py_metrics_ops.py, and a tf.metrics-like implementation of the op at metrics/python/detection_metrics.py. This library requires TensorFlow to be installed." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "WB_BdDAYCQ62" - }, - "source": [ - "Install TensorFlow and NumPy." - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "iXQqouc9CVWe", - "colab": {} - }, - "source": [ - "!pip3 install numpy tensorflow" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "AURmNltwCYFx" - }, - "source": [ - "Reconfigure .bazelrc such that you can compile the TensorFlow ops" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "7jr2qkwYChDp", - "colab": {} - }, - "source": [ - "!cd waymo-od && ./configure.sh && cat .bazelrc" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "WkVOvmP4EOmD" - }, - "source": [ - "Run the op and tf.metrics wrapper unit tests which can be referenced as example usage of the libraries." - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "4ArMxYB1E9j-", - "colab": {} - }, - "source": [ - "!cd waymo-od && bazel test waymo_open_dataset/metrics/ops/... && bazel test waymo_open_dataset/metrics/python/..." - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "923OIUNSFCFw" - }, - "source": [ - "Run all tests in the repo." - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "ytEO4tE3FHKo", - "colab": {} - }, - "source": [ - "!cd waymo-od && bazel test ..." - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "_FpcGDRnAElo" - }, - "source": [ - "### Build local PIP package" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "ZwnPUOgVAHec", - "colab": {} - }, - "source": [ - "!cd waymo-od && export PYTHON_VERSION=3 && ./pip_pkg_scripts/build.sh" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vWcqyWarUbCv", - "colab_type": "text" - }, - "source": [ - "You can install the locally compiled package or access any c++ binary compiled from this." - ] - } - ] + "base_uri": "https://localhost:8080/" + }, + "id": "zxX_JIZrdKoR", + "outputId": "faf06267-31c2-48f3-de6d-41ef79cae056" + }, + "outputs": [], + "source": [ + "!rm -rf waymo-od > /dev/null\n", + "!git clone https://github.com/waymo-research/waymo-open-dataset.git waymo-od\n", + "!cd waymo-od && git branch -a\n", + "!cd waymo-od && git checkout remotes/origin/master" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6HZpuBOY5cwo", + "outputId": "684648e2-274b-44e1-df76-4f0579cb9ad5" + }, + "outputs": [], + "source": [ + "!pip3 install --upgrade pip\n", + "!pip3 install numpy==1.21.6\n", + "!pip3 install waymo-open-dataset-tf-2-6-0==1.4.9" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hDQ1DPqwdfNW" + }, + "outputs": [], + "source": [ + "import google.protobuf\n", + "import os\n", + "import math\n", + "import numpy as np\n", + "import itertools\n", + "import tensorflow as tf\n", + "\n", + "from waymo_open_dataset.utils import range_image_utils\n", + "from waymo_open_dataset.utils import transform_utils\n", + "from waymo_open_dataset.utils import frame_utils\n", + "from waymo_open_dataset import dataset_pb2 as open_dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "F86_jdCkAkNx" + }, + "outputs": [], + "source": [ + "# Run this if using TF 1.x\n", + "# tf.enable_eager_execution()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ibor0U9XBlX6" + }, + "source": [ + "## Read one frame\n", + "\n", + "Each file in the dataset is a sequence of frames ordered by frame start timestamps. We have extracted two frames from the dataset to demonstrate the dataset format." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "29uZtYLJBx2r" + }, + "outputs": [], + "source": [ + "FILENAME = '/content/waymo-od/tutorial/frames'\n", + "dataset = tf.data.TFRecordDataset(FILENAME, compression_type='')\n", + "for data in dataset:\n", + " frame = open_dataset.Frame()\n", + " frame.ParseFromString(bytearray(data.numpy()))\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wHK95_JBUXUx" + }, + "outputs": [], + "source": [ + "(range_images, camera_projections,\n", + " _, range_image_top_pose) = frame_utils.parse_range_image_and_camera_projection(\n", + " frame)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eorb6V2qznV_" + }, + "source": [ + "### Examine frame context\n", + "\n", + "Refer to [dataset.proto](https://github.com/waymo-research/waymo-open-dataset/blob/master/waymo_open_dataset/dataset.proto) for the data format. The context contains shared information among all frames in the scene." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZotEevt7S0fE", + "outputId": "e1d62e96-1ee2-4e65-9016-873e96e412c2" + }, + "outputs": [], + "source": [ + "print(frame.context)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_YcpB9SHkW4v" + }, + "source": [ + "## Visualize Camera Images and Camera Labels" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 645 + }, + "id": "18KfxT8RkMv0", + "outputId": "e16ef932-7801-4c67-c8fd-b6c6c910c16b" + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import matplotlib.patches as patches\n", + "\n", + "def show_camera_image(camera_image, camera_labels, layout, cmap=None):\n", + " \"\"\"Show a camera image and the given camera labels.\"\"\"\n", + "\n", + " ax = plt.subplot(*layout)\n", + "\n", + " # Draw the camera labels.\n", + " for camera_labels in frame.camera_labels:\n", + " # Ignore camera labels that do not correspond to this camera.\n", + " if camera_labels.name != camera_image.name:\n", + " continue\n", + "\n", + " # Iterate over the individual labels.\n", + " for label in camera_labels.labels:\n", + " # Draw the object bounding box.\n", + " ax.add_patch(patches.Rectangle(\n", + " xy=(label.box.center_x - 0.5 * label.box.length,\n", + " label.box.center_y - 0.5 * label.box.width),\n", + " width=label.box.length,\n", + " height=label.box.width,\n", + " linewidth=1,\n", + " edgecolor='red',\n", + " facecolor='none'))\n", + "\n", + " # Show the camera image.\n", + " plt.imshow(tf.image.decode_jpeg(camera_image.image), cmap=cmap)\n", + " plt.title(open_dataset.CameraName.Name.Name(camera_image.name))\n", + " plt.grid(False)\n", + " plt.axis('off')\n", + "\n", + "plt.figure(figsize=(25, 20))\n", + "\n", + "for index, image in enumerate(frame.images):\n", + " show_camera_image(image, frame.camera_labels, [3, 3, index+1])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JPc-xBE6VMHi" + }, + "source": [ + "## Visualize Range Images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 308 + }, + "id": "mwZ4xcsHVO1V", + "outputId": "81a7f97a-ebee-490f-becb-325790df2f05" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(64, 20))\n", + "def plot_range_image_helper(data, name, layout, vmin = 0, vmax=1, cmap='gray'):\n", + " \"\"\"Plots range image.\n", + "\n", + " Args:\n", + " data: range image data\n", + " name: the image title\n", + " layout: plt layout\n", + " vmin: minimum value of the passed data\n", + " vmax: maximum value of the passed data\n", + " cmap: color map\n", + " \"\"\"\n", + " plt.subplot(*layout)\n", + " plt.imshow(data, cmap=cmap, vmin=vmin, vmax=vmax)\n", + " plt.title(name)\n", + " plt.grid(False)\n", + " plt.axis('off')\n", + "\n", + "def get_range_image(laser_name, return_index):\n", + " \"\"\"Returns range image given a laser name and its return index.\"\"\"\n", + " return range_images[laser_name][return_index]\n", + "\n", + "def show_range_image(range_image, layout_index_start = 1):\n", + " \"\"\"Shows range image.\n", + "\n", + " Args:\n", + " range_image: the range image data from a given lidar of type MatrixFloat.\n", + " layout_index_start: layout offset\n", + " \"\"\"\n", + " range_image_tensor = tf.convert_to_tensor(range_image.data)\n", + " range_image_tensor = tf.reshape(range_image_tensor, range_image.shape.dims)\n", + " lidar_image_mask = tf.greater_equal(range_image_tensor, 0)\n", + " range_image_tensor = tf.where(lidar_image_mask, range_image_tensor,\n", + " tf.ones_like(range_image_tensor) * 1e10)\n", + " range_image_range = range_image_tensor[...,0] \n", + " range_image_intensity = range_image_tensor[...,1]\n", + " range_image_elongation = range_image_tensor[...,2]\n", + " plot_range_image_helper(range_image_range.numpy(), 'range',\n", + " [8, 1, layout_index_start], vmax=75, cmap='gray')\n", + " plot_range_image_helper(range_image_intensity.numpy(), 'intensity',\n", + " [8, 1, layout_index_start + 1], vmax=1.5, cmap='gray')\n", + " plot_range_image_helper(range_image_elongation.numpy(), 'elongation',\n", + " [8, 1, layout_index_start + 2], vmax=1.5, cmap='gray')\n", + "frame.lasers.sort(key=lambda laser: laser.name)\n", + "show_range_image(get_range_image(open_dataset.LaserName.TOP, 0), 1)\n", + "show_range_image(get_range_image(open_dataset.LaserName.TOP, 1), 4)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "17Lwd2nVpex7" + }, + "source": [ + "## Point Cloud Conversion and Visualization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XIEDW1pfpmd-" + }, + "outputs": [], + "source": [ + "points, cp_points = frame_utils.convert_range_image_to_point_cloud(\n", + " frame,\n", + " range_images,\n", + " camera_projections,\n", + " range_image_top_pose)\n", + "points_ri2, cp_points_ri2 = frame_utils.convert_range_image_to_point_cloud(\n", + " frame,\n", + " range_images,\n", + " camera_projections,\n", + " range_image_top_pose,\n", + " ri_index=1)\n", + "\n", + "# 3d points in vehicle frame.\n", + "points_all = np.concatenate(points, axis=0)\n", + "points_all_ri2 = np.concatenate(points_ri2, axis=0)\n", + "# camera projection corresponding to each point.\n", + "cp_points_all = np.concatenate(cp_points, axis=0)\n", + "cp_points_all_ri2 = np.concatenate(cp_points_ri2, axis=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5MAKwTRWz3af" + }, + "source": [ + "### Examine number of points in each lidar sensor." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "E5_e4BCAGfYX" + }, + "source": [ + "First return." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bpsAJp2CqKrE", + "outputId": "39190430-cb59-4e47-e21c-a8841af652bb" + }, + "outputs": [], + "source": [ + "print(points_all.shape)\n", + "print(cp_points_all.shape)\n", + "print(points_all[0:2])\n", + "for i in range(5):\n", + " print(points[i].shape)\n", + " print(cp_points[i].shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "n3cdlVYFGiE_" + }, + "source": [ + "Second return." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QX6aj2EDGlRo", + "outputId": "a575afd7-3732-4e92-a5df-0c6dc802a071" + }, + "outputs": [], + "source": [ + "print(points_all_ri2.shape)\n", + "print(cp_points_all_ri2.shape)\n", + "print(points_all_ri2[0:2])\n", + "for i in range(5):\n", + " print(points_ri2[i].shape)\n", + " print(cp_points_ri2[i].shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PCj3SDbuq9Nr" + }, + "source": [ + "### Show point cloud\n", + "3D point clouds are rendered using an internal tool, which is unfortunately not publicly available yet. Here is an example of what they look like." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 445 + }, + "id": "K8VFnGnOq6cO", + "outputId": "e2eb39e5-b122-4ba9-91b5-fa5168b9de0b" + }, + "outputs": [], + "source": [ + "from IPython.display import Image, display\n", + "display(Image('/content/waymo-od/tutorial/3d_point_cloud.png'))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HiGOSt4mr0xA" + }, + "source": [ + "## Visualize Camera Projection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yRMUN-hur9wO" + }, + "outputs": [], + "source": [ + "images = sorted(frame.images, key=lambda i:i.name)\n", + "cp_points_all_concat = np.concatenate([cp_points_all, points_all], axis=-1)\n", + "cp_points_all_concat_tensor = tf.constant(cp_points_all_concat)\n", + "\n", + "# The distance between lidar points and vehicle frame origin.\n", + "points_all_tensor = tf.norm(points_all, axis=-1, keepdims=True)\n", + "cp_points_all_tensor = tf.constant(cp_points_all, dtype=tf.int32)\n", + "\n", + "mask = tf.equal(cp_points_all_tensor[..., 0], images[0].name)\n", + "\n", + "cp_points_all_tensor = tf.cast(tf.gather_nd(\n", + " cp_points_all_tensor, tf.where(mask)), dtype=tf.float32)\n", + "points_all_tensor = tf.gather_nd(points_all_tensor, tf.where(mask))\n", + "\n", + "projected_points_all_from_raw_data = tf.concat(\n", + " [cp_points_all_tensor[..., 1:3], points_all_tensor], axis=-1).numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Invsxz0xsXNA" + }, + "outputs": [], + "source": [ + "def rgba(r):\n", + " \"\"\"Generates a color based on range.\n", + "\n", + " Args:\n", + " r: the range value of a given point.\n", + " Returns:\n", + " The color for a given range\n", + " \"\"\"\n", + " c = plt.get_cmap('jet')((r % 20.0) / 20.0)\n", + " c = list(c)\n", + " c[-1] = 0.5 # alpha\n", + " return c\n", + "\n", + "def plot_image(camera_image):\n", + " \"\"\"Plot a cmaera image.\"\"\"\n", + " plt.figure(figsize=(20, 12))\n", + " plt.imshow(tf.image.decode_jpeg(camera_image.image))\n", + " plt.grid(\"off\")\n", + "\n", + "def plot_points_on_image(projected_points, camera_image, rgba_func,\n", + " point_size=5.0):\n", + " \"\"\"Plots points on a camera image.\n", + "\n", + " Args:\n", + " projected_points: [N, 3] numpy array. The inner dims are\n", + " [camera_x, camera_y, range].\n", + " camera_image: jpeg encoded camera image.\n", + " rgba_func: a function that generates a color from a range value.\n", + " point_size: the point size.\n", + "\n", + " \"\"\"\n", + " plot_image(camera_image)\n", + "\n", + " xs = []\n", + " ys = []\n", + " colors = []\n", + "\n", + " for point in projected_points:\n", + " xs.append(point[0]) # width, col\n", + " ys.append(point[1]) # height, row\n", + " colors.append(rgba_func(point[2]))\n", + "\n", + " plt.scatter(xs, ys, c=colors, s=point_size, edgecolors=\"none\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 703 + }, + "id": "fx7mUQM2saI-", + "outputId": "84b298ab-0eb1-4ed5-d650-f03e12c3e8ba" + }, + "outputs": [], + "source": [ + "plot_points_on_image(projected_points_all_from_raw_data,\n", + " images[0], rgba, point_size=5.0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6rPeRqI54sBh" + }, + "source": [ + "## Install from source code\n", + "\n", + "The remaining part of this colab covers details of installing the repo form source code which provides a richer API." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C8b-R3v1-6Eg" + }, + "source": [ + "### Install dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HhD5nAIV-5Hj", + "outputId": "ab611288-e05d-4182-96b5-cd0ec8cfa35c" + }, + "outputs": [], + "source": [ + "# Bazel v0.28.0 -> v3.1.0\n", + "!sudo apt install build-essential\n", + "!sudo apt-get install --assume-yes pkg-config zip g++ zlib1g-dev unzip python3 python3-pip\n", + "!wget https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel-3.1.0-installer-linux-x86_64.sh\n", + "!sudo bash ./bazel-3.1.0-installer-linux-x86_64.sh" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "etZQJlyu_N-_" + }, + "source": [ + "### Build and test (this can take 10 mins)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EJmk8o-S_Flm" + }, + "source": [ + "Configure .bazelrc. This works with/without Tensorflow. This colab machine has Tensorflow installed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KscXNRZo_Rtg", + "outputId": "9e2035ca-dbf7-4f73-bea9-f12bf2070954" + }, + "outputs": [], + "source": [ + "!cd waymo-od && ./configure.sh && cat .bazelrc && bazel clean" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "56xb_Ckj_8ZL", + "outputId": "fac74093-b2e1-4532-fe52-81415216e266" + }, + "outputs": [], + "source": [ + "!cd waymo-od && bazel build ... --show_progress_rate_limit=10.0" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P8pO1b1EALPA" + }, + "source": [ + "### Metrics computation\n", + "The core metrics computation library is written in C++, so it can be extended to other programming languages. It can compute detection metrics (mAP) and tracking metrics (MOTA). See more information about the metrics on the [website](https://waymo.com/open/next/).\n", + "\n", + "We provide command line tools and TensorFlow ops to call the detection metrics library to compute detection metrics. We will provide a similar wrapper for tracking metrics library in the future. You are welcome to contribute your wrappers." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UIBufmNJBdGR" + }, + "source": [ + "#### Command line detection metrics computation\n", + "\n", + "The command takes a pair of files for prediction and ground truth. Read the comment in waymo_open_dataset/metrics/tools/compute_detection_metrics_main.cc for details of the data format." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bDGzTcNgBVG-", + "outputId": "f3fbfb6d-8d61-40c7-87a1-fd6199ed368b" + }, + "outputs": [], + "source": [ + "!cd waymo-od && bazel-bin/waymo_open_dataset/metrics/tools/compute_detection_metrics_main waymo_open_dataset/metrics/tools/fake_predictions.bin waymo_open_dataset/metrics/tools/fake_ground_truths.bin" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LJn7Z9AjBii-" + }, + "source": [ + "#### TensorFlow custom op\n", + "\n", + "A TensorFlow op is defined at metrics/ops/metrics_ops.cc. We provide a python wrapper of the op at metrics/ops/py_metrics_ops.py, and a tf.metrics-like implementation of the op at metrics/python/detection_metrics.py. This library requires TensorFlow to be installed." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WB_BdDAYCQ62" + }, + "source": [ + "Install TensorFlow and NumPy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "iXQqouc9CVWe", + "outputId": "2267cad4-0856-456b-81ea-86e68c27a190" + }, + "outputs": [], + "source": [ + "!pip3 install numpy tensorflow" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AURmNltwCYFx" + }, + "source": [ + "Reconfigure .bazelrc such that you can compile the TensorFlow ops" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7jr2qkwYChDp", + "outputId": "a7930fa7-ff80-44e6-da47-fe308f69233a" + }, + "outputs": [], + "source": [ + "!cd waymo-od && ./configure.sh && cat .bazelrc" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WkVOvmP4EOmD" + }, + "source": [ + "Run the op and tf.metrics wrapper unit tests which can be referenced as example usage of the libraries." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4ArMxYB1E9j-", + "outputId": "2f8be41e-55f1-4b52-8bf1-a06933ea35e2" + }, + "outputs": [], + "source": [ + "!cd waymo-od && bazel test waymo_open_dataset/metrics/ops/... && bazel test waymo_open_dataset/metrics/python/..." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "923OIUNSFCFw" + }, + "source": [ + "Run all tests in the repo." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ytEO4tE3FHKo", + "outputId": "fa3377f1-d4c6-4d93-d6a4-69b84ddc0f7e" + }, + "outputs": [], + "source": [ + "!cd waymo-od && bazel test ..." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_FpcGDRnAElo" + }, + "source": [ + "### Build local PIP package" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZwnPUOgVAHec", + "outputId": "72803b89-dd10-40d4-bb2f-6287d115b433" + }, + "outputs": [], + "source": [ + "!cd waymo-od && export PYTHON_VERSION=3 && ./pip_pkg_scripts/build.sh" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vWcqyWarUbCv" + }, + "source": [ + "You can install the locally compiled package or access any c++ binary compiled from this." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "Copy of Waymo Open Dataset Tutorial.ipynb", + "provenance": [], + "toc_visible": true + }, + "gpuClass": "standard", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 }