From ab3ea252953ea23a9280d08534d5060479d6edfe Mon Sep 17 00:00:00 2001
From: Scott Schneider <scott.a.s@gmail.com>
Date: Wed, 9 Apr 2025 10:57:15 -0700
Subject: [PATCH 01/14] Skeleton of code for tutorial

---
 examples/file_like.py | 92 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)
 create mode 100644 examples/file_like.py

diff --git a/examples/file_like.py b/examples/file_like.py
new file mode 100644
index 00000000..1b2ed1ee
--- /dev/null
+++ b/examples/file_like.py
@@ -0,0 +1,92 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+===================================================================
+Streaming data through file-like support
+===================================================================
+
+In this example, we will describe the feature with references to its docs."""
+
+# %%
+# First, a bit of boilerplate: TODO.
+
+
+import torch
+import requests
+from time import perf_counter_ns
+
+def get_url_content(url):
+    response = requests.get(url, headers={"User-Agent": ""})
+    if response.status_code != 200:
+        raise RuntimeError(f"Failed to download video. {response.status_code = }.")
+    return response.content
+
+
+def bench(f, average_over=20, warmup=2):
+    for _ in range(warmup):
+        f()
+
+    times = []
+    for _ in range(average_over):
+        start = perf_counter_ns()
+        f()
+        end = perf_counter_ns()
+        times.append(end - start)
+
+    times = torch.tensor(times) * 1e-6  # ns to ms
+    std = times.std().item()
+    med = times.median().item()
+    print(f"{med = :.2f}ms +- {std:.2f}")
+
+
+from torchcodec.decoders import VideoDecoder
+
+nasa_url = "https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4.mp4"
+
+pre_downloaded_raw_video_bytes = get_url_content(nasa_url)
+decoder = VideoDecoder(pre_downloaded_raw_video_bytes)
+
+print(f"Video size in MB: {len(pre_downloaded_raw_video_bytes) / 1024 / 1024}")
+print(decoder.metadata)
+print()
+
+def decode_from_existing_download():
+    decoder = VideoDecoder(pre_downloaded_raw_video_bytes, seek_mode="approximate")
+    return decoder[0]
+
+def download_before_decode():
+    raw_video_bytes = get_url_content(nasa_url)
+    decoder = VideoDecoder(raw_video_bytes, seek_mode="approximate")
+    return decoder[0]
+
+def direct_url_to_ffmpeg():
+    decoder = VideoDecoder(nasa_url, seek_mode="approximate")
+    return decoder[0]
+
+print("Decode from existing download")
+bench(decode_from_existing_download)
+print()
+
+print("Download before decode: ")
+bench(download_before_decode)
+print()
+
+print("Direct url to FFmpeg: ")
+bench(direct_url_to_ffmpeg)
+print()
+
+import fsspec
+# Note: we also need: aiohttp
+
+def stream_while_decode():
+    with fsspec.open(nasa_url, client_kwargs={'trust_env': True}) as file:
+        decoder = VideoDecoder(file, seek_mode="approximate")
+        return decoder[0]
+
+print("Stream while decode: ")
+bench(stream_while_decode)
+print()

From dbbf62e0ad4b2b364eeaafb96a7748a694d8165c Mon Sep 17 00:00:00 2001
From: Scott Schneider <scott.a.s@gmail.com>
Date: Fri, 11 Apr 2025 19:49:08 -0700
Subject: [PATCH 02/14] Current draft of file like tutorial

---
 docs/requirements.txt |   2 +
 examples/file_like.py | 173 +++++++++++++++++++++++++++++++++++++++---
 2 files changed, 166 insertions(+), 9 deletions(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 64fa264e..d5fa5091 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -6,4 +6,6 @@ sphinx-tabs
 matplotlib
 torchvision
 ipython
+fsspec
+aiohttp
 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
diff --git a/examples/file_like.py b/examples/file_like.py
index 1b2ed1ee..091ac680 100644
--- a/examples/file_like.py
+++ b/examples/file_like.py
@@ -9,16 +9,22 @@
 Streaming data through file-like support
 ===================================================================
 
-In this example, we will describe the feature with references to its docs."""
+In this example, we will show how to decode streaming data. That is, when files
+do not reside locally, we will show how to only download the data segments that
+are needed to decode the frames you care about. We accomplish this capability
+with Python
+`file-like objects <https://docs.python.org/3/glossary.html#term-file-like-object>`_."""
 
 # %%
-# First, a bit of boilerplate: TODO.
+# First, a bit of boilerplate. We define two functions: one to download content
+# from a given URL, and another to time the execution of a given function.
 
 
 import torch
 import requests
 from time import perf_counter_ns
 
+
 def get_url_content(url):
     response = requests.get(url, headers={"User-Agent": ""})
     if response.status_code != 200:
@@ -42,6 +48,19 @@ def bench(f, average_over=20, warmup=2):
     med = times.median().item()
     print(f"{med = :.2f}ms +- {std:.2f}")
 
+# %%
+# Performance: downloading first versus streaming
+# -----------------------------------------------
+#
+# We are going to investigate the cost of having to download an entire video
+# before decoding any frames versus being able to stream the video's data
+# while decoding. To demonsrate an extreme case, we're going to always decode
+# just the first frame of the video, while we vary how we get that video's
+# data.
+#
+# The video we're going to use in this tutorial is publicly available on the
+# internet. We perform an initial download of it so that we can understand
+# its size and content:
 
 from torchcodec.decoders import VideoDecoder
 
@@ -54,39 +73,175 @@ def bench(f, average_over=20, warmup=2):
 print(decoder.metadata)
 print()
 
+# %%
+# We can see that the video is about 253 MB, has the resolution 1920x1080, is
+# about 30 frames per second and is almost 3 and a half minutes long. As we
+# only want to decode the first frame, we would clearly benefit from not having
+# to download the entire video!
+#
+# Let's first test three scenarios:
+#
+#   1. Decode from the *existing* video we just downloaded. This is our baseline
+#      performance, as we've reduced the downloading cost to 0.
+#   2. Download the entire video before decoding. This is the worst case
+#      that we want to avoid.
+#   3. Provde the URL directly to the :class:`~torchcodec.decoders.VideoDecoder` class, which will pass
+#      the URL on to FFmpeg. Then FFmpeg will decide how much of the video to
+#      download before decoding.
+#
+# Note that in our scenarios, we are always setting the ``seek_mode`` parameter of
+# the :class:`~torchcodec.decoders.VideoDecoder` class to ``"approximate"``. We do
+# this to avoid scanning the entire video during initialization, which would
+# require downloading the entire video even if we only want to decode the first
+# frame. See :ref:`sphx_glr_generated_examples_approximate_mode.py` for more.
+
 def decode_from_existing_download():
-    decoder = VideoDecoder(pre_downloaded_raw_video_bytes, seek_mode="approximate")
+    decoder = VideoDecoder(
+        source=pre_downloaded_raw_video_bytes,
+        seek_mode="approximate",
+    )
     return decoder[0]
 
+
 def download_before_decode():
     raw_video_bytes = get_url_content(nasa_url)
-    decoder = VideoDecoder(raw_video_bytes, seek_mode="approximate")
+    decoder = VideoDecoder(
+        source=raw_video_bytes,
+        seek_mode="approximate",
+    )
     return decoder[0]
 
+
 def direct_url_to_ffmpeg():
-    decoder = VideoDecoder(nasa_url, seek_mode="approximate")
+    decoder = VideoDecoder(
+        source=nasa_url,
+        seek_mode="approximate",
+    )
     return decoder[0]
 
-print("Decode from existing download")
+
+print("Decode from existing download:")
 bench(decode_from_existing_download)
 print()
 
-print("Download before decode: ")
+print("Download before decode:")
 bench(download_before_decode)
 print()
 
-print("Direct url to FFmpeg: ")
+print("Direct url to FFmpeg:")
 bench(direct_url_to_ffmpeg)
 print()
 
+# %%
+# Decoding the already downloaded video is clearly the fastest. Having to
+# download the entire video each time we want to decode just the first frame
+# is over 4x slower than decoding an existing video. Providing a direct URL
+# is much better, as its about 2.5x faster than downloding the video first.
+#
+# We can do better, and the way how is to use a file-like object which
+# implements its own read and seek methods that only download data from a URL as
+# needed. Rather than implementing our own, we can use such objects from the
+# `fsspec <https://github.com/fsspec/filesystem_spec>`_ module that provides
+# `Filesystem interfaces for Python <https://filesystem-spec.readthedocs.io/en/latest/?badge=latest>`_.
+
 import fsspec
-# Note: we also need: aiohttp
 
 def stream_while_decode():
+    # The `client_kwargs` are passed down to the aiohttp module's client
+    # session; we need to indicate that we need to trust the environment
+    # settings for proxy configuration. Depending on your environment, you may
+    # not need this setting.
     with fsspec.open(nasa_url, client_kwargs={'trust_env': True}) as file:
         decoder = VideoDecoder(file, seek_mode="approximate")
         return decoder[0]
 
+
 print("Stream while decode: ")
 bench(stream_while_decode)
 print()
+
+# %%
+# Streaming the data through a file-like object is about 4.3x faster than
+# downloading the video first. And not only is it about 1.7x faster than
+# providing a direct URL, it's more general. :class:`~torchcodec.decoders.VideoDecoder` supports
+# direct URLs because the underlying FFmpeg functions support them. But the
+# kinds of protocols supported are determined by what that version of FFmpeg
+# supports. A file-like object can adapt any kind of resource, including ones
+# that are specific to your own infrastructure and are unknown to FFmpeg.
+
+
+# %%
+# How it works
+# ------------
+#
+
+from pathlib import Path
+import tempfile
+
+temp_dir = tempfile.mkdtemp()
+nasa_video_path = Path(temp_dir) / "nasa_video.mp4"
+with open(nasa_video_path, "wb") as f:
+    f.write(pre_downloaded_raw_video_bytes)
+
+
+class FileOpCounter:
+    def __init__(self, file):
+        self._file = file
+        self.num_reads = 0
+        self.num_seeks = 0
+
+    def read(self, size: int) -> bytes:
+        self.num_reads += 1
+        return self._file.read(size)
+
+    def seek(self, offset: int, whence: int) -> bytes:
+        self.num_seeks += 1
+        return self._file.seek(offset, whence)
+
+
+file_op_counter = FileOpCounter(open(nasa_video_path, "rb"))
+counter_decoder = VideoDecoder(file_op_counter, seek_mode="approximate")
+
+print("Decoder initialization required "
+      f"{file_op_counter.num_reads} reads and "
+      f"{file_op_counter.num_seeks} seeks.")
+
+init_reads = file_op_counter.num_reads
+init_seeks = file_op_counter.num_seeks
+
+first_frame = counter_decoder[0]
+
+print("Decoding the first frame required "
+      f"{file_op_counter.num_reads - init_reads} additional reads and "
+      f"{file_op_counter.num_seeks - init_seeks} additional seeks.")
+print()
+
+# %%
+# Performance: local file path versus local file-like object
+# ----------------------------------------------------------
+#
+
+
+def decode_from_existing_file_path():
+    decoder = VideoDecoder(nasa_video_path, seek_mode="approximate")
+    return decoder[0]
+
+
+def decode_from_existing_open_file_object():
+    with open(nasa_video_path, "rb") as f:
+        decoder = VideoDecoder(f, seek_mode="approximate")
+        return decoder[0]
+
+
+print("Decode from existing file path:")
+bench(decode_from_existing_file_path)
+print()
+
+print("Decode from existing open file object:")
+bench(decode_from_existing_open_file_object)
+print()
+
+# %%
+import shutil
+shutil.rmtree(temp_dir)
+# %%

From 95523c9c8712d3844d771bc2e6c7c6109fb1cdce Mon Sep 17 00:00:00 2001
From: Scott Schneider <scott.a.s@gmail.com>
Date: Fri, 11 Apr 2025 19:52:46 -0700
Subject: [PATCH 03/14] Lint

---
 examples/file_like.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/examples/file_like.py b/examples/file_like.py
index 091ac680..3d88df6f 100644
--- a/examples/file_like.py
+++ b/examples/file_like.py
@@ -48,6 +48,7 @@ def bench(f, average_over=20, warmup=2):
     med = times.median().item()
     print(f"{med = :.2f}ms +- {std:.2f}")
 
+
 # %%
 # Performance: downloading first versus streaming
 # -----------------------------------------------
@@ -95,6 +96,7 @@ def bench(f, average_over=20, warmup=2):
 # require downloading the entire video even if we only want to decode the first
 # frame. See :ref:`sphx_glr_generated_examples_approximate_mode.py` for more.
 
+
 def decode_from_existing_download():
     decoder = VideoDecoder(
         source=pre_downloaded_raw_video_bytes,
@@ -146,6 +148,7 @@ def direct_url_to_ffmpeg():
 
 import fsspec
 
+
 def stream_while_decode():
     # The `client_kwargs` are passed down to the aiohttp module's client
     # session; we need to indicate that we need to trust the environment

From 8d442473878bc794ee34eed5591cef96ffefc7bb Mon Sep 17 00:00:00 2001
From: Scott Schneider <scott.a.s@gmail.com>
Date: Fri, 11 Apr 2025 19:54:39 -0700
Subject: [PATCH 04/14] Seciton title change

---
 examples/file_like.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/file_like.py b/examples/file_like.py
index 3d88df6f..89237ce9 100644
--- a/examples/file_like.py
+++ b/examples/file_like.py
@@ -50,8 +50,8 @@ def bench(f, average_over=20, warmup=2):
 
 
 # %%
-# Performance: downloading first versus streaming
-# -----------------------------------------------
+# Performance: downloading first vs. streaming
+# --------------------------------------------
 #
 # We are going to investigate the cost of having to download an entire video
 # before decoding any frames versus being able to stream the video's data
@@ -138,7 +138,7 @@ def direct_url_to_ffmpeg():
 # Decoding the already downloaded video is clearly the fastest. Having to
 # download the entire video each time we want to decode just the first frame
 # is over 4x slower than decoding an existing video. Providing a direct URL
-# is much better, as its about 2.5x faster than downloding the video first.
+# is much better, as its about 2.5x faster than downloading the video first.
 #
 # We can do better, and the way how is to use a file-like object which
 # implements its own read and seek methods that only download data from a URL as

From 87edc8c9d087da8ff6e59daf494a02ea3f7a1bbb Mon Sep 17 00:00:00 2001
From: Scott Schneider <scott.a.s@gmail.com>
Date: Fri, 11 Apr 2025 20:07:28 -0700
Subject: [PATCH 05/14] Remove trailing prints

---
 examples/file_like.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/examples/file_like.py b/examples/file_like.py
index 89237ce9..be1a230b 100644
--- a/examples/file_like.py
+++ b/examples/file_like.py
@@ -72,7 +72,6 @@ def bench(f, average_over=20, warmup=2):
 
 print(f"Video size in MB: {len(pre_downloaded_raw_video_bytes) / 1024 / 1024}")
 print(decoder.metadata)
-print()
 
 # %%
 # We can see that the video is about 253 MB, has the resolution 1920x1080, is
@@ -132,7 +131,6 @@ def direct_url_to_ffmpeg():
 
 print("Direct url to FFmpeg:")
 bench(direct_url_to_ffmpeg)
-print()
 
 # %%
 # Decoding the already downloaded video is clearly the fastest. Having to
@@ -161,7 +159,6 @@ def stream_while_decode():
 
 print("Stream while decode: ")
 bench(stream_while_decode)
-print()
 
 # %%
 # Streaming the data through a file-like object is about 4.3x faster than
@@ -217,7 +214,6 @@ def seek(self, offset: int, whence: int) -> bytes:
 print("Decoding the first frame required "
       f"{file_op_counter.num_reads - init_reads} additional reads and "
       f"{file_op_counter.num_seeks - init_seeks} additional seeks.")
-print()
 
 # %%
 # Performance: local file path versus local file-like object
@@ -242,7 +238,6 @@ def decode_from_existing_open_file_object():
 
 print("Decode from existing open file object:")
 bench(decode_from_existing_open_file_object)
-print()
 
 # %%
 import shutil

From 055e9ca6b38139c63d3467e917a8d93fbd871102 Mon Sep 17 00:00:00 2001
From: Scott Schneider <scott.a.s@gmail.com>
Date: Fri, 11 Apr 2025 20:08:21 -0700
Subject: [PATCH 06/14] versus -> vs.

---
 examples/file_like.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/file_like.py b/examples/file_like.py
index be1a230b..6543362e 100644
--- a/examples/file_like.py
+++ b/examples/file_like.py
@@ -216,7 +216,7 @@ def seek(self, offset: int, whence: int) -> bytes:
       f"{file_op_counter.num_seeks - init_seeks} additional seeks.")
 
 # %%
-# Performance: local file path versus local file-like object
+# Performance: local file path vs. local file-like object
 # ----------------------------------------------------------
 #
 

From c340d608fbb059abc58ef16063d5808eee64194d Mon Sep 17 00:00:00 2001
From: Scott Schneider <scott.a.s@gmail.com>
Date: Mon, 21 Apr 2025 07:01:36 -0700
Subject: [PATCH 07/14] Apply comments

---
 examples/file_like.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/examples/file_like.py b/examples/file_like.py
index 6543362e..ccae211e 100644
--- a/examples/file_like.py
+++ b/examples/file_like.py
@@ -13,7 +13,10 @@
 do not reside locally, we will show how to only download the data segments that
 are needed to decode the frames you care about. We accomplish this capability
 with Python
-`file-like objects <https://docs.python.org/3/glossary.html#term-file-like-object>`_."""
+`file-like objects <https://docs.python.org/3/glossary.html#term-file-like-object>`_.
+Our example uses a video file, so we use the :class:`~torchcodec.decoders.VideoDecoder`
+class to decode it. But all of the lessons here also apply to audio files and the
+:class:`~torchcodec.decoders.AudioDecoder` class."""
 
 # %%
 # First, a bit of boilerplate. We define two functions: one to download content
@@ -70,7 +73,7 @@ def bench(f, average_over=20, warmup=2):
 pre_downloaded_raw_video_bytes = get_url_content(nasa_url)
 decoder = VideoDecoder(pre_downloaded_raw_video_bytes)
 
-print(f"Video size in MB: {len(pre_downloaded_raw_video_bytes) / 1024 / 1024}")
+print(f"Video size in MB: {len(pre_downloaded_raw_video_bytes) // 1024 // 1024}")
 print(decoder.metadata)
 
 # %%
@@ -143,6 +146,9 @@ def direct_url_to_ffmpeg():
 # needed. Rather than implementing our own, we can use such objects from the
 # `fsspec <https://github.com/fsspec/filesystem_spec>`_ module that provides
 # `Filesystem interfaces for Python <https://filesystem-spec.readthedocs.io/en/latest/?badge=latest>`_.
+# Note that using these capabilities from the `fsspec` library also requires the
+# `aiohttp <https://docs.aiohttp.org/en/stable/>`_ module. You can install both with
+# `pip install fsspec aiohttp`.
 
 import fsspec
 
@@ -152,8 +158,8 @@ def stream_while_decode():
     # session; we need to indicate that we need to trust the environment
     # settings for proxy configuration. Depending on your environment, you may
     # not need this setting.
-    with fsspec.open(nasa_url, client_kwargs={'trust_env': True}) as file:
-        decoder = VideoDecoder(file, seek_mode="approximate")
+    with fsspec.open(nasa_url, client_kwargs={'trust_env': True}) as file_like:
+        decoder = VideoDecoder(file_like, seek_mode="approximate")
         return decoder[0]
 
 
From 91128accc33aea27a0fe0926fdfd83d9af3482b1 Mon Sep 17 00:00:00 2001
From: Scott Schneider <scott.a.s@gmail.com>
Date: Mon, 21 Apr 2025 08:56:38 -0700
Subject: [PATCH 08/14] First full draft

---
 examples/file_like.py | 59 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 55 insertions(+), 4 deletions(-)

diff --git a/examples/file_like.py b/examples/file_like.py
index ccae211e..50068637 100644
--- a/examples/file_like.py
+++ b/examples/file_like.py
@@ -16,7 +16,7 @@
 `file-like objects <https://docs.python.org/3/glossary.html#term-file-like-object>`_.
 Our example uses a video file, so we use the :class:`~torchcodec.decoders.VideoDecoder`
 class to decode it. But all of the lessons here also apply to audio files and the
-:class:`~torchcodec.decoders.AudioDecoder` class."""
+:class:`~torchcodec.decoders.AudioDecoder` class as well."""
 
 # %%
 # First, a bit of boilerplate. We define two functions: one to download content
@@ -146,9 +146,9 @@ def direct_url_to_ffmpeg():
 # needed. Rather than implementing our own, we can use such objects from the
 # `fsspec <https://github.com/fsspec/filesystem_spec>`_ module that provides
 # `Filesystem interfaces for Python <https://filesystem-spec.readthedocs.io/en/latest/?badge=latest>`_.
-# Note that using these capabilities from the `fsspec` library also requires the
+# Note that using these capabilities from the fsspec` library also requires the
 # `aiohttp <https://docs.aiohttp.org/en/stable/>`_ module. You can install both with
-# `pip install fsspec aiohttp`.
+# ``pip install fsspec aiohttp``.
 
 import fsspec
 
@@ -179,17 +179,32 @@ def stream_while_decode():
 # %%
 # How it works
 # ------------
+# In Python, a `file-like object <https://docs.python.org/3/glossary.html#term-file-like-object>`_
+# is any object that exposes special methods for reading, writing and seeking.
+# While such methods are obviously file oriented, it's not required that
+# a file-like object is backed by an actual file. As far as Python is concerned,
+# if an object acts like a file, it's a file. This is a powerful concept, as
+# it enables libraries that read or write data to assume a file-like interface.
+# Other libraries that present novel resources can then be easily used by
+# providing a file-like wrapper for their resource.
 #
+# For our case, we only need the read and seek methods for decoding. The exact
+# method signature needed is in the example below. Rather than wrap a novel
+# resource, we demonstrate this capability by wrapping an actual file while
+# counting how often each method is called.
 
 from pathlib import Path
 import tempfile
 
+# Create a local file to interact with.
 temp_dir = tempfile.mkdtemp()
 nasa_video_path = Path(temp_dir) / "nasa_video.mp4"
 with open(nasa_video_path, "wb") as f:
     f.write(pre_downloaded_raw_video_bytes)
 
 
+# A file-like class that is backed by an actual file, but it intercepts reads
+# and seeks to maintain counts.
 class FileOpCounter:
     def __init__(self, file):
         self._file = file
@@ -205,6 +220,9 @@ def seek(self, offset: int, whence: int) -> bytes:
         return self._file.seek(offset, whence)
 
 
+# Let's now get a file-like object from our class defined above, providing it a
+# reference to the file we created. We pass our file-like object to the decoder
+# rather than the file itself.
 file_op_counter = FileOpCounter(open(nasa_video_path, "rb"))
 counter_decoder = VideoDecoder(file_op_counter, seek_mode="approximate")
 
@@ -221,10 +239,35 @@ def seek(self, offset: int, whence: int) -> bytes:
       f"{file_op_counter.num_reads - init_reads} additional reads and "
       f"{file_op_counter.num_seeks - init_seeks} additional seeks.")
 
+# %%
+# While we defined a simple class primarily for demonstration, it's actually
+# useful for diagnosing how much reading and seeking are required for different
+# decoding operations. We've also introduced a mystery that we should answer:
+# why does *initializing* the decoder take more reads and seeks than decoding
+# the first frame? The answer is that in our decoder implementation, we're
+# actually calling a special
+# `FFmpeg function <https://ffmpeg.org/doxygen/6.1/group__lavf__decoding.html#gad42172e27cddafb81096939783b157bb>`_
+# that decodes the first few frames to return more robust metadata.
+#
+# It's also worth noting that the Python file-like interface is only half of
+# the story. FFmpeg also has its own mechanism for directing reads and seeks
+# during decoding to user-define functions. TorchCodec does the work of
+# connecting the Python methods you define to FFmpeg. All you have to do is
+# define your methods in Python, and TorchCodec handles the rest.
+
 # %%
 # Performance: local file path vs. local file-like object
-# ----------------------------------------------------------
+# -------------------------------------------------------
+#
+# Since we have a local file defined, let's do a bonus performance test. We now
+# have two means of providing a local file to TorchCodec:
 #
+#   1. Through a path, where TorchCodec will then do the work of opening the
+#      local file at that path.
+#   2. Through a file-like object, where you open the file yourself and provide
+#      the file-like object to TorchCodec.
+#
+# An obvious question is: which is faster? The code below tests that question.
 
 
 def decode_from_existing_file_path():
@@ -246,6 +289,14 @@ def decode_from_existing_open_file_object():
 bench(decode_from_existing_open_file_object)
 
 # %%
+# Thankfully, the answer is both means of decoding from a local file take about
+# the same amount of time. This result means that in your own code, you can use
+# whichever method is more convienient. What this result implies is that the
+# cost of actually reading and copying data dominates the cost of calling Python
+# methods while decoding.
+
+# %%
+# Finally, let's clean up the local resources we created.
 import shutil
 shutil.rmtree(temp_dir)
 # %%

From daa33b6c2bba0d1cdb796ec1dba50efe2c782b43 Mon Sep 17 00:00:00 2001
From: Scott Schneider <scott.a.s@gmail.com>
Date: Tue, 22 Apr 2025 06:53:05 -0700
Subject: [PATCH 09/14] Minor edits

---
 examples/file_like.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/examples/file_like.py b/examples/file_like.py
index 50068637..34662c4a 100644
--- a/examples/file_like.py
+++ b/examples/file_like.py
@@ -146,9 +146,9 @@ def direct_url_to_ffmpeg():
 # needed. Rather than implementing our own, we can use such objects from the
 # `fsspec <https://github.com/fsspec/filesystem_spec>`_ module that provides
 # `Filesystem interfaces for Python <https://filesystem-spec.readthedocs.io/en/latest/?badge=latest>`_.
-# Note that using these capabilities from the fsspec` library also requires the
+# Note that using these capabilities from the fsspec library also requires the
 # `aiohttp <https://docs.aiohttp.org/en/stable/>`_ module. You can install both with
-# ``pip install fsspec aiohttp``.
+# `pip install fsspec aiohttp`.
 
 import fsspec
 
@@ -251,21 +251,22 @@ def seek(self, offset: int, whence: int) -> bytes:
 #
 # It's also worth noting that the Python file-like interface is only half of
 # the story. FFmpeg also has its own mechanism for directing reads and seeks
-# during decoding to user-define functions. TorchCodec does the work of
+# during decoding to user-define functions. The
+# :class:`~torchcodec.decoders.VideoDecoder` object does the work of
 # connecting the Python methods you define to FFmpeg. All you have to do is
-# define your methods in Python, and TorchCodec handles the rest.
+# define your methods in Python, and we do the rest.
 
 # %%
 # Performance: local file path vs. local file-like object
 # -------------------------------------------------------
 #
 # Since we have a local file defined, let's do a bonus performance test. We now
-# have two means of providing a local file to TorchCodec:
+# have two means of providing a local file to :class:`~torchcodec.decoders.VideoDecoder`:
 #
-#   1. Through a path, where TorchCodec will then do the work of opening the
-#      local file at that path.
-#   2. Through a file-like object, where you open the file yourself and provide
-#      the file-like object to TorchCodec.
+#   1. Through a *path*, where the :class:`~torchcodec.decoders.VideoDecoder`
+#      object will then do the work of opening the local file at that path.
+#   2. Through a *file-like object*, where you open the file yourself and provide
+#      the file-like object to :class:`~torchcodec.decoders.VideoDecoder`.
 #
 # An obvious question is: which is faster? The code below tests that question.
 
@@ -276,8 +277,8 @@ def decode_from_existing_file_path():
 
 
 def decode_from_existing_open_file_object():
-    with open(nasa_video_path, "rb") as f:
-        decoder = VideoDecoder(f, seek_mode="approximate")
+    with open(nasa_video_path, "rb") as file:
+        decoder = VideoDecoder(file, seek_mode="approximate")
         return decoder[0]
 
 
From bd4ca6613da19835d74b3cfbd602e3aa1ce38559 Mon Sep 17 00:00:00 2001
From: Scott Schneider <scott.a.s@gmail.com>
Date: Tue, 22 Apr 2025 07:01:49 -0700
Subject: [PATCH 10/14] Reference tutorial in doc strings

---
 src/torchcodec/decoders/_audio_decoder.py | 5 +++--
 src/torchcodec/decoders/_video_decoder.py | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/torchcodec/decoders/_audio_decoder.py b/src/torchcodec/decoders/_audio_decoder.py
index 4b73e94c..0fcab700 100644
--- a/src/torchcodec/decoders/_audio_decoder.py
+++ b/src/torchcodec/decoders/_audio_decoder.py
@@ -32,8 +32,9 @@ class AudioDecoder:
             - If ``Pathlib.path``: a path to a local video or audio file.
             - If ``bytes`` object or ``torch.Tensor``: the raw encoded audio data.
             - If file-like object: we read video data from the object on demand. The object must
-              expose the methods ``read(self, size: int) -> bytes`` and
-              ``seek(self, offset: int, whence: int) -> bytes``. Read more in TODO_FILE_LIKE_TUTORIAL.
+              expose the methods `read(self, size: int) -> bytes` and
+              `seek(self, offset: int, whence: int) -> bytes`. Read more in:
+              :ref:`sphx_glr_generated_examples_file_like.py`.
         stream_index (int, optional): Specifies which stream in the file to decode samples from.
             Note that this index is absolute across all media types. If left unspecified, then
             the :term:`best stream` is used.
diff --git a/src/torchcodec/decoders/_video_decoder.py b/src/torchcodec/decoders/_video_decoder.py
index 884bf275..b672cc09 100644
--- a/src/torchcodec/decoders/_video_decoder.py
+++ b/src/torchcodec/decoders/_video_decoder.py
@@ -28,8 +28,9 @@ class VideoDecoder:
             - If ``Pathlib.path``: a path to a local video file.
             - If ``bytes`` object or ``torch.Tensor``: the raw encoded video data.
             - If file-like object: we read video data from the object on demand. The object must
-              expose the methods ``read(self, size: int) -> bytes`` and
-              ``seek(self, offset: int, whence: int) -> bytes``. Read more in TODO_FILE_LIKE_TUTORIAL.
+              expose the methods `read(self, size: int) -> bytes` and
+              `seek(self, offset: int, whence: int) -> bytes`. Read more in:
+              :ref:`sphx_glr_generated_examples_file_like.py`.
         stream_index (int, optional): Specifies which stream in the video to decode frames from.
             Note that this index is absolute across all media types. If left unspecified, then
             the :term:`best stream` is used.

From f510124ae4cc10388a57154333c955c25978b9e0 Mon Sep 17 00:00:00 2001
From: Scott Schneider <scott.a.s@gmail.com>
Date: Tue, 22 Apr 2025 07:40:57 -0700
Subject: [PATCH 11/14] Reduce number of default benchmark iterations to 10

---
 examples/file_like.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/file_like.py b/examples/file_like.py
index 34662c4a..04ba8c41 100644
--- a/examples/file_like.py
+++ b/examples/file_like.py
@@ -35,7 +35,7 @@ def get_url_content(url):
     return response.content
 
 
-def bench(f, average_over=20, warmup=2):
+def bench(f, average_over=10, warmup=2):
     for _ in range(warmup):
         f()
 

From 534da283925f1eada3209b9dd58718a5745a39b1 Mon Sep 17 00:00:00 2001
From: Scott Schneider <scott.a.s@gmail.com>
Date: Tue, 22 Apr 2025 08:41:37 -0700
Subject: [PATCH 12/14] empty


From c4fa4f5e86e6388c8b594cbe0ee60e8cbbf53c00 Mon Sep 17 00:00:00 2001
From: Scott Schneider <scott.a.s@gmail.com>
Date: Tue, 22 Apr 2025 09:28:43 -0700
Subject: [PATCH 13/14] empty


From cb6c373cbd2eea417f163a9407f118e7ecc405f2 Mon Sep 17 00:00:00 2001
From: Scott Schneider <scott.a.s@gmail.com>
Date: Tue, 22 Apr 2025 10:09:06 -0700
Subject: [PATCH 14/14] Make perf discussion more general

---
 examples/file_like.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/file_like.py b/examples/file_like.py
index 04ba8c41..a327f4c8 100644
--- a/examples/file_like.py
+++ b/examples/file_like.py
@@ -138,8 +138,8 @@ def direct_url_to_ffmpeg():
 # %%
 # Decoding the already downloaded video is clearly the fastest. Having to
 # download the entire video each time we want to decode just the first frame
-# is over 4x slower than decoding an existing video. Providing a direct URL
-# is much better, as its about 2.5x faster than downloading the video first.
+# is many times slower than decoding an existing video. Providing a direct URL
+# is much better, but we're still probably downloading more than we need to.
 #
 # We can do better, and the way how is to use a file-like object which
 # implements its own read and seek methods that only download data from a URL as
@@ -167,8 +167,8 @@ def stream_while_decode():
 bench(stream_while_decode)
 
 # %%
-# Streaming the data through a file-like object is about 4.3x faster than
-# downloading the video first. And not only is it about 1.7x faster than
+# Streaming the data through a file-like object is much faster than
+# downloading the video first. And not only is it also faster than
 # providing a direct URL, it's more general. :class:`~torchcodec.decoders.VideoDecoder` supports
 # direct URLs because the underlying FFmpeg functions support them. But the
 # kinds of protocols supported are determined by what that version of FFmpeg