4
4
5
5
from torchcodec import FrameBatch
6
6
from torchcodec .samplers ._common import (
7
+ _FRAMEBATCH_RETURN_DOCS ,
7
8
_POLICY_FUNCTION_TYPE ,
8
9
_POLICY_FUNCTIONS ,
9
10
_reshape_4d_framebatch_into_5d ,
@@ -156,7 +157,7 @@ def _generic_time_based_sampler(
156
157
# None means "begining", which may not always be 0
157
158
sampling_range_start : Optional [float ],
158
159
sampling_range_end : Optional [float ], # interval is [start, end).
159
- policy : str = "repeat_last" ,
160
+ policy : Literal [ "repeat_last" , "wrap" , "error" ] = "repeat_last" ,
160
161
) -> FrameBatch :
161
162
# Note: *everywhere*, sampling_range_end denotes the upper bound of where a
162
163
# clip can start. This is an *open* upper bound, i.e. we will make sure no
@@ -226,8 +227,9 @@ def clips_at_random_timestamps(
226
227
# None means "begining", which may not always be 0
227
228
sampling_range_start : Optional [float ] = None ,
228
229
sampling_range_end : Optional [float ] = None , # interval is [start, end).
229
- policy : str = "repeat_last" ,
230
+ policy : Literal [ "repeat_last" , "wrap" , "error" ] = "repeat_last" ,
230
231
) -> FrameBatch :
232
+ # See docstring below
231
233
return _generic_time_based_sampler (
232
234
kind = "random" ,
233
235
decoder = decoder ,
@@ -250,8 +252,9 @@ def clips_at_regular_timestamps(
250
252
# None means "begining", which may not always be 0
251
253
sampling_range_start : Optional [float ] = None ,
252
254
sampling_range_end : Optional [float ] = None , # interval is [start, end).
253
- policy : str = "repeat_last" ,
255
+ policy : Literal [ "repeat_last" , "wrap" , "error" ] = "repeat_last" ,
254
256
) -> FrameBatch :
257
+ # See docstring below
255
258
return _generic_time_based_sampler (
256
259
kind = "regular" ,
257
260
decoder = decoder ,
@@ -263,3 +266,82 @@ def clips_at_regular_timestamps(
263
266
sampling_range_end = sampling_range_end ,
264
267
policy = policy ,
265
268
)
269
+
270
+
271
+ _COMMON_DOCS = """
272
+ {maybe_note}
273
+
274
+ Args:
275
+ decoder (VideoDecoder): The :class:`~torchcodec.decoders.VideoDecoder`
276
+ instance to sample clips from.
277
+ {num_clips_or_seconds_between_clip_starts}
278
+ num_frames_per_clip (int, optional): The number of frames per clips. Default: 1.
279
+ seconds_between_frames (float or None, optional): The time (in seconds)
280
+ between each frame within a clip. More accurately, this defines the
281
+ time between the *frame sampling point*, i.e. the timestamps at
282
+ which we sample the frames. Because frames span intervals in time ,
283
+ the resulting start of frames within a clip may not be exactly
284
+ spaced by ``seconds_between_frames`` - but on average, they will be.
285
+ Default is None, which is set to the average frame duration
286
+ (``1/average_fps``).
287
+ sampling_range_start (float or None, optional): The start of the
288
+ sampling range, which defines the first timestamp (in seconds) that
289
+ a clip may *start* at. Default: None, which corresponds to the start
290
+ of the video. (Note: some videos start at negative values, which is
291
+ why the default is not 0).
292
+ sampling_range_end (float or None, optional): The end of the sampling
293
+ range, which defines the last timestamp (in seconds) that a clip may
294
+ *start* at. This value is exclusive, i.e. a clip may only start within
295
+ [``sampling_range_start``, ``sampling_range_end``). If None
296
+ (default), the value is set automatically such that the clips never
297
+ span beyond the end of the video, i.e. it is set to
298
+ ``end_video_seconds - (num_frames_per_clip - 1) *
299
+ seconds_between_frames``. When a clip spans beyond the end of the
300
+ video, the ``policy`` parameter defines how to construct such clip.
301
+ policy (str, optional): Defines how to construct clips that span beyond
302
+ the end of the video. This is best described with an example:
303
+ assuming the last valid (seekable) timestamp in a video is 10.9, and
304
+ a clip was sampled to start at timestamp 10.5, with
305
+ ``num_frames_per_clip=5`` and ``seconds_between_frames=0.2``, the
306
+ sampling timestamps of the frames in the clip are supposed to be
307
+ [10.5, 10.7, 10.9, 11.1, 11.2]. But 11.1 and 11.2 are invalid
308
+ timestamps, so the ``policy`` parameter defines how to replace those
309
+ frames, with valid sampling timestamps:
310
+
311
+ - "repeat_last": repeats the last valid frame of the clip. We would
312
+ get frames sampled at timestamps [10.5, 10.7, 10.9, 10.9, 10.9].
313
+ - "wrap": wraps around to the beginning of the clip. We would get
314
+ frames sampled at timestamps [10.5, 10.7, 10.9, 10.5, 10.7].
315
+ - "error": raises an error.
316
+
317
+ Default is "repeat_last". Note that when ``sampling_range_end=None``
318
+ (default), this policy parameter is unlikely to be relevant.
319
+
320
+ {return_docs}
321
+ """
322
+
323
+
324
+ _NUM_CLIPS_DOCS = """
325
+ num_clips (int, optional): The number of clips to return. Default: 1.
326
+ """
327
+ clips_at_random_timestamps .__doc__ = f"""Sample :term:`clips` at random timestamps.
328
+ { _COMMON_DOCS .format (maybe_note = "" , num_clips_or_seconds_between_clip_starts = _NUM_CLIPS_DOCS , return_docs = _FRAMEBATCH_RETURN_DOCS )}
329
+ """
330
+
331
+
332
+ _SECONDS_BETWEEN_CLIP_STARTS = """
333
+ seconds_between_clip_starts (float): The space (in seconds) between each
334
+ clip start.
335
+ """
336
+
337
+ _NOTE_DOCS = """
338
+ .. note::
339
+ For consistency with existing sampling APIs (such as torchvision), this
340
+ sampler takes a ``seconds_between_clip_starts`` parameter instead of
341
+ ``num_clips``. If you find that supporting ``num_clips`` would be
342
+ useful, please let us know by `opening a feature request
343
+ <https://github.com/pytorch/torchcodec/issues?q=is:open+is:issue>`_.
344
+ """
345
+ clips_at_regular_timestamps .__doc__ = f"""Sample :term:`clips` at regular (equally-spaced) timestamps.
346
+ { _COMMON_DOCS .format (maybe_note = _NOTE_DOCS , num_clips_or_seconds_between_clip_starts = _SECONDS_BETWEEN_CLIP_STARTS , return_docs = _FRAMEBATCH_RETURN_DOCS )}
347
+ """
0 commit comments