Encoder: properly validate sample rate parameter (#624)

NicolasHug · web-flow · commit 3280b90b3c33 · 2025-04-08T11:16:12.000+01:00
diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp
@@ -1,8 +1,40 @@
+#include <sstream>
+
 #include "src/torchcodec/_core/Encoder.h"
 #include "torch/types.h"
 
 namespace facebook::torchcodec {
 
+namespace {
+
+void validateSampleRate(const AVCodec& avCodec, int sampleRate) {
+  if (avCodec.supported_samplerates == nullptr) {
+    return;
+  }
+
+  for (auto i = 0; avCodec.supported_samplerates[i] != 0; ++i) {
+    if (sampleRate == avCodec.supported_samplerates[i]) {
+      return;
+    }
+  }
+  std::stringstream supportedRates;
+  for (auto i = 0; avCodec.supported_samplerates[i] != 0; ++i) {
+    if (i > 0) {
+      supportedRates << ", ";
+    }
+    supportedRates << avCodec.supported_samplerates[i];
+  }
+
+  TORCH_CHECK(
+      false,
+      "invalid sample rate=",
+      sampleRate,
+      ". Supported sample rate values are: ",
+      supportedRates.str());
+}
+
+} // namespace
+
 AudioEncoder::~AudioEncoder() {}
 
 // TODO-ENCODING: disable ffmpeg logs by default
@@ -12,7 +44,7 @@ AudioEncoder::AudioEncoder(
     int sampleRate,
     std::string_view fileName,
     std::optional<int64_t> bit_rate)
-    : wf_(wf), sampleRate_(sampleRate) {
+    : wf_(wf) {
   TORCH_CHECK(
       wf_.dtype() == torch::kFloat32,
       "waveform must have float32 dtype, got ",
@@ -57,7 +89,8 @@ AudioEncoder::AudioEncoder(
   // well when "-b:a" isn't specified.
   avCodecContext_->bit_rate = bit_rate.value_or(0);
 
-  avCodecContext_->sample_rate = sampleRate_;
+  validateSampleRate(*avCodec, sampleRate);
+  avCodecContext_->sample_rate = sampleRate;
 
   // Note: This is the format of the **input** waveform. This doesn't determine
   // the output.
diff --git a/src/torchcodec/_core/Encoder.h b/src/torchcodec/_core/Encoder.h
@@ -14,6 +14,10 @@ class AudioEncoder {
   // supported.
   AudioEncoder(
       const torch::Tensor wf,
+      // The *output* sample rate. We can't really decide for the user what it
+      // should be. Particularly, the sample rate of the input waveform should
+      // match this, and that's up to the user. If sample rates don't match,
+      // encoding will still work but audio will be distorted.
       int sampleRate,
       std::string_view fileName,
       std::optional<int64_t> bit_rate = std::nullopt);
@@ -30,13 +34,5 @@ class AudioEncoder {
   int streamIndex_;
 
   const torch::Tensor wf_;
-  // The *output* sample rate. We can't really decide for the user what it
-  // should be. Particularly, the sample rate of the input waveform should match
-  // this, and that's up to the user. If sample rates don't match, encoding will
-  // still work but audio will be distorted.
-  // We technically could let the user also specify the input sample rate, and
-  // resample the waveform internally to match them, but that's not in scope for
-  // an initial version (if at all).
-  int sampleRate_;
 };
 } // namespace facebook::torchcodec
diff --git a/test/test_ops.py b/test/test_ops.py
@@ -1107,9 +1107,7 @@ def test_bad_input(self, tmp_path):
                 wf=torch.rand(10, 10), sample_rate=10, filename="./file.bad_extension"
             )
 
-        # TODO-ENCODING: raise more informative error message when sample rate
-        # isn't supported
-        with pytest.raises(RuntimeError, match="Invalid argument"):
+        with pytest.raises(RuntimeError, match="invalid sample rate=10"):
             create_audio_encoder(
                 wf=self.decode(NASA_AUDIO_MP3),
                 sample_rate=10,