@@ -1345,20 +1345,29 @@ void SingleStreamDecoder::convertAudioAVFrameToFrameOutputOnCPU(
1345
1345
static_cast <AVSampleFormat>(srcAVFrame->format );
1346
1346
AVSampleFormat desiredSampleFormat = AV_SAMPLE_FMT_FLTP;
1347
1347
1348
+ StreamInfo& streamInfo = streamInfos_[activeStreamIndex_];
1348
1349
int sourceSampleRate = srcAVFrame->sample_rate ;
1349
1350
int desiredSampleRate =
1350
- streamInfos_[activeStreamIndex_].audioStreamOptions .sampleRate .value_or (
1351
- sourceSampleRate);
1351
+ streamInfo.audioStreamOptions .sampleRate .value_or (sourceSampleRate);
1352
1352
1353
1353
bool mustConvert =
1354
1354
(sourceSampleFormat != desiredSampleFormat ||
1355
1355
sourceSampleRate != desiredSampleRate);
1356
1356
1357
1357
UniqueAVFrame convertedAVFrame;
1358
1358
if (mustConvert) {
1359
+ if (!streamInfo.swrContext ) {
1360
+ streamInfo.swrContext .reset (createSwrContext (
1361
+ streamInfo.codecContext ,
1362
+ sourceSampleFormat,
1363
+ desiredSampleFormat,
1364
+ sourceSampleRate,
1365
+ desiredSampleRate));
1366
+ }
1367
+
1359
1368
convertedAVFrame = convertAudioAVFrameSampleFormatAndSampleRate (
1369
+ streamInfo.swrContext ,
1360
1370
srcAVFrame,
1361
- sourceSampleFormat,
1362
1371
desiredSampleFormat,
1363
1372
sourceSampleRate,
1364
1373
desiredSampleRate);
@@ -1393,77 +1402,6 @@ void SingleStreamDecoder::convertAudioAVFrameToFrameOutputOnCPU(
1393
1402
}
1394
1403
}
1395
1404
1396
- UniqueAVFrame SingleStreamDecoder::convertAudioAVFrameSampleFormatAndSampleRate (
1397
- const UniqueAVFrame& srcAVFrame,
1398
- AVSampleFormat sourceSampleFormat,
1399
- AVSampleFormat desiredSampleFormat,
1400
- int sourceSampleRate,
1401
- int desiredSampleRate) {
1402
- auto & streamInfo = streamInfos_[activeStreamIndex_];
1403
-
1404
- if (!streamInfo.swrContext ) {
1405
- createSwrContext (
1406
- streamInfo,
1407
- sourceSampleFormat,
1408
- desiredSampleFormat,
1409
- sourceSampleRate,
1410
- desiredSampleRate);
1411
- }
1412
-
1413
- UniqueAVFrame convertedAVFrame (av_frame_alloc ());
1414
- TORCH_CHECK (
1415
- convertedAVFrame,
1416
- " Could not allocate frame for sample format conversion." );
1417
-
1418
- setChannelLayout (convertedAVFrame, srcAVFrame);
1419
- convertedAVFrame->format = static_cast <int >(desiredSampleFormat);
1420
- convertedAVFrame->sample_rate = desiredSampleRate;
1421
- if (sourceSampleRate != desiredSampleRate) {
1422
- // Note that this is an upper bound on the number of output samples.
1423
- // `swr_convert()` will likely not fill convertedAVFrame with that many
1424
- // samples if sample rate conversion is needed. It will buffer the last few
1425
- // ones because those require future samples. That's also why we reset
1426
- // nb_samples after the call to `swr_convert()`.
1427
- // We could also use `swr_get_out_samples()` to determine the number of
1428
- // output samples, but empirically `av_rescale_rnd()` seems to provide a
1429
- // tighter bound.
1430
- convertedAVFrame->nb_samples = av_rescale_rnd (
1431
- swr_get_delay (streamInfo.swrContext .get (), sourceSampleRate) +
1432
- srcAVFrame->nb_samples ,
1433
- desiredSampleRate,
1434
- sourceSampleRate,
1435
- AV_ROUND_UP);
1436
- } else {
1437
- convertedAVFrame->nb_samples = srcAVFrame->nb_samples ;
1438
- }
1439
-
1440
- auto status = av_frame_get_buffer (convertedAVFrame.get (), 0 );
1441
- TORCH_CHECK (
1442
- status == AVSUCCESS,
1443
- " Could not allocate frame buffers for sample format conversion: " ,
1444
- getFFMPEGErrorStringFromErrorCode (status));
1445
-
1446
- auto numConvertedSamples = swr_convert (
1447
- streamInfo.swrContext .get (),
1448
- convertedAVFrame->data ,
1449
- convertedAVFrame->nb_samples ,
1450
- static_cast <const uint8_t **>(
1451
- const_cast <const uint8_t **>(srcAVFrame->data )),
1452
- srcAVFrame->nb_samples );
1453
- // numConvertedSamples can be 0 if we're downsampling by a great factor and
1454
- // the first frame doesn't contain a lot of samples. It should be handled
1455
- // properly by the caller.
1456
- TORCH_CHECK (
1457
- numConvertedSamples >= 0 ,
1458
- " Error in swr_convert: " ,
1459
- getFFMPEGErrorStringFromErrorCode (numConvertedSamples));
1460
-
1461
- // See comment above about nb_samples
1462
- convertedAVFrame->nb_samples = numConvertedSamples;
1463
-
1464
- return convertedAVFrame;
1465
- }
1466
-
1467
1405
std::optional<torch::Tensor> SingleStreamDecoder::maybeFlushSwrBuffers () {
1468
1406
// When sample rate conversion is involved, swresample buffers some of the
1469
1407
// samples in-between calls to swr_convert (see the libswresample docs).
@@ -1735,30 +1673,6 @@ void SingleStreamDecoder::createSwsContext(
1735
1673
streamInfo.swsContext .reset (swsContext);
1736
1674
}
1737
1675
1738
- void SingleStreamDecoder::createSwrContext (
1739
- StreamInfo& streamInfo,
1740
- AVSampleFormat sourceSampleFormat,
1741
- AVSampleFormat desiredSampleFormat,
1742
- int sourceSampleRate,
1743
- int desiredSampleRate) {
1744
- auto swrContext = allocateSwrContext (
1745
- streamInfo.codecContext ,
1746
- sourceSampleFormat,
1747
- desiredSampleFormat,
1748
- sourceSampleRate,
1749
- desiredSampleRate);
1750
-
1751
- auto status = swr_init (swrContext);
1752
- TORCH_CHECK (
1753
- status == AVSUCCESS,
1754
- " Couldn't initialize SwrContext: " ,
1755
- getFFMPEGErrorStringFromErrorCode (status),
1756
- " . If the error says 'Invalid argument', it's likely that you are using "
1757
- " a buggy FFmpeg version. FFmpeg4 is known to fail here in some "
1758
- " valid scenarios. Try to upgrade FFmpeg?" );
1759
- streamInfo.swrContext .reset (swrContext);
1760
- }
1761
-
1762
1676
// --------------------------------------------------------------------------
1763
1677
// PTS <-> INDEX CONVERSIONS
1764
1678
// --------------------------------------------------------------------------
0 commit comments