Skip to content

Commit 59e9efd

Browse files
committed
py files
0 parents  commit 59e9efd

File tree

11 files changed

+3987
-0
lines changed

11 files changed

+3987
-0
lines changed

audio_io.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
import av
2+
# import torchaudio
3+
import numpy as np
4+
from fractions import Fraction
5+
6+
7+
# def load_audio_torchaudio(fn):
8+
# data, sr = torchaudio.load(fn)
9+
# return data, sr
10+
11+
12+
def open_audio_av(path):
13+
container = av.open(path)
14+
for stream in container.streams.video:
15+
stream.codec_context.thread_type = av.codec.context.ThreadType.NONE
16+
stream.codec_context.thread_count = 1
17+
for stream in container.streams.audio:
18+
stream.codec_context.thread_type = av.codec.context.ThreadType.NONE
19+
stream.codec_context.thread_count = 1
20+
return container
21+
22+
23+
def load_audio_av(path=None, container=None, rate=None, start_time=None, duration=None, layout="mono"):
24+
if container is None:
25+
container = av.open(path)
26+
audio_stream = container.streams.audio[0]
27+
28+
# Parse metadata
29+
_ss = audio_stream.start_time * audio_stream.time_base if audio_stream.start_time is not None else 0.
30+
_dur = audio_stream.duration * audio_stream.time_base
31+
_ff = _ss + _dur
32+
_rate = audio_stream.rate
33+
34+
if rate is None:
35+
rate = _rate
36+
if start_time is None:
37+
start_time = _ss
38+
if duration is None:
39+
duration = _ff - start_time
40+
duration = min(duration, _ff - start_time)
41+
end_time = start_time + duration
42+
43+
resampler = av.audio.resampler.AudioResampler(format="s16p", layout=layout, rate=rate)
44+
45+
# Read data
46+
chunks = []
47+
container.seek(int(start_time * av.time_base))
48+
for frame in container.decode(audio=0):
49+
chunk_start_time = frame.pts * frame.time_base
50+
chunk_end_time = chunk_start_time + Fraction(frame.samples, frame.rate)
51+
if chunk_end_time < start_time: # Skip until start time
52+
continue
53+
if chunk_start_time > end_time: # Exit if clip has been extracted
54+
break
55+
56+
try:
57+
frame.pts = None
58+
if resampler is not None:
59+
chunks.append((chunk_start_time, resampler.resample(frame)[0].to_ndarray()))
60+
else:
61+
chunks.append((chunk_start_time, frame.to_ndarray()))
62+
except AttributeError:
63+
break
64+
65+
# Trim for frame accuracy
66+
audio = np.concatenate([af[1] for af in chunks], 1)
67+
ss = int((start_time - chunks[0][0]) * rate)
68+
t = int(duration * rate)
69+
if ss < 0:
70+
audio = np.pad(audio, ((0, 0), (-ss, 0)), 'constant', constant_values=0)
71+
ss = 0
72+
audio = audio[:, ss: ss+t]
73+
74+
# Normalize to [-1, 1]
75+
audio = audio / np.iinfo(audio.dtype).max
76+
77+
return audio, rate
78+
79+
80+
def audio_info_av(inpt, audio=None, format=None):
81+
container = inpt
82+
if isinstance(inpt, str):
83+
try:
84+
container = av.open(inpt, format=format)
85+
except av.AVError:
86+
return None, None
87+
88+
audio_stream = container.streams.audio[audio]
89+
time_base = audio_stream.time_base
90+
duration = audio_stream.duration * time_base
91+
start_time = audio_stream.start_time * time_base
92+
channels = audio_stream.channels
93+
fps = audio_stream.rate
94+
chunk_size = audio_stream.frame_size
95+
chunks = audio_stream.frames
96+
meta = {'channels': channels,
97+
'fps': fps,
98+
'start_time': start_time,
99+
'duration': duration,
100+
'chunks': chunks,
101+
'chunk_size': chunk_size}
102+
return meta

0 commit comments

Comments
 (0)