-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
/
Copy pathpreprocess_resample.py
144 lines (120 loc) · 4.34 KB
/
preprocess_resample.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
from __future__ import annotations
import warnings
from logging import getLogger
from pathlib import Path
from typing import Iterable
import librosa
import soundfile
from joblib import Parallel, delayed
from tqdm_joblib import tqdm_joblib
from .preprocess_utils import check_hubert_min_duration
LOG = getLogger(__name__)
# input_dir and output_dir exists.
# write code to convert input dir audio files to output dir audio files,
# without changing folder structure. Use joblib to parallelize.
# Converting audio files includes:
# - resampling to specified sampling rate
# - trim silence
# - adjust volume in a smart way
# - save as 16-bit wav file
def _get_unique_filename(path: Path, existing_paths: Iterable[Path]) -> Path:
"""Return a unique path by appending a number to the original path."""
if path not in existing_paths:
return path
i = 1
while True:
new_path = path.parent / f"{path.stem}_{i}{path.suffix}"
if new_path not in existing_paths:
return new_path
i += 1
def is_relative_to(path: Path, *other):
"""Return True if the path is relative to another path or False.
Python 3.9+ has Path.is_relative_to() method, but we need to support Python 3.8.
"""
try:
path.relative_to(*other)
return True
except ValueError:
return False
def _preprocess_one(
input_path: Path,
output_path: Path,
sr: int,
*,
top_db: int,
frame_seconds: float,
hop_seconds: float,
) -> None:
"""Preprocess one audio file."""
try:
audio, sr = librosa.load(input_path, sr=sr, mono=True)
# Audioread is the last backend it will attempt, so this is the exception thrown on failure
except Exception as e:
# Failure due to attempting to load a file that is not audio, so return early
LOG.warning(f"Failed to load {input_path} due to {e}")
return
if not check_hubert_min_duration(audio, sr):
LOG.info(f"Skip {input_path} because it is too short.")
return
# Adjust volume
audio /= max(audio.max(), -audio.min())
# Trim silence
audio, _ = librosa.effects.trim(
audio,
top_db=top_db,
frame_length=int(frame_seconds * sr),
hop_length=int(hop_seconds * sr),
)
if not check_hubert_min_duration(audio, sr):
LOG.info(f"Skip {input_path} because it is too short.")
return
soundfile.write(output_path, audio, samplerate=sr, subtype="PCM_16")
def preprocess_resample(
input_dir: Path | str,
output_dir: Path | str,
sampling_rate: int,
n_jobs: int = -1,
*,
top_db: int = 30,
frame_seconds: float = 0.1,
hop_seconds: float = 0.05,
) -> None:
input_dir = Path(input_dir)
output_dir = Path(output_dir)
"""Preprocess audio files in input_dir and save them to output_dir."""
out_paths = []
in_paths = list(input_dir.rglob("*.*"))
if not in_paths:
raise ValueError(f"No audio files found in {input_dir}")
for in_path in in_paths:
in_path_relative = in_path.relative_to(input_dir)
if not in_path.is_absolute() and is_relative_to(
in_path, Path("dataset_raw") / "44k"
):
new_in_path_relative = in_path_relative.relative_to("44k")
warnings.warn(
f"Recommended folder structure has changed since v1.0.0. "
"Please move your dataset directly under dataset_raw folder. "
f"Recognized {in_path_relative} as {new_in_path_relative}"
)
in_path_relative = new_in_path_relative
if len(in_path_relative.parts) < 2:
continue
speaker_name = in_path_relative.parts[0]
file_name = in_path_relative.with_suffix(".wav").name
out_path = output_dir / speaker_name / file_name
out_path = _get_unique_filename(out_path, out_paths)
out_path.parent.mkdir(parents=True, exist_ok=True)
out_paths.append(out_path)
in_and_out_paths = list(zip(in_paths, out_paths))
with tqdm_joblib(desc="Preprocessing", total=len(in_and_out_paths)):
Parallel(n_jobs=n_jobs)(
delayed(_preprocess_one)(
*args,
sr=sampling_rate,
top_db=top_db,
frame_seconds=frame_seconds,
hop_seconds=hop_seconds,
)
for args in in_and_out_paths
)