-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrim.py
36 lines (29 loc) · 1.1 KB
/
trim.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import os
from concurrent.futures import ProcessPoolExecutor
from functools import partial
from in_out import load_sample, save_sample
import librosa
import numpy as np
from tqdm import tqdm
def trim_sample(y:np.ndarray, top_db: float = 45):
return librosa.effects.trim(y, top_db=top_db)[0]
def trim_directory(directory, top_db: float = 45, max_workers: int = 16):
'''
Runs trim_sample on all samples in a directory
Input arguments:
* directory (str): A path to a directory containing one or more waveform files
* top_db (float): The threshold at both ends of recordings that has to be
crossed to be counted as non-silence
* max_workers (int=16): The number of parallel workers
'''
executor = ProcessPoolExecutor(max_workers=max_workers)
batch_sz = 1000
read_futures = []
trim_futures = []
save_futures = []
# Load up to 1000 samples at a time
paths = [os.path.join(directory, fname) for fname in os.listdir(directory)]
for p in tqdm(paths):
y, sr = load_sample(p)
trimmed = trim_sample(y, top_db)
save_sample(trimmed, p, sr)