-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfft_generator.py
57 lines (45 loc) · 2.18 KB
/
fft_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import pathlib
import argparse
import warnings
import numpy as np
from tqdm import tqdm
from src.models.dataset import find_largest_waveform_size, AudioFileWindowDataset
from src.utils.tsv import TSVEntry, append_tsv
from src.utils.mp_util import round_robin_map
def parse_waveform_segment(tup):
cols, fn, wfm, start, stop = tup
segment = wfm[start:stop]
fft = np.fft.rfft(segment).real
entry_data = [fn, start, stop, ','.join(map(str, fft))]
return TSVEntry(cols, list(map(str, entry_data)))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='KNN Parser for the dataset to analyze phones')
parser.add_argument('tsv_file', type=pathlib.Path, help='The TSV file to train from')
parser.add_argument('clip_dir', type=pathlib.Path, help='The location of the .wav files')
parser.add_argument('--wave_size', type=int, default=-1,
help='The output size of the waveform, for if you\'ve ran this before.')
parser.add_argument('--window_size', type=int, default=20000,
help='The output size of the window to use for the fft.')
parser.add_argument('--output', type=pathlib.Path, default=pathlib.Path('./fft.tsv'),
help='The file that you would like to save the KNN in')
args = parser.parse_args()
tsv_columns = [
'filename',
'start',
'stop',
'fft'
]
if args.wave_size < 0:
max_output_size = find_largest_waveform_size(args.phoneme_dir)
else:
max_output_size = args.wave_size
dataset = AudioFileWindowDataset(args.tsv_file, args.clip_dir, max_output_size, False)
warnings.filterwarnings('ignore')
args.output.touch()
for waveform, sr, fname in dataset:
print(f'parsing {fname} with {len(waveform)} values and sample rate of {sr}')
windows = [(tsv_columns, fname, waveform, window, window + args.window_size)
for window in range(len(waveform) - args.window_size)]
# tsv_entries = list(map(parse_waveform_segment, tqdm(windows, desc='generating ffts')))
tsv_entries = round_robin_map(windows, parse_waveform_segment)
append_tsv(args.output, tsv_entries)