-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrecognize.py
46 lines (39 loc) · 1.72 KB
/
recognize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import os
import json
import glob
from typing import Dict
import whisper_timestamped as whisper
from utils import extract_speaker_name
def recognize(input_dir: str, names: Dict[str, str], fast: bool = False, model_type: str = "small", device: str = "cuda", audio_ext: str = "ogg"):
model_type = "tiny" if fast else model_type
model = whisper.load_model(model_type, device=device)
print()
print("--------------------")
print("RECOGNIZE")
print("--------------------")
print()
files = glob.glob(os.path.join(input_dir, '*.' + audio_ext))
if not files:
print(f" No {audio_ext} files were found at {input_dir}.")
print()
return
print(f" {len(files)} {audio_ext} files found at {input_dir}.")
for audio_file in files:
print(f" - {audio_file}...")
speaker = extract_speaker_name(audio_file, audio_ext)
if speaker in names and (names[speaker] is None or names[speaker] == ''):
print(f" Skipping {audio_file} because '{speaker}' is specified as blank.")
print()
continue
else:
audio = whisper.load_audio(os.path.join(input_dir, audio_file))
if fast:
results = whisper.transcribe(model, audio, detect_disfluencies=True, vad="auditok")
else:
results = whisper.transcribe(model, audio, detect_disfluencies=True, vad="auditok", beam_size=5, best_of=5, temperature=(0.0, 0.2, 0.4, 0.6, 0.8, 1.0))
json_file = os.path.join(input_dir, audio_file + '.words.json')
with open(json_file, 'w') as f:
f.write(json.dumps(results))
print(f" Saved to {json_file}")
print()
print("--------------------")