Skip to content

Commit

Permalink
Refactor WhisperTranscriber initialization and silence duration; upda…
Browse files Browse the repository at this point in the history
…te constant references in main and add static methods to configuration classes
  • Loading branch information
S0L0GUY committed Jan 30, 2025
1 parent 361da49 commit a5384dc
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 7 deletions.
9 changes: 4 additions & 5 deletions classes/whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class WhisperTranscriber:

def __init__(self):
"""Initialize the WhisperTranscriber with the base Whisper model."""
self.model = whisper.load_model("base")
self.model = whisper.load("base")

def transcribe_file(self, audio_file_path):
"""
Expand Down Expand Up @@ -53,7 +53,7 @@ def get_speech_input(self):
rate = 16000
chunk = 1024
silence_threshold = -40 # Silence threshold in dB
silence_duration = 1000 # Duration of silence in ms (1 second)
silence_duration = 1 # Duration of silence in seconds

# Open the audio stream
stream = p.open(format=audio_format,
Expand Down Expand Up @@ -96,7 +96,7 @@ def get_speech_input(self):
# Save the recorded data to a WAV file
with wave.open('temp.wav', 'wb') as wf:
wf.setnchannels(channels)
wf.setsampwidth(p.get_sample_size(format))
wf.setsampwidth(p.get_sample_size(audio_format))
wf.setframerate(rate)
wf.writeframes(b''.join(frames))

Expand All @@ -115,5 +115,4 @@ def get_speech_input(self):

if text not in unwanted_responses:
return text
else:
return ""
return ""
5 changes: 5 additions & 0 deletions constants.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
@staticmethod
class Network:
"""
Class representing network configuration parameters.
Expand All @@ -11,6 +12,7 @@ class Network:
VRC_PORT = 9000


@staticmethod
class Audio:
"""
A class containing audio device configuration constants.
Expand All @@ -26,6 +28,7 @@ class Audio:
AUDIO_INPUT_INDEX = 16


@staticmethod
class Voice:
"""
A class that defines constants for voice-related configurations in
Expand All @@ -37,6 +40,7 @@ class Voice:
VOICE_NAME = "Zira"


@staticmethod
class LanguageModel:
"""
A class representing configuration settings for a language model.
Expand All @@ -53,6 +57,7 @@ class LanguageModel:
LM_TEMPERATURE = 0.5


@staticmethod
class FilePaths:
"""
A class containing file path constants used in the application.
Expand Down
2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def main():
Exception: If an error occurs during the execution of nova.run_code().
"""

osc = VRChatOSC(constant.LOCAL_IP, constant.VRC_PORT)
osc = VRChatOSC(constant.Network.LOCAL_IP, constant.Network.VRC_PORT)
while True:
try:
print("Program Starting...")
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ translate
googletrans
requests
sounddevice
soundfile
soundfile

0 comments on commit a5384dc

Please sign in to comment.