Refactor WhisperTranscriber initialization and silence duration; upda…

…te constant references in main and add static methods to configuration classes
S0L0GUY · Jan 30, 2025 · a5384dc · a5384dc
1 parent 361da49
commit a5384dc
Show file tree

Hide file tree

Showing 4 changed files with 11 additions and 7 deletions.
diff --git a/classes/whisper.py b/classes/whisper.py
@@ -19,7 +19,7 @@ class WhisperTranscriber:
 
     def __init__(self):
         """Initialize the WhisperTranscriber with the base Whisper model."""
-        self.model = whisper.load_model("base")
+        self.model = whisper.load("base")
 
     def transcribe_file(self, audio_file_path):
         """
@@ -53,7 +53,7 @@ def get_speech_input(self):
         rate = 16000
         chunk = 1024
         silence_threshold = -40  # Silence threshold in dB
-        silence_duration = 1000  # Duration of silence in ms (1 second)
+        silence_duration = 1  # Duration of silence in seconds
 
         # Open the audio stream
         stream = p.open(format=audio_format,
@@ -96,7 +96,7 @@ def get_speech_input(self):
         # Save the recorded data to a WAV file
         with wave.open('temp.wav', 'wb') as wf:
             wf.setnchannels(channels)
-            wf.setsampwidth(p.get_sample_size(format))
+            wf.setsampwidth(p.get_sample_size(audio_format))
             wf.setframerate(rate)
             wf.writeframes(b''.join(frames))
 
@@ -115,5 +115,4 @@ def get_speech_input(self):
 
         if text not in unwanted_responses:
             return text
-        else:
-            return ""
+        return ""
diff --git a/constants.py b/constants.py
@@ -1,3 +1,4 @@
+@staticmethod
 class Network:
     """
     Class representing network configuration parameters.
@@ -11,6 +12,7 @@ class Network:
     VRC_PORT = 9000
 
 
+@staticmethod
 class Audio:
     """
     A class containing audio device configuration constants.
@@ -26,6 +28,7 @@ class Audio:
     AUDIO_INPUT_INDEX = 16
 
 
+@staticmethod
 class Voice:
     """
     A class that defines constants for voice-related configurations in
@@ -37,6 +40,7 @@ class Voice:
     VOICE_NAME = "Zira"
 
 
+@staticmethod
 class LanguageModel:
     """
     A class representing configuration settings for a language model.
@@ -53,6 +57,7 @@ class LanguageModel:
     LM_TEMPERATURE = 0.5
 
 
+@staticmethod
 class FilePaths:
     """
     A class containing file path constants used in the application.

diff --git a/main.py b/main.py
@@ -33,7 +33,7 @@ def main():
         Exception: If an error occurs during the execution of nova.run_code().
     """
 
-    osc = VRChatOSC(constant.LOCAL_IP, constant.VRC_PORT)
+    osc = VRChatOSC(constant.Network.LOCAL_IP, constant.Network.VRC_PORT)
     while True:
         try:
             print("Program Starting...")

diff --git a/requirements.txt b/requirements.txt
@@ -10,4 +10,4 @@ translate
 googletrans
 requests
 sounddevice
-soundfile
+soundfile