1
1
'''
2
2
AI Chat Module voice-to-text class
3
3
Randy Mackay, December 2023
4
+
5
+ Audio threshold algorithm courtesy of Primusa on StackOverflow: https://stackoverflow.com/questions/18406570/python-record-audio-on-detected-sound
4
6
'''
5
7
6
8
import time
9
+ import math
10
+ import struct
7
11
8
12
try :
9
13
import pyaudio # install using, "sudo apt-get install python3-pyaudio"
@@ -19,6 +23,15 @@ def __init__(self):
19
23
self .client = None
20
24
self .assistant = None
21
25
26
+ # initialise audio recording
27
+ self .p = pyaudio .PyAudio ()
28
+
29
+ # flag to enable/disable listening and recording
30
+ self .listening_and_recording_enabled = False
31
+
32
+ # audio stream is opened during listening and closed at the end of recording
33
+ self .stream = None
34
+
22
35
# set the OpenAI API key
23
36
def set_api_key (self , api_key_str ):
24
37
self .client = OpenAI (api_key = api_key_str )
@@ -37,34 +50,84 @@ def check_connection(self):
37
50
# return True if connected
38
51
return self .client is not None
39
52
40
- # record audio from microphone
41
- # returns filename of recording or None if failed
42
- def record_audio (self ):
43
- # Initialize PyAudio
44
- p = pyaudio .PyAudio ()
53
+ # listen for noise
54
+ # returns true if noise is detected, false if not
55
+ def listen_for_noise (self ):
56
+ # check pyaudio is initialised
57
+ if self .p is None :
58
+ print ("chat: pyaudio not initialised" )
59
+ return False
45
60
46
61
# Open stream
47
62
try :
48
- stream = p .open (format = pyaudio .paInt16 , channels = 1 , rate = 44100 , input = True , frames_per_buffer = 1024 )
63
+ self . stream = self . p .open (format = pyaudio .paInt16 , channels = 1 , rate = 44100 , input = True , frames_per_buffer = 1024 )
49
64
except :
50
65
print ("chat: failed to connect to microphone" )
66
+ return False
67
+
68
+ # enable listening and recording
69
+ self .listening_and_recording_enabled = True
70
+
71
+ # throw away first 2 seconds of audio which can be noisy
72
+ for i in range (0 , int (44100 / 1024 * 2 )):
73
+ self .stream .read (1024 )
74
+
75
+ # listen for noise
76
+ noise_detected = False
77
+ while self .listening_and_recording_enabled and not noise_detected :
78
+ data = self .stream .read (1024 )
79
+ if self .volume_over_threshold (data ):
80
+ noise_detected = True
81
+
82
+ # return true if listening enabled andnoise detected
83
+ if self .listening_and_recording_enabled and noise_detected :
84
+ return True
85
+
86
+ # stop and close the stream
87
+ self .stream .stop_stream ()
88
+ self .stream .close ()
89
+ return False
90
+
91
+ # stop listening for noise or recording audio
92
+ def stop_listening_or_recording (self ):
93
+ self .listening_and_recording_enabled = False
94
+
95
+ # record audio from microphone. should only be called after listen_for_noise() returns true
96
+ # returns filename of recording on success
97
+ # returns None if failed to record or recording volume was below threshold (e.g. nothing recorded)
98
+ def record_audio (self ):
99
+ # check pyaudio is initialised
100
+ if self .p is None :
101
+ print ("chat: pyaudio not initialised" )
51
102
return None
52
103
53
- # calculate time recording should stop
104
+ # check stream is open and active
105
+ if self .stream is None or not self .stream .is_active () or self .stream .is_stopped ():
106
+ print ("chat: failed to connect to microphone" )
107
+ return None
108
+
109
+ # record for at least 2 seconds
54
110
curr_time = time .time ()
55
- time_stop = curr_time + 5
111
+ stop_time = curr_time + 2
56
112
57
- # record until specified time
113
+ # record until no noise is heard for 2 seconds or user unchecks the record button
114
+ noise_heard = False
58
115
frames = []
59
- while curr_time < time_stop :
60
- data = stream .read (1024 )
116
+ while curr_time < stop_time and self . listening_and_recording_enabled :
117
+ data = self . stream .read (1024 )
61
118
frames .append (data )
62
119
curr_time = time .time ()
120
+ if self .volume_over_threshold (data ):
121
+ noise_heard = True
122
+ stop_time = curr_time + 2
123
+
124
+ # if no noise was heard, return None
125
+ if not noise_heard :
126
+ return None
63
127
64
- # Stop and close the stream
65
- stream .stop_stream ()
66
- stream .close ()
67
- p .terminate ()
128
+ # stop and close the stream
129
+ self .stream .stop_stream ()
130
+ self .stream .close ()
68
131
69
132
# Save audio file
70
133
wf = wave .open ("recording.wav" , "wb" )
@@ -89,3 +152,25 @@ def convert_audio_to_text(self, audio_filename):
89
152
file = audio_file ,
90
153
response_format = "text" )
91
154
return transcript
155
+
156
+ # return true if the volume of a frame of audio is above a given threshold
157
+ @staticmethod
158
+ def volume_over_threshold (frame , threshold = 10 ):
159
+ # calculate number of samples in the frame
160
+ num_samples = len (frame ) / pyaudio .PyAudio ().get_sample_size (pyaudio .paInt16 )
161
+
162
+ # protect against divide by zero
163
+ if num_samples == 0 :
164
+ return False
165
+ format = "%dh" % (num_samples )
166
+ shorts = struct .unpack (format , frame )
167
+
168
+ # iterate over the frame and calculate the RMS volume
169
+ sum_squares = 0.0
170
+ for sample in shorts :
171
+ n = sample * (1.0 / 32768.0 )
172
+ sum_squares += n * n
173
+ volume_rms = math .sqrt (sum_squares / num_samples ) * 1000
174
+
175
+ # return true if volume is above threshold
176
+ return volume_rms >= threshold
0 commit comments