18
18
############################################################################
19
19
20
20
import os
21
- import time
22
- from sys import byteorder
23
21
from array import array
24
- from struct import pack
22
+ import time
25
23
import logging
26
-
27
- import pyaudio
28
24
import wave
29
25
import audioop
30
- import logging
26
+ import pyaudio
31
27
32
28
try :
33
29
from pocketsphinx .pocketsphinx import Decoder
34
30
from sphinxbase .sphinxbase import *
35
- except :
31
+ except Exception :
36
32
logging .info ("pocketsphinx not available" )
37
33
38
- ## GOOGLE Speech API ##
39
- # [START import_libraries]
40
- #from __future__ import division
41
-
42
- import re
43
- import sys
44
-
45
- #from google.cloud import speech
46
- #from google.cloud.speech import enums
47
- #from google.cloud.speech import types
48
- import pyaudio
49
34
from six .moves import queue
50
35
# [END import_libraries]
51
36
@@ -76,7 +61,7 @@ def __init__(self):
76
61
# frames_per_buffer=CHUNK)
77
62
#self.stream_in.start_stream()
78
63
self .stream_in = self .MicrophoneStream (FORMAT , RATE , CHUNK )
79
- except Exception as e :
64
+ except Exception :
80
65
logging .info ("Audio: input stream not available" )
81
66
82
67
#self._google_speech_client = speech.SpeechClient()
@@ -89,36 +74,32 @@ def exit(self):
89
74
90
75
def say (self , what , locale = 'en' ):
91
76
if what and "$" in what :
92
- os .system ('omxplayer sounds/' + what [1 :])
93
- elif what and len ( what ) :
94
- os .system ('espeak --stdout -v' + locale + ' -p 90 -a 200 -s 150 -g 10 "' + what + '" 2>>/dev/null | aplay -D hw:1,0' )
77
+ os .system ('omxplayer sounds/' + what [1 :])
78
+ elif what and what :
79
+ os .system ('espeak --stdout -v' + locale + ' -p 90 -a 200 -s 150 -g 10 "' + what + '" 2>>/dev/null | aplay -D hw:1,0' )
95
80
96
81
def normalize (self , snd_data ):
97
82
"Average the volume out"
98
83
MAXIMUM = 16384
99
- times = float (MAXIMUM )/ max (abs (i ) for i in snd_data )
84
+ times = float (MAXIMUM ) / max (abs (i ) for i in snd_data )
100
85
101
86
r = array ('h' , snd_data )
102
87
c = 0
103
88
for i in snd_data :
104
89
r [c ] = int (i * times )
105
- c += 1
90
+ c += 1
106
91
return r
107
92
108
93
def record (self , elapse ):
109
- num_silent = 0
110
- snd_started = False
111
- c = 0
112
-
113
94
r = bytearray ()
114
-
115
95
t = time .time ()
116
96
with self .stream_in as stream :
117
97
audio_generator = stream .generator ()
118
98
for content in audio_generator :
119
99
r .extend (content )
120
100
if time .time () - t >= elapse :
121
101
return r
102
+ return r
122
103
123
104
def record_to_file (self , filename , elapse ):
124
105
data = self .record (elapse )
@@ -132,7 +113,7 @@ def record_to_file(self, filename, elapse):
132
113
wf .close ()
133
114
134
115
def play (self , filename ):
135
- os .system ('omxplayer sounds/' + filename )
116
+ os .system ('omxplayer sounds/' + filename )
136
117
137
118
"""
138
119
# open the file for reading.
@@ -160,8 +141,6 @@ def play(self, filename):
160
141
"""
161
142
162
143
def hear (self , level , elapse = 1.0 ):
163
- ts_total = time .time ()
164
-
165
144
t = time .time ()
166
145
with self .stream_in as stream :
167
146
audio_generator = stream .generator ()
@@ -171,9 +150,9 @@ def hear(self, level, elapse=1.0):
171
150
return True
172
151
if time .time () - t >= elapse :
173
152
return False
153
+ return False
174
154
175
155
def speech_recog (self , model ):
176
-
177
156
# Create a decoder with certain model
178
157
config = Decoder .default_config ()
179
158
config .set_string ('-hmm' , '/usr/local/share/pocketsphinx/model/en-us/en-us' )
@@ -186,7 +165,6 @@ def speech_recog(self, model):
186
165
decoder = Decoder (config )
187
166
188
167
decoder .start_utt ()
189
- tstamp = time .time ()
190
168
recog_text = ''
191
169
192
170
with self .stream_in as stream :
@@ -195,38 +173,11 @@ def speech_recog(self, model):
195
173
decoder .process_raw (content , False , False )
196
174
if decoder .hyp () and decoder .hyp ().hypstr != '' :
197
175
recog_text += decoder .hyp ().hypstr
198
- tstamp = time .time ()
199
176
if len (recog_text ) > 1 :
200
177
decoder .end_utt ()
201
- logging .info ("recog text: " + recog_text )
178
+ logging .info ("recog text: %s" , recog_text )
202
179
return recog_text
203
-
204
- # def speech_recog_google(self, locale):
205
- # config = types.RecognitionConfig(
206
- # encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
207
- # sample_rate_hertz=RATE,
208
- # language_code=locale)
209
- # streaming_config = types.StreamingRecognitionConfig(
210
- # config=config,
211
- # interim_results=False,
212
- # single_utterance=True)
213
- #
214
- # t1 = time.time()
215
- # with self.stream_in as stream:
216
- # audio_generator = stream.generator()
217
- # requests = (types.StreamingRecognizeRequest(audio_content=content)
218
- # for content in audio_generator)
219
- #
220
- # responses = self._google_speech_client.streaming_recognize(streaming_config, requests)
221
-
222
- # Now, put the transcription responses to use.
223
- # for response in responses:
224
- # if time.time() - t1 > 10:
225
- # return ""
226
- # if response.results:
227
- # result = response.results[0]
228
- # if result.is_final:
229
- # return result.alternatives[0].transcript
180
+ return recog_text
230
181
231
182
class MicrophoneStream (object ):
232
183
"""Opens a recording stream as a generator yielding the audio chunks."""
@@ -244,21 +195,21 @@ def __enter__(self):
244
195
self ._audio_interface = pyaudio .PyAudio ()
245
196
self ._buff = queue .Queue ()
246
197
self ._audio_stream = self ._audio_interface .open (
247
- format = self ._format ,
248
- # The API currently only supports 1-channel (mono) audio
249
- # https://goo.gl/z757pE
250
- channels = 1 , rate = self ._rate ,
251
- input = True , frames_per_buffer = self ._chunk ,
252
- # Run the audio stream asynchronously to fill the buffer object.
253
- # This is necessary so that the input device's buffer doesn't
254
- # overflow while the calling thread makes network requests, etc.
255
- stream_callback = self ._fill_buffer ,
198
+ format = self ._format ,
199
+ # The API currently only supports 1-channel (mono) audio
200
+ # https://goo.gl/z757pE
201
+ channels = 1 , rate = self ._rate ,
202
+ input = True , frames_per_buffer = self ._chunk ,
203
+ # Run the audio stream asynchronously to fill the buffer object.
204
+ # This is necessary so that the input device's buffer doesn't
205
+ # overflow while the calling thread makes network requests, etc.
206
+ stream_callback = self ._fill_buffer ,
256
207
)
257
208
self .closed = False
258
209
259
210
return self
260
211
261
- def __exit__ (self , type , value , traceback ):
212
+ def __exit__ (self , atype , value , traceback ):
262
213
self ._audio_stream .stop_stream ()
263
214
self ._audio_stream .close ()
264
215
self ._audio_interface .terminate ()
0 commit comments