-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmy-voice-tree.py
333 lines (315 loc) · 12.8 KB
/
my-voice-tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# my-voice-tree.py
# ----------------
# (c) 2021 Mal Minhas, <[email protected]>
#
# my-voice-tree.py
# ----------------
# Voice controlled RGB Xmas tree from Pi Hut using AWS Transcribe.
# Builds on my-tree.py by adding voice support using AWS.
# Uttering "Christmas tree red" will make the Christmas tree flash red.
# Uttering "Christmas tree green" will make it flash green.
# Uttering "Christmas tree disco" will make it phase different disco hues.
# Uttering "Christmas tree phase" will make it phase with synced hue.
#
# Installation:
# -------------
# See accompanying readme for full details on how to setup both the
# RGB Xmas tree as well as AWS Transcribe and Polly functionality on
# a Raspberry Pi 4.
#
# Implementation:
# --------------
# Cooperative multitasking using Python asyncio to interleave between
# micStream and RGBXmasTree LEDs.
#
# History:
# -------
# 27.11.21 v0.1 First cut
# 28.11.21 v0.2 Voice control tested and working on Raspberry Pi
# 28.11.21 v0.3 Added support for wait looping on network
# 28.11.21 v0.4 Added basic support for playing back mp3 and using Polly
#
import re
import os
import sys
import vlc
import time
import boto3
import awscrt
import asyncio
import threading
import sounddevice
from boto3 import Session
from botocore.exceptions import BotoCoreError, ClientError
from contextlib import closing
from asyncio.subprocess import PIPE
from amazon_transcribe.client import TranscribeStreamingClient
from amazon_transcribe.handlers import TranscriptResultStreamHandler
from amazon_transcribe.model import TranscriptEvent
from tree import RGBXmasTree
from colorzero import Color, Hue
from time import sleep
from random import random
# Create an instance of an RGBXmasTree
TREE = RGBXmasTree(brightness=0.3)
# LED number for star at the top of the tree
STAR = 3
TREE_LED_SET = [list(range(25)[::3]), list(range(25)[1::3]), list(range(25)[2::3])]
LAST_STATE = 'disco'
STATE = 'disco'
TEXT = 'Hello everyone this is your Christmas Tree talking'
AUDIO = ''
SUPPORTED_COLORS = ['red','green','blue','yellow','orange','purple',
'white','pink','black','brown','disco','phase']
async def micStream():
# Wraps raw input stream for mic forwarding blocks to asyncio.Queue
loop = asyncio.get_event_loop()
input_queue = asyncio.Queue()
def callback(indata, frame_count, time_info, status):
loop.call_soon_threadsafe(input_queue.put_nowait, (bytes(indata), status))
# audio stream params should mate the audio formats for the source language being used per:
# https://docs.aws.amazon.com/transcribe/latest/dg/streaming.html
stream = sounddevice.RawInputStream(
channels = 1,
samplerate = 16000,
callback = callback,
blocksize = 1024*2,
dtype = "int16",
)
# Initiate the audio stream and async yield the audio chunks when they become available
with stream:
while True:
indata, status = await input_queue.get()
yield indata, status
async def writeChunks(stream):
print("writeChunks: ENTER")
# Connect raw audio chunks generator from mic and pass along to transcription stream
async for chunk, status in micStream():
await stream.input_stream.send_audio_event(audio_chunk=chunk)
await stream.input_stream.end_stream()
print("writeChunks: EXIT")
class TranscribeEventHandler(TranscriptResultStreamHandler):
async def handle_transcript_event(self, transcript_event: TranscriptEvent):
#print("TranscribeEventHandler: ENTER")
# Handle text transcriptions
xmasTree = re.compile(r'(christmas tree)(\.|\,|s)?\s+(\w+)(.*)')
results = transcript_event.transcript.results
for result in results:
for i,alt in enumerate(result.alternatives):
global STATE, LAST_STATE, TEXT, AUDIO
text = alt.transcript.lower()
print(f"{i}:'{alt.transcript}' ({text})")
xres = re.match(xmasTree,text)
def switchState(new_state):
global STATE, LAST_STATE
if STATE == new_state:
print(f"We are already in STATE {STATE} - skipping")
else:
LAST_STATE = STATE
STATE = new_state
print(f"STATE CHANGE: '{new_state}' LAST_STATE={LAST_STATE}")
if xres:
print(f"MATCH! xres[0]='{xres[0]}',xres[1]='{xres[1]}',\
xres[2]='{xres[2]}',xres[3]='{xres[3]}',xres[4]='{xres[4]}'")
command = xres[3].lower()
if command in SUPPORTED_COLORS:
switchState(command)
if STATE in ['disco']:
initXmasTree(darkMode=False)
break
elif command in ['speak','talk','talked']:
AUDIO = 'speech.mp3'
switchState('speak')
break
elif command in ['sing','saying','black mirror']:
AUDIO = '08-I-Wish-it-Could-be-Christmas-Everyday.mp3'
switchState('speak')
break
elif command == 'generate':
TEXT = xres[4].replace('.','')
if len(TEXT.strip()) >= 10:
#TEXT = "You didn't give me anything to generate"
switchState('generate')
break
else:
print(f"Cannot handle '{command}'")
#print("TranscribeEventHandler: EXIT")
def initXmasTree(darkMode):
print(f"initXmasTree(darkMode={darkMode})")
global TREE, STATE
if darkMode:
STATE = 'black'
for i, leds in enumerate(TREE_LED_SET):
for led in leds:
TREE[led].color = Color('black')
# Colour top LED white
TREE[STAR].color = Color('black')
else:
assert(STATE in ['disco'])
# Initialise the LEDs to starting colours
colors = [Color('red'),Color('green'),Color('blue')]
for i, leds in enumerate(TREE_LED_SET):
for led in leds:
TREE[led].color = colors[i]
# Colour top LED white
TREE[STAR].color = Color('white')
async def lightUpXmasTree():
print("lightUpXmasTree: ENTER")
initXmasTree(darkMode=False)
try:
global TREE, TREE_LED_SET, SUPPORTED_COLORS, STATE, LAST_STATE
while True:
#print(f'XmasTree: {STATE} ({LAST_STATE})')
if (STATE in ['disco','phase']):
# Hue phase in a slow cycle through all colors
for leds in TREE_LED_SET:
for led in leds:
TREE[led].color += Hue(deg=10)
TREE[STAR].color = Color('white')
LAST_STATE = STATE
elif STATE in SUPPORTED_COLORS:
# Solid color
for leds in TREE_LED_SET:
for led in leds:
TREE[led].color = Color(STATE)
if STATE not in ['black']:
TREE[STAR].color = Color('white')
LAST_STATE = STATE
else:
# print(f"Skipping unknown state {STATE}')
pass
await asyncio.sleep(0.01) # non-blocking
except:
print("Exiting tree")
TREE.close()
print("lightUpXmasTree: EXIT")
raise KeyboardInterrupt
def playMp3(file,length):
print(f"playMp3({file})")
player = vlc.MediaPlayer(file)
player.play()
time.sleep(length)
player.stop()
def generateMp3WithPolly(text, file):
""" From AWS Getting Started Example """
print(f"Generating polly file {file} from: '{text}'")
polly_client = boto3.Session(region_name='us-west-2').client('polly')
response = polly_client.synthesize_speech(VoiceId='Joanna',
OutputFormat='mp3',
Text = text,
Engine = 'neural')
file = open(file, 'wb')
file.write(response['AudioStream'].read())
file.close()
async def waitForPolly():
print("waitForPolly: ENTER")
global TEXT, STATE, LAST_STATE, AUDIO
while True:
await asyncio.sleep(0.1)
#print(f"polly state: {STATE}")
if STATE == 'speak':
# Initially tried this using asyncio.create_subprocess_exec using a local script
"""
speechFile = 'speech2.mp3'
process = await asyncio.create_subprocess_exec(
'python',
'testVlc.py',
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
process = await asyncio.create_subprocess_shell(
f'python testVlc.py',
stdout=PIPE,
stderr=PIPE,
)
print("going into process communicate")
(output,err) = await process.communicate()
status = await process.wait()
print(f"dropping out of await. STATE={STATE}, LAST_STATE={LAST_STATE}")
"""
cwd = os.environ.get("WORKING_DIR")
if not cwd:
cwd = '.'
speechFile = f'{cwd}/{AUDIO}'
length = 360
if AUDIO == 'speech.mp3':
length = 10
print(f"Using vlc to play {speechFile} - non-blocking")
# Switched to using threads to avoid blocking
x2 = threading.Thread(target=playMp3, args=(speechFile,length), daemon=False)
x2.start()
#x2.join() # uncomment this to block on completion
print(f"dropping out after starting vlc thread. STATE={STATE}, LAST_STATE={LAST_STATE}")
STATE = LAST_STATE
LAST_STATE = 'speak'
print(f"Switching back to {STATE}")
elif STATE == 'generate':
cwd = os.environ.get("WORKING_DIR")
if not cwd:
cwd = '.'
speechFile = f'{cwd}/generate.mp3'
print(f"Generating speech file {speechFile} - blocking")
x1 = threading.Thread(target=generateMp3WithPolly, args=(TEXT,speechFile,), daemon=False)
x1.start()
x1.join() # uncomment this to block on completion
print(f"Using vlc to play {speechFile} - non-blocking")
# Switched to using threads to avoid blocking
x2 = threading.Thread(target=playMp3, args=(speechFile,), daemon=False)
x2.start()
#x2.join() # uncomment this to block on completion
print(f"dropping out after starting vlc thread. STATE={STATE}, LAST_STATE={LAST_STATE}")
STATE = LAST_STATE
LAST_STATE = 'speak'
print(f"Switching back to {STATE}")
def synthesizeText(text):
polly_client = boto3.Session(region_name='us-west-2').client('polly')
response = polly_client.synthesize_speech(VoiceId='Joanna',
OutputFormat='mp3',
Text = 'This is a sample text to be synthesized.',
Engine = 'neural')
with open('speech.mp3', 'wb'):
file.write(response['AudioStream'].read())
async def initializeVoiceTree():
# setup client with chosen AWS region
client = TranscribeStreamingClient(region = "us-west-2")
# start transcription to generate our async mic stream
stream = await client.start_stream_transcription(
language_code = "en-US",
media_sample_rate_hz = 16000,
media_encoding = "pcm",
)
handler = TranscribeEventHandler(stream.output_stream)
await asyncio.gather(writeChunks(stream), handler.handle_events(), lightUpXmasTree(), waitForPolly())
#await asyncio.gather(writeChunks(stream), handler.handle_events(), lightUpXmasTree())
if __name__ == '__main__':
def initialiseLoop():
ret = 0
print("initialise loop")
try:
asyncio.set_event_loop(asyncio.new_event_loop())
loop = asyncio.get_event_loop()
loop.run_until_complete(initializeVoiceTree())
print('closing loop')
loop.close()
except KeyboardInterrupt:
print('Exiting main loop')
except awscrt.exceptions.AwsCrtError as e:
# Can get here on boot with:
# AWS_IO_DNS_QUERY_FAILED: A query to dns failed to resolve.
print('Caught awscrt.exceptions.AwsCrtError')
ret = -1
except Exception as e:
print(e)
ret = -1
return ret
while True:
r = initialiseLoop()
if r == 0:
sys.exit(0)
else:
retry = 2
print(f'Retrying after {retry} secs..')
sleep(retry)