18
18
19
19
load_dotenv ()
20
20
21
+ # We will collect the is_final=true messages here so we can use them when the person finishes speaking
22
+ is_finals = []
21
23
22
24
async def main ():
23
25
try :
@@ -42,31 +44,52 @@ async def main():
42
44
dg_connection = deepgram .listen .asynclive .v ("1" )
43
45
44
46
async def on_open (self , open , ** kwargs ):
45
- print (f"\n \n { open } \n \n " )
47
+ print (f"Deepgram Connection Open " )
46
48
47
49
async def on_message (self , result , ** kwargs ):
50
+ global is_finals
48
51
sentence = result .channel .alternatives [0 ].transcript
49
52
if len (sentence ) == 0 :
50
53
return
51
- print (f"speaker: { sentence } " )
54
+ if result .is_final :
55
+ # We need to collect these and concatenate them together when we get a speech_final=true
56
+ # See docs: https://developers.deepgram.com/docs/understand-endpointing-interim-results
57
+ is_finals .append (sentence )
58
+
59
+ # Speech Final means we have detected sufficent silence to consider this end of speech
60
+ # Speech final is the lowest latency result as it triggers as soon an the endpointing value has triggered
61
+ if result .speech_final :
62
+ utterance = ' ' .join (is_finals )
63
+ print (f"Speech Final: { utterance } " )
64
+ is_finals = []
65
+ else :
66
+ # These are useful if you need real time captioning and update what the Interim Results produced
67
+ print (f"Is Final: { sentence } " )
68
+ else :
69
+ # These are useful if you need real time captioning of what is being spoken
70
+ print (f"Interim Results: { sentence } " )
52
71
53
72
async def on_metadata (self , metadata , ** kwargs ):
54
- print (f"\n \n { metadata } \n \n " )
73
+ print (f"Deepgram Metadata: { metadata } " )
55
74
56
75
async def on_speech_started (self , speech_started , ** kwargs ):
57
- print (f"\n \n { speech_started } \n \n " )
76
+ print (f"Deepgram Speech Started " )
58
77
59
78
async def on_utterance_end (self , utterance_end , ** kwargs ):
60
- print (f"\n \n { utterance_end } \n \n " )
79
+ global is_finals
80
+ if len (is_finals ) > 0 :
81
+ utterance = ' ' .join (is_finals )
82
+ print (f"Deepgram Utterance End: { utterance } " )
83
+ is_finals = []
61
84
62
- def on_close (self , close , ** kwargs ):
63
- print (f"\n \n { close } \n \n " )
85
+ async def on_close (self , close , ** kwargs ):
86
+ print (f"Deepgram Connection Closed " )
64
87
65
- def on_error (self , error , ** kwargs ):
66
- print (f"\n \n { error } \n \n " )
88
+ async def on_error (self , error , ** kwargs ):
89
+ print (f"Deepgram Handled Error: { error } " )
67
90
68
- def on_unhandled (self , unhandled , ** kwargs ):
69
- print (f"\n \n { unhandled } \n \n " )
91
+ async def on_unhandled (self , unhandled , ** kwargs ):
92
+ print (f"Deepgram Unhandled Websocket Message: { unhandled } " )
70
93
71
94
dg_connection .on (LiveTranscriptionEvents .Open , on_open )
72
95
dg_connection .on (LiveTranscriptionEvents .Transcript , on_message )
@@ -80,19 +103,28 @@ def on_unhandled(self, unhandled, **kwargs):
80
103
# connect to websocket
81
104
options : LiveOptions = LiveOptions (
82
105
model = "nova-2" ,
83
- punctuate = True ,
84
106
language = "en-US" ,
107
+ # Apply smart formatting to the output
108
+ smart_format = True ,
109
+ # Raw audio format deatils
85
110
encoding = "linear16" ,
86
111
channels = 1 ,
87
112
sample_rate = 16000 ,
88
113
# To get UtteranceEnd, the following must be set:
89
114
interim_results = True ,
90
115
utterance_end_ms = "1000" ,
91
116
vad_events = True ,
117
+ # Time in milliseconds of silence to wait for before finalizing speech
118
+ endpointing = 300
92
119
)
93
120
121
+ addons = {
122
+ # Prevent waiting for additional numbers
123
+ "no_delay" : "true"
124
+ }
125
+
94
126
print ("\n \n Start talking! Press Ctrl+C to stop...\n " )
95
- if await dg_connection .start (options ) is False :
127
+ if await dg_connection .start (options , addons = addons ) is False :
96
128
print ("Failed to connect to Deepgram" )
97
129
return
98
130
0 commit comments