Skip to content

Commit 81e2940

Browse files
Merge pull request #364 from deepgram/is-final-handling
Handle is_final and endpointing together with utterance end + clean u…
2 parents 3904fcc + 10a8b2e commit 81e2940

File tree

2 files changed

+87
-23
lines changed

2 files changed

+87
-23
lines changed

examples/streaming/async_microphone/main.py

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818

1919
load_dotenv()
2020

21+
# We will collect the is_final=true messages here so we can use them when the person finishes speaking
22+
is_finals = []
2123

2224
async def main():
2325
try:
@@ -42,31 +44,52 @@ async def main():
4244
dg_connection = deepgram.listen.asynclive.v("1")
4345

4446
async def on_open(self, open, **kwargs):
45-
print(f"\n\n{open}\n\n")
47+
print(f"Deepgram Connection Open")
4648

4749
async def on_message(self, result, **kwargs):
50+
global is_finals
4851
sentence = result.channel.alternatives[0].transcript
4952
if len(sentence) == 0:
5053
return
51-
print(f"speaker: {sentence}")
54+
if result.is_final:
55+
# We need to collect these and concatenate them together when we get a speech_final=true
56+
# See docs: https://developers.deepgram.com/docs/understand-endpointing-interim-results
57+
is_finals.append(sentence)
58+
59+
# Speech Final means we have detected sufficent silence to consider this end of speech
60+
# Speech final is the lowest latency result as it triggers as soon an the endpointing value has triggered
61+
if result.speech_final:
62+
utterance = ' '.join(is_finals)
63+
print(f"Speech Final: {utterance}")
64+
is_finals = []
65+
else:
66+
# These are useful if you need real time captioning and update what the Interim Results produced
67+
print(f"Is Final: {sentence}")
68+
else:
69+
# These are useful if you need real time captioning of what is being spoken
70+
print(f"Interim Results: {sentence}")
5271

5372
async def on_metadata(self, metadata, **kwargs):
54-
print(f"\n\n{metadata}\n\n")
73+
print(f"Deepgram Metadata: {metadata}")
5574

5675
async def on_speech_started(self, speech_started, **kwargs):
57-
print(f"\n\n{speech_started}\n\n")
76+
print(f"Deepgram Speech Started")
5877

5978
async def on_utterance_end(self, utterance_end, **kwargs):
60-
print(f"\n\n{utterance_end}\n\n")
79+
global is_finals
80+
if len(is_finals) > 0:
81+
utterance = ' '.join(is_finals)
82+
print(f"Deepgram Utterance End: {utterance}")
83+
is_finals = []
6184

62-
def on_close(self, close, **kwargs):
63-
print(f"\n\n{close}\n\n")
85+
async def on_close(self, close, **kwargs):
86+
print(f"Deepgram Connection Closed")
6487

65-
def on_error(self, error, **kwargs):
66-
print(f"\n\n{error}\n\n")
88+
async def on_error(self, error, **kwargs):
89+
print(f"Deepgram Handled Error: {error}")
6790

68-
def on_unhandled(self, unhandled, **kwargs):
69-
print(f"\n\n{unhandled}\n\n")
91+
async def on_unhandled(self, unhandled, **kwargs):
92+
print(f"Deepgram Unhandled Websocket Message: {unhandled}")
7093

7194
dg_connection.on(LiveTranscriptionEvents.Open, on_open)
7295
dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
@@ -80,19 +103,28 @@ def on_unhandled(self, unhandled, **kwargs):
80103
# connect to websocket
81104
options: LiveOptions = LiveOptions(
82105
model="nova-2",
83-
punctuate=True,
84106
language="en-US",
107+
# Apply smart formatting to the output
108+
smart_format=True,
109+
# Raw audio format deatils
85110
encoding="linear16",
86111
channels=1,
87112
sample_rate=16000,
88113
# To get UtteranceEnd, the following must be set:
89114
interim_results=True,
90115
utterance_end_ms="1000",
91116
vad_events=True,
117+
# Time in milliseconds of silence to wait for before finalizing speech
118+
endpointing=300
92119
)
93120

121+
addons = {
122+
# Prevent waiting for additional numbers
123+
"no_delay": "true"
124+
}
125+
94126
print("\n\nStart talking! Press Ctrl+C to stop...\n")
95-
if await dg_connection.start(options) is False:
127+
if await dg_connection.start(options, addons=addons) is False:
96128
print("Failed to connect to Deepgram")
97129
return
98130

examples/streaming/microphone/main.py

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616

1717
load_dotenv()
1818

19+
# We will collect the is_final=true messages here so we can use them when the person finishes speaking
20+
is_finals = []
1921

2022
def main():
2123
try:
@@ -30,31 +32,52 @@ def main():
3032
dg_connection = deepgram.listen.live.v("1")
3133

3234
def on_open(self, open, **kwargs):
33-
print(f"\n\n{open}\n\n")
35+
print(f"Deepgram Connection Open")
3436

3537
def on_message(self, result, **kwargs):
38+
global is_finals
3639
sentence = result.channel.alternatives[0].transcript
3740
if len(sentence) == 0:
3841
return
39-
print(f"speaker: {sentence}")
42+
if result.is_final:
43+
# We need to collect these and concatenate them together when we get a speech_final=true
44+
# See docs: https://developers.deepgram.com/docs/understand-endpointing-interim-results
45+
is_finals.append(sentence)
46+
47+
# Speech Final means we have detected sufficent silence to consider this end of speech
48+
# Speech final is the lowest latency result as it triggers as soon an the endpointing value has triggered
49+
if result.speech_final:
50+
utterance = ' '.join(is_finals)
51+
print(f"Speech Final: {utterance}")
52+
is_finals = []
53+
else:
54+
# These are useful if you need real time captioning and update what the Interim Results produced
55+
print(f"Is Final: {sentence}")
56+
else:
57+
# These are useful if you need real time captioning of what is being spoken
58+
print(f"Interim Results: {sentence}")
4059

4160
def on_metadata(self, metadata, **kwargs):
42-
print(f"\n\n{metadata}\n\n")
61+
print(f"Deepgram Metadata: {metadata}")
4362

4463
def on_speech_started(self, speech_started, **kwargs):
45-
print(f"\n\n{speech_started}\n\n")
64+
print(f"Deepgram Speech Started")
4665

4766
def on_utterance_end(self, utterance_end, **kwargs):
48-
print(f"\n\n{utterance_end}\n\n")
67+
global is_finals
68+
if len(is_finals) > 0:
69+
utterance = ' '.join(is_finals)
70+
print(f"Deepgram Utterance End: {utterance}")
71+
is_finals = []
4972

5073
def on_close(self, close, **kwargs):
51-
print(f"\n\n{close}\n\n")
74+
print(f"Deepgram Connection Closed")
5275

5376
def on_error(self, error, **kwargs):
54-
print(f"\n\n{error}\n\n")
77+
print(f"Deepgram Handled Error: {error}")
5578

5679
def on_unhandled(self, unhandled, **kwargs):
57-
print(f"\n\n{unhandled}\n\n")
80+
print(f"Deepgram Unhandled Websocket Message: {unhandled}")
5881

5982
dg_connection.on(LiveTranscriptionEvents.Open, on_open)
6083
dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
@@ -67,19 +90,28 @@ def on_unhandled(self, unhandled, **kwargs):
6790

6891
options: LiveOptions = LiveOptions(
6992
model="nova-2",
70-
punctuate=True,
7193
language="en-US",
94+
# Apply smart formatting to the output
95+
smart_format=True,
96+
# Raw audio format deatils
7297
encoding="linear16",
7398
channels=1,
7499
sample_rate=16000,
75100
# To get UtteranceEnd, the following must be set:
76101
interim_results=True,
77102
utterance_end_ms="1000",
78103
vad_events=True,
104+
# Time in milliseconds of silence to wait for before finalizing speech
105+
endpointing=300
79106
)
80107

108+
addons = {
109+
# Prevent waiting for additional numbers
110+
"no_delay": "true"
111+
}
112+
81113
print("\n\nPress Enter to stop recording...\n\n")
82-
if dg_connection.start(options) is False:
114+
if dg_connection.start(options, addons=addons) is False:
83115
print("Failed to connect to Deepgram")
84116
return
85117

0 commit comments

Comments
 (0)