Skip to content

Commit f1196b8

Browse files
authored
AISDK-128: Add delete_after_seconds option to streaming and async clients (#58)
1 parent f89d6f3 commit f1196b8

File tree

8 files changed

+53
-34
lines changed

8 files changed

+53
-34
lines changed

Diff for: README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ job = client.submit_job_url("https://example.com/file-to-transcribe.mp3")
5151
[Submit Job](https://www.rev.ai/docs#operation/SubmitTranscriptionJob) endpoint.
5252

5353
If you want to get fancy, both send job methods take `metadata`, `callback_url`,
54-
`skip_diarization`, `skip_punctuation`, `speaker_channels_count`, `custom_vocabularies`, `filter_profanity` and `remove_disfluencies` as optional parameters, these are described in the request body of
54+
`skip_diarization`, `skip_punctuation`, `speaker_channels_count`, `custom_vocabularies`, `filter_profanity`, `remove_disfluencies`, and `delete_after_seconds` as optional parameters, these are described in the request body of
5555
the [Submit Job](https://www.rev.ai/docs#operation/SubmitTranscriptionJob) endpoint.
5656

5757
### Checking your file's status
@@ -159,7 +159,7 @@ If passing in custom functions, make sure you provide the right parameters. See
159159

160160
Once you have a streaming client setup with a `MediaConfig` and access token, you can obtain a transcription generator of your audio. You can also use a custom vocabulary with your streaming job by supplying the optional `custom_vocabulary_id` when starting a connection!
161161

162-
More optional parameters can be supplied when starting a connection, these are `metadata`, `filter_profanity` and `remove_disfluencies`. For a description of these optional parameters look at our [streaming documentation](https://www.rev.ai/docs/streaming#section/WebSocket-Endpoint).
162+
More optional parameters can be supplied when starting a connection, these are `metadata`, `filter_profanity`, `remove_disfluencies`, and `delete_after_seconds`. For a description of these optional parameters look at our [streaming documentation](https://www.rev.ai/docs/streaming#section/WebSocket-Endpoint).
163163

164164
```python
165165
response_generator = streaming_client.start(AUDIO_GENERATOR, custom_vocabulary_id="CUSTOM VOCAB ID")

Diff for: examples/async_example.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@
4242
# skip_diarization=False,
4343
# custom_vocabularies=None,
4444
# filter_profanity=False,
45-
# remove_disfluencies=False)
45+
# remove_disfluencies=False,
46+
# delete_after_seconds=None)
4647

4748

4849
# Submitting a job with a link to the file you want transcribed
@@ -54,7 +55,8 @@
5455
skip_diarization=False,
5556
custom_vocabularies=custom_vocabularies,
5657
filter_profanity=False,
57-
remove_disfluencies=False)
58+
remove_disfluencies=False,
59+
delete_after_seconds=None)
5860

5961
print("Submitted Job")
6062

Diff for: setup.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 2.9.0
2+
current_version = 2.10.0
33
commit = True
44
tag = True
55

Diff for: src/rev_ai/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -*- coding: utf-8 -*-
22
"""Top-level package for rev_ai"""
33

4-
__version__ = '2.9.0'
4+
__version__ = '2.10.0'
55

66
from .models import Job, JobStatus, Account, Transcript, MediaConfig, CaptionType, CustomVocabulary

Diff for: src/rev_ai/apiclient.py

+16-13
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ def submit_job_url(
4545
speaker_channels_count=None,
4646
custom_vocabularies=None,
4747
filter_profanity=False,
48-
remove_disfluencies=False):
48+
remove_disfluencies=False,
49+
delete_after_seconds=None):
4950
"""Submit media given a URL for transcription.
5051
The audio data is downloaded from the URL.
5152
@@ -67,10 +68,9 @@ def submit_job_url(
6768
of a key "phrases" which maps to a list of strings, each of which
6869
represents a phrase you would like the speech recognition to bias
6970
itself toward.
70-
:param filter_profanity: should rev.ai obscure profane words when
71-
transcribing this file
72-
:param remove_disfluencies: should rev.ai remove disfluencies when
73-
transcribing this file
71+
:param filter_profanity: whether to mask profane words
72+
:param remove_disfluencies: whether to exclude filler words like "uh"
73+
:param delete_after_seconds: number of seconds after job completion when job is auto-deleted
7474
:returns: raw response data
7575
:raises: HTTPError
7676
"""
@@ -81,7 +81,7 @@ def submit_job_url(
8181
callback_url, skip_diarization,
8282
skip_punctuation, speaker_channels_count,
8383
custom_vocabularies, filter_profanity,
84-
remove_disfluencies)
84+
remove_disfluencies, delete_after_seconds)
8585

8686
response = self._make_http_request(
8787
"POST",
@@ -100,7 +100,8 @@ def submit_job_local_file(
100100
speaker_channels_count=None,
101101
custom_vocabularies=None,
102102
filter_profanity=False,
103-
remove_disfluencies=False):
103+
remove_disfluencies=False,
104+
delete_after_seconds=None):
104105
"""Submit a local file for transcription.
105106
Note that the content type is inferred if not provided.
106107
@@ -121,10 +122,9 @@ def submit_job_local_file(
121122
recognition to find those phrases. Each dictionary has the key
122123
"phrases" which maps to a list of strings, each of which represents
123124
a phrase you would like the speech recognition to bias itself toward.
124-
:param filter_profanity: should rev.ai obscure profane words when
125-
transcribing this file
126-
:param remove_disfluencies: should rev.ai remove disfluencies when
127-
transcribing this file
125+
:param filter_profanity: whether to mask profane words
126+
:param remove_disfluencies: whether to exclude filler words like "uh"
127+
:param delete_after_seconds: number of seconds after job completion when job is auto-deleted
128128
:returns: raw response data
129129
:raises: HTTPError
130130
"""
@@ -134,7 +134,7 @@ def submit_job_local_file(
134134
payload = self._create_job_options_payload(None, metadata, callback_url, skip_diarization,
135135
skip_punctuation, speaker_channels_count,
136136
custom_vocabularies, filter_profanity,
137-
remove_disfluencies)
137+
remove_disfluencies, delete_after_seconds)
138138

139139
with open(filename, 'rb') as f:
140140
files = {
@@ -372,7 +372,8 @@ def _create_job_options_payload(
372372
speaker_channels_count=None,
373373
custom_vocabularies=None,
374374
filter_profanity=None,
375-
remove_disfluencies=None):
375+
remove_disfluencies=None,
376+
delete_after_seconds=None):
376377
payload = {}
377378
if media_url:
378379
payload['media_url'] = media_url
@@ -393,6 +394,8 @@ def _create_job_options_payload(
393394
payload['filter_profanity'] = filter_profanity
394395
if remove_disfluencies:
395396
payload['remove_disfluencies'] = remove_disfluencies
397+
if delete_after_seconds is not None:
398+
payload['delete_after_seconds'] = delete_after_seconds
396399
return payload
397400

398401
def _create_captions_query(self, speaker_channel):

Diff for: src/rev_ai/streamingclient.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,16 @@ def start(self,
6666
metadata=None,
6767
custom_vocabulary_id=None,
6868
filter_profanity=None,
69-
remove_disfluencies=None):
69+
remove_disfluencies=None,
70+
delete_after_seconds=None):
7071
"""Function to connect the websocket to the URL and start the response
7172
thread
7273
:param generator: generator object that yields binary audio data
7374
:param metadata: metadata to be attached to streaming job
75+
:param custom_vocabulary_id: id of custom vocabulary to be used with this streaming job
76+
:param filter_profanity: whether to mask profane words
77+
:param remove_disfluencies: whether to exclude filler words like "uh"
78+
:param delete_after_seconds: number of seconds after job completion when job is auto-deleted
7479
"""
7580
url = self.base_url + '?' + urlencode({
7681
'access_token': self.access_token,
@@ -90,6 +95,9 @@ def start(self,
9095
if remove_disfluencies:
9196
url += '&' + urlencode({'remove_disfluencies': 'true'})
9297

98+
if delete_after_seconds is not None:
99+
url += '&' + urlencode({'delete_after_seconds': delete_after_seconds})
100+
93101
try:
94102
self.client.connect(url)
95103
except Exception as e:

Diff for: tests/test_job.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -107,15 +107,16 @@ def test_submit_job_url_with_success(self, mock_session, make_mock_response):
107107
'skip_punctuation': True,
108108
'speaker_channels_count': 1,
109109
'filter_profanity': True,
110-
'remove_disfluencies': True
110+
'remove_disfluencies': True,
111+
'delete_after_seconds': 0
111112
}
112113
response = make_mock_response(url=JOB_ID_URL, json_data=data)
113114
mock_session.request.return_value = response
114115
client = RevAiAPIClient(TOKEN)
115116

116117
res = client.submit_job_url(MEDIA_URL, METADATA,
117118
CALLBACK_URL, True,
118-
True, 1, CUSTOM_VOCAB, True, True)
119+
True, 1, CUSTOM_VOCAB, True, True, 0)
119120

120121
assert res == Job(JOB_ID,
121122
CREATED_ON,
@@ -134,7 +135,8 @@ def test_submit_job_url_with_success(self, mock_session, make_mock_response):
134135
'speaker_channels_count': 1,
135136
'custom_vocabularies': CUSTOM_VOCAB,
136137
'filter_profanity': True,
137-
'remove_disfluencies': True
138+
'remove_disfluencies': True,
139+
'delete_after_seconds': 0
138140
},
139141
headers=client.default_headers)
140142

@@ -155,7 +157,8 @@ def test_submit_job_local_file_with_success(self, mocker, mock_session, make_moc
155157
'skip_diarization': True,
156158
'speaker_channels_count': 1,
157159
'filter_profanity': True,
158-
'remove_disfluencies': True
160+
'remove_disfluencies': True,
161+
'delete_after_seconds': 0
159162
}
160163
response = make_mock_response(url=JOB_ID_URL, json_data=data)
161164
mock_session.request.return_value = response
@@ -164,7 +167,7 @@ def test_submit_job_local_file_with_success(self, mocker, mock_session, make_moc
164167
with mocker.patch('src.rev_ai.apiclient.open', create=True)() as file:
165168
res = client.submit_job_local_file(FILENAME, METADATA,
166169
CALLBACK_URL, True,
167-
True, 1, CUSTOM_VOCAB, True, True)
170+
True, 1, CUSTOM_VOCAB, True, True, 0)
168171

169172
assert res == Job(JOB_ID,
170173
CREATED_ON,
@@ -186,7 +189,8 @@ def test_submit_job_local_file_with_success(self, mocker, mock_session, make_moc
186189
'speaker_channels_count': 1,
187190
'custom_vocabularies': CUSTOM_VOCAB,
188191
'filter_profanity': True,
189-
'remove_disfluencies': True
192+
'remove_disfluencies': True,
193+
'delete_after_seconds': 0
190194
}, sort_keys=True)
191195
)
192196
},

Diff for: tests/test_streamingclient.py

+10-8
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,16 @@ def test_start_success(self, mock_streaming_client, mock_generator, capsys):
5858
metadata = "my metadata"
5959
filter_profanity = 'true'
6060
remove_disfluencies = 'true'
61-
query_dict = {
61+
delete_after_seconds = '0'
62+
expected_query_dict = {
6263
'access_token': mock_streaming_client.access_token,
6364
'content_type': mock_streaming_client.config.get_content_type_string(),
6465
'user_agent': 'RevAi-PythonSDK/{}'.format(__version__),
6566
'custom_vocabulary_id': custom_vocabulary_id,
6667
'metadata': metadata,
6768
'filter_profanity': filter_profanity,
68-
'remove_disfluencies': remove_disfluencies
69+
'remove_disfluencies': remove_disfluencies,
70+
'delete_after_seconds': delete_after_seconds
6971
}
7072
example_data = '{"type":"partial","transcript":"Test"}'
7173
example_connected = '{"type":"connected","id":"testid"}'
@@ -81,11 +83,11 @@ def test_start_success(self, mock_streaming_client, mock_generator, capsys):
8183
mock_streaming_client.client.recv_data.side_effect = data
8284

8385
response_gen = mock_streaming_client.start(mock_generator(), metadata,
84-
custom_vocabulary_id, True)
86+
custom_vocabulary_id, True, True, 0)
8587

8688
assert mock_streaming_client.client.connect.call_count == 1
87-
called_url = mock_streaming_client.client.connect.call_args_list[0].args[0]
88-
validate_query_parameters(called_url, query_dict)
89+
called_url = mock_streaming_client.client.connect.call_args_list[0][0][0]
90+
validate_query_parameters(called_url, expected_query_dict)
8991
mock_streaming_client.client.send_binary.assert_any_call(0)
9092
mock_streaming_client.client.send_binary.assert_any_call(1)
9193
mock_streaming_client.client.send_binary.assert_any_call(2)
@@ -107,8 +109,8 @@ def test_end(self, mock_streaming_client):
107109
mock_streaming_client.client.abort.assert_called_once_with()
108110

109111

110-
def validate_query_parameters(called_url, query_dict):
112+
def validate_query_parameters(called_url, expected_query_dict):
111113
called_query_string = urlparse(called_url).query
112114
called_query_parameters = parse_qs(called_query_string)
113-
for key in called_query_parameters:
114-
assert called_query_parameters[key][0] == query_dict[key]
115+
for key in expected_query_dict:
116+
assert called_query_parameters[key][0] == expected_query_dict[key]

0 commit comments

Comments
 (0)