AISDK-128: Add delete_after_seconds option to streaming and async clients (#58)

KostasRev · web-flow · commit f1196b88e8ff · 2020-08-24T09:46:22.000-05:00
diff --git a/README.md b/README.md
@@ -51,7 +51,7 @@ job = client.submit_job_url("https://example.com/file-to-transcribe.mp3")
 [Submit Job](https://www.rev.ai/docs#operation/SubmitTranscriptionJob) endpoint.
 
 If you want to get fancy, both send job methods take `metadata`, `callback_url`,
-`skip_diarization`, `skip_punctuation`, `speaker_channels_count`, `custom_vocabularies`, `filter_profanity` and `remove_disfluencies` as optional parameters, these are described in the request body of
+`skip_diarization`, `skip_punctuation`, `speaker_channels_count`, `custom_vocabularies`, `filter_profanity`, `remove_disfluencies`, and `delete_after_seconds` as optional parameters, these are described in the request body of
 the [Submit Job](https://www.rev.ai/docs#operation/SubmitTranscriptionJob) endpoint.
 
 ### Checking your file's status
@@ -159,7 +159,7 @@ If passing in custom functions, make sure you provide the right parameters. See
 
 Once you have a streaming client setup with a `MediaConfig` and access token, you can obtain a transcription generator of your audio. You can also use a custom vocabulary with your streaming job by supplying the optional `custom_vocabulary_id` when starting a connection!
 
-More optional parameters can be supplied when starting a connection, these are `metadata`, `filter_profanity` and `remove_disfluencies`. For a description of these optional parameters look at our [streaming documentation](https://www.rev.ai/docs/streaming#section/WebSocket-Endpoint).
+More optional parameters can be supplied when starting a connection, these are `metadata`, `filter_profanity`, `remove_disfluencies`, and `delete_after_seconds`. For a description of these optional parameters look at our [streaming documentation](https://www.rev.ai/docs/streaming#section/WebSocket-Endpoint).
 
 ```python
 response_generator = streaming_client.start(AUDIO_GENERATOR, custom_vocabulary_id="CUSTOM VOCAB ID")
diff --git a/examples/async_example.py b/examples/async_example.py
@@ -42,7 +42,8 @@
 #                                    skip_diarization=False,
 #                                    custom_vocabularies=None,
 #                                    filter_profanity=False,
-#                                    remove_disfluencies=False)
+#                                    remove_disfluencies=False,
+#                                    delete_after_seconds=None)
 
 
 # Submitting a job with a link to the file you want transcribed
@@ -54,7 +55,8 @@
                             skip_diarization=False,
                             custom_vocabularies=custom_vocabularies,
                             filter_profanity=False,
-                            remove_disfluencies=False)
+                            remove_disfluencies=False,
+                            delete_after_seconds=None)
 
 print("Submitted Job")
 
diff --git a/setup.cfg b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 2.9.0
+current_version = 2.10.0
 commit = True
 tag = True
 
diff --git a/src/rev_ai/__init__.py b/src/rev_ai/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """Top-level package for rev_ai"""
 
-__version__ = '2.9.0'
+__version__ = '2.10.0'
 
 from .models import Job, JobStatus, Account, Transcript, MediaConfig, CaptionType, CustomVocabulary
diff --git a/src/rev_ai/apiclient.py b/src/rev_ai/apiclient.py
@@ -45,7 +45,8 @@ def submit_job_url(
             speaker_channels_count=None,
             custom_vocabularies=None,
             filter_profanity=False,
-            remove_disfluencies=False):
+            remove_disfluencies=False,
+            delete_after_seconds=None):
         """Submit media given a URL for transcription.
         The audio data is downloaded from the URL.
 
@@ -67,10 +68,9 @@ def submit_job_url(
             of a key "phrases" which maps to a list of strings, each of which
             represents a phrase you would like the speech recognition to bias
             itself toward.
-        :param filter_profanity: should rev.ai obscure profane words when
-                                 transcribing this file
-        :param remove_disfluencies: should rev.ai remove disfluencies when
-                                    transcribing this file
+        :param filter_profanity: whether to mask profane words
+        :param remove_disfluencies: whether to exclude filler words like "uh"
+        :param delete_after_seconds: number of seconds after job completion when job is auto-deleted
         :returns: raw response data
         :raises: HTTPError
         """
@@ -81,7 +81,7 @@ def submit_job_url(
                                                    callback_url, skip_diarization,
                                                    skip_punctuation, speaker_channels_count,
                                                    custom_vocabularies, filter_profanity,
-                                                   remove_disfluencies)
+                                                   remove_disfluencies, delete_after_seconds)
 
         response = self._make_http_request(
             "POST",
@@ -100,7 +100,8 @@ def submit_job_local_file(
             speaker_channels_count=None,
             custom_vocabularies=None,
             filter_profanity=False,
-            remove_disfluencies=False):
+            remove_disfluencies=False,
+            delete_after_seconds=None):
         """Submit a local file for transcription.
         Note that the content type is inferred if not provided.
 
@@ -121,10 +122,9 @@ def submit_job_local_file(
             recognition to find those phrases. Each dictionary has the key
             "phrases" which maps to a list of strings, each of which represents
             a phrase you would like the speech recognition to bias itself toward.
-        :param filter_profanity: should rev.ai obscure profane words when
-                                 transcribing this file
-        :param remove_disfluencies: should rev.ai remove disfluencies when
-                                    transcribing this file
+        :param filter_profanity: whether to mask profane words
+        :param remove_disfluencies: whether to exclude filler words like "uh"
+        :param delete_after_seconds: number of seconds after job completion when job is auto-deleted
         :returns: raw response data
         :raises: HTTPError
         """
@@ -134,7 +134,7 @@ def submit_job_local_file(
         payload = self._create_job_options_payload(None, metadata, callback_url, skip_diarization,
                                                    skip_punctuation, speaker_channels_count,
                                                    custom_vocabularies, filter_profanity,
-                                                   remove_disfluencies)
+                                                   remove_disfluencies, delete_after_seconds)
 
         with open(filename, 'rb') as f:
             files = {
@@ -372,7 +372,8 @@ def _create_job_options_payload(
             speaker_channels_count=None,
             custom_vocabularies=None,
             filter_profanity=None,
-            remove_disfluencies=None):
+            remove_disfluencies=None,
+            delete_after_seconds=None):
         payload = {}
         if media_url:
             payload['media_url'] = media_url
@@ -393,6 +394,8 @@ def _create_job_options_payload(
             payload['filter_profanity'] = filter_profanity
         if remove_disfluencies:
             payload['remove_disfluencies'] = remove_disfluencies
+        if delete_after_seconds is not None:
+            payload['delete_after_seconds'] = delete_after_seconds
         return payload
 
     def _create_captions_query(self, speaker_channel):
diff --git a/src/rev_ai/streamingclient.py b/src/rev_ai/streamingclient.py
@@ -66,11 +66,16 @@ def start(self,
               metadata=None,
               custom_vocabulary_id=None,
               filter_profanity=None,
-              remove_disfluencies=None):
+              remove_disfluencies=None,
+              delete_after_seconds=None):
         """Function to connect the websocket to the URL and start the response
             thread
         :param generator: generator object that yields binary audio data
         :param metadata: metadata to be attached to streaming job
+        :param custom_vocabulary_id: id of custom vocabulary to be used with this streaming job
+        :param filter_profanity: whether to mask profane words
+        :param remove_disfluencies: whether to exclude filler words like "uh"
+        :param delete_after_seconds: number of seconds after job completion when job is auto-deleted
         """
         url = self.base_url + '?' + urlencode({
             'access_token': self.access_token,
@@ -90,6 +95,9 @@ def start(self,
         if remove_disfluencies:
             url += '&' + urlencode({'remove_disfluencies': 'true'})
 
+        if delete_after_seconds is not None:
+            url += '&' + urlencode({'delete_after_seconds': delete_after_seconds})
+
         try:
             self.client.connect(url)
         except Exception as e:
diff --git a/tests/test_job.py b/tests/test_job.py
@@ -107,15 +107,16 @@ def test_submit_job_url_with_success(self, mock_session, make_mock_response):
             'skip_punctuation': True,
             'speaker_channels_count': 1,
             'filter_profanity': True,
-            'remove_disfluencies': True
+            'remove_disfluencies': True,
+            'delete_after_seconds': 0
         }
         response = make_mock_response(url=JOB_ID_URL, json_data=data)
         mock_session.request.return_value = response
         client = RevAiAPIClient(TOKEN)
 
         res = client.submit_job_url(MEDIA_URL, METADATA,
                                     CALLBACK_URL, True,
-                                    True, 1, CUSTOM_VOCAB, True, True)
+                                    True, 1, CUSTOM_VOCAB, True, True, 0)
 
         assert res == Job(JOB_ID,
                           CREATED_ON,
@@ -134,7 +135,8 @@ def test_submit_job_url_with_success(self, mock_session, make_mock_response):
                 'speaker_channels_count': 1,
                 'custom_vocabularies': CUSTOM_VOCAB,
                 'filter_profanity': True,
-                'remove_disfluencies': True
+                'remove_disfluencies': True,
+                'delete_after_seconds': 0
             },
             headers=client.default_headers)
 
@@ -155,7 +157,8 @@ def test_submit_job_local_file_with_success(self, mocker, mock_session, make_moc
             'skip_diarization': True,
             'speaker_channels_count': 1,
             'filter_profanity': True,
-            'remove_disfluencies': True
+            'remove_disfluencies': True,
+            'delete_after_seconds': 0
         }
         response = make_mock_response(url=JOB_ID_URL, json_data=data)
         mock_session.request.return_value = response
@@ -164,7 +167,7 @@ def test_submit_job_local_file_with_success(self, mocker, mock_session, make_moc
         with mocker.patch('src.rev_ai.apiclient.open', create=True)() as file:
             res = client.submit_job_local_file(FILENAME, METADATA,
                                                CALLBACK_URL, True,
-                                               True, 1, CUSTOM_VOCAB, True, True)
+                                               True, 1, CUSTOM_VOCAB, True, True, 0)
 
             assert res == Job(JOB_ID,
                               CREATED_ON,
@@ -186,7 +189,8 @@ def test_submit_job_local_file_with_success(self, mocker, mock_session, make_moc
                             'speaker_channels_count': 1,
                             'custom_vocabularies': CUSTOM_VOCAB,
                             'filter_profanity': True,
-                            'remove_disfluencies': True
+                            'remove_disfluencies': True,
+                            'delete_after_seconds': 0
                         }, sort_keys=True)
                     )
                 },
diff --git a/tests/test_streamingclient.py b/tests/test_streamingclient.py
@@ -58,14 +58,16 @@ def test_start_success(self, mock_streaming_client, mock_generator, capsys):
         metadata = "my metadata"
         filter_profanity = 'true'
         remove_disfluencies = 'true'
-        query_dict = {
+        delete_after_seconds = '0'
+        expected_query_dict = {
             'access_token': mock_streaming_client.access_token,
             'content_type': mock_streaming_client.config.get_content_type_string(),
             'user_agent': 'RevAi-PythonSDK/{}'.format(__version__),
             'custom_vocabulary_id': custom_vocabulary_id,
             'metadata': metadata,
             'filter_profanity': filter_profanity,
-            'remove_disfluencies': remove_disfluencies
+            'remove_disfluencies': remove_disfluencies,
+            'delete_after_seconds': delete_after_seconds
         }
         example_data = '{"type":"partial","transcript":"Test"}'
         example_connected = '{"type":"connected","id":"testid"}'
@@ -81,11 +83,11 @@ def test_start_success(self, mock_streaming_client, mock_generator, capsys):
         mock_streaming_client.client.recv_data.side_effect = data
 
         response_gen = mock_streaming_client.start(mock_generator(), metadata,
-                                                   custom_vocabulary_id, True)
+                                                   custom_vocabulary_id, True, True, 0)
 
         assert mock_streaming_client.client.connect.call_count == 1
-        called_url = mock_streaming_client.client.connect.call_args_list[0].args[0]
-        validate_query_parameters(called_url, query_dict)
+        called_url = mock_streaming_client.client.connect.call_args_list[0][0][0]
+        validate_query_parameters(called_url, expected_query_dict)
         mock_streaming_client.client.send_binary.assert_any_call(0)
         mock_streaming_client.client.send_binary.assert_any_call(1)
         mock_streaming_client.client.send_binary.assert_any_call(2)
@@ -107,8 +109,8 @@ def test_end(self, mock_streaming_client):
         mock_streaming_client.client.abort.assert_called_once_with()
 
 
-def validate_query_parameters(called_url, query_dict):
+def validate_query_parameters(called_url, expected_query_dict):
     called_query_string = urlparse(called_url).query
     called_query_parameters = parse_qs(called_query_string)
-    for key in called_query_parameters:
-        assert called_query_parameters[key][0] == query_dict[key]
+    for key in expected_query_dict:
+        assert called_query_parameters[key][0] == expected_query_dict[key]