Skip to content

Commit 42fdf96

Browse files
feat: support document translation output_format parameter, used to control translated file format
1 parent 178399d commit 42fdf96

File tree

6 files changed

+76
-13
lines changed

6 files changed

+76
-13
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99
### Added
10+
* Added `output_format` parameter for document upload function, that indicates
11+
the file extension of the desired file format for the translated document.
1012
* Added basic usage example of the library
1113
### Fixed
1214
* Fixed typechecking errors when using `mypy`'s `strict` mode

README.md

+4
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,10 @@ arguments, the available `translate_document()` and
244244

245245
- `formality`: same as in [Text translation options](#text-translation-options).
246246
- `glossary`: same as in [Text translation options](#text-translation-options).
247+
- `output_format`: (`translate_document()` only)
248+
file extension of desired format of translated file, for example: `'pdf'`. If
249+
unspecified, by default the translated file will be in the same format as the
250+
input file.
247251

248252
### Glossaries
249253

deepl/__main__.py

+15-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import os
99
import pathlib
1010
import sys
11-
from typing import List
11+
from typing import List, Optional
1212
from deepl.util import _optional_import
1313

1414
# Program name for integration with click.testing
@@ -51,7 +51,11 @@ def action_languages(translator: deepl.Translator, glossary: bool):
5151

5252

5353
def action_document(
54-
translator: deepl.Translator, file: List[str], dest: str, **kwargs
54+
translator: deepl.Translator,
55+
file: List[str],
56+
dest: str,
57+
output_format: Optional[str],
58+
**kwargs,
5559
):
5660
"""Action function for the document command."""
5761
if not os.path.exists(dest):
@@ -60,7 +64,12 @@ def action_document(
6064
raise Exception("Destination already exists, and is not a directory")
6165

6266
for this_file in file:
63-
output_path = os.path.join(dest, os.path.basename(this_file))
67+
outfile_name = (
68+
this_file
69+
if not output_format
70+
else (os.path.splitext(this_file)[0] + "." + output_format)
71+
)
72+
output_path = os.path.join(dest, os.path.basename(outfile_name))
6473
translator.translate_document_from_filepath(
6574
this_file, output_path, **kwargs
6675
)
@@ -370,6 +379,9 @@ def add_common_arguments(subparser: argparse.ArgumentParser):
370379
parser_document.add_argument(
371380
"file", nargs="+", help="file(s) to be translated."
372381
)
382+
parser_document.add_argument(
383+
"--output-format", type=str, default=None, help="output file extension"
384+
)
373385
parser_document.add_argument(
374386
"dest", help="destination directory to store translated files."
375387
)

deepl/translator.py

+15
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,11 @@ def translate_document_from_filepath(
540540
translation. The exception includes information about the document
541541
request.
542542
"""
543+
# Determine output_format from output path
544+
in_ext = pathlib.PurePath(input_path).suffix.lower()
545+
out_ext = pathlib.PurePath(output_path).suffix.lower()
546+
output_format = None if in_ext == out_ext else out_ext[1:]
547+
543548
with open(input_path, "rb") as in_file:
544549
with open(output_path, "wb") as out_file:
545550
try:
@@ -550,6 +555,7 @@ def translate_document_from_filepath(
550555
source_lang=source_lang,
551556
formality=formality,
552557
glossary=glossary,
558+
output_format=output_format,
553559
)
554560
except Exception as e:
555561
out_file.close()
@@ -566,6 +572,7 @@ def translate_document(
566572
formality: Union[str, Formality] = Formality.DEFAULT,
567573
glossary: Union[str, GlossaryInfo, None] = None,
568574
filename: Optional[str] = None,
575+
output_format: Optional[str] = None,
569576
) -> DocumentStatus:
570577
"""Upload document, translate it into the target language, and download
571578
result.
@@ -585,6 +592,8 @@ def translate_document(
585592
translation. Must match specified source_lang and target_lang.
586593
:param filename: (Optional) Filename including extension, only required
587594
if uploading string or bytes containing file content.
595+
:param output_format: (Optional) Desired output file extension, if
596+
it differs from the input file format.
588597
:return: DocumentStatus when document translation completed, this
589598
allows the number of billed characters to be queried.
590599
@@ -599,6 +608,7 @@ def translate_document(
599608
formality=formality,
600609
glossary=glossary,
601610
filename=filename,
611+
output_format=output_format,
602612
)
603613

604614
try:
@@ -625,6 +635,7 @@ def translate_document_upload(
625635
formality: Union[str, Formality, None] = None,
626636
glossary: Union[str, GlossaryInfo, None] = None,
627637
filename: Optional[str] = None,
638+
output_format: Optional[str] = None,
628639
) -> DocumentHandle:
629640
"""Upload document to be translated and return handle associated with
630641
request.
@@ -642,12 +653,16 @@ def translate_document_upload(
642653
translation. Must match specified source_lang and target_lang.
643654
:param filename: (Optional) Filename including extension, only required
644655
if uploading string or bytes containing file content.
656+
:param output_format: (Optional) Desired output file extension, if
657+
it differs from the input file format.
645658
:return: DocumentHandle with ID and key identifying document.
646659
"""
647660

648661
request_data = self._check_language_and_formality(
649662
source_lang, target_lang, formality, glossary
650663
)
664+
if output_format:
665+
request_data["output_format"] = output_format
651666

652667
files: Dict[str, Any] = {}
653668
if isinstance(input_document, (str, bytes)):

tests/conftest.py

+21-10
Original file line numberDiff line numberDiff line change
@@ -323,27 +323,38 @@ def example_document_translation():
323323

324324

325325
@pytest.fixture
326-
def example_large_document_path(tmpdir):
326+
def example_large_document_translation():
327+
return (example_text["DE"] + "\n") * 1000
328+
329+
330+
@pytest.fixture
331+
def input_dir_path(tmpdir):
327332
tmpdir = pathlib.Path(tmpdir)
328-
path = tmpdir / "input" / "example_document.txt"
329-
path.parent.mkdir()
330-
path.write_text((example_text["EN"] + "\n") * 1000)
333+
path = tmpdir / "input"
334+
path.mkdir(exist_ok=True)
331335
return path
332336

333337

334338
@pytest.fixture
335-
def example_large_document_translation():
336-
return (example_text["DE"] + "\n") * 1000
339+
def output_dir_path(tmpdir):
340+
tmpdir = pathlib.Path(tmpdir)
341+
path = tmpdir / "output"
342+
path.mkdir(exist_ok=True)
343+
return path
337344

338345

339346
@pytest.fixture
340-
def output_document_path(tmpdir):
341-
tmpdir = pathlib.Path(tmpdir)
342-
path = tmpdir / "output" / "example_document.txt"
343-
path.parent.mkdir()
347+
def example_large_document_path(input_dir_path):
348+
path = input_dir_path / "example_document.txt"
349+
path.write_text((example_text["EN"] + "\n") * 1000)
344350
return path
345351

346352

353+
@pytest.fixture
354+
def output_document_path(output_dir_path):
355+
return output_dir_path / "example_document.txt"
356+
357+
347358
# Decorate test functions with "@needs_mock_server" to skip them if a real
348359
# server is used
349360
needs_mock_server = pytest.mark.skipif(

tests/test_translate_document.py

+19
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,25 @@ def test_translate_document_formality(
125125
assert "Wie geht es dir?" == output_document_path.read_text()
126126

127127

128+
@needs_mock_server
129+
def test_document_output_format(
130+
translator,
131+
example_document_path,
132+
output_dir_path,
133+
):
134+
# Mock server supports only TXT and HTML files, so translate TXT->HTML
135+
example_document_path.write_text(example_text["EN"])
136+
output_document_path = output_dir_path / "example.html"
137+
translator.translate_document_from_filepath(
138+
example_document_path,
139+
output_document_path,
140+
**default_lang_args,
141+
)
142+
143+
output = output_document_path.read_text()
144+
assert example_text["DE"] == output
145+
146+
128147
def test_document_failure_during_translation(
129148
translator, example_document_path, output_document_path
130149
):

0 commit comments

Comments
 (0)