Skip to content

Commit 1904622

Browse files
authored
Update fileformattools (#1133)
1 parent 6c83f5e commit 1904622

File tree

1 file changed

+14
-2
lines changed

1 file changed

+14
-2
lines changed

lib/sycamore/sycamore/utils/fileformat_tools.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
1+
import logging
2+
import os
13
import subprocess
24
from pathlib import Path
35
from urllib.parse import urlparse
46
from tempfile import NamedTemporaryFile, TemporaryDirectory
57

68
from sycamore.data import Document
79

10+
logger = logging.getLogger(__name__)
11+
812

913
def binary_representation_to_pdf(doc: Document) -> Document:
1014
"""
@@ -29,19 +33,27 @@ def run_libreoffice(source_path, output_path):
2933
)
3034

3135
assert doc.binary_representation is not None
32-
extension = get_file_extension(doc.properties.get("path", "unknown"))
36+
origpath = doc.properties.get("path", "unknown")
37+
extension = get_file_extension(origpath)
3338

3439
with NamedTemporaryFile(suffix=f"{extension}") as temp_file:
3540
temp_file.write(doc.binary_representation)
3641
temp_file.flush()
3742

3843
temp_path = Path(temp_file.name)
3944

45+
pdffile = f"{temp_path.parent}/{temp_path.stem}.pdf"
46+
logger.info(f"Processing {origpath} to {pdffile}")
47+
with open(pdffile + "-path", "w") as pathfile:
48+
pathfile.write(origpath)
49+
4050
run_libreoffice(temp_path, temp_path.parent)
4151

42-
with open(f"{temp_path.parent}/{temp_path.stem}.pdf", "rb") as processed_file:
52+
with open(pdffile, "rb") as processed_file:
4353
doc.binary_representation = processed_file.read()
4454
doc.properties["filetype"] = "application/pdf"
55+
os.unlink(pdffile)
56+
os.unlink(pdffile + "-path")
4557

4658
return doc
4759

0 commit comments

Comments
 (0)