Skip to content

Commit e8c1e18

Browse files
Avoid using close methods on pdfParser objects
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent 124ccd6 commit e8c1e18

File tree

3 files changed

+20
-24
lines changed

3 files changed

+20
-24
lines changed

src/textcode/pdf.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,20 +28,20 @@ def get_text_lines(location, max_pages=5):
2828
extracted_text = BytesIO()
2929
laparams = LAParams()
3030
with open(location, 'rb') as pdf_file:
31-
with contextlib.closing(PDFParser(pdf_file)) as parser:
32-
document = PDFDocument(parser)
33-
if not document.is_extractable:
34-
raise PDFTextExtractionNotAllowed(
35-
'Encrypted PDF document: text extraction is not allowed')
31+
parser = PDFParser(pdf_file)
32+
document = PDFDocument(parser)
33+
if not document.is_extractable:
34+
raise PDFTextExtractionNotAllowed(
35+
'Encrypted PDF document: text extraction is not allowed')
3636

37-
manager = PDFResourceManager()
38-
with contextlib.closing(
39-
TextConverter(manager, extracted_text, laparams=laparams)) as extractor:
40-
interpreter = PDFPageInterpreter(manager, extractor)
41-
pages = PDFPage.create_pages(document)
42-
for page_num, page in enumerate(pages, 1):
43-
interpreter.process_page(page)
44-
if max_pages and page_num == max_pages:
45-
break
37+
manager = PDFResourceManager()
38+
with contextlib.closing(
39+
TextConverter(manager, extracted_text, laparams=laparams)) as extractor:
40+
interpreter = PDFPageInterpreter(manager, extractor)
41+
pages = PDFPage.create_pages(document)
42+
for page_num, page in enumerate(pages, 1):
43+
interpreter.process_page(page)
44+
if max_pages and page_num == max_pages:
45+
break
4646
extracted_text.seek(0)
4747
return extracted_text.readlines()
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
license_expressions:
22
- sun-sissl-1.1
3+
- proprietary-license
4+
- cpal-1.0

tests/textcode/test_pdf.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -98,13 +98,7 @@ def test_numbered_text_lines_does_not_fail_on_autocad_test_pdf(self):
9898
b'It is a print formatter driv-\n',
9999
b'en by XSL formatting ob-\n',
100100
b'jects (XSL-FO) and an out-\n',
101-
b'(XSL-FO) and an output in-\n',
102-
b'dependent formatter. It is a\n',
103-
b'Java application that reads\n',
104-
b'Line 2 of item 1\n',
105-
b'Apache FOP (Formatting\n',
106-
b'Objects Processor) est\n',
107-
b'une application de mise en\n',
108-
b'The end of the document\n',
109-
b'has now been reached.\n',
110-
]
101+
b'ter1. FOP has a nice logo:\n',
102+
b'Header 1.1 Header 1.2\n',
103+
b'See the FOP website for more information\n'
104+
]

0 commit comments

Comments
 (0)