@@ -28,20 +28,20 @@ def get_text_lines(location, max_pages=5):
2828 extracted_text = BytesIO ()
2929 laparams = LAParams ()
3030 with open (location , 'rb' ) as pdf_file :
31- with contextlib . closing ( PDFParser (pdf_file )) as parser :
32- document = PDFDocument (parser )
33- if not document .is_extractable :
34- raise PDFTextExtractionNotAllowed (
35- 'Encrypted PDF document: text extraction is not allowed' )
31+ parser = PDFParser (pdf_file )
32+ document = PDFDocument (parser )
33+ if not document .is_extractable :
34+ raise PDFTextExtractionNotAllowed (
35+ 'Encrypted PDF document: text extraction is not allowed' )
3636
37- manager = PDFResourceManager ()
38- with contextlib .closing (
39- TextConverter (manager , extracted_text , laparams = laparams )) as extractor :
40- interpreter = PDFPageInterpreter (manager , extractor )
41- pages = PDFPage .create_pages (document )
42- for page_num , page in enumerate (pages , 1 ):
43- interpreter .process_page (page )
44- if max_pages and page_num == max_pages :
45- break
37+ manager = PDFResourceManager ()
38+ with contextlib .closing (
39+ TextConverter (manager , extracted_text , laparams = laparams )) as extractor :
40+ interpreter = PDFPageInterpreter (manager , extractor )
41+ pages = PDFPage .create_pages (document )
42+ for page_num , page in enumerate (pages , 1 ):
43+ interpreter .process_page (page )
44+ if max_pages and page_num == max_pages :
45+ break
4646 extracted_text .seek (0 )
4747 return extracted_text .readlines ()
0 commit comments