34
34
import cStringIO
35
35
import base64
36
36
import zlib
37
+ import math
37
38
38
39
# Pkg to read multiple image tiffs
39
40
from PIL import Image
44
45
import xml .etree
45
46
46
47
# Import Pypdf2
47
- from PyPDF2 import PdfFileMerger , PdfFileReader , PdfFileWriter
48
+ from PyPDF2 import PdfFileMerger , PdfFileReader , PdfFileWriter , utils
48
49
49
50
class PyPdf (object ):
50
51
"""Class to create pdfs from images"""
@@ -54,6 +55,27 @@ def __init__(self, gs):
54
55
self .gs = gs # Pointer to ghostscript object
55
56
pass
56
57
58
+ def mergeRotateAroundPointPage (self ,page , page2 , rotation , tx , ty ):
59
+ # Code taken from here:
60
+ # http://stackoverflow.com/questions/6041244/how-to-merge-two-landscape-pdf-pages-using-pypdf/17392824#17392824
61
+ # Unclear why PyPDF2 builtin page rotation functions don't work
62
+ translation = [[1 , 0 , 0 ],
63
+ [0 , 1 , 0 ],
64
+ [- tx ,- ty ,1 ]]
65
+ rotation = math .radians (rotation )
66
+ rotating = [[math .cos (rotation ), math .sin (rotation ),0 ],
67
+ [- math .sin (rotation ),math .cos (rotation ), 0 ],
68
+ [0 , 0 , 1 ]]
69
+ rtranslation = [[1 , 0 , 0 ],
70
+ [0 , 1 , 0 ],
71
+ [tx ,ty ,1 ]]
72
+ ctm = utils .matrixMultiply (translation , rotating )
73
+ ctm = utils .matrixMultiply (ctm , rtranslation )
74
+
75
+ return page .mergeTransformedPage (page2 , [ctm [0 ][0 ], ctm [0 ][1 ],
76
+ ctm [1 ][0 ], ctm [1 ][1 ],
77
+ ctm [2 ][0 ], ctm [2 ][1 ]])
78
+
57
79
def overlay_hocr_pages (self , dpi , hocr_filenames , orig_pdf_filename ):
58
80
59
81
logging .debug ("Going to overlay following files onto %s" % orig_pdf_filename )
@@ -75,9 +97,19 @@ def overlay_hocr_pages(self, dpi, hocr_filenames, orig_pdf_filename):
75
97
text_file = open (text_pg_filename , 'rb' )
76
98
text_files .append (text_file ) # Save this to close after we write the final pdf
77
99
text_pg = self .iter_pdf_page (text_file ).next ()
78
- orig_pg .mergePage (text_pg )
100
+ orig_rotation_angle = int (orig_pg .get ('/Rotate' , 0 ))
101
+
102
+ if orig_rotation_angle != 0 :
103
+ logging .info ("Original Rotation: %s" % orig_pg .get ("/Rotate" , 0 ))
104
+ self .mergeRotateAroundPointPage (orig_pg , text_pg , orig_rotation_angle , text_pg .mediaBox .getWidth ()/ 2 , text_pg .mediaBox .getWidth ()/ 2 )
105
+
106
+ # None of these commands worked for me:
107
+ #orig_pg.rotateCounterClockwise(orig_rotation_angle)
108
+ #orig_pg.mergeRotatedPage(text_pg,text_rotation_angle)
109
+ else :
110
+ orig_pg .mergePerge (text_pg )
111
+ orig_pg .compressContentStreams ()
79
112
writer .addPage (orig_pg )
80
- #text_file.close()
81
113
82
114
pdf_dir , pdf_basename = os .path .split (orig_pdf_filename )
83
115
basename = pdf_basename .split ('.' )[0 ]
0 commit comments