-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
32 lines (24 loc) · 981 Bytes
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from classes.CoordsGrab import CoordsGrab
import tabula as tb
import easygui
def pdf2excel(file, filepages, coord, colunas):
df = tb.read_pdf(file, pages=filepages, area=coord, columns=colunas, relative_area=True, relative_columns=True, pandas_options={
'header': None}, output_format="dataframe", stream=True)
headers = df[0].iloc[0]
df = df[0][1:]
df.columns = headers
print(df)
df.to_excel('output.xlsx')
filename = easygui.fileopenbox()
pdfpages = [1 + i for i in range(5)]
CoordsObj = CoordsGrab(filename)
dataPoints = CoordsObj.coords
cols = CoordsObj.cols
pageSize = CoordsObj.size
topbound = dataPoints[0][1]/pageSize[1]*100
botbound = dataPoints[1][1]/pageSize[1]*100
leftbound = dataPoints[0][0]/pageSize[0]*100
rightbound = dataPoints[1][0]/pageSize[0]*100
coords = [topbound, leftbound, botbound, rightbound]
cols[:] = [x/pageSize[0]*100 for x in cols]
pdf2excel(filename, pdfpages, coords, cols)