Skip to content

Commit 1f96e4b

Browse files
committed
Exported PI invoices as PDFs
The PI-specific dataframes will first be converted to HTML tables using Jinja templates, and then converted to PDFs using Chromium. Now, users of the script must provide a path to the Chromium/Chrome binary throught the env var `CHROME_BIN_PATH` A html template folder has been added, and the test cases for the PI-specific invoice will now both check whether the dataframe is formatted correctly and if the PDFs are correctly generated. The dockerfile has been to install chromium
1 parent e95349a commit 1f96e4b

File tree

5 files changed

+315
-67
lines changed

5 files changed

+315
-67
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ FROM python:3.11-slim
22

33
WORKDIR /app
44

5-
RUN apt-get update && apt-get install -y git
5+
RUN apt-get update && apt-get install -y git chromium
66

77
COPY requirements.txt .
88
RUN pip install -r requirements.txt

process_report/invoices/pi_specific_invoice.py

Lines changed: 111 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,24 @@
11
import os
2+
import sys
23
from dataclasses import dataclass
4+
import subprocess
5+
import tempfile
6+
import logging
37

48
import pandas
9+
from jinja2 import Environment, FileSystemLoader
510

611
import process_report.invoices.invoice as invoice
712
import process_report.util as util
813

914

15+
TEMPLATE_DIR_PATH = "process_report/templates"
16+
17+
18+
logger = logging.getLogger(__name__)
19+
logging.basicConfig(level=logging.INFO)
20+
21+
1022
@dataclass
1123
class PIInvoice(invoice.Invoice):
1224
"""
@@ -15,6 +27,21 @@ class PIInvoice(invoice.Invoice):
1527
- NewPICreditProcessor
1628
"""
1729

30+
TOTAL_COLUMN_LIST = [
31+
invoice.COST_FIELD,
32+
invoice.CREDIT_FIELD,
33+
invoice.BALANCE_FIELD,
34+
]
35+
36+
DOLLAR_COLUMN_LIST = [
37+
invoice.RATE_FIELD,
38+
invoice.GROUP_BALANCE_FIELD,
39+
invoice.COST_FIELD,
40+
invoice.GROUP_BALANCE_USED_FIELD,
41+
invoice.CREDIT_FIELD,
42+
invoice.BALANCE_FIELD,
43+
]
44+
1845
export_columns_list = [
1946
invoice.INVOICE_DATE_FIELD,
2047
invoice.PROJECT_FIELD,
@@ -43,31 +70,101 @@ def _prepare(self):
4370
]
4471
self.pi_list = self.export_data[invoice.PI_FIELD].unique()
4572

73+
def _get_pi_dataframe(self, data, pi):
74+
pi_projects = data[data[invoice.PI_FIELD] == pi].copy().reset_index(drop=True)
75+
76+
# Remove prepay group data if it's empty
77+
if pandas.isna(pi_projects[invoice.GROUP_NAME_FIELD]).all():
78+
pi_projects = pi_projects.drop(
79+
[
80+
invoice.GROUP_NAME_FIELD,
81+
invoice.GROUP_INSTITUTION_FIELD,
82+
invoice.GROUP_BALANCE_FIELD,
83+
invoice.GROUP_BALANCE_USED_FIELD,
84+
],
85+
axis=1,
86+
)
87+
88+
# Add a row containing sums for certain columns
89+
column_sums = []
90+
sum_columns_list = []
91+
for column_name in self.TOTAL_COLUMN_LIST:
92+
if column_name in pi_projects.columns:
93+
column_sums.append(pi_projects[column_name].sum())
94+
sum_columns_list.append(column_name)
95+
pi_projects.loc[
96+
len(pi_projects)
97+
] = None # Adds a new row to end of dataframe initialized with None
98+
pi_projects.loc[pi_projects.index[-1], invoice.INVOICE_DATE_FIELD] = "Total"
99+
pi_projects.loc[pi_projects.index[-1], sum_columns_list] = column_sums
100+
101+
# Add dollar sign to certain columns
102+
for column_name in self.DOLLAR_COLUMN_LIST:
103+
if column_name in pi_projects.columns:
104+
pi_projects[column_name] = pi_projects[column_name].apply(
105+
lambda data: data if pandas.isna(data) else f"${data}"
106+
)
107+
108+
pi_projects.fillna("", inplace=True)
109+
110+
return pi_projects
111+
46112
def export(self):
47-
def _export_pi_invoice(pi):
48-
if pandas.isna(pi):
49-
return
50-
pi_projects = self.export_data[self.export_data[invoice.PI_FIELD] == pi]
51-
pi_instituition = pi_projects[invoice.INSTITUTION_FIELD].iat[0]
52-
pi_projects.to_csv(
53-
f"{self.name}/{pi_instituition}_{pi} {self.invoice_month}.csv"
113+
def _create_html_invoice(temp_fd):
114+
environment = Environment(loader=FileSystemLoader(TEMPLATE_DIR_PATH))
115+
template = environment.get_template("pi_invoice.html")
116+
content = template.render(
117+
data=pi_dataframe,
118+
)
119+
temp_fd.write(content)
120+
temp_fd.flush()
121+
122+
def _create_pdf_invoice(temp_fd_name):
123+
chrome_binary_location = os.environ.get(
124+
"CHROME_BIN_PATH", "/usr/bin/chromium"
125+
)
126+
if not os.path.exists(chrome_binary_location):
127+
sys.exit(
128+
f"Chrome binary does not exist at {chrome_binary_location}. Make sure the env var CHROME_BIN_PATH is set correctly and that Google Chrome is installed"
129+
)
130+
131+
invoice_pdf_path = (
132+
f"{self.name}/{pi_instituition}_{pi}_{self.invoice_month}.pdf"
133+
)
134+
subprocess.run(
135+
[
136+
chrome_binary_location,
137+
"--headless",
138+
"--no-sandbox",
139+
f"--print-to-pdf={invoice_pdf_path}",
140+
"--no-pdf-header-footer",
141+
f"file://{temp_fd_name}",
142+
],
143+
capture_output=True,
54144
)
55145

56146
self._filter_columns()
57-
if not os.path.exists(
58-
self.name
59-
): # self.name is name of folder storing invoices
60-
os.mkdir(self.name)
147+
148+
# self.name is name of folder storing invoices
149+
os.makedirs(self.name, exist_ok=True)
61150

62151
for pi in self.pi_list:
63-
_export_pi_invoice(pi)
152+
if pandas.isna(pi):
153+
continue
154+
155+
pi_dataframe = self._get_pi_dataframe(self.export_data, pi)
156+
pi_instituition = pi_dataframe[invoice.INSTITUTION_FIELD].iat[0]
157+
158+
with tempfile.NamedTemporaryFile(mode="w", suffix=".html") as temp_fd:
159+
_create_html_invoice(temp_fd)
160+
_create_pdf_invoice(temp_fd.name)
64161

65162
def export_s3(self, s3_bucket):
66163
def _export_s3_pi_invoice(pi_invoice):
67164
pi_invoice_path = os.path.join(self.name, pi_invoice)
68165
striped_invoice_path = os.path.splitext(pi_invoice_path)[0]
69-
output_s3_path = f"Invoices/{self.invoice_month}/{striped_invoice_path}.csv"
70-
output_s3_archive_path = f"Invoices/{self.invoice_month}/Archive/{striped_invoice_path} {util.get_iso8601_time()}.csv"
166+
output_s3_path = f"Invoices/{self.invoice_month}/{striped_invoice_path}.pdf"
167+
output_s3_archive_path = f"Invoices/{self.invoice_month}/Archive/{striped_invoice_path} {util.get_iso8601_time()}.pdf"
71168
s3_bucket.upload_file(pi_invoice_path, output_s3_path)
72169
s3_bucket.upload_file(pi_invoice_path, output_s3_archive_path)
73170

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="utf-8">
5+
</head>
6+
7+
<style>
8+
table {
9+
font-family: arial, sans-serif;
10+
border-collapse: collapse;
11+
width: 100%;
12+
}
13+
td, th {
14+
border: 1px solid #8d8d8d;
15+
text-align: left;
16+
padding: 8px;
17+
}
18+
th {
19+
text-align: center;
20+
}
21+
tr {
22+
page-break-inside: avoid;
23+
}
24+
tr:nth-child(even) {
25+
background-color: #dddddd;
26+
}
27+
tr:last-child {
28+
background-color: #dddddd;
29+
font-weight: bold;
30+
}
31+
</style>
32+
33+
<body>
34+
<table>
35+
<tr>
36+
{% for col in data.columns %}
37+
<th>{{col}}</th>
38+
{% endfor %}
39+
</tr>
40+
41+
{% for i, row in data.iterrows() %}
42+
<tr>
43+
{% for field in row %}
44+
{% if i == data.index[-1] %}
45+
{% if field %}
46+
<th>{{field}}</th>
47+
{% else %}
48+
<td style="border-width: 0;"></td>
49+
{% endif %}
50+
{% else %}
51+
<td>{{field}}</td>
52+
{% endif %}
53+
{% endfor %}
54+
</tr>
55+
{% endfor %}
56+
</table>
57+
</body>
58+
59+
<script>
60+
// To ensure the HTML invoice table always fit the page when
61+
// printed to PDF, the width of the page is assigned to be
62+
// the width of the table
63+
64+
var table_width = document.getElementsByTagName('table')[0].clientWidth;
65+
const style = document.createElement('style');
66+
style.innerHTML = `
67+
@page {
68+
size: ${table_width}px 1200px;
69+
}
70+
`;
71+
document.head.appendChild(style);
72+
</script>
73+
</html>

0 commit comments

Comments
 (0)