Skip to content

Commit bac60f7

Browse files
committed
Implemented processors for removing nonbillables and validating billable PIs
1 parent d9b838c commit bac60f7

File tree

6 files changed

+134
-79
lines changed

6 files changed

+134
-79
lines changed

process_report/invoices/billable_invoice.py

-27
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,6 @@ class BillableInvoice(discount_invoice.DiscountInvoice):
2020
EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"]
2121
PI_S3_FILEPATH = "PIs/PI.csv"
2222

23-
nonbillable_pis: list[str]
24-
nonbillable_projects: list[str]
25-
2623
export_columns_list = [
2724
invoice.INVOICE_DATE_FIELD,
2825
invoice.PROJECT_FIELD,
@@ -61,26 +58,6 @@ def _load_old_pis(old_pi_filepath) -> pandas.DataFrame:
6158

6259
return old_pi_df
6360

64-
@staticmethod
65-
def _remove_nonbillables(
66-
data: pandas.DataFrame,
67-
nonbillable_pis: list[str],
68-
nonbillable_projects: list[str],
69-
):
70-
return data[
71-
~data[invoice.PI_FIELD].isin(nonbillable_pis)
72-
& ~data[invoice.PROJECT_FIELD].isin(nonbillable_projects)
73-
]
74-
75-
@staticmethod
76-
def _validate_pi_names(data: pandas.DataFrame):
77-
invalid_pi_projects = data[pandas.isna(data[invoice.PI_FIELD])]
78-
for i, row in invalid_pi_projects.iterrows():
79-
logger.warn(
80-
f"Billable project {row[invoice.PROJECT_FIELD]} has empty PI field"
81-
)
82-
return data[~pandas.isna(data[invoice.PI_FIELD])]
83-
8461
@staticmethod
8562
def _get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month):
8663
"""Returns time difference between current invoice month and PI's first invoice month
@@ -101,10 +78,6 @@ def _get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month):
10178
return month_diff
10279

10380
def _prepare(self):
104-
self.data = self._remove_nonbillables(
105-
self.data, self.nonbillable_pis, self.nonbillable_projects
106-
)
107-
self.data = self._validate_pi_names(self.data)
10881
self.data[invoice.CREDIT_FIELD] = None
10982
self.data[invoice.CREDIT_CODE_FIELD] = None
11083
self.data[invoice.BALANCE_FIELD] = self.data[invoice.COST_FIELD]

process_report/process_report.py

+19-3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from process_report.processors import (
1818
validate_pi_alias_processor,
1919
add_institution_processor,
20+
remove_nonbillables_processor,
21+
validate_billable_pi_processor,
2022
)
2123

2224
### PI file field names
@@ -227,15 +229,29 @@ def main():
227229
nonbillable_projects=projects,
228230
)
229231

232+
### Remove nonbillables
233+
234+
remove_nonbillables_proc = remove_nonbillables_processor.RemoveNonbillables(
235+
"", invoice_month, add_institute_proc.data, pi, projects
236+
)
237+
remove_nonbillables_proc.process()
238+
239+
validate_billable_pi_proc = (
240+
validate_billable_pi_processor.ValidateBillablePIsProcessor(
241+
"", invoice_month, remove_nonbillables_proc.data
242+
)
243+
)
244+
validate_billable_pi_proc.process()
245+
246+
### Initialize invoices
247+
230248
if args.upload_to_s3:
231249
backup_to_s3_old_pi_file(old_pi_file)
232250

233251
billable_inv = billable_invoice.BillableInvoice(
234252
name=args.output_file,
235253
invoice_month=invoice_month,
236-
data=add_institute_proc.data.copy(),
237-
nonbillable_pis=pi,
238-
nonbillable_projects=projects,
254+
data=validate_billable_pi_proc.data.copy(),
239255
old_pi_filepath=old_pi_file,
240256
)
241257

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from dataclasses import dataclass
2+
3+
import pandas
4+
5+
from process_report.invoices import invoice
6+
from process_report.processors import processor
7+
8+
9+
@dataclass
10+
class RemoveNonbillables(processor.Processor):
11+
nonbillable_pis: list[str]
12+
nonbillable_projects: list[str]
13+
14+
@staticmethod
15+
def _remove_nonbillables(
16+
data: pandas.DataFrame,
17+
nonbillable_pis: list[str],
18+
nonbillable_projects: list[str],
19+
):
20+
return data[
21+
~data[invoice.PI_FIELD].isin(nonbillable_pis)
22+
& ~data[invoice.PROJECT_FIELD].isin(nonbillable_projects)
23+
]
24+
25+
def _process(self):
26+
self.data = self._remove_nonbillables(
27+
self.data, self.nonbillable_pis, self.nonbillable_projects
28+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from dataclasses import dataclass
2+
import logging
3+
4+
import pandas
5+
6+
from process_report.invoices import invoice
7+
from process_report.processors import processor
8+
9+
logger = logging.getLogger(__name__)
10+
logging.basicConfig(level=logging.INFO)
11+
12+
13+
@dataclass
14+
class ValidateBillablePIsProcessor(processor.Processor):
15+
@staticmethod
16+
def _validate_pi_names(data: pandas.DataFrame):
17+
invalid_pi_projects = data[pandas.isna(data[invoice.PI_FIELD])]
18+
for i, row in invalid_pi_projects.iterrows():
19+
logger.warn(
20+
f"Billable project {row[invoice.PROJECT_FIELD]} has empty PI field"
21+
)
22+
return data[~pandas.isna(data[invoice.PI_FIELD])]
23+
24+
def _process(self):
25+
self.data = self._validate_pi_names(self.data)

process_report/tests/unit_tests.py

+40-45
Original file line numberDiff line numberDiff line change
@@ -104,26 +104,6 @@ def test_remove_billables(self):
104104
self.assertNotIn("ProjectE", result_df["Project - Allocation"].tolist())
105105

106106

107-
class TestBillableInvoice(TestCase):
108-
def test_remove_nonbillables(self):
109-
pis = [uuid.uuid4().hex for x in range(10)]
110-
projects = [uuid.uuid4().hex for x in range(10)]
111-
nonbillable_pis = pis[:3]
112-
nonbillable_projects = projects[7:]
113-
billable_pis = pis[3:7]
114-
data = pandas.DataFrame({"Manager (PI)": pis, "Project - Allocation": projects})
115-
116-
test_invoice = test_utils.new_billable_invoice()
117-
data = test_invoice._remove_nonbillables(
118-
data, nonbillable_pis, nonbillable_projects
119-
)
120-
self.assertTrue(data[data["Manager (PI)"].isin(nonbillable_pis)].empty)
121-
self.assertTrue(
122-
data[data["Project - Allocation"].isin(nonbillable_projects)].empty
123-
)
124-
self.assertTrue(data.equals(data[data["Manager (PI)"].isin(billable_pis)]))
125-
126-
127107
class TestMergeCSV(TestCase):
128108
def setUp(self):
129109
self.header = ["ID", "Name", "Age"]
@@ -278,6 +258,46 @@ def test_validate_alias(self):
278258
self.assertTrue(answer_data.equals(validate_pi_alias_proc.data))
279259

280260

261+
class TestRemoveNonbillablesProcessor(TestCase):
262+
def test_remove_nonbillables(self):
263+
pis = [uuid.uuid4().hex for x in range(10)]
264+
projects = [uuid.uuid4().hex for x in range(10)]
265+
nonbillable_pis = pis[:3]
266+
nonbillable_projects = projects[7:]
267+
billable_pis = pis[3:7]
268+
data = pandas.DataFrame({"Manager (PI)": pis, "Project - Allocation": projects})
269+
270+
remove_nonbillables_proc = test_utils.new_remove_nonbillables_processor()
271+
data = remove_nonbillables_proc._remove_nonbillables(
272+
data, nonbillable_pis, nonbillable_projects
273+
)
274+
self.assertTrue(data[data["Manager (PI)"].isin(nonbillable_pis)].empty)
275+
self.assertTrue(
276+
data[data["Project - Allocation"].isin(nonbillable_projects)].empty
277+
)
278+
self.assertTrue(data.equals(data[data["Manager (PI)"].isin(billable_pis)]))
279+
280+
281+
class TestValidateBillablePIProcessor(TestCase):
282+
def test_validate_billables(self):
283+
test_data = pandas.DataFrame(
284+
{
285+
"Manager (PI)": ["PI1", math.nan, "PI1", "PI2", "PI2"],
286+
"Project - Allocation": [
287+
"ProjectA",
288+
"ProjectB",
289+
"ProjectC",
290+
"ProjectD",
291+
"ProjectE",
292+
],
293+
}
294+
)
295+
self.assertEqual(1, len(test_data[pandas.isna(test_data["Manager (PI)"])]))
296+
validate_billable_pi_proc = test_utils.new_validate_billable_pi_processor()
297+
output_data = validate_billable_pi_proc._validate_pi_names(test_data)
298+
self.assertEqual(0, len(output_data[pandas.isna(output_data["Manager (PI)"])]))
299+
300+
281301
class TestMonthUtils(TestCase):
282302
def test_get_month_diff(self):
283303
testcases = [
@@ -707,31 +727,6 @@ def test_apply_BU_subsidy(self):
707727
self.assertEqual(50, output_df.loc[3, "Balance"])
708728

709729

710-
class TestValidateBillables(TestCase):
711-
def setUp(self):
712-
data = {
713-
"Manager (PI)": ["PI1", math.nan, "PI1", "PI2", "PI2"],
714-
"Project - Allocation": [
715-
"ProjectA",
716-
"ProjectB",
717-
"ProjectC",
718-
"ProjectD",
719-
"ProjectE",
720-
],
721-
}
722-
self.dataframe = pandas.DataFrame(data)
723-
724-
def test_validate_billables(self):
725-
self.assertEqual(
726-
1, len(self.dataframe[pandas.isna(self.dataframe["Manager (PI)"])])
727-
)
728-
test_invoice = test_utils.new_billable_invoice()
729-
validated_df = test_invoice._validate_pi_names(self.dataframe)
730-
self.assertEqual(
731-
0, len(validated_df[pandas.isna(validated_df["Manager (PI)"])])
732-
)
733-
734-
735730
class TestExportLenovo(TestCase):
736731
def setUp(self):
737732
data = {

process_report/tests/util.py

+22-4
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
from process_report.processors import (
1111
add_institution_processor,
1212
validate_pi_alias_processor,
13+
remove_nonbillables_processor,
14+
validate_billable_pi_processor,
1315
)
1416

1517

@@ -25,16 +27,12 @@ def new_billable_invoice(
2527
name="",
2628
invoice_month="0000-00",
2729
data=pandas.DataFrame(),
28-
nonbillable_pis=[],
29-
nonbillable_projects=[],
3030
old_pi_filepath="",
3131
):
3232
return billable_invoice.BillableInvoice(
3333
name,
3434
invoice_month,
3535
data,
36-
nonbillable_pis,
37-
nonbillable_projects,
3836
old_pi_filepath,
3937
)
4038

@@ -73,3 +71,23 @@ def new_validate_pi_alias_processor(
7371
return validate_pi_alias_processor.ValidatePIAliasProcessor(
7472
name, invoice_month, data, alias_map
7573
)
74+
75+
76+
def new_remove_nonbillables_processor(
77+
name="",
78+
invoice_month="0000-00",
79+
data=pandas.DataFrame(),
80+
nonbillable_pis=[],
81+
nonbillable_projects=[],
82+
):
83+
return remove_nonbillables_processor.RemoveNonbillables(
84+
name, invoice_month, data, nonbillable_pis, nonbillable_projects
85+
)
86+
87+
88+
def new_validate_billable_pi_processor(
89+
name="", invoice_month="0000-00", data=pandas.DataFrame()
90+
):
91+
return validate_billable_pi_processor.ValidateBillablePIsProcessor(
92+
name, invoice_month, data
93+
)

0 commit comments

Comments
 (0)