Skip to content

Commit eb2d471

Browse files
committed
Implemented discount_invoice to refactor BU-Internal and billable invoice
The BU-Internal and billable invoice now subclasses from `discount_invoice`, an invoice class which implements a function to apply a flat discount on a PI's projects. This reduces some code redundancy since the New-PI credit and the BU subsidy share some similar logic. Additional smaller changes is done to improve code readability.
1 parent c09fd98 commit eb2d471

File tree

6 files changed

+159
-88
lines changed

6 files changed

+159
-88
lines changed

process_report/invoices/billable_invoice.py

Lines changed: 18 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,20 @@
11
from dataclasses import dataclass
2-
from decimal import Decimal
32
import logging
43
import sys
54

65
import pandas
76
import pyarrow
87

9-
import process_report.invoices.invoice as invoice
10-
import process_report.util as util
8+
from process_report.invoices import invoice, discount_invoice
9+
from process_report import util
1110

1211

1312
logger = logging.getLogger(__name__)
1413
logging.basicConfig(level=logging.INFO)
1514

1615

1716
@dataclass
18-
class BillableInvoice(invoice.Invoice):
17+
class BillableInvoice(discount_invoice.DiscountInvoice):
1918
NEW_PI_CREDIT_CODE = "0002"
2019
INITIAL_CREDIT_AMOUNT = 1000
2120
EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"]
@@ -89,7 +88,7 @@ def _prepare(self):
8988
self.data = self._validate_pi_names(self.data)
9089
self.data[invoice.CREDIT_FIELD] = None
9190
self.data[invoice.CREDIT_CODE_FIELD] = None
92-
self.data[invoice.BALANCE_FIELD] = Decimal(0)
91+
self.data[invoice.BALANCE_FIELD] = self.data[invoice.COST_FIELD]
9392
self.old_pi_df = self._load_old_pis(self.old_pi_filepath)
9493

9594
def _process(self):
@@ -143,14 +142,17 @@ def get_initial_credit_amount(
143142

144143
current_pi_set = set(data[invoice.PI_FIELD])
145144
for pi in current_pi_set:
146-
pi_projects = data[data[invoice.PI_FIELD] == pi]
145+
credit_eligible_projects = data[
146+
(data[invoice.PI_FIELD] == pi)
147+
& ~(data[invoice.SU_TYPE_FIELD].isin(self.EXCLUDE_SU_TYPES))
148+
]
147149
pi_age = self._get_pi_age(old_pi_df, pi, self.invoice_month)
148150
pi_old_pi_entry = old_pi_df.loc[
149151
old_pi_df[invoice.PI_PI_FIELD] == pi
150152
].squeeze()
151153

152154
if pi_age > 1:
153-
for i, row in pi_projects.iterrows():
155+
for i, row in credit_eligible_projects.iterrows():
154156
data.at[i, invoice.BALANCE_FIELD] = row[invoice.COST_FIELD]
155157
else:
156158
if pi_age == 0:
@@ -176,25 +178,16 @@ def get_initial_credit_amount(
176178
)
177179
credit_used_field = invoice.PI_2ND_USED
178180

179-
initial_credit = remaining_credit
180-
for i, row in pi_projects.iterrows():
181-
if (
182-
remaining_credit == 0
183-
or row[invoice.SU_TYPE_FIELD] in self.EXCLUDE_SU_TYPES
184-
):
185-
data.at[i, invoice.BALANCE_FIELD] = row[invoice.COST_FIELD]
186-
else:
187-
project_cost = row[invoice.COST_FIELD]
188-
applied_credit = min(project_cost, remaining_credit)
189-
190-
data.at[i, invoice.CREDIT_FIELD] = applied_credit
191-
data.at[i, invoice.CREDIT_CODE_FIELD] = self.NEW_PI_CREDIT_CODE
192-
data.at[i, invoice.BALANCE_FIELD] = (
193-
row[invoice.COST_FIELD] - applied_credit
194-
)
195-
remaining_credit -= applied_credit
181+
credits_used = self.apply_flat_discount(
182+
data,
183+
credit_eligible_projects,
184+
remaining_credit,
185+
invoice.CREDIT_FIELD,
186+
invoice.BALANCE_FIELD,
187+
invoice.CREDIT_CODE_FIELD,
188+
self.NEW_PI_CREDIT_CODE,
189+
)
196190

197-
credits_used = initial_credit - remaining_credit
198191
if (pi_old_pi_entry[credit_used_field] != 0) and (
199192
credits_used != pi_old_pi_entry[credit_used_field]
200193
):

process_report/invoices/bu_internal_invoice.py

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22
from decimal import Decimal
33

44
import process_report.invoices.invoice as invoice
5+
import process_report.invoices.discount_invoice as discount_invoice
56

67

78
@dataclass
8-
class BUInternalInvoice(invoice.Invoice):
9+
class BUInternalInvoice(discount_invoice.DiscountInvoice):
910
subsidy_amount: int
1011

1112
def _prepare(self):
@@ -34,35 +35,36 @@ def get_project(row):
3435
]
3536

3637
def _process(self):
37-
project_list = self.data["Project"].unique()
38-
data_no_dup = self.data.drop_duplicates("Project", inplace=False)
38+
data_summed_projects = self._sum_project_allocations(self.data)
39+
self.data = self._apply_subsidy(data_summed_projects, self.subsidy_amount)
40+
41+
def _sum_project_allocations(self, dataframe):
42+
"""A project may have multiple allocations, and therefore multiple rows
43+
in the raw invoices. For BU-Internal invoice, we only want 1 row for
44+
each unique project, summing up its allocations' costs"""
45+
project_list = dataframe["Project"].unique()
46+
data_no_dup = dataframe.drop_duplicates("Project", inplace=False)
3947
sum_fields = [invoice.COST_FIELD, invoice.CREDIT_FIELD, invoice.BALANCE_FIELD]
4048
for project in project_list:
41-
project_mask = self.data["Project"] == project
49+
project_mask = dataframe["Project"] == project
4250
no_dup_project_mask = data_no_dup["Project"] == project
4351

44-
sum_fields_sums = self.data[project_mask][sum_fields].sum().values
52+
sum_fields_sums = dataframe[project_mask][sum_fields].sum().values
4553
data_no_dup.loc[no_dup_project_mask, sum_fields] = sum_fields_sums
4654

47-
self.data = self._apply_subsidy(data_no_dup, self.subsidy_amount)
55+
return data_no_dup
4856

4957
def _apply_subsidy(self, dataframe, subsidy_amount):
5058
pi_list = dataframe[invoice.PI_FIELD].unique()
5159

5260
for pi in pi_list:
5361
pi_projects = dataframe[dataframe[invoice.PI_FIELD] == pi]
54-
remaining_subsidy = subsidy_amount
55-
for i, row in pi_projects.iterrows():
56-
project_remaining_cost = row[invoice.BALANCE_FIELD]
57-
applied_subsidy = min(project_remaining_cost, remaining_subsidy)
58-
59-
dataframe.at[i, invoice.SUBSIDY_FIELD] = applied_subsidy
60-
dataframe.at[i, invoice.BALANCE_FIELD] = (
61-
row[invoice.BALANCE_FIELD] - applied_subsidy
62-
)
63-
remaining_subsidy -= applied_subsidy
64-
65-
if remaining_subsidy == 0:
66-
break
62+
self.apply_flat_discount(
63+
dataframe,
64+
pi_projects,
65+
subsidy_amount,
66+
invoice.SUBSIDY_FIELD,
67+
invoice.BALANCE_FIELD,
68+
)
6769

6870
return dataframe
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
from dataclasses import dataclass
2+
3+
import pandas
4+
5+
import process_report.invoices.invoice as invoice
6+
7+
8+
@dataclass
9+
class DiscountInvoice(invoice.Invoice):
10+
"""
11+
Invoice class containing functions useful for applying discounts
12+
on dataframes
13+
"""
14+
15+
@staticmethod
16+
def apply_flat_discount(
17+
invoice: pandas.DataFrame,
18+
pi_projects: pandas.DataFrame,
19+
discount_amount: int,
20+
discount_field: str,
21+
balance_field: str,
22+
code_field: str = None,
23+
discount_code: str = None,
24+
):
25+
"""
26+
Takes in an invoice and a list of PI projects that are a subset of it,
27+
and applies a flat discount to those PI projects. Note that this function
28+
will change the provided `invoice` Dataframe directly. Therefore, it does
29+
not return the changed invoice.
30+
31+
This function assumes that the balance field shows the remaining cost of the project,
32+
or what the PI would pay before the flat discount is applied.
33+
34+
If the optional parameters `code_field` and `discount_code` are passed in,
35+
`discount_code` will be comma-APPENDED to the `code_field` of projects where
36+
the discount is applied
37+
38+
Returns the amount of discount used.
39+
40+
:param invoice: Dataframe containing all projects
41+
:param pi_projects: A subset of `invoice`, containing all projects for a PI you want to apply the discount
42+
:param discount_amount: The discount given to the PI
43+
:param discount_field: Name of the field to put the discount amount applied to each project
44+
:param balance_field: Name of the balance field
45+
:param code_field: Name of the discount code field
46+
:param discount_code: Code of the discount
47+
"""
48+
49+
def apply_discount_on_project(remaining_discount_amount, project_i, project):
50+
remaining_project_balance = project[balance_field]
51+
applied_discount = min(remaining_project_balance, remaining_discount_amount)
52+
invoice.at[project_i, discount_field] = applied_discount
53+
invoice.at[project_i, balance_field] = (
54+
project[balance_field] - applied_discount
55+
)
56+
remaining_discount_amount -= applied_discount
57+
return remaining_discount_amount
58+
59+
def apply_credit_code_on_project(project_i):
60+
if code_field and discount_code:
61+
if pandas.isna(invoice.at[project_i, code_field]):
62+
invoice.at[project_i, code_field] = discount_code
63+
else:
64+
invoice.at[project_i, code_field] = (
65+
invoice.at[project_i, code_field] + "," + discount_code
66+
)
67+
68+
remaining_discount_amount = discount_amount
69+
for i, row in pi_projects.iterrows():
70+
if remaining_discount_amount == 0:
71+
break
72+
else:
73+
remaining_discount_amount = apply_discount_on_project(
74+
remaining_discount_amount, i, row
75+
)
76+
apply_credit_code_on_project(i)
77+
78+
discount_used = discount_amount - remaining_discount_amount
79+
return discount_used

process_report/process_report.py

Lines changed: 10 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,12 @@
22
import os
33
import sys
44
import datetime
5-
import functools
65

76
import json
87
import pandas
9-
import boto3
108
import pyarrow
119

10+
from process_report.util import get_invoice_bucket, process_and_export_invoices
1211
from process_report.invoices import (
1312
lenovo_invoice,
1413
nonbillable_invoice,
@@ -87,22 +86,6 @@ def load_alias(alias_file):
8786
return alias_dict
8887

8988

90-
@functools.lru_cache
91-
def get_invoice_bucket():
92-
try:
93-
s3_resource = boto3.resource(
94-
service_name="s3",
95-
endpoint_url=os.environ.get(
96-
"S3_ENDPOINT", "https://s3.us-east-005.backblazeb2.com"
97-
),
98-
aws_access_key_id=os.environ["S3_KEY_ID"],
99-
aws_secret_access_key=os.environ["S3_APP_KEY"],
100-
)
101-
except KeyError:
102-
print("Error: Please set the environment variables S3_KEY_ID and S3_APP_KEY")
103-
return s3_resource.Bucket(os.environ.get("S3_BUCKET_NAME", "nerc-invoicing"))
104-
105-
10689
def get_iso8601_time():
10790
return datetime.datetime.now().strftime("%Y%m%dT%H%M%SZ")
10891

@@ -249,12 +232,6 @@ def main():
249232
nonbillable_pis=pi,
250233
nonbillable_projects=projects,
251234
)
252-
for invoice in [lenovo_inv, nonbillable_inv]:
253-
invoice.process()
254-
invoice.export()
255-
if args.upload_to_s3:
256-
bucket = get_invoice_bucket()
257-
invoice.export_s3(bucket)
258235

259236
if args.upload_to_s3:
260237
backup_to_s3_old_pi_file(old_pi_file)
@@ -267,35 +244,27 @@ def main():
267244
nonbillable_projects=projects,
268245
old_pi_filepath=old_pi_file,
269246
)
270-
billable_inv.process()
271-
billable_inv.export()
247+
248+
process_and_export_invoices(
249+
[lenovo_inv, nonbillable_inv, billable_inv], args.upload_to_s3
250+
)
272251

273252
nerc_total_inv = NERC_total_invoice.NERCTotalInvoice(
274253
name=args.NERC_total_invoice_file,
275254
invoice_month=invoice_month,
276-
data=billable_inv.data,
255+
data=billable_inv.data.copy(),
277256
)
278-
nerc_total_inv.process()
279-
nerc_total_inv.export()
280-
281-
if args.upload_to_s3:
282-
for invoice in [billable_inv, nerc_total_inv]:
283-
bucket = get_invoice_bucket()
284-
invoice.export_s3(bucket)
285257

286258
bu_internal_inv = bu_internal_invoice.BUInternalInvoice(
287259
name=args.BU_invoice_file,
288260
invoice_month=invoice_month,
289-
data=billable_inv.data,
261+
data=billable_inv.data.copy(),
290262
subsidy_amount=args.BU_subsidy_amount,
291263
)
292-
bu_internal_inv.process()
293-
bu_internal_inv.export()
294-
if args.upload_to_s3:
295-
bucket = get_invoice_bucket()
296-
bu_internal_inv.export_s3(bucket)
297264

298-
export_pi_billables(billable_inv.data, args.output_folder, invoice_month)
265+
process_and_export_invoices([nerc_total_inv, bu_internal_inv], args.upload_to_s3)
266+
267+
export_pi_billables(billable_inv.data.copy(), args.output_folder, invoice_month)
299268

300269
if args.upload_to_s3:
301270
invoice_list = list()

process_report/tests/unit_tests.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import os
66
import uuid
77
import math
8-
from decimal import Decimal
98
from textwrap import dedent
109

1110
from process_report import process_report, util
@@ -419,7 +418,7 @@ def setUp(self):
419418
self.dataframe = pandas.DataFrame(data)
420419
self.dataframe["Credit"] = None
421420
self.dataframe["Credit Code"] = None
422-
self.dataframe["Balance"] = Decimal(0)
421+
self.dataframe["Balance"] = self.dataframe["Cost"]
423422
self.answer_dataframe = pandas.DataFrame(answer_df_dict)
424423
old_pi = [
425424
"PI,First Invoice Month,Initial Credits,1st Month Used,2nd Month Used",
@@ -516,7 +515,7 @@ def setUp(self):
516515
)
517516
self.dataframe_no_gpu["Credit"] = None
518517
self.dataframe_no_gpu["Credit Code"] = None
519-
self.dataframe_no_gpu["Balance"] = Decimal(0)
518+
self.dataframe_no_gpu["Balance"] = self.dataframe_no_gpu["Cost"]
520519
old_pi_no_gpu = [
521520
"PI,First Invoice Month,Initial Credits,1st Month Used,2nd Month Used",
522521
"OldPI,2024-03,500,200,0",

0 commit comments

Comments
 (0)