Skip to content

Commit 745ecff

Browse files
committed
Allow limiting New-PI credit to partner institutions
`process_report.py` will now use `nerc_rates` to determine if the New-PI credit is limited to institutions that are MGHPCC partners for the given invoice month. The script determines whether an institution's MGHPCC partnership is active by first seeing if the `mghpcc_partnership_start_date` field is set in `institute_list.yaml`, then checks if the start date is within or before the invoice month.
1 parent 9a9a24b commit 745ecff

File tree

7 files changed

+77
-14
lines changed

7 files changed

+77
-14
lines changed

process_report/institute_list.yaml

-7
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
- display_name: Boston University
55
domains:
66
- bu.edu
7-
- robbaron
87
- display_name: Bentley
98
domains:
109
- bentley.edu
@@ -17,7 +16,6 @@
1716
- display_name: Boston Childrens Hospital
1817
domains:
1918
- childrens.harvard.edu
20-
- rudolph
2119
- display_name: McLean Hospital
2220
domains:
2321
- mclean.harvard.edu
@@ -36,9 +34,6 @@
3634
- display_name: Harvard University
3735
domains:
3836
- harvard.edu
39-
- mmsh
40-
- kmdalton
41-
- francesco.pontiggia
4237
- chemistry.harvard.edu
4338
- display_name: Worcester Polytechnic Institute
4439
domains:
@@ -49,8 +44,6 @@
4944
- display_name: University of Massachusetts Amherst
5045
domains:
5146
- umass.edu
52-
- gstuart
53-
- mzink
5447
- display_name: University of Massachusetts Lowell
5548
domains:
5649
- uml.edu

process_report/invoices/billable_invoice.py

+33-6
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from dataclasses import dataclass
2+
from datetime import datetime
23
import logging
34
import sys
45

@@ -23,6 +24,7 @@ class BillableInvoice(discount_invoice.DiscountInvoice):
2324
nonbillable_pis: list[str]
2425
nonbillable_projects: list[str]
2526
old_pi_filepath: str
27+
limit_new_pi_credit_to_partners: bool = False
2628

2729
@staticmethod
2830
def _load_old_pis(old_pi_filepath) -> pandas.DataFrame:
@@ -115,6 +117,31 @@ def export_s3(self, s3_bucket):
115117
super().export_s3(s3_bucket)
116118
s3_bucket.upload_file(self.old_pi_filepath, self.PI_S3_FILEPATH)
117119

120+
def _filter_partners(self, data):
121+
active_partnerships = list()
122+
institute_list = util.load_institute_list()
123+
for institute_info in institute_list:
124+
if partnership_start_date := institute_info.get(
125+
"mghpcc_partnership_start_date"
126+
):
127+
partnership_year_month = "{:%Y-%m}".format(
128+
datetime.strptime(partnership_start_date, "%Y-%m-%d")
129+
)
130+
if util.get_month_diff(self.invoice_month, partnership_year_month) >= 0:
131+
active_partnerships.append(institute_info["display_name"])
132+
133+
return data[data[invoice.INSTITUTION_FIELD].isin(active_partnerships)]
134+
135+
def _filter_excluded_su_types(self, data):
136+
return data[~(data[invoice.SU_TYPE_FIELD].isin(self.EXCLUDE_SU_TYPES))]
137+
138+
def _get_credit_eligible_projects(self, data: pandas.DataFrame):
139+
filtered_data = self._filter_excluded_su_types(data)
140+
if self.limit_new_pi_credit_to_partners:
141+
filtered_data = self._filter_partners(filtered_data)
142+
143+
return filtered_data
144+
118145
def _apply_credits_new_pi(
119146
self, data: pandas.DataFrame, old_pi_df: pandas.DataFrame
120147
):
@@ -140,19 +167,19 @@ def get_initial_credit_amount(
140167
)
141168
print(f"New PI Credit set at {new_pi_credit_amount} for {self.invoice_month}")
142169

143-
current_pi_set = set(data[invoice.PI_FIELD])
170+
credit_eligible_projects = self._get_credit_eligible_projects(data)
171+
current_pi_set = set(credit_eligible_projects[invoice.PI_FIELD])
144172
for pi in current_pi_set:
145-
credit_eligible_projects = data[
146-
(data[invoice.PI_FIELD] == pi)
147-
& ~(data[invoice.SU_TYPE_FIELD].isin(self.EXCLUDE_SU_TYPES))
173+
pi_projects = credit_eligible_projects[
174+
credit_eligible_projects[invoice.PI_FIELD] == pi
148175
]
149176
pi_age = self._get_pi_age(old_pi_df, pi, self.invoice_month)
150177
pi_old_pi_entry = old_pi_df.loc[
151178
old_pi_df[invoice.PI_PI_FIELD] == pi
152179
].squeeze()
153180

154181
if pi_age > 1:
155-
for i, row in credit_eligible_projects.iterrows():
182+
for i, row in pi_projects.iterrows():
156183
data.at[i, invoice.BALANCE_FIELD] = row[invoice.COST_FIELD]
157184
else:
158185
if pi_age == 0:
@@ -180,7 +207,7 @@ def get_initial_credit_amount(
180207

181208
credits_used = self.apply_flat_discount(
182209
data,
183-
credit_eligible_projects,
210+
pi_projects,
184211
remaining_credit,
185212
invoice.CREDIT_FIELD,
186213
invoice.BALANCE_FIELD,

process_report/process_report.py

+5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import pandas
66
import pyarrow
7+
from nerc_rates import load_from_url
78

89
from process_report import util
910
from process_report.invoices import (
@@ -215,13 +216,17 @@ def main():
215216
if args.upload_to_s3:
216217
backup_to_s3_old_pi_file(old_pi_file)
217218

219+
rates_info = load_from_url()
218220
billable_inv = billable_invoice.BillableInvoice(
219221
name=args.output_file,
220222
invoice_month=invoice_month,
221223
data=merged_dataframe.copy(),
222224
nonbillable_pis=pi,
223225
nonbillable_projects=projects,
224226
old_pi_filepath=old_pi_file,
227+
limit_new_pi_credit_to_partners=rates_info.get_value_at(
228+
"Limit New PI Credit to MGHPCC Partners", invoice_month
229+
),
225230
)
226231

227232
util.process_and_export_invoices(

process_report/tests/unit_tests.py

+35
Original file line numberDiff line numberDiff line change
@@ -833,3 +833,38 @@ def test_upload_to_s3(self, mock_get_time, mock_get_bucket):
833833

834834
for i, call_args in enumerate(mock_bucket.upload_file.call_args_list):
835835
self.assertTrue(answers[i] in call_args)
836+
837+
838+
class TestNERCRates(TestCase):
839+
@mock.patch("process_report.util.load_institute_list")
840+
def test_flag_limit_new_pi_credit(self, mock_load_institute_list):
841+
mock_load_institute_list.return_value = [
842+
{"display_name": "BU", "mghpcc_partnership_start_date": "2024-02-01"},
843+
{"display_name": "HU", "mghpcc_partnership_start_date": "2024-06-31"},
844+
{"display_name": "NEU", "mghpcc_partnership_start_date": "2024-11-01"},
845+
]
846+
sample_df = pandas.DataFrame(
847+
{
848+
"Institution": ["BU", "HU", "NEU", "MIT", "BC"],
849+
}
850+
)
851+
sample_inv = test_utils.new_billable_invoice(
852+
limit_new_pi_credit_to_partners=True
853+
)
854+
855+
# When no partnerships are active
856+
sample_inv.invoice_month = "2024-01"
857+
output_df = sample_inv._filter_partners(sample_df)
858+
self.assertTrue(output_df.empty)
859+
860+
# When some partnerships are active
861+
sample_inv.invoice_month = "2024-06"
862+
output_df = sample_inv._filter_partners(sample_df)
863+
answer_df = pandas.DataFrame({"Institution": ["BU", "HU"]})
864+
self.assertTrue(output_df.equals(answer_df))
865+
866+
# When all partnerships are active
867+
sample_inv.invoice_month = "2024-12"
868+
output_df = sample_inv._filter_partners(sample_df)
869+
answer_df = pandas.DataFrame({"Institution": ["BU", "HU", "NEU"]})
870+
self.assertTrue(output_df.equals(answer_df))

process_report/tests/util.py

+2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ def new_billable_invoice(
2323
nonbillable_pis=[],
2424
nonbillable_projects=[],
2525
old_pi_filepath="",
26+
limit_new_pi_credit_to_partners=False,
2627
):
2728
return billable_invoice.BillableInvoice(
2829
name,
@@ -31,6 +32,7 @@ def new_billable_invoice(
3132
nonbillable_pis,
3233
nonbillable_projects,
3334
old_pi_filepath,
35+
limit_new_pi_credit_to_partners,
3436
)
3537

3638

process_report/util.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def get_institution_from_pi(institute_map, pi_uname):
4444
institution_domain = institution_domain[institution_domain.find(".") + 1 :]
4545

4646
if institution_name == "":
47-
print(f"Warning: PI name {pi_uname} does not match any institution!")
47+
logger.warn(f"PI name {pi_uname} does not match any institution!")
4848

4949
return institution_name
5050

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
git+https://github.com/CCI-MOC/nerc-rates@74eb4a7#egg=nerc_rates
12
pandas
23
pyarrow
34
boto3

0 commit comments

Comments
 (0)