From ce715221d28e338a1bcca8002f26bd0a516470e5 Mon Sep 17 00:00:00 2001 From: Quan Pham Date: Wed, 11 Sep 2024 13:44:40 -0400 Subject: [PATCH] Allow limiting New-PI credit to partner institutions `process_report.py` will now use `nerc_rates` to determine if the New-PI credit is limited to institutions that are MGHPCC partners for the given invoice month. The script determines whether an institution's MGHPCC partnership is active by first seeing if the `mghpcc_partnership_start_date` field is set in `institute_list.yaml`, then checks if the start date is within or before the invoice month. --- process_report/invoices/billable_invoice.py | 38 +++++++++++++++++---- process_report/process_report.py | 5 +++ process_report/tests/unit_tests.py | 35 +++++++++++++++++++ process_report/tests/util.py | 2 ++ requirements.txt | 1 + 5 files changed, 75 insertions(+), 6 deletions(-) diff --git a/process_report/invoices/billable_invoice.py b/process_report/invoices/billable_invoice.py index 0cf7d3e..1ddab12 100644 --- a/process_report/invoices/billable_invoice.py +++ b/process_report/invoices/billable_invoice.py @@ -23,6 +23,7 @@ class BillableInvoice(discount_invoice.DiscountInvoice): nonbillable_pis: list[str] nonbillable_projects: list[str] old_pi_filepath: str + limit_new_pi_credit_to_partners: bool = False @staticmethod def _load_old_pis(old_pi_filepath) -> pandas.DataFrame: @@ -115,6 +116,31 @@ def export_s3(self, s3_bucket): super().export_s3(s3_bucket) s3_bucket.upload_file(self.old_pi_filepath, self.PI_S3_FILEPATH) + def _filter_partners(self, data): + active_partnerships = list() + institute_list = util.load_institute_list() + for institute_info in institute_list: + if partnership_start_date := institute_info.get( + "mghpcc_partnership_start_date", None + ): + if ( + util.get_month_diff(self.invoice_month, partnership_start_date[:-3]) + >= 0 + ): + active_partnerships.append(institute_info["display_name"]) + + return data[data[invoice.INSTITUTION_FIELD].isin(active_partnerships)] + + def _filter_excluded_su_types(self, data): + return data[~(data[invoice.SU_TYPE_FIELD].isin(self.EXCLUDE_SU_TYPES))] + + def _get_credit_eligible_projects(self, data: pandas.DataFrame): + filtered_data = self._filter_excluded_su_types(data) + if self.limit_new_pi_credit_to_partners: + filtered_data = self._filter_partners(filtered_data) + + return filtered_data + def _apply_credits_new_pi( self, data: pandas.DataFrame, old_pi_df: pandas.DataFrame ): @@ -140,11 +166,11 @@ def get_initial_credit_amount( ) print(f"New PI Credit set at {new_pi_credit_amount} for {self.invoice_month}") - current_pi_set = set(data[invoice.PI_FIELD]) + credit_eligible_projects = self._get_credit_eligible_projects(data) + current_pi_set = set(credit_eligible_projects[invoice.PI_FIELD]) for pi in current_pi_set: - credit_eligible_projects = data[ - (data[invoice.PI_FIELD] == pi) - & ~(data[invoice.SU_TYPE_FIELD].isin(self.EXCLUDE_SU_TYPES)) + pi_projects = credit_eligible_projects[ + credit_eligible_projects[invoice.PI_FIELD] == pi ] pi_age = self._get_pi_age(old_pi_df, pi, self.invoice_month) pi_old_pi_entry = old_pi_df.loc[ @@ -152,7 +178,7 @@ def get_initial_credit_amount( ].squeeze() if pi_age > 1: - for i, row in credit_eligible_projects.iterrows(): + for i, row in pi_projects.iterrows(): data.at[i, invoice.BALANCE_FIELD] = row[invoice.COST_FIELD] else: if pi_age == 0: @@ -180,7 +206,7 @@ def get_initial_credit_amount( credits_used = self.apply_flat_discount( data, - credit_eligible_projects, + pi_projects, remaining_credit, invoice.CREDIT_FIELD, invoice.BALANCE_FIELD, diff --git a/process_report/process_report.py b/process_report/process_report.py index 7026715..cfe35a1 100644 --- a/process_report/process_report.py +++ b/process_report/process_report.py @@ -5,6 +5,7 @@ import pandas import pyarrow +from nerc_rates import load_from_url from process_report import util from process_report.invoices import ( @@ -215,6 +216,7 @@ def main(): if args.upload_to_s3: backup_to_s3_old_pi_file(old_pi_file) + rates_info = load_from_url() billable_inv = billable_invoice.BillableInvoice( name=args.output_file, invoice_month=invoice_month, @@ -222,6 +224,9 @@ def main(): nonbillable_pis=pi, nonbillable_projects=projects, old_pi_filepath=old_pi_file, + limit_new_pi_credit_to_partners=rates_info.get_value_at( + "Limit New PI Credit to MGHPCC Partners", invoice_month + ), ) util.process_and_export_invoices( diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py index 1fdc725..17f7b9c 100644 --- a/process_report/tests/unit_tests.py +++ b/process_report/tests/unit_tests.py @@ -818,3 +818,38 @@ def test_remove_prefix(self, mock_get_time, mock_get_bucket): process_report.upload_to_s3(filenames, invoice_month) for i, call_args in enumerate(mock_bucket.upload_file.call_args_list): self.assertTrue(answers[i] in call_args) + + +class TestNERCRates(TestCase): + @mock.patch("process_report.util.load_institute_list") + def test_flag_limit_new_pi_credit(self, mock_load_institute_list): + mock_load_institute_list.return_value = [ + {"display_name": "BU", "mghpcc_partnership_start_date": "2024-02-01"}, + {"display_name": "HU", "mghpcc_partnership_start_date": "2024-06-31"}, + {"display_name": "NEU", "mghpcc_partnership_start_date": "2024-11-01"}, + ] + sample_df = pandas.DataFrame( + { + "Institution": ["BU", "HU", "NEU", "MIT", "BC"], + } + ) + sample_inv = test_utils.new_billable_invoice( + limit_new_pi_credit_to_partners=True + ) + + # When no partnerships are active + sample_inv.invoice_month = "2024-01" + output_df = sample_inv._filter_partners(sample_df) + self.assertTrue(output_df.empty) + + # When some partnerships are active + sample_inv.invoice_month = "2024-06" + output_df = sample_inv._filter_partners(sample_df) + answer_df = pandas.DataFrame({"Institution": ["BU", "HU"]}) + self.assertTrue(output_df.equals(answer_df)) + + # When all partnerships are active + sample_inv.invoice_month = "2024-12" + output_df = sample_inv._filter_partners(sample_df) + answer_df = pandas.DataFrame({"Institution": ["BU", "HU", "NEU"]}) + self.assertTrue(output_df.equals(answer_df)) diff --git a/process_report/tests/util.py b/process_report/tests/util.py index 0d3d3d6..a8d678d 100644 --- a/process_report/tests/util.py +++ b/process_report/tests/util.py @@ -10,6 +10,7 @@ def new_billable_invoice( nonbillable_pis=[], nonbillable_projects=[], old_pi_filepath="", + limit_new_pi_credit_to_partners=False, ): return billable_invoice.BillableInvoice( name, @@ -18,6 +19,7 @@ def new_billable_invoice( nonbillable_pis, nonbillable_projects, old_pi_filepath, + limit_new_pi_credit_to_partners, ) diff --git a/requirements.txt b/requirements.txt index cc0d852..748b45a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +git+https://github.com/CCI-MOC/nerc-rates@74eb4a7#egg=nerc_rates pandas pyarrow boto3