From 6a36c2432a9ba030389f3839728a55663db08ef0 Mon Sep 17 00:00:00 2001 From: Quan Pham Date: Tue, 16 Jul 2024 16:35:04 -0400 Subject: [PATCH] Refactored PI-specific invoices The PI-specific is now implemented in the `PIInvoice` class. This class takes in the entire billable invoice and will export the PI invoices to local storage and S3 Because the `upload_to_s3` function is no longer used, it is removed. Since the `export_s3` function in the Invoice class does not need to be concerned about file extensions, the test case for s3 exporting is somewhat simplified, only checking that the format of the output paths are correct. --- .../invoices/pi_specific_invoice.py | 43 +++++++++++++++++ process_report/process_report.py | 46 +++--------------- process_report/tests/unit_tests.py | 47 ++++++++++++------- process_report/tests/util.py | 27 ++++++++++- 4 files changed, 107 insertions(+), 56 deletions(-) create mode 100644 process_report/invoices/pi_specific_invoice.py diff --git a/process_report/invoices/pi_specific_invoice.py b/process_report/invoices/pi_specific_invoice.py new file mode 100644 index 0000000..fc2bd63 --- /dev/null +++ b/process_report/invoices/pi_specific_invoice.py @@ -0,0 +1,43 @@ +import os +from dataclasses import dataclass + +import pandas + +import process_report.invoices.invoice as invoice +import process_report.util as util + + +@dataclass +class PIInvoice(invoice.Invoice): + def _prepare(self): + self.pi_list = self.data[invoice.PI_FIELD].unique() + + def export(self): + def _export_pi_invoice(pi): + if pandas.isna(pi): + return + pi_projects = self.data[self.data[invoice.PI_FIELD] == pi] + pi_instituition = pi_projects[invoice.INSTITUTION_FIELD].iat[0] + pi_projects.to_csv( + f"{self.name}/{pi_instituition}_{pi} {self.invoice_month}.csv" + ) + + if not os.path.exists( + self.name + ): # self.name is name of folder storing invoices + os.mkdir(self.name) + + for pi in self.pi_list: + _export_pi_invoice(pi) + + def export_s3(self, s3_bucket): + def _export_s3_pi_invoice(pi_invoice): + pi_invoice_path = os.path.join(self.name, pi_invoice) + striped_invoice_path = os.path.splitext(pi_invoice_path)[0] + output_s3_path = f"Invoices/{self.invoice_month}/{striped_invoice_path}.csv" + output_s3_archive_path = f"Invoices/{self.invoice_month}/Archive/{striped_invoice_path} {util.get_iso8601_time()}.csv" + s3_bucket.upload_file(pi_invoice_path, output_s3_path) + s3_bucket.upload_file(pi_invoice_path, output_s3_archive_path) + + for pi_invoice in os.listdir(self.name): + _export_s3_pi_invoice(pi_invoice) diff --git a/process_report/process_report.py b/process_report/process_report.py index 3f309a2..4207152 100644 --- a/process_report/process_report.py +++ b/process_report/process_report.py @@ -1,5 +1,4 @@ import argparse -import os import sys import datetime @@ -14,6 +13,7 @@ billable_invoice, NERC_total_invoice, bu_internal_invoice, + pi_specific_invoice, ) @@ -262,17 +262,13 @@ def main(): subsidy_amount=args.BU_subsidy_amount, ) - process_and_export_invoices([nerc_total_inv, bu_internal_inv], args.upload_to_s3) - - export_pi_billables(billable_inv.data.copy(), args.output_folder, invoice_month) - - if args.upload_to_s3: - invoice_list = list() - - for pi_invoice in os.listdir(args.output_folder): - invoice_list.append(os.path.join(args.output_folder, pi_invoice)) + pi_inv = pi_specific_invoice.PIInvoice( + name=args.output_folder, invoice_month=invoice_month, data=billable_inv.data + ) - upload_to_s3(invoice_list, invoice_month) + process_and_export_invoices( + [nerc_total_inv, bu_internal_inv, pi_inv], args.upload_to_s3 + ) def fetch_s3_invoices(invoice_month): @@ -390,33 +386,5 @@ def export_billables(dataframe, output_file): dataframe.to_csv(output_file, index=False) -def export_pi_billables(dataframe: pandas.DataFrame, output_folder, invoice_month): - if not os.path.exists(output_folder): - os.mkdir(output_folder) - - pi_list = dataframe[PI_FIELD].unique() - - for pi in pi_list: - if pandas.isna(pi): - continue - pi_projects = dataframe[dataframe[PI_FIELD] == pi] - pi_instituition = pi_projects[INSTITUTION_FIELD].iat[0] - pi_projects.to_csv( - output_folder + f"/{pi_instituition}_{pi}_{invoice_month}.csv", index=False - ) - - -def upload_to_s3(invoice_list: list, invoice_month): - invoice_bucket = get_invoice_bucket() - for invoice_filename in invoice_list: - striped_filename = os.path.splitext(invoice_filename)[0] - invoice_s3_path = ( - f"Invoices/{invoice_month}/{striped_filename} {invoice_month}.csv" - ) - invoice_s3_path_archive = f"Invoices/{invoice_month}/Archive/{striped_filename} {invoice_month} {get_iso8601_time()}.csv" - invoice_bucket.upload_file(invoice_filename, invoice_s3_path) - invoice_bucket.upload_file(invoice_filename, invoice_s3_path_archive) - - if __name__ == "__main__": main() diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py index 411b55a..b2eeb3a 100644 --- a/process_report/tests/unit_tests.py +++ b/process_report/tests/unit_tests.py @@ -182,12 +182,13 @@ def setUp(self): def test_export_pi(self): output_dir = tempfile.TemporaryDirectory() - process_report.export_pi_billables( - self.dataframe, output_dir.name, self.invoice_month + pi_inv = test_utils.new_pi_specific_invoice( + output_dir.name, invoice_month=self.invoice_month, data=self.dataframe ) - - pi_csv_1 = f'{self.dataframe["Institution"][0]}_{self.dataframe["Manager (PI)"][0]}_{self.dataframe["Invoice Month"][0]}.csv' - pi_csv_2 = f'{self.dataframe["Institution"][3]}_{self.dataframe["Manager (PI)"][3]}_{self.dataframe["Invoice Month"][3]}.csv' + pi_inv.process() + pi_inv.export() + pi_csv_1 = f'{self.dataframe["Institution"][0]}_{self.dataframe["Manager (PI)"][0]} {self.dataframe["Invoice Month"][0]}.csv' + pi_csv_2 = f'{self.dataframe["Institution"][3]}_{self.dataframe["Manager (PI)"][3]} {self.dataframe["Invoice Month"][3]}.csv' self.assertIn(pi_csv_1, os.listdir(output_dir.name)) self.assertIn(pi_csv_2, os.listdir(output_dir.name)) self.assertEqual( @@ -789,32 +790,46 @@ def test_process_lenovo(self): class TestUploadToS3(TestCase): @mock.patch("process_report.process_report.get_invoice_bucket") - @mock.patch("process_report.process_report.get_iso8601_time") - def test_remove_prefix(self, mock_get_time, mock_get_bucket): + @mock.patch("process_report.util.get_iso8601_time") + def test_upload_to_s3(self, mock_get_time, mock_get_bucket): mock_bucket = mock.MagicMock() mock_get_bucket.return_value = mock_bucket mock_get_time.return_value = "0" invoice_month = "2024-03" - filenames = ["test.csv", "test2.test.csv", "test3"] + filenames = ["test-test", "test2.test", "test3"] + sample_base_invoice = test_utils.new_base_invoice(invoice_month=invoice_month) + answers = [ - ("test.csv", f"Invoices/{invoice_month}/test {invoice_month}.csv"), ( - "test.csv", - f"Invoices/{invoice_month}/Archive/test {invoice_month} 0.csv", + f"test-test {invoice_month}.csv", + f"Invoices/{invoice_month}/test-test {invoice_month}.csv", ), ( - "test2.test.csv", + f"test-test {invoice_month}.csv", + f"Invoices/{invoice_month}/Archive/test-test {invoice_month} 0.csv", + ), + ( + f"test2.test {invoice_month}.csv", f"Invoices/{invoice_month}/test2.test {invoice_month}.csv", ), ( - "test2.test.csv", + f"test2.test {invoice_month}.csv", f"Invoices/{invoice_month}/Archive/test2.test {invoice_month} 0.csv", ), - ("test3", f"Invoices/{invoice_month}/test3 {invoice_month}.csv"), - ("test3", f"Invoices/{invoice_month}/Archive/test3 {invoice_month} 0.csv"), + ( + f"test3 {invoice_month}.csv", + f"Invoices/{invoice_month}/test3 {invoice_month}.csv", + ), + ( + f"test3 {invoice_month}.csv", + f"Invoices/{invoice_month}/Archive/test3 {invoice_month} 0.csv", + ), ] - process_report.upload_to_s3(filenames, invoice_month) + for filename in filenames: + sample_base_invoice.name = filename + sample_base_invoice.export_s3(mock_bucket) + for i, call_args in enumerate(mock_bucket.upload_file.call_args_list): self.assertTrue(answers[i] in call_args) diff --git a/process_report/tests/util.py b/process_report/tests/util.py index 0d3d3d6..95148d1 100644 --- a/process_report/tests/util.py +++ b/process_report/tests/util.py @@ -1,6 +1,19 @@ import pandas -from process_report.invoices import billable_invoice, bu_internal_invoice +from process_report.invoices import ( + invoice, + billable_invoice, + bu_internal_invoice, + pi_specific_invoice, +) + + +def new_base_invoice( + name="", + invoice_month="0000-00", + data=pandas.DataFrame(), +): + return invoice.Invoice(name, invoice_month, data) def new_billable_invoice( @@ -27,3 +40,15 @@ def new_bu_internal_invoice( return bu_internal_invoice.BUInternalInvoice( name, invoice_month, data, subsidy_amount ) + + +def new_pi_specific_invoice( + name="", + invoice_month="0000-00", + data=pandas.DataFrame(), +): + return pi_specific_invoice.PIInvoice( + name, + invoice_month, + data, + )