cal-itp
diff --git a/‎dla/iija/_data_utils.py
Lines changed: 119 additions & 18 deletions b/‎dla/iija/_data_utils.py
Lines changed: 119 additions & 18 deletions
diff --git a/‎dla/iija/_script_utils.py
Lines changed: 0 additions & 5 deletions b/‎dla/iija/_script_utils.py
Lines changed: 0 additions & 5 deletions
@@ -6,25 +6,16 @@
 If using the get_list_of_words function, remove the comment out hashtag from the import nltk and re in this script 
 AND run a `! pip install nltk` in the first cell of your notebook
 '''
-
-
 import pandas as pd
 from siuba import *
-
 from calitp_data_analysis.sql import to_snakecase
-
 import _script_utils
-
-
 GCS_FILE_PATH  = 'gs://calitp-analytics-data/data-analyses/dla/dla-iija'
 
-
 # import nltk
 # from nltk.corpus import stopwords
 # from nltk.tokenize import word_tokenize, sent_tokenize
 # import re
-
-
 def read_data_all():
     proj = to_snakecase(pd.read_excel(f"{GCS_FILE_PATH}/CopyofFMIS_Projects_Universe_IIJA_Reporting_4.xls", 
                            # sheet_name='FMIS 5 Projects  ', header=[3]
@@ -42,6 +33,10 @@ def read_data_all():
     return proj
 
 
+'''
+Program Code
+Functions
+'''
 # def update_program_code_list():
 
 #     ## read in the program codes
@@ -59,11 +54,6 @@ def read_data_all():
 
 #     return program_codes 
 
-
-'''
-Updated version of the update_program_code_list to alter program names if needed.
-
-'''
 def update_program_code_list2():
     updated_codes = to_snakecase(pd.read_excel(f"{GCS_FILE_PATH}/program_codes/FY21-22ProgramCodesAsOf5-25-2022.v2_expanded090823.xlsx"))
     updated_codes = updated_codes>>select(_.iija_program_code, _.new_description)
@@ -87,21 +77,132 @@ def add_program_to_row(row):
 
     return program_codes
 
+def add_program_to_row(row):
+    if "Program" not in row["program_name"]:
+        return row["program_name"] + " Program"
+    else:
+        return row["program_name"]
+
+def load_program_codes_og() -> pd.DataFrame:
+    df = to_snakecase(
+        pd.read_excel(
+            f"{GCS_FILE_PATH}/program_codes/Copy of lst_IIJA_Code_20230908.xlsx"
+        )
+    )[["iija_program_code", "description", "program_name"]]
+    return df
+
+def load_program_codes_sept_2023() -> pd.DataFrame:
+    df = to_snakecase(
+        pd.read_excel(
+            f"{GCS_FILE_PATH}/program_codes/FY21-22ProgramCodesAsOf5-25-2022.v2_expanded090823.xlsx"
+        )
+    )[["iija_program_code", "new_description"]]
+    return df
+
+def load_program_codes_jan_2025() -> pd.DataFrame:
+    df = to_snakecase(
+        pd.read_excel(f"{GCS_FILE_PATH}/program_codes/Ycodes_01.2025.xlsx")
+    )[["program_code", "short_name", "program_code_description", "funding_type_code"]]
+
+    df = df.rename(
+        columns={
+            "program_code": "iija_program_code",
+        }
+    )
+    df.short_name = df.short_name.str.title()
+    return df
+
+def update_program_code_list_2025():
+    """
+    On January 2025, we received a new list of updated codes.
+    Merge this new list with codes received originally and in
+    September 2023.
+    """
+    # Load original codes
+    original_codes_df = load_program_codes_og()
+
+    # Load September 2023 codes
+    program_codes_sept_2023 = load_program_codes_sept_2023()
+
+    # Merge original + September first
+    m1 = pd.merge(
+        program_codes_sept_2023,
+        original_codes_df,
+        on="iija_program_code",
+        how="outer",
+        indicator=True,
+    )
+
+    # Clean up description
+    m1["new_description"] = (
+        m1["new_description"].str.strip().fillna(m1.description)
+    )
+
+    # Delete unnecessary columns
+    m1 = m1.drop(columns={"description", "_merge"})
+
+    # Load January 2025 code
+    program_codes_jan_2025 = load_program_codes_jan_2025()
+
+    # Merge m1 with program codes from January 2025.
+    m2 = pd.merge(
+        program_codes_jan_2025,
+        m1,
+        on="iija_program_code",
+        how="outer",
+        indicator=True,
+    )
+    # Update descriptions
+    m2["2025_description"] = (
+        m2["program_code_description"].str.strip().fillna(m2.new_description)
+    )
+
+    # Update program names
+    m2["2025_program_name"] = m2.program_name.fillna(m2.short_name)
+
+    # Delete outdated columns
+    m2 = m2.drop(
+        columns=[
+            "short_name",
+            "program_name",
+            "program_code_description",
+            "new_description",
+            "_merge",
+        ]
+    )
+
+    # Rename to match original sheet
+    m2 = m2.rename(
+        columns={
+            "2025_description": "new_description",
+            "2025_program_name": "program_name",
+        }
+    )
+
+    # Add program to another program names without the string "program"
+    m2["program_name"] = m2.apply(add_program_to_row, axis=1)
+    return m2
 
-## Function to add the updated program codes to the data
 def add_new_codes(df):
+    """
+    Function to add the updated program codes to the data
+    """
     #new_codes = to_snakecase(pd.read_excel(f"{GCS_FILE_PATH}/FY21-22ProgramCodesAsOf5-25-2022.v2.xlsx"))
     #code_map = dict(new_codes[['iija_program_code', 'new_description']].values)
 
     ## adding updated program codes 05/11/23
-    new_codes = update_program_code_list2()
+    #new_codes = update_program_code_list2()
+    
+    ## adding updated program codes 1/30/25
+    new_codes = update_program_code_list_2025
     code_map = dict(new_codes[['iija_program_code', 'program_name']].values)
 
     df['program_code_description'] = df.program_code.map(code_map)
     df['summary_recipient_defined_text_field_1_value'] = df['summary_recipient_defined_text_field_1_value'].astype(str)
 
-    df.loc[df.program_code =='ER01', 'program_code_description'] = 'Emergency Relieve Funding'
-    df.loc[df.program_code =='ER03', 'program_code_description'] = 'Emergency Relieve Funding'
+    # Amanda: January 2025, notified this should be called emergency supplement funding
+    #df.loc[df.program_code =='ER01', 'program_code_description'] = 'Emergency Relieve Funding'
+    #df.loc[df.program_code =='ER03', 'program_code_description'] = 'Emergency Relieve Funding'
 
     return df
 
 
@@ -6,17 +6,13 @@
 - add project types that classify project type
 - create a public-friendly project title 
 '''
-
 import numpy as np
 import pandas as pd
 from siuba import *
 
 import dla_utils
-
 from calitp_data_analysis.sql import to_snakecase
-
 import _data_utils
-
 import intake
 
 # import nltk
@@ -27,7 +23,6 @@
 
 GCS_FILE_PATH  = 'gs://calitp-analytics-data/data-analyses/dla/dla-iija'
 
-
 def _prep_data(file_name): 
     proj = to_snakecase(pd.read_excel(f"{GCS_FILE_PATH}/{file_name}"))