6
6
If using the get_list_of_words function, remove the comment out hashtag from the import nltk and re in this script
7
7
AND run a `! pip install nltk` in the first cell of your notebook
8
8
'''
9
-
10
-
11
9
import pandas as pd
12
10
from siuba import *
13
-
14
11
from calitp_data_analysis .sql import to_snakecase
15
-
16
12
import _script_utils
17
-
18
-
19
13
GCS_FILE_PATH = 'gs://calitp-analytics-data/data-analyses/dla/dla-iija'
20
14
21
-
22
15
# import nltk
23
16
# from nltk.corpus import stopwords
24
17
# from nltk.tokenize import word_tokenize, sent_tokenize
25
18
# import re
26
-
27
-
28
19
def read_data_all ():
29
20
proj = to_snakecase (pd .read_excel (f"{ GCS_FILE_PATH } /CopyofFMIS_Projects_Universe_IIJA_Reporting_4.xls" ,
30
21
# sheet_name='FMIS 5 Projects ', header=[3]
@@ -42,6 +33,10 @@ def read_data_all():
42
33
return proj
43
34
44
35
36
+ '''
37
+ Program Code
38
+ Functions
39
+ '''
45
40
# def update_program_code_list():
46
41
47
42
# ## read in the program codes
@@ -59,11 +54,6 @@ def read_data_all():
59
54
60
55
# return program_codes
61
56
62
-
63
- '''
64
- Updated version of the update_program_code_list to alter program names if needed.
65
-
66
- '''
67
57
def update_program_code_list2 ():
68
58
updated_codes = to_snakecase (pd .read_excel (f"{ GCS_FILE_PATH } /program_codes/FY21-22ProgramCodesAsOf5-25-2022.v2_expanded090823.xlsx" ))
69
59
updated_codes = updated_codes >> select (_ .iija_program_code , _ .new_description )
@@ -87,21 +77,132 @@ def add_program_to_row(row):
87
77
88
78
return program_codes
89
79
80
+ def add_program_to_row (row ):
81
+ if "Program" not in row ["program_name" ]:
82
+ return row ["program_name" ] + " Program"
83
+ else :
84
+ return row ["program_name" ]
85
+
86
+ def load_program_codes_og () -> pd .DataFrame :
87
+ df = to_snakecase (
88
+ pd .read_excel (
89
+ f"{ GCS_FILE_PATH } /program_codes/Copy of lst_IIJA_Code_20230908.xlsx"
90
+ )
91
+ )[["iija_program_code" , "description" , "program_name" ]]
92
+ return df
93
+
94
+ def load_program_codes_sept_2023 () -> pd .DataFrame :
95
+ df = to_snakecase (
96
+ pd .read_excel (
97
+ f"{ GCS_FILE_PATH } /program_codes/FY21-22ProgramCodesAsOf5-25-2022.v2_expanded090823.xlsx"
98
+ )
99
+ )[["iija_program_code" , "new_description" ]]
100
+ return df
101
+
102
+ def load_program_codes_jan_2025 () -> pd .DataFrame :
103
+ df = to_snakecase (
104
+ pd .read_excel (f"{ GCS_FILE_PATH } /program_codes/Ycodes_01.2025.xlsx" )
105
+ )[["program_code" , "short_name" , "program_code_description" , "funding_type_code" ]]
106
+
107
+ df = df .rename (
108
+ columns = {
109
+ "program_code" : "iija_program_code" ,
110
+ }
111
+ )
112
+ df .short_name = df .short_name .str .title ()
113
+ return df
114
+
115
+ def update_program_code_list_2025 ():
116
+ """
117
+ On January 2025, we received a new list of updated codes.
118
+ Merge this new list with codes received originally and in
119
+ September 2023.
120
+ """
121
+ # Load original codes
122
+ original_codes_df = load_program_codes_og ()
123
+
124
+ # Load September 2023 codes
125
+ program_codes_sept_2023 = load_program_codes_sept_2023 ()
126
+
127
+ # Merge original + September first
128
+ m1 = pd .merge (
129
+ program_codes_sept_2023 ,
130
+ original_codes_df ,
131
+ on = "iija_program_code" ,
132
+ how = "outer" ,
133
+ indicator = True ,
134
+ )
135
+
136
+ # Clean up description
137
+ m1 ["new_description" ] = (
138
+ m1 ["new_description" ].str .strip ().fillna (m1 .description )
139
+ )
140
+
141
+ # Delete unnecessary columns
142
+ m1 = m1 .drop (columns = {"description" , "_merge" })
143
+
144
+ # Load January 2025 code
145
+ program_codes_jan_2025 = load_program_codes_jan_2025 ()
146
+
147
+ # Merge m1 with program codes from January 2025.
148
+ m2 = pd .merge (
149
+ program_codes_jan_2025 ,
150
+ m1 ,
151
+ on = "iija_program_code" ,
152
+ how = "outer" ,
153
+ indicator = True ,
154
+ )
155
+ # Update descriptions
156
+ m2 ["2025_description" ] = (
157
+ m2 ["program_code_description" ].str .strip ().fillna (m2 .new_description )
158
+ )
159
+
160
+ # Update program names
161
+ m2 ["2025_program_name" ] = m2 .program_name .fillna (m2 .short_name )
162
+
163
+ # Delete outdated columns
164
+ m2 = m2 .drop (
165
+ columns = [
166
+ "short_name" ,
167
+ "program_name" ,
168
+ "program_code_description" ,
169
+ "new_description" ,
170
+ "_merge" ,
171
+ ]
172
+ )
173
+
174
+ # Rename to match original sheet
175
+ m2 = m2 .rename (
176
+ columns = {
177
+ "2025_description" : "new_description" ,
178
+ "2025_program_name" : "program_name" ,
179
+ }
180
+ )
181
+
182
+ # Add program to another program names without the string "program"
183
+ m2 ["program_name" ] = m2 .apply (add_program_to_row , axis = 1 )
184
+ return m2
90
185
91
- ## Function to add the updated program codes to the data
92
186
def add_new_codes (df ):
187
+ """
188
+ Function to add the updated program codes to the data
189
+ """
93
190
#new_codes = to_snakecase(pd.read_excel(f"{GCS_FILE_PATH}/FY21-22ProgramCodesAsOf5-25-2022.v2.xlsx"))
94
191
#code_map = dict(new_codes[['iija_program_code', 'new_description']].values)
95
192
96
193
## adding updated program codes 05/11/23
97
- new_codes = update_program_code_list2 ()
194
+ #new_codes = update_program_code_list2()
195
+
196
+ ## adding updated program codes 1/30/25
197
+ new_codes = update_program_code_list_2025
98
198
code_map = dict (new_codes [['iija_program_code' , 'program_name' ]].values )
99
199
100
200
df ['program_code_description' ] = df .program_code .map (code_map )
101
201
df ['summary_recipient_defined_text_field_1_value' ] = df ['summary_recipient_defined_text_field_1_value' ].astype (str )
102
202
103
- df .loc [df .program_code == 'ER01' , 'program_code_description' ] = 'Emergency Relieve Funding'
104
- df .loc [df .program_code == 'ER03' , 'program_code_description' ] = 'Emergency Relieve Funding'
203
+ # Amanda: January 2025, notified this should be called emergency supplement funding
204
+ #df.loc[df.program_code =='ER01', 'program_code_description'] = 'Emergency Relieve Funding'
205
+ #df.loc[df.program_code =='ER03', 'program_code_description'] = 'Emergency Relieve Funding'
105
206
106
207
return df
107
208
0 commit comments