Skip to content

Commit 7ad46c6

Browse files
committed
working on locodes
1 parent 2ddbaf7 commit 7ad46c6

File tree

4 files changed

+4117
-473
lines changed

4 files changed

+4117
-473
lines changed

dla/iija/_data_utils.py

Lines changed: 119 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,16 @@
66
If using the get_list_of_words function, remove the comment out hashtag from the import nltk and re in this script
77
AND run a `! pip install nltk` in the first cell of your notebook
88
'''
9-
10-
119
import pandas as pd
1210
from siuba import *
13-
1411
from calitp_data_analysis.sql import to_snakecase
15-
1612
import _script_utils
17-
18-
1913
GCS_FILE_PATH = 'gs://calitp-analytics-data/data-analyses/dla/dla-iija'
2014

21-
2215
# import nltk
2316
# from nltk.corpus import stopwords
2417
# from nltk.tokenize import word_tokenize, sent_tokenize
2518
# import re
26-
27-
2819
def read_data_all():
2920
proj = to_snakecase(pd.read_excel(f"{GCS_FILE_PATH}/CopyofFMIS_Projects_Universe_IIJA_Reporting_4.xls",
3021
# sheet_name='FMIS 5 Projects ', header=[3]
@@ -42,6 +33,10 @@ def read_data_all():
4233
return proj
4334

4435

36+
'''
37+
Program Code
38+
Functions
39+
'''
4540
# def update_program_code_list():
4641

4742
# ## read in the program codes
@@ -59,11 +54,6 @@ def read_data_all():
5954

6055
# return program_codes
6156

62-
63-
'''
64-
Updated version of the update_program_code_list to alter program names if needed.
65-
66-
'''
6757
def update_program_code_list2():
6858
updated_codes = to_snakecase(pd.read_excel(f"{GCS_FILE_PATH}/program_codes/FY21-22ProgramCodesAsOf5-25-2022.v2_expanded090823.xlsx"))
6959
updated_codes = updated_codes>>select(_.iija_program_code, _.new_description)
@@ -87,21 +77,132 @@ def add_program_to_row(row):
8777

8878
return program_codes
8979

80+
def add_program_to_row(row):
81+
if "Program" not in row["program_name"]:
82+
return row["program_name"] + " Program"
83+
else:
84+
return row["program_name"]
85+
86+
def load_program_codes_og() -> pd.DataFrame:
87+
df = to_snakecase(
88+
pd.read_excel(
89+
f"{GCS_FILE_PATH}/program_codes/Copy of lst_IIJA_Code_20230908.xlsx"
90+
)
91+
)[["iija_program_code", "description", "program_name"]]
92+
return df
93+
94+
def load_program_codes_sept_2023() -> pd.DataFrame:
95+
df = to_snakecase(
96+
pd.read_excel(
97+
f"{GCS_FILE_PATH}/program_codes/FY21-22ProgramCodesAsOf5-25-2022.v2_expanded090823.xlsx"
98+
)
99+
)[["iija_program_code", "new_description"]]
100+
return df
101+
102+
def load_program_codes_jan_2025() -> pd.DataFrame:
103+
df = to_snakecase(
104+
pd.read_excel(f"{GCS_FILE_PATH}/program_codes/Ycodes_01.2025.xlsx")
105+
)[["program_code", "short_name", "program_code_description", "funding_type_code"]]
106+
107+
df = df.rename(
108+
columns={
109+
"program_code": "iija_program_code",
110+
}
111+
)
112+
df.short_name = df.short_name.str.title()
113+
return df
114+
115+
def update_program_code_list_2025():
116+
"""
117+
On January 2025, we received a new list of updated codes.
118+
Merge this new list with codes received originally and in
119+
September 2023.
120+
"""
121+
# Load original codes
122+
original_codes_df = load_program_codes_og()
123+
124+
# Load September 2023 codes
125+
program_codes_sept_2023 = load_program_codes_sept_2023()
126+
127+
# Merge original + September first
128+
m1 = pd.merge(
129+
program_codes_sept_2023,
130+
original_codes_df,
131+
on="iija_program_code",
132+
how="outer",
133+
indicator=True,
134+
)
135+
136+
# Clean up description
137+
m1["new_description"] = (
138+
m1["new_description"].str.strip().fillna(m1.description)
139+
)
140+
141+
# Delete unnecessary columns
142+
m1 = m1.drop(columns={"description", "_merge"})
143+
144+
# Load January 2025 code
145+
program_codes_jan_2025 = load_program_codes_jan_2025()
146+
147+
# Merge m1 with program codes from January 2025.
148+
m2 = pd.merge(
149+
program_codes_jan_2025,
150+
m1,
151+
on="iija_program_code",
152+
how="outer",
153+
indicator=True,
154+
)
155+
# Update descriptions
156+
m2["2025_description"] = (
157+
m2["program_code_description"].str.strip().fillna(m2.new_description)
158+
)
159+
160+
# Update program names
161+
m2["2025_program_name"] = m2.program_name.fillna(m2.short_name)
162+
163+
# Delete outdated columns
164+
m2 = m2.drop(
165+
columns=[
166+
"short_name",
167+
"program_name",
168+
"program_code_description",
169+
"new_description",
170+
"_merge",
171+
]
172+
)
173+
174+
# Rename to match original sheet
175+
m2 = m2.rename(
176+
columns={
177+
"2025_description": "new_description",
178+
"2025_program_name": "program_name",
179+
}
180+
)
181+
182+
# Add program to another program names without the string "program"
183+
m2["program_name"] = m2.apply(add_program_to_row, axis=1)
184+
return m2
90185

91-
## Function to add the updated program codes to the data
92186
def add_new_codes(df):
187+
"""
188+
Function to add the updated program codes to the data
189+
"""
93190
#new_codes = to_snakecase(pd.read_excel(f"{GCS_FILE_PATH}/FY21-22ProgramCodesAsOf5-25-2022.v2.xlsx"))
94191
#code_map = dict(new_codes[['iija_program_code', 'new_description']].values)
95192

96193
## adding updated program codes 05/11/23
97-
new_codes = update_program_code_list2()
194+
#new_codes = update_program_code_list2()
195+
196+
## adding updated program codes 1/30/25
197+
new_codes = update_program_code_list_2025
98198
code_map = dict(new_codes[['iija_program_code', 'program_name']].values)
99199

100200
df['program_code_description'] = df.program_code.map(code_map)
101201
df['summary_recipient_defined_text_field_1_value'] = df['summary_recipient_defined_text_field_1_value'].astype(str)
102202

103-
df.loc[df.program_code =='ER01', 'program_code_description'] = 'Emergency Relieve Funding'
104-
df.loc[df.program_code =='ER03', 'program_code_description'] = 'Emergency Relieve Funding'
203+
# Amanda: January 2025, notified this should be called emergency supplement funding
204+
#df.loc[df.program_code =='ER01', 'program_code_description'] = 'Emergency Relieve Funding'
205+
#df.loc[df.program_code =='ER03', 'program_code_description'] = 'Emergency Relieve Funding'
105206

106207
return df
107208

dla/iija/_script_utils.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,13 @@
66
- add project types that classify project type
77
- create a public-friendly project title
88
'''
9-
109
import numpy as np
1110
import pandas as pd
1211
from siuba import *
1312

1413
import dla_utils
15-
1614
from calitp_data_analysis.sql import to_snakecase
17-
1815
import _data_utils
19-
2016
import intake
2117

2218
# import nltk
@@ -27,7 +23,6 @@
2723

2824
GCS_FILE_PATH = 'gs://calitp-analytics-data/data-analyses/dla/dla-iija'
2925

30-
3126
def _prep_data(file_name):
3227
proj = to_snakecase(pd.read_excel(f"{GCS_FILE_PATH}/{file_name}"))
3328

0 commit comments

Comments
 (0)