Skip to content

Commit 4c7fe4e

Browse files
authored
Merge pull request #907 from cal-itp/tiffany-switch-imports
Tiffany switch imports from `shared_utils` to `calitp_data_analysis`
2 parents ddc3c78 + ba9bc38 commit 4c7fe4e

File tree

133 files changed

+405
-504
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

133 files changed

+405
-504
lines changed

bus_service_increase/A3_service_increase_estimator.ipynb

+1-3
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@
2020
"ix = pd.IndexSlice\n",
2121
"\n",
2222
"from utils import *\n",
23-
"import shared_utils\n",
24-
"\n",
2523
"from siuba import *"
2624
]
2725
},
@@ -704,7 +702,7 @@
704702
"name": "python",
705703
"nbconvert_exporter": "python",
706704
"pygments_lexer": "ipython3",
707-
"version": "3.9.7"
705+
"version": "3.9.13"
708706
}
709707
},
710708
"nbformat": 4,

bus_service_increase/C1_transit_near_highways.ipynb

+13-40
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,16 @@
2727
],
2828
"source": [
2929
"import branca\n",
30+
"import folium\n",
3031
"import geopandas as gpd\n",
3132
"import intake\n",
32-
"import ipywidgets as widgets\n",
3333
"import pandas as pd\n",
3434
"\n",
3535
"from IPython.display import Markdown, HTML\n",
3636
"\n",
3737
"import setup_corridors_stats\n",
3838
"from create_parallel_corridors import IMG_PATH, DATA_PATH\n",
39-
"from shared_utils import geography_utils\n",
40-
"from shared_utils import calitp_color_palette as cp\n",
39+
"from calitp_data_analysis import calitp_color_palette as cp\n",
4140
"\n",
4241
"catalog = intake.open_catalog(\"./*.yml\")"
4342
]
@@ -145,53 +144,27 @@
145144
" cp.CALITP_CATEGORY_BRIGHT_COLORS[0], #blue\n",
146145
" cp.CALITP_CATEGORY_BRIGHT_COLORS[1] # orange\n",
147146
" ],\n",
148-
" )\n",
149-
" \n",
150-
" # Instead of using county centroid, calculate centroid from transit_df\n",
151-
" # Otherwise, it's too zoomed out from where transit routes are\n",
152-
" transit_centroid = (to_map\n",
153-
" .to_crs(geography_utils.WGS84).geometry.centroid\n",
154-
" .iloc[0]\n",
155-
" )\n",
156-
"\n",
157-
" LAYERS_DICT = {\n",
158-
" \"Highways\": {\"df\": hwy_df,\n",
159-
" \"plot_col\": \"Route\",\n",
160-
" \"popup_dict\": hwys_popup_dict, \n",
161-
" \"tooltip_dict\": hwys_popup_dict,\n",
162-
" \"colorscale\": hwys_color,\n",
163-
" },\n",
164-
" \"Transit Routes\": {\"df\": to_map,\n",
165-
" \"plot_col\": \"parallel\",\n",
166-
" \"popup_dict\": transit_popup_dict, \n",
167-
" \"tooltip_dict\": transit_popup_dict,\n",
168-
" \"colorscale\": colorscale,\n",
169-
" },\n",
170-
" }\n",
147+
" ) \n",
171148
" \n",
172149
" LEGEND_URL = (\n",
173150
" \"https://github.com/cal-itp/data-analyses/raw/\"\n",
174151
" \"main/bus_service_increase/\"\n",
175152
" \"img/legend_intersecting_parallel.png\"\n",
176153
" )\n",
177-
" \n",
178-
" LEGEND_DICT = {\n",
179-
" \"legend_url\": LEGEND_URL,\n",
180-
" \"legend_bottom\": 85,\n",
181-
" \"legend_left\": 5,\n",
182-
" }\n",
183154
" \n",
155+
" fig = hwy_df.explore(\n",
156+
" \"Route\", tiles = \"CartoDB Positron\",\n",
157+
" cmap = colorscale, tooltip = list(hwys_popup_dict.keys()),\n",
158+
" name = \"Highways\",\n",
159+
" )\n",
184160
" \n",
185-
" fig = map_utils.make_folium_multiple_layers_map(\n",
186-
" LAYERS_DICT,\n",
187-
" fig_width = 700, fig_height = 700, \n",
188-
" zoom=11, \n",
189-
" centroid = [round(transit_centroid.y,2), \n",
190-
" round(transit_centroid.x, 2)], \n",
191-
" title=f\"Parallel vs Intersecting Lines for {to_map.itp_id.iloc[0]}\",\n",
192-
" legend_dict = LEGEND_DICT\n",
161+
" fig = to_map.explore(\"parallel\",\n",
162+
" m=fig, cmap = colorscale, name=\"Transit Routes\",\n",
163+
" tooltip = list(transit_popup_dict.keys())\n",
193164
" )\n",
194165
" \n",
166+
" folium.LayerControl().add_to(fig)\n",
167+
"\n",
195168
" display(fig)\n",
196169
" #fig.save(f\"{IMG_PATH}parallel_{operator_name}.html\")\n",
197170
" #print(f\"{operator_name} map saved\")"

bus_service_increase/C3_debug_notinshapes.ipynb

+3-3
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@
5151
"\n",
5252
"import create_parallel_corridors\n",
5353
"from bus_service_utils import utils\n",
54-
"from shared_utils import geography_utils\n",
55-
"from shared_utils import calitp_color_palette as cp\n",
54+
"from calitp_data_analysis import portfolio_utils\n",
55+
"from calitp_data_analysis import calitp_color_palette as cp\n",
5656
"\n",
5757
"IMG_PATH = create_parallel_corridors.IMG_PATH\n",
5858
"DATA_PATH = create_parallel_corridors.DATA_PATH\n",
@@ -553,7 +553,7 @@
553553
" \"addl_service_hrs\", \"service_hours_annual\", \n",
554554
" \"addl_service_hrs_annual\"\n",
555555
" ]\n",
556-
"a1 = geography_utils.aggregate_by_geography(service_increase,\n",
556+
"a1 = portfolio_utils.aggregate_by_geography(service_increase,\n",
557557
" group_cols = [\"itp_id\", \"day_name\", \"tract_type\"],\n",
558558
" sum_cols = sum_cols,\n",
559559
" )"

bus_service_increase/C4_select_routes.ipynb

+5-5
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@
3434
"import matplotlib.pyplot as plt\n",
3535
"import pandas as pd\n",
3636
"\n",
37-
"import shared_utils\n",
38-
"from bus_service_utils import utils"
37+
"from bus_service_utils import utils as bus_utils\n",
38+
"from calitp_data_analysis import utils"
3939
]
4040
},
4141
{
@@ -45,8 +45,8 @@
4545
"metadata": {},
4646
"outputs": [],
4747
"source": [
48-
"gdf = shared_utils.utils.download_geoparquet(utils.GCS_FILE_PATH, \n",
49-
" \"parallel_or_intersecting\")"
48+
"gdf = utils.download_geoparquet(utils.GCS_FILE_PATH, \n",
49+
" \"parallel_or_intersecting\")"
5050
]
5151
},
5252
{
@@ -136,7 +136,7 @@
136136
"name": "python",
137137
"nbconvert_exporter": "python",
138138
"pygments_lexer": "ipython3",
139-
"version": "3.10.5"
139+
"version": "3.9.13"
140140
}
141141
},
142142
"nbformat": 4,

bus_service_increase/C7_target_highway_corridors.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@
3131
"\n",
3232
"import setup_corridors_stats\n",
3333
"from create_parallel_corridors import IMG_PATH, DATA_PATH\n",
34-
"from shared_utils import geography_utils, styleguide\n",
35-
"from shared_utils import calitp_color_palette as cp\n",
34+
"from calitp_data_analysis import styleguide\n",
35+
"from calitp_data_analysis import calitp_color_palette as cp\n",
3636
"\n",
3737
"alt.themes.register(\"calitp_theme\", styleguide.calitp_theme)\n",
3838
"\n",

bus_service_increase/D1_setup_parallel_trips_with_stops.py

+11-10
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,17 @@
1515

1616
os.environ["CALITP_BQ_MAX_BYTES"] = str(130_000_000_000)
1717

18-
import shared_utils
19-
from bus_service_utils import utils
18+
from shared_utils import gtfs_utils, rt_dates, rt_utils
19+
from bus_service_utils import utils as bus_utils
20+
from calitp_data_analysis import geography_utils, utils
2021

21-
ANALYSIS_DATE = shared_utils.rt_dates.PMAC["Q2_2022"]
22-
COMPILED_CACHED = f"{shared_utils.rt_utils.GCS_FILE_PATH}compiled_cached_views/"
22+
ANALYSIS_DATE = rt_dates.PMAC["Q2_2022"]
23+
COMPILED_CACHED = f"{rt_utils.GCS_FILE_PATH}compiled_cached_views/"
2324

2425

2526
def grab_service_hours(selected_date: str,
2627
valid_trip_keys: list) -> pd.DataFrame:
27-
daily_service_hours = shared_utils.gtfs_utils.get_trips(
28+
daily_service_hours = gtfs_utils.get_trips(
2829
selected_date = selected_date,
2930
itp_id_list = None,
3031
# Keep more columns, route_id, shape_id, direction_id so the metrolink fix
@@ -36,7 +37,7 @@ def grab_service_hours(selected_date: str,
3637
)
3738

3839
daily_service_hours.to_parquet(
39-
f"{utils.GCS_FILE_PATH}service_hours_{selected_date}.parquet")
40+
f"{bus_utils.GCS_FILE_PATH}service_hours_{selected_date}.parquet")
4041

4142

4243
def merge_trips_with_service_hours(selected_date: str)-> pd.DataFrame:
@@ -45,7 +46,7 @@ def merge_trips_with_service_hours(selected_date: str)-> pd.DataFrame:
4546
f"{COMPILED_CACHED}trips_{selected_date}.parquet")
4647

4748
daily_service_hours = pd.read_parquet(
48-
f"{utils.GCS_FILE_PATH}service_hours_{selected_date}.parquet")
49+
f"{bus_utils.GCS_FILE_PATH}service_hours_{selected_date}.parquet")
4950

5051
df = dd.merge(
5152
trips,
@@ -120,7 +121,7 @@ def grab_stops_for_trip_selected(trip_df: dd.DataFrame,
120121
stop_times_for_trip,
121122
on = ["calitp_itp_id", "stop_id"],
122123
how = "inner"
123-
).to_crs(shared_utils.geography_utils.WGS84)
124+
).to_crs(geography_utils.WGS84)
124125

125126

126127
stop_times_with_geom2 = (stop_times_with_geom.drop(
@@ -146,8 +147,8 @@ def grab_stops_for_trip_selected(trip_df: dd.DataFrame,
146147

147148
trips_with_stops = grab_stops_for_trip_selected(one_trip, ANALYSIS_DATE)
148149

149-
shared_utils.utils.geoparquet_gcs_export(
150+
utils.geoparquet_gcs_export(
150151
trips_with_stops,
151-
utils.GCS_FILE_PATH,
152+
bus_utils.GCS_FILE_PATH,
152153
f"trips_with_stops_{ANALYSIS_DATE}"
153154
)

bus_service_increase/D4_make_gmaps_results.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
from datetime import datetime
1313
from loguru import logger
1414

15-
import shared_utils
16-
from bus_service_utils import utils
15+
from calitp_data_analysis import geography_utils, utils
16+
from bus_service_utils import utils as bus_utils
1717
from D1_setup_parallel_trips_with_stops import ANALYSIS_DATE, COMPILED_CACHED
1818

1919
logger.add("./logs/make_gmaps_results.log")
@@ -22,7 +22,7 @@
2222
level="INFO")
2323

2424
DATA_PATH = "./gmaps_cache/"
25-
GCS_FILE_PATH = f"{utils.GCS_FILE_PATH}gmaps_cache_{ANALYSIS_DATE}/"
25+
GCS_FILE_PATH = f"{bus_utils.GCS_FILE_PATH}gmaps_cache_{ANALYSIS_DATE}/"
2626

2727
def grab_cached_results(df: pd.DataFrame) -> (list, list):
2828
result_ids = list(df.identifier_num)
@@ -32,7 +32,7 @@ def grab_cached_results(df: pd.DataFrame) -> (list, list):
3232

3333
for i in result_ids:
3434
try:
35-
json_dict = utils.open_request_json(i,
35+
json_dict = bus_utils.open_request_json(i,
3636
data_path = DATA_PATH,
3737
gcs_file_path = GCS_FILE_PATH
3838
)
@@ -71,7 +71,7 @@ def compare_travel_time_by_mode(df: pd.DataFrame) -> pd.DataFrame:
7171
if __name__ == "__main__":
7272
time0 = datetime.now()
7373

74-
df = pd.read_parquet(f"{utils.GCS_FILE_PATH}gmaps_df_{ANALYSIS_DATE}.parquet")
74+
df = pd.read_parquet(f"{bus_utils.GCS_FILE_PATH}gmaps_df_{ANALYSIS_DATE}.parquet")
7575

7676
successful_ids, durations = grab_cached_results(df)
7777
logger.info("Grabbed cached results")
@@ -106,11 +106,11 @@ def compare_travel_time_by_mode(df: pd.DataFrame) -> pd.DataFrame:
106106
how = "inner",
107107
# many on right because trip_ids can share same shape_id
108108
validate = "1:m"
109-
).to_crs(shared_utils.geography_utils.WGS84)
109+
).to_crs(geography_utils.WGS84)
110110

111-
shared_utils.utils.geoparquet_gcs_export(gdf,
112-
utils.GCS_FILE_PATH,
113-
f"gmaps_results_{ANALYSIS_DATE}")
111+
utils.geoparquet_gcs_export(gdf,
112+
bus_utils.GCS_FILE_PATH,
113+
f"gmaps_results_{ANALYSIS_DATE}")
114114

115115
end = datetime.now()
116116
logger.info(f"Total execution: {end - time0}")

bus_service_increase/D5_make_stripplot_data.py

+10-9
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,11 @@
1313
from calitp_data_analysis.tables import tbls
1414
from siuba import *
1515

16-
import shared_utils
1716
import D2_setup_gmaps as setup_gmaps
1817
import E2_aggregated_route_stats as aggregated_route_stats
19-
from bus_service_utils import utils
18+
from bus_service_utils import utils as bus_utils
19+
from calitp_data_analysis import utils
20+
from shared_utils import portfolio_utils, rt_utils
2021
from D1_setup_parallel_trips_with_stops import (ANALYSIS_DATE, COMPILED_CACHED,
2122
merge_trips_with_service_hours)
2223

@@ -62,7 +63,7 @@ def add_trip_time_of_day(trips: pd.DataFrame) -> pd.DataFrame:
6263
# Add time-of-day
6364
df = df.assign(
6465
time_of_day = df.apply(
65-
lambda x: shared_utils.rt_utils.categorize_time_of_day(
66+
lambda x: rt_utils.categorize_time_of_day(
6667
x.trip_first_departure),
6768
axis=1)
6869
)
@@ -193,7 +194,7 @@ def add_route_group(df: gpd.GeoDataFrame,
193194

194195
# Use agency_name from our views.gtfs_schedule.agency instead of Airtable?
195196
def merge_in_agency_name(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
196-
agency_names = shared_utils.portfolio_utils.add_agency_name(
197+
agency_names = portfolio_utils.add_agency_name(
197198
selected_date = ANALYSIS_DATE)
198199

199200
df2 = pd.merge(
@@ -209,7 +210,7 @@ def merge_in_agency_name(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
209210

210211
def merge_in_airtable(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
211212
# Don't use name from Airtable. But, use district.
212-
caltrans_districts = shared_utils.portfolio_utils.add_caltrans_district()
213+
caltrans_districts = portfolio_utils.add_caltrans_district()
213214

214215
# Airtable gives us fewer duplicates than doing tbl.gtfs_schedule.agency()
215216
# But naming should be done with tbl.gtfs_schedule.agency because that's what's used
@@ -231,7 +232,7 @@ def add_route_categories(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
231232
under quarterly performance objective work.
232233
"""
233234
route_categories = (gpd.read_parquet(
234-
f"{utils.GCS_FILE_PATH}routes_categorized_{ANALYSIS_DATE}.parquet")
235+
f"{bus_utils.GCS_FILE_PATH}routes_categorized_{ANALYSIS_DATE}.parquet")
235236
.rename(columns = {"itp_id": "calitp_itp_id"})
236237
)
237238

@@ -244,7 +245,7 @@ def add_route_categories(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
244245
)
245246

246247
# Clean up route_name
247-
route_names = shared_utils.portfolio_utils.add_route_name(ANALYSIS_DATE)
248+
route_names = portfolio_utils.add_route_name(ANALYSIS_DATE)
248249

249250
gdf3 = pd.merge(
250251
gdf2,
@@ -330,7 +331,7 @@ def assemble_data(analysis_date: str, threshold: float = 1.5,
330331
gdf = assemble_data(ANALYSIS_DATE, threshold = 1.5,
331332
service_time_cutoffs = SERVICE_TIME_CUTOFFS)
332333

333-
shared_utils.utils.geoparquet_gcs_export(
334+
utils.geoparquet_gcs_export(
334335
gdf,
335-
utils.GCS_FILE_PATH,
336+
bus_utils.GCS_FILE_PATH,
336337
f"competitive_route_variability_{ANALYSIS_DATE}")

bus_service_increase/E1_get_buses_on_shn.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from E0_bus_oppor_vars import GCS_FILE_PATH, ANALYSIS_DATE, COMPILED_CACHED_GCS
1515
from bus_service_utils import create_parallel_corridors, utils
16-
from shared_utils import geography_utils, utils
16+
from calitp_data_analysis import geography_utils, utils
1717

1818
catalog = intake.open_catalog("./*.yml")
1919

bus_service_increase/E2_aggregated_route_stats.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,8 @@
1818
import geopandas as gpd
1919
import pandas as pd
2020

21-
from shared_utils import (geography_utils, gtfs_utils,
22-
rt_utils, portfolio_utils, utils
23-
)
21+
from shared_utils import gtfs_utils, portfolio_utils, rt_utils
22+
from calitp_data_analysis import utils
2423
from E0_bus_oppor_vars import GCS_FILE_PATH, ANALYSIS_DATE, COMPILED_CACHED_GCS
2524
from bus_service_utils import gtfs_build
2625

@@ -254,7 +253,7 @@ def calculate_mean_speed_by_route(analysis_date: str,
254253

255254
# Each trip is 1 observation, just take the average (not weighted)
256255
# to get route-level mean_speed_mph
257-
mean_speed = geography_utils.aggregate_by_geography(
256+
mean_speed = portfolio_utils.aggregate_by_geography(
258257
df,
259258
group_cols = group_cols,
260259
mean_cols = ["mean_speed_mph"]
@@ -281,7 +280,7 @@ def get_competitive_routes() -> pd.DataFrame:
281280
"num_competitive", "pct_trips_competitive",
282281
]
283282

284-
route_df = geography_utils.aggregate_by_geography(
283+
route_df = portfolio_utils.aggregate_by_geography(
285284
trip_df,
286285
group_cols = route_level_cols,
287286
mean_cols = ["bus_multiplier", "bus_difference"],

0 commit comments

Comments
 (0)