cal-itp
diff --git a/‎bus_service_increase/A3_service_increase_estimator.ipynb
Lines changed: 1 addition & 3 deletions b/‎bus_service_increase/A3_service_increase_estimator.ipynb
Lines changed: 1 addition & 3 deletions
diff --git a/‎bus_service_increase/C1_transit_near_highways.ipynb
Lines changed: 13 additions & 40 deletions b/‎bus_service_increase/C1_transit_near_highways.ipynb
Lines changed: 13 additions & 40 deletions
diff --git a/‎bus_service_increase/C3_debug_notinshapes.ipynb
Lines changed: 3 additions & 3 deletions b/‎bus_service_increase/C3_debug_notinshapes.ipynb
Lines changed: 3 additions & 3 deletions
diff --git a/‎bus_service_increase/C4_select_routes.ipynb
Lines changed: 5 additions & 5 deletions b/‎bus_service_increase/C4_select_routes.ipynb
Lines changed: 5 additions & 5 deletions
diff --git a/‎bus_service_increase/C7_target_highway_corridors.ipynb
Lines changed: 2 additions & 2 deletions b/‎bus_service_increase/C7_target_highway_corridors.ipynb
Lines changed: 2 additions & 2 deletions
diff --git a/‎bus_service_increase/D1_setup_parallel_trips_with_stops.py
Lines changed: 11 additions & 10 deletions b/‎bus_service_increase/D1_setup_parallel_trips_with_stops.py
Lines changed: 11 additions & 10 deletions
diff --git a/‎bus_service_increase/D4_make_gmaps_results.py
Lines changed: 9 additions & 9 deletions b/‎bus_service_increase/D4_make_gmaps_results.py
Lines changed: 9 additions & 9 deletions
diff --git a/‎bus_service_increase/D5_make_stripplot_data.py
Lines changed: 10 additions & 9 deletions b/‎bus_service_increase/D5_make_stripplot_data.py
Lines changed: 10 additions & 9 deletions
diff --git a/‎bus_service_increase/E1_get_buses_on_shn.py
Lines changed: 1 addition & 1 deletion b/‎bus_service_increase/E1_get_buses_on_shn.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎bus_service_increase/E2_aggregated_route_stats.py
Lines changed: 4 additions & 5 deletions b/‎bus_service_increase/E2_aggregated_route_stats.py
Lines changed: 4 additions & 5 deletions
@@ -20,8 +20,6 @@
     "ix = pd.IndexSlice\n",
     "\n",
     "from utils import *\n",
-    "import shared_utils\n",
-    "\n",
     "from siuba import *"
    ]
   },
@@ -704,7 +702,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.9.13"
   }
  },
  "nbformat": 4,
 
@@ -27,17 +27,16 @@
    ],
    "source": [
     "import branca\n",
+    "import folium\n",
     "import geopandas as gpd\n",
     "import intake\n",
-    "import ipywidgets as widgets\n",
     "import pandas as pd\n",
     "\n",
     "from IPython.display import Markdown, HTML\n",
     "\n",
     "import setup_corridors_stats\n",
     "from create_parallel_corridors import IMG_PATH, DATA_PATH\n",
-    "from shared_utils import geography_utils\n",
-    "from shared_utils import calitp_color_palette as cp\n",
+    "from calitp_data_analysis import calitp_color_palette as cp\n",
     "\n",
     "catalog = intake.open_catalog(\"./*.yml\")"
    ]
@@ -145,53 +144,27 @@
     "            cp.CALITP_CATEGORY_BRIGHT_COLORS[0], #blue\n",
     "            cp.CALITP_CATEGORY_BRIGHT_COLORS[1] # orange\n",
     "        ],\n",
-    "    )\n",
-    "    \n",
-    "    # Instead of using county centroid, calculate centroid from transit_df\n",
-    "    # Otherwise, it's too zoomed out from where transit routes are\n",
-    "    transit_centroid = (to_map\n",
-    "                        .to_crs(geography_utils.WGS84).geometry.centroid\n",
-    "                        .iloc[0]\n",
-    "                       )\n",
-    "\n",
-    "    LAYERS_DICT = {\n",
-    "        \"Highways\": {\"df\": hwy_df,\n",
-    "            \"plot_col\": \"Route\",\n",
-    "            \"popup_dict\": hwys_popup_dict, \n",
-    "            \"tooltip_dict\": hwys_popup_dict,\n",
-    "            \"colorscale\": hwys_color,\n",
-    "        },\n",
-    "        \"Transit Routes\": {\"df\": to_map,\n",
-    "            \"plot_col\": \"parallel\",\n",
-    "            \"popup_dict\": transit_popup_dict, \n",
-    "            \"tooltip_dict\": transit_popup_dict,\n",
-    "            \"colorscale\": colorscale,\n",
-    "        },\n",
-    "    }\n",
+    "    ) \n",
     "    \n",
     "    LEGEND_URL = (\n",
     "        \"https://github.com/cal-itp/data-analyses/raw/\"\n",
     "        \"main/bus_service_increase/\"\n",
     "        \"img/legend_intersecting_parallel.png\"\n",
     "    )\n",
-    "    \n",
-    "    LEGEND_DICT = {\n",
-    "        \"legend_url\": LEGEND_URL,\n",
-    "        \"legend_bottom\": 85,\n",
-    "        \"legend_left\": 5,\n",
-    "    }\n",
     "     \n",
+    "    fig = hwy_df.explore(\n",
+    "        \"Route\", tiles = \"CartoDB Positron\",\n",
+    "        cmap = colorscale, tooltip = list(hwys_popup_dict.keys()),\n",
+    "        name = \"Highways\",\n",
+    "    )\n",
     "    \n",
-    "    fig = map_utils.make_folium_multiple_layers_map(\n",
-    "        LAYERS_DICT,\n",
-    "        fig_width = 700, fig_height = 700, \n",
-    "        zoom=11, \n",
-    "        centroid = [round(transit_centroid.y,2), \n",
-    "                    round(transit_centroid.x, 2)], \n",
-    "        title=f\"Parallel vs Intersecting Lines for {to_map.itp_id.iloc[0]}\",\n",
-    "        legend_dict = LEGEND_DICT\n",
+    "    fig = to_map.explore(\"parallel\",\n",
+    "        m=fig, cmap = colorscale, name=\"Transit Routes\",\n",
+    "        tooltip = list(transit_popup_dict.keys())\n",
     "    )\n",
     "    \n",
+    "    folium.LayerControl().add_to(fig)\n",
+    "\n",
     "    display(fig)\n",
     "    #fig.save(f\"{IMG_PATH}parallel_{operator_name}.html\")\n",
     "    #print(f\"{operator_name} map saved\")"
 
@@ -51,8 +51,8 @@
     "\n",
     "import create_parallel_corridors\n",
     "from bus_service_utils import utils\n",
-    "from shared_utils import geography_utils\n",
-    "from shared_utils import calitp_color_palette as cp\n",
+    "from calitp_data_analysis import portfolio_utils\n",
+    "from calitp_data_analysis import calitp_color_palette as cp\n",
     "\n",
     "IMG_PATH = create_parallel_corridors.IMG_PATH\n",
     "DATA_PATH = create_parallel_corridors.DATA_PATH\n",
@@ -553,7 +553,7 @@
     "            \"addl_service_hrs\", \"service_hours_annual\", \n",
     "            \"addl_service_hrs_annual\"\n",
     "           ]\n",
-    "a1 = geography_utils.aggregate_by_geography(service_increase,\n",
+    "a1 = portfolio_utils.aggregate_by_geography(service_increase,\n",
     "                                       group_cols = [\"itp_id\", \"day_name\", \"tract_type\"],\n",
     "                                       sum_cols = sum_cols,\n",
     "                                      )"
 
@@ -34,8 +34,8 @@
     "import matplotlib.pyplot as plt\n",
     "import pandas as pd\n",
     "\n",
-    "import shared_utils\n",
-    "from bus_service_utils import utils"
+    "from bus_service_utils import utils as bus_utils\n",
+    "from calitp_data_analysis import utils"
    ]
   },
   {
@@ -45,8 +45,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "gdf = shared_utils.utils.download_geoparquet(utils.GCS_FILE_PATH, \n",
-    "                                             \"parallel_or_intersecting\")"
+    "gdf = utils.download_geoparquet(utils.GCS_FILE_PATH, \n",
+    "                                \"parallel_or_intersecting\")"
    ]
   },
   {
@@ -136,7 +136,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.5"
+   "version": "3.9.13"
   }
  },
  "nbformat": 4,
 
@@ -31,8 +31,8 @@
     "\n",
     "import setup_corridors_stats\n",
     "from create_parallel_corridors import IMG_PATH, DATA_PATH\n",
-    "from shared_utils import geography_utils, styleguide\n",
-    "from shared_utils import calitp_color_palette as cp\n",
+    "from calitp_data_analysis import styleguide\n",
+    "from calitp_data_analysis import calitp_color_palette as cp\n",
     "\n",
     "alt.themes.register(\"calitp_theme\", styleguide.calitp_theme)\n",
     "\n",
 
@@ -15,16 +15,17 @@
 
 os.environ["CALITP_BQ_MAX_BYTES"] = str(130_000_000_000)
 
-import shared_utils
-from bus_service_utils import utils
+from shared_utils import gtfs_utils, rt_dates, rt_utils
+from bus_service_utils import utils as bus_utils
+from calitp_data_analysis import geography_utils, utils
 
-ANALYSIS_DATE = shared_utils.rt_dates.PMAC["Q2_2022"]
-COMPILED_CACHED = f"{shared_utils.rt_utils.GCS_FILE_PATH}compiled_cached_views/"
+ANALYSIS_DATE = rt_dates.PMAC["Q2_2022"]
+COMPILED_CACHED = f"{rt_utils.GCS_FILE_PATH}compiled_cached_views/"
 
 
 def grab_service_hours(selected_date: str, 
                        valid_trip_keys: list) -> pd.DataFrame:
-    daily_service_hours = shared_utils.gtfs_utils.get_trips(
+    daily_service_hours = gtfs_utils.get_trips(
         selected_date = selected_date,
         itp_id_list = None,
         # Keep more columns, route_id, shape_id, direction_id so the metrolink fix 
@@ -36,7 +37,7 @@ def grab_service_hours(selected_date: str,
     )
 
     daily_service_hours.to_parquet(
-        f"{utils.GCS_FILE_PATH}service_hours_{selected_date}.parquet")
+        f"{bus_utils.GCS_FILE_PATH}service_hours_{selected_date}.parquet")
 
 
 def merge_trips_with_service_hours(selected_date: str)-> pd.DataFrame:
@@ -45,7 +46,7 @@ def merge_trips_with_service_hours(selected_date: str)-> pd.DataFrame:
         f"{COMPILED_CACHED}trips_{selected_date}.parquet")
 
     daily_service_hours = pd.read_parquet(
-        f"{utils.GCS_FILE_PATH}service_hours_{selected_date}.parquet")
+        f"{bus_utils.GCS_FILE_PATH}service_hours_{selected_date}.parquet")
 
     df = dd.merge(
         trips, 
@@ -120,7 +121,7 @@ def grab_stops_for_trip_selected(trip_df: dd.DataFrame,
         stop_times_for_trip,
         on = ["calitp_itp_id", "stop_id"],
         how = "inner"
-    ).to_crs(shared_utils.geography_utils.WGS84)
+    ).to_crs(geography_utils.WGS84)
 
 
     stop_times_with_geom2 = (stop_times_with_geom.drop(
@@ -146,8 +147,8 @@ def grab_stops_for_trip_selected(trip_df: dd.DataFrame,
 
     trips_with_stops = grab_stops_for_trip_selected(one_trip, ANALYSIS_DATE)
 
-    shared_utils.utils.geoparquet_gcs_export(
+    utils.geoparquet_gcs_export(
         trips_with_stops,
-        utils.GCS_FILE_PATH,
+        bus_utils.GCS_FILE_PATH,
         f"trips_with_stops_{ANALYSIS_DATE}"
     )
@@ -12,8 +12,8 @@
 from datetime import datetime
 from loguru import logger
 
-import shared_utils
-from bus_service_utils import utils
+from calitp_data_analysis import geography_utils, utils
+from bus_service_utils import utils as bus_utils
 from D1_setup_parallel_trips_with_stops import ANALYSIS_DATE, COMPILED_CACHED
 
 logger.add("./logs/make_gmaps_results.log")
@@ -22,7 +22,7 @@
            level="INFO")
 
 DATA_PATH = "./gmaps_cache/"
-GCS_FILE_PATH = f"{utils.GCS_FILE_PATH}gmaps_cache_{ANALYSIS_DATE}/"
+GCS_FILE_PATH = f"{bus_utils.GCS_FILE_PATH}gmaps_cache_{ANALYSIS_DATE}/"
 
 def grab_cached_results(df: pd.DataFrame) -> (list, list):
     result_ids = list(df.identifier_num)
@@ -32,7 +32,7 @@ def grab_cached_results(df: pd.DataFrame) -> (list, list):
 
     for i in result_ids:
         try:
-            json_dict = utils.open_request_json(i, 
+            json_dict = bus_utils.open_request_json(i, 
                                                 data_path = DATA_PATH, 
                                                 gcs_file_path = GCS_FILE_PATH
                                    )
@@ -71,7 +71,7 @@ def compare_travel_time_by_mode(df: pd.DataFrame) -> pd.DataFrame:
 if __name__ == "__main__":    
     time0 = datetime.now()
 
-    df = pd.read_parquet(f"{utils.GCS_FILE_PATH}gmaps_df_{ANALYSIS_DATE}.parquet")
+    df = pd.read_parquet(f"{bus_utils.GCS_FILE_PATH}gmaps_df_{ANALYSIS_DATE}.parquet")
 
     successful_ids, durations = grab_cached_results(df)
     logger.info("Grabbed cached results")
@@ -106,11 +106,11 @@ def compare_travel_time_by_mode(df: pd.DataFrame) -> pd.DataFrame:
         how = "inner",
         # many on right because trip_ids can share same shape_id
         validate = "1:m"
-    ).to_crs(shared_utils.geography_utils.WGS84)
+    ).to_crs(geography_utils.WGS84)
 
-    shared_utils.utils.geoparquet_gcs_export(gdf, 
-                                             utils.GCS_FILE_PATH, 
-                                             f"gmaps_results_{ANALYSIS_DATE}")
+    utils.geoparquet_gcs_export(gdf, 
+                                bus_utils.GCS_FILE_PATH, 
+                                f"gmaps_results_{ANALYSIS_DATE}")
 
     end = datetime.now()
     logger.info(f"Total execution: {end - time0}")
 
@@ -13,10 +13,11 @@
 from calitp_data_analysis.tables import tbls
 from siuba import *
 
-import shared_utils
 import D2_setup_gmaps as setup_gmaps
 import E2_aggregated_route_stats as aggregated_route_stats 
-from bus_service_utils import utils
+from bus_service_utils import utils as bus_utils
+from calitp_data_analysis import utils
+from shared_utils import portfolio_utils, rt_utils
 from D1_setup_parallel_trips_with_stops import (ANALYSIS_DATE, COMPILED_CACHED,
                                                 merge_trips_with_service_hours)
 
@@ -62,7 +63,7 @@ def add_trip_time_of_day(trips: pd.DataFrame) -> pd.DataFrame:
     # Add time-of-day
     df = df.assign(
         time_of_day = df.apply(
-            lambda x: shared_utils.rt_utils.categorize_time_of_day(
+            lambda x: rt_utils.categorize_time_of_day(
                 x.trip_first_departure), 
             axis=1)
     )
@@ -193,7 +194,7 @@ def add_route_group(df: gpd.GeoDataFrame,
 
 # Use agency_name from our views.gtfs_schedule.agency instead of Airtable?
 def merge_in_agency_name(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
-    agency_names = shared_utils.portfolio_utils.add_agency_name(
+    agency_names = portfolio_utils.add_agency_name(
         selected_date = ANALYSIS_DATE)
 
     df2 = pd.merge(
@@ -209,7 +210,7 @@ def merge_in_agency_name(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
 
 def merge_in_airtable(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
     # Don't use name from Airtable. But, use district.
-    caltrans_districts = shared_utils.portfolio_utils.add_caltrans_district()
+    caltrans_districts = portfolio_utils.add_caltrans_district()
 
     # Airtable gives us fewer duplicates than doing tbl.gtfs_schedule.agency()
     # But naming should be done with tbl.gtfs_schedule.agency because that's what's used
@@ -231,7 +232,7 @@ def add_route_categories(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
     under quarterly performance objective work.
     """
     route_categories = (gpd.read_parquet(
-        f"{utils.GCS_FILE_PATH}routes_categorized_{ANALYSIS_DATE}.parquet")
+        f"{bus_utils.GCS_FILE_PATH}routes_categorized_{ANALYSIS_DATE}.parquet")
         .rename(columns = {"itp_id": "calitp_itp_id"})
     )
 
@@ -244,7 +245,7 @@ def add_route_categories(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
     )
 
     # Clean up route_name
-    route_names = shared_utils.portfolio_utils.add_route_name(ANALYSIS_DATE)
+    route_names = portfolio_utils.add_route_name(ANALYSIS_DATE)
 
     gdf3 = pd.merge(
         gdf2,
@@ -330,7 +331,7 @@ def assemble_data(analysis_date: str, threshold: float = 1.5,
     gdf = assemble_data(ANALYSIS_DATE, threshold = 1.5, 
                         service_time_cutoffs = SERVICE_TIME_CUTOFFS)
 
-    shared_utils.utils.geoparquet_gcs_export(
+    utils.geoparquet_gcs_export(
         gdf, 
-        utils.GCS_FILE_PATH, 
+        bus_utils.GCS_FILE_PATH, 
         f"competitive_route_variability_{ANALYSIS_DATE}")
@@ -13,7 +13,7 @@
 
 from E0_bus_oppor_vars import GCS_FILE_PATH, ANALYSIS_DATE, COMPILED_CACHED_GCS
 from bus_service_utils import create_parallel_corridors, utils
-from shared_utils import geography_utils, utils
+from calitp_data_analysis import geography_utils, utils
 
 catalog = intake.open_catalog("./*.yml")
 
 
@@ -18,9 +18,8 @@
 import geopandas as gpd
 import pandas as pd
 
-from shared_utils import (geography_utils, gtfs_utils, 
-                          rt_utils, portfolio_utils, utils
-                         )
+from shared_utils import gtfs_utils, portfolio_utils, rt_utils
+from calitp_data_analysis import utils
 from E0_bus_oppor_vars import GCS_FILE_PATH, ANALYSIS_DATE, COMPILED_CACHED_GCS
 from bus_service_utils import gtfs_build
 
@@ -254,7 +253,7 @@ def calculate_mean_speed_by_route(analysis_date: str,
 
     # Each trip is 1 observation, just take the average (not weighted)
     # to get route-level mean_speed_mph
-    mean_speed = geography_utils.aggregate_by_geography(
+    mean_speed = portfolio_utils.aggregate_by_geography(
         df,
         group_cols = group_cols,
         mean_cols = ["mean_speed_mph"]
@@ -281,7 +280,7 @@ def get_competitive_routes() -> pd.DataFrame:
         "num_competitive", "pct_trips_competitive",
     ]
 
-    route_df = geography_utils.aggregate_by_geography(
+    route_df = portfolio_utils.aggregate_by_geography(
         trip_df,
         group_cols = route_level_cols,
         mean_cols = ["bus_multiplier", "bus_difference"],
Original file line number	Diff line number	Diff line change
`@@ -34,8 +34,8 @@`
`34`	`34`	`"import matplotlib.pyplot as plt\n",`
`35`	`35`	`"import pandas as pd\n",`
`36`	`36`	`"\n",`
`37`		`- "import shared_utils\n",`
`38`		`- "from bus_service_utils import utils"`
	`37`	`+ "from bus_service_utils import utils as bus_utils\n",`
	`38`	`+ "from calitp_data_analysis import utils"`
`39`	`39`	`]`
`40`	`40`	`},`
`41`	`41`	`{`
`@@ -45,8 +45,8 @@`
`45`	`45`	`"metadata": {},`
`46`	`46`	`"outputs": [],`
`47`	`47`	`"source": [`
`48`		`- "gdf = shared_utils.utils.download_geoparquet(utils.GCS_FILE_PATH, \n",`
`49`		`- " \"parallel_or_intersecting\")"`
	`48`	`+ "gdf = utils.download_geoparquet(utils.GCS_FILE_PATH, \n",`
	`49`	`+ " \"parallel_or_intersecting\")"`
`50`	`50`	`]`
`51`	`51`	`},`
`52`	`52`	`{`
`@@ -136,7 +136,7 @@`
`136`	`136`	`"name": "python",`
`137`	`137`	`"nbconvert_exporter": "python",`
`138`	`138`	`"pygments_lexer": "ipython3",`
`139`		`- "version": "3.10.5"`
	`139`	`+ "version": "3.9.13"`
`140`	`140`	`}`
`141`	`141`	`},`
`142`	`142`	`"nbformat": 4,`