diff --git a/sb125_analyses/corridor_study/_utils.py b/sb125_analyses/corridor_study/_utils.py
new file mode 100644
index 000000000..aba4dcdc6
--- /dev/null
+++ b/sb125_analyses/corridor_study/_utils.py
@@ -0,0 +1,113 @@
+"""
+Replica and Streetlight Analysis Utils
+"""
+
+import pandas as pd
+from siuba import *
+import ast
+
+from calitp_data_analysis.sql import to_snakecase
+
+import altair as alt
+from calitp_data_analysis import calitp_color_palette as cp
+
+
+"""
+Replica Analysis Utils
+"""
+##function that returns Replica transit data into df we can analyze easier
+def get_tranist_agency_counts(df, primary_mode_col, transit_mode_col, transit_agency_col, activity_id_col):
+ ## return a df with the agency counts
+ agencies = (df
+ >>filter(_[primary_mode_col] =="public_transit")
+ >>group_by(_[primary_mode_col], _[transit_mode_col], _[transit_agency_col])
+ >>summarize(n =_[activity_id_col].nunique())
+ >>arrange(-_.n))
+
+ agencies[transit_mode_col] = agencies[transit_mode_col].astype(str)
+ agencies[transit_agency_col] = agencies[transit_agency_col].astype(str)
+
+ agencies['agency_count'] = [len(set(x.split(", "))) for x in
+ agencies[transit_agency_col].str.lower()]
+ agencies['n_modes_taken'] = agencies[transit_mode_col].apply(lambda x: len(x.split()))
+
+ ## return a df with the mode counts
+ modes = (df
+ >>filter(_[primary_mode_col] =="public_transit")
+ >>count(_[transit_mode_col])>>arrange(-_.n))
+
+ modes[transit_mode_col] = modes[transit_mode_col].astype(str)
+ # modes[transit_agency_col] = modes[transit_agency_col].astype(str)
+
+ modes['n_modes_taken'] = modes[transit_mode_col].apply(lambda x: len(x.split()))
+
+ return agencies, modes
+
+def get_list_of_agencies(df, transit_agency_col):
+
+ ## Get just one columns
+ column = df[[transit_agency_col]]
+ #remove single-dimensional entries from the shape of an array
+ col_text = column.squeeze()
+ # get list of words
+ text_list = col_text.tolist()
+ # #join list of words
+ text_list = ', '.join(text_list).title()
+
+ text_list = text_list.replace(", ", "', '")
+ text_list = "['" + text_list + "']"
+
+ agency_list = ast.literal_eval(text_list)
+ agency_list = set(agency_list)
+
+ return agency_list
+
+def get_dummies_by_agency(df, col):
+ transit_agencies = set()
+ for agencies in df[col].str.split(', '):
+ transit_agencies.update(agencies)
+ unique_agencies = []
+
+ for agency in transit_agencies:
+ df[agency] = df[col].str.count(agency)
+ unique_agencies.append(agency)
+
+ ### adding column for unique agencies list
+ def get_unique_agencies(agency_list):
+ unique_agencies = set()
+ for agencies in agency_list:
+ unique_agencies.update(agencies.split(', '))
+ return ', '.join(sorted(list(unique_agencies)))
+
+ # Applying the function to each row of the dataframe to get unique agencies
+ df['unique_agencies'] = df[col].str.split(', ').apply(lambda x: get_unique_agencies(x))
+
+ return df
+
+def get_agencies_occurances(df):
+
+ ##get list of columns that have agency names
+ columns_to_keep = list(df.columns.values)
+
+ ## remove columns that are not agency names
+ unwanted_cols = {'transit_agency', 'unique_agencies', 'primary_mode', 'transit_submode', 'n', 'agency_count', 'n_modes_taken' }
+ columns_to_keep = [e for e in columns_to_keep if e not in unwanted_cols]
+
+ df_agencies = df.loc[:, columns_to_keep]
+
+ ## transpose the df again
+ df_agencies = pd.DataFrame(df_agencies).transpose().reset_index().rename(columns={'index':'agency'})
+
+ ##sum up the number of trips by agency
+ df_agencies['n_trips'] = df_agencies[list(df_agencies.columns)].sum(axis=1)
+
+ ## filter df to only have two cols
+ df_agencies = df_agencies>>select(_.agency, _.n_trips)
+
+ return df_agencies
+
+"""
+Streetlight Analysis Utils
+"""
+
+
diff --git a/sb125_analyses/corridor_study/data_downloads_baybridge.ipynb b/sb125_analyses/corridor_study/data_downloads_baybridge.ipynb
new file mode 100644
index 000000000..181e33062
--- /dev/null
+++ b/sb125_analyses/corridor_study/data_downloads_baybridge.ipynb
@@ -0,0 +1,5807 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "82a53f1d-2622-4cb0-bfdd-36ceec652215",
+ "metadata": {},
+ "source": [
+ "# SB1 Big Data Downloads\n",
+ "An analysis into the corridor analysis data downloads from Streetlight and Replica"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "45233485-2055-499a-a89e-fc154fd56e63",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_483/4150955979.py:10: DeprecationWarning: Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display\n",
+ " from IPython.core.display import display\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "from siuba import *\n",
+ "\n",
+ "from calitp_data_analysis.sql import to_snakecase\n",
+ "\n",
+ "import altair as alt\n",
+ "from calitp_data_analysis import calitp_color_palette as cp\n",
+ "\n",
+ "from IPython.display import Markdown, HTML, display_html, display\n",
+ "from IPython.core.display import display\n",
+ "\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "577fe167-1d9d-4676-999c-ccb2b89575ff",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import _utils"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "651aae92-5188-4676-8e5f-3c040f77077c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pd.set_option('display.max_columns', 500)\n",
+ "pd.set_option('display.max_colwidth', 100)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "af402da0-0b03-4c71-a1af-19f97f67cef1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "GCS_PATH = \"gs://calitp-analytics-data/data-analyses/sb125/corridor_study_data/\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "71c1deda-8e83-45d8-a0f5-20b36b7051c0",
+ "metadata": {},
+ "source": [
+ "### Read in Replica Data Spring 2023"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "0e5c1b44-c981-4a9a-b6ea-e1cb2a7a279e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "replica_trips = \"replica-bay_bridge-03_06_24-trips_dataset.csv\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "b0bd67f0-6f82-4f2c-8da0-3627a2813571",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "replica_ppl = \"replica-bay_bridge-03_06_24-people_dataset.csv\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a9f14685-0953-4b1d-a636-882230fef1d5",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "cb3dea26-4e6d-4dc7-acc8-27b899a0a537",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_483/2643890524.py:1: DtypeWarning: Columns (2,4,5,9,10,11,18,25,26,29,30,35,36,40,43,48,50) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ " r_trips = to_snakecase(pd.read_csv(f\"{GCS_PATH}{replica_trips}\"))\n"
+ ]
+ }
+ ],
+ "source": [
+ "r_trips = to_snakecase(pd.read_csv(f\"{GCS_PATH}{replica_trips}\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "ca697494-b872-4de8-afd9-c538a455364c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
Sample of data
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\" Sample of data
\"))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "4713b1a5-ccac-4e1b-8552-850285cad53e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " origin_building_use | \n",
+ " trip_duration_minutes | \n",
+ " trip_taker_language | \n",
+ " origin_trct_2020 | \n",
+ " trip_taker_household_id | \n",
+ " trip_taker_sex | \n",
+ " trip_taker_home_bgrp_2020 | \n",
+ " origin_st_2020 | \n",
+ " trip_taker_home_st_2020 | \n",
+ " trip_taker_race_ethnicity | \n",
+ " transit_agency | \n",
+ " transit_route | \n",
+ " trip_taker_resident_type | \n",
+ " vehicle_type | \n",
+ " trip_taker_age | \n",
+ " activity_id | \n",
+ " destination_bgrp_2020 | \n",
+ " destination_building_use | \n",
+ " trip_taker_wfh | \n",
+ " destination_st_2020 | \n",
+ " destination_cty_2020 | \n",
+ " trip_taker_work_bgrp_2020 | \n",
+ " destination_trct_2020 | \n",
+ " trip_taker_household_income | \n",
+ " origin_bgrp_2020 | \n",
+ " trip_taker_commute_mode | \n",
+ " trip_taker_available_vehicles | \n",
+ " primary_mode | \n",
+ " previous_trip_purpose | \n",
+ " trip_taker_building_type | \n",
+ " transit_submode | \n",
+ " trip_taker_work_st_2020 | \n",
+ " trip_taker_household_size | \n",
+ " origin_cty_2020 | \n",
+ " destination_land_use | \n",
+ " trip_taker_industry | \n",
+ " trip_taker_tenure | \n",
+ " vehicle_fuel_type | \n",
+ " trip_taker_home_trct_2020 | \n",
+ " trip_taker_work_trct_2020 | \n",
+ " trip_taker_education | \n",
+ " trip_start_time | \n",
+ " trip_taker_individual_income | \n",
+ " trip_taker_employment_status | \n",
+ " trip_purpose | \n",
+ " origin_land_use | \n",
+ " trip_taker_work_cty_2020 | \n",
+ " trip_distance_miles | \n",
+ " trip_taker_school_grade_attending | \n",
+ " trip_taker_home_cty_2020 | \n",
+ " trip_taker_person_id | \n",
+ " trip_end_time | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 199078 | \n",
+ " multi_family | \n",
+ " 46 | \n",
+ " other | \n",
+ " 4501.01 (Alameda, CA) | \n",
+ " 69970950706205725 | \n",
+ " male | \n",
+ " 2 (Tract 4501.01, Alameda, CA) | \n",
+ " California | \n",
+ " California | \n",
+ " white_not_hispanic_or_latino | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " core | \n",
+ " unknown_vehicle_type | \n",
+ " 39.0 | \n",
+ " 4486197526664593040 | \n",
+ " 2 (Tract 105, San Francisco, CA) | \n",
+ " non_retail_attraction | \n",
+ " in_person | \n",
+ " California | \n",
+ " San Francisco | \n",
+ " 2 (Tract 105, San Francisco, CA) | \n",
+ " 105 (San Francisco, CA) | \n",
+ " 237966.0 | \n",
+ " 2 (Tract 4501.01, Alameda, CA) | \n",
+ " public_transit | \n",
+ " one | \n",
+ " auto_passenger | \n",
+ " home | \n",
+ " multiple_units | \n",
+ " NaN | \n",
+ " California | \n",
+ " 4.0 | \n",
+ " Alameda | \n",
+ " non_retail_attraction | \n",
+ " naics31_33 | \n",
+ " renter | \n",
+ " unknown_fuel_type | \n",
+ " 4501.01 (Alameda, CA) | \n",
+ " 105 (San Francisco, CA) | \n",
+ " advanced_degree | \n",
+ " 04:53:00 | \n",
+ " 237966.0 | \n",
+ " employed | \n",
+ " work | \n",
+ " multi_family | \n",
+ " San Francisco | \n",
+ " 36.7 | \n",
+ " not_attending_school | \n",
+ " Alameda | \n",
+ " 11699013211020046684 | \n",
+ " 05:39:20 | \n",
+ "
\n",
+ " \n",
+ " 362910 | \n",
+ " multi_family | \n",
+ " 65 | \n",
+ " english | \n",
+ " 615.06 (San Francisco, CA) | \n",
+ " 14710396768871133663 | \n",
+ " female | \n",
+ " 1 (Tract 615.06, San Francisco, CA) | \n",
+ " California | \n",
+ " California | \n",
+ " white_not_hispanic_or_latino | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " core | \n",
+ " unknown_vehicle_type | \n",
+ " 30.0 | \n",
+ " 3388798362605091248 | \n",
+ " 1 (Tract 3551.13, Contra Costa, CA) | \n",
+ " single_family | \n",
+ " in_person | \n",
+ " California | \n",
+ " Contra Costa | \n",
+ " 1 (Tract 615.01, San Francisco, CA) | \n",
+ " 3551.13 (Contra Costa, CA) | \n",
+ " 237586.0 | \n",
+ " 1 (Tract 615.06, San Francisco, CA) | \n",
+ " walking | \n",
+ " one | \n",
+ " auto_passenger | \n",
+ " home | \n",
+ " multiple_units | \n",
+ " NaN | \n",
+ " California | \n",
+ " 2.0 | \n",
+ " San Francisco | \n",
+ " single_family | \n",
+ " naics5151 | \n",
+ " renter | \n",
+ " unknown_fuel_type | \n",
+ " 615.06 (San Francisco, CA) | \n",
+ " 615.01 (San Francisco, CA) | \n",
+ " bachelors_degree | \n",
+ " 17:25:00 | \n",
+ " 77510.0 | \n",
+ " employed | \n",
+ " social | \n",
+ " mixed_use | \n",
+ " San Francisco | \n",
+ " 33.7 | \n",
+ " not_attending_school | \n",
+ " San Francisco | \n",
+ " 14279596460102489210 | \n",
+ " 18:30:49 | \n",
+ "
\n",
+ " \n",
+ " 397490 | \n",
+ " education | \n",
+ " 53 | \n",
+ " indo_european | \n",
+ " 301.02 (San Francisco, CA) | \n",
+ " 11603413512180790232 | \n",
+ " male | \n",
+ " 5 (Tract 4381, Alameda, CA) | \n",
+ " California | \n",
+ " California | \n",
+ " two_races_not_hispanic_or_latino | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " core | \n",
+ " unknown_vehicle_type | \n",
+ " 46.0 | \n",
+ " 17748416797468343771 | \n",
+ " 5 (Tract 4381, Alameda, CA) | \n",
+ " multi_family | \n",
+ " in_person | \n",
+ " California | \n",
+ " Alameda | \n",
+ " 3 (Tract 301.02, San Francisco, CA) | \n",
+ " 4381 (Alameda, CA) | \n",
+ " 101548.0 | \n",
+ " 3 (Tract 301.02, San Francisco, CA) | \n",
+ " private_auto | \n",
+ " two | \n",
+ " private_auto | \n",
+ " work | \n",
+ " several_units | \n",
+ " NaN | \n",
+ " California | \n",
+ " 4.0 | \n",
+ " San Francisco | \n",
+ " multi_family | \n",
+ " naics61 | \n",
+ " owner | \n",
+ " other_non_bev | \n",
+ " 4381 (Alameda, CA) | \n",
+ " 301.02 (San Francisco, CA) | \n",
+ " some_college | \n",
+ " 20:53:05 | \n",
+ " 71051.0 | \n",
+ " employed | \n",
+ " home | \n",
+ " education | \n",
+ " San Francisco | \n",
+ " 34.1 | \n",
+ " not_attending_school | \n",
+ " Alameda | \n",
+ " 9371860226741686287 | \n",
+ " 21:46:49 | \n",
+ "
\n",
+ " \n",
+ " 646286 | \n",
+ " single_family | \n",
+ " 42 | \n",
+ " asian_pacific | \n",
+ " 3851 (Contra Costa, CA) | \n",
+ " 8856161571093041221 | \n",
+ " female | \n",
+ " 2 (Tract 3851, Contra Costa, CA) | \n",
+ " California | \n",
+ " California | \n",
+ " asian_not_hispanic_or_latino | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " core | \n",
+ " unknown_vehicle_type | \n",
+ " 61.0 | \n",
+ " 1096586971710081424 | \n",
+ " 1 (Tract 177, San Francisco, CA) | \n",
+ " retail | \n",
+ " unemployed_under_16_not_in_labor_force | \n",
+ " California | \n",
+ " San Francisco | \n",
+ " Does not have work/school location | \n",
+ " 177 (San Francisco, CA) | \n",
+ " 163758.0 | \n",
+ " 2 (Tract 3851, Contra Costa, CA) | \n",
+ " other_travel_mode | \n",
+ " two | \n",
+ " auto_passenger | \n",
+ " home | \n",
+ " single_family | \n",
+ " NaN | \n",
+ " Does not have work/school location | \n",
+ " 4.0 | \n",
+ " Contra Costa | \n",
+ " mixed_use | \n",
+ " not_working | \n",
+ " owner | \n",
+ " unknown_fuel_type | \n",
+ " 3851 (Contra Costa, CA) | \n",
+ " Does not have work/school location | \n",
+ " high_school | \n",
+ " 17:10:00 | \n",
+ " 11123.0 | \n",
+ " not_in_labor_force | \n",
+ " eat | \n",
+ " single_family | \n",
+ " Does not have work/school location | \n",
+ " 17.5 | \n",
+ " not_attending_school | \n",
+ " Contra Costa | \n",
+ " 15224202816917079679 | \n",
+ " 17:52:23 | \n",
+ "
\n",
+ " \n",
+ " 432031 | \n",
+ " single_family | \n",
+ " 30 | \n",
+ " english | \n",
+ " 3790 (Contra Costa, CA) | \n",
+ " 11125850078941310816 | \n",
+ " male | \n",
+ " 1 (Tract 3790, Contra Costa, CA) | \n",
+ " California | \n",
+ " California | \n",
+ " white_not_hispanic_or_latino | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " core | \n",
+ " unknown_vehicle_type | \n",
+ " 58.0 | \n",
+ " 17878107943481256906 | \n",
+ " 3 (Tract 151, San Francisco, CA) | \n",
+ " office | \n",
+ " in_person | \n",
+ " California | \n",
+ " San Francisco | \n",
+ " 3 (Tract 151, San Francisco, CA) | \n",
+ " 151 (San Francisco, CA) | \n",
+ " 191291.0 | \n",
+ " 1 (Tract 3790, Contra Costa, CA) | \n",
+ " private_auto | \n",
+ " two | \n",
+ " auto_passenger | \n",
+ " home | \n",
+ " single_family | \n",
+ " NaN | \n",
+ " California | \n",
+ " 3.0 | \n",
+ " Contra Costa | \n",
+ " mixed_use | \n",
+ " naics42 | \n",
+ " owner | \n",
+ " unknown_fuel_type | \n",
+ " 3790 (Contra Costa, CA) | \n",
+ " 151 (San Francisco, CA) | \n",
+ " some_college | \n",
+ " 13:50:00 | \n",
+ " 81981.0 | \n",
+ " employed | \n",
+ " work | \n",
+ " single_family | \n",
+ " San Francisco | \n",
+ " 18.4 | \n",
+ " not_attending_school | \n",
+ " Contra Costa | \n",
+ " 17663033273047637228 | \n",
+ " 14:20:30 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " origin_building_use trip_duration_minutes trip_taker_language \\\n",
+ "199078 multi_family 46 other \n",
+ "362910 multi_family 65 english \n",
+ "397490 education 53 indo_european \n",
+ "646286 single_family 42 asian_pacific \n",
+ "432031 single_family 30 english \n",
+ "\n",
+ " origin_trct_2020 trip_taker_household_id trip_taker_sex \\\n",
+ "199078 4501.01 (Alameda, CA) 69970950706205725 male \n",
+ "362910 615.06 (San Francisco, CA) 14710396768871133663 female \n",
+ "397490 301.02 (San Francisco, CA) 11603413512180790232 male \n",
+ "646286 3851 (Contra Costa, CA) 8856161571093041221 female \n",
+ "432031 3790 (Contra Costa, CA) 11125850078941310816 male \n",
+ "\n",
+ " trip_taker_home_bgrp_2020 origin_st_2020 \\\n",
+ "199078 2 (Tract 4501.01, Alameda, CA) California \n",
+ "362910 1 (Tract 615.06, San Francisco, CA) California \n",
+ "397490 5 (Tract 4381, Alameda, CA) California \n",
+ "646286 2 (Tract 3851, Contra Costa, CA) California \n",
+ "432031 1 (Tract 3790, Contra Costa, CA) California \n",
+ "\n",
+ " trip_taker_home_st_2020 trip_taker_race_ethnicity \\\n",
+ "199078 California white_not_hispanic_or_latino \n",
+ "362910 California white_not_hispanic_or_latino \n",
+ "397490 California two_races_not_hispanic_or_latino \n",
+ "646286 California asian_not_hispanic_or_latino \n",
+ "432031 California white_not_hispanic_or_latino \n",
+ "\n",
+ " transit_agency transit_route trip_taker_resident_type \\\n",
+ "199078 NaN NaN core \n",
+ "362910 NaN NaN core \n",
+ "397490 NaN NaN core \n",
+ "646286 NaN NaN core \n",
+ "432031 NaN NaN core \n",
+ "\n",
+ " vehicle_type trip_taker_age activity_id \\\n",
+ "199078 unknown_vehicle_type 39.0 4486197526664593040 \n",
+ "362910 unknown_vehicle_type 30.0 3388798362605091248 \n",
+ "397490 unknown_vehicle_type 46.0 17748416797468343771 \n",
+ "646286 unknown_vehicle_type 61.0 1096586971710081424 \n",
+ "432031 unknown_vehicle_type 58.0 17878107943481256906 \n",
+ "\n",
+ " destination_bgrp_2020 destination_building_use \\\n",
+ "199078 2 (Tract 105, San Francisco, CA) non_retail_attraction \n",
+ "362910 1 (Tract 3551.13, Contra Costa, CA) single_family \n",
+ "397490 5 (Tract 4381, Alameda, CA) multi_family \n",
+ "646286 1 (Tract 177, San Francisco, CA) retail \n",
+ "432031 3 (Tract 151, San Francisco, CA) office \n",
+ "\n",
+ " trip_taker_wfh destination_st_2020 \\\n",
+ "199078 in_person California \n",
+ "362910 in_person California \n",
+ "397490 in_person California \n",
+ "646286 unemployed_under_16_not_in_labor_force California \n",
+ "432031 in_person California \n",
+ "\n",
+ " destination_cty_2020 trip_taker_work_bgrp_2020 \\\n",
+ "199078 San Francisco 2 (Tract 105, San Francisco, CA) \n",
+ "362910 Contra Costa 1 (Tract 615.01, San Francisco, CA) \n",
+ "397490 Alameda 3 (Tract 301.02, San Francisco, CA) \n",
+ "646286 San Francisco Does not have work/school location \n",
+ "432031 San Francisco 3 (Tract 151, San Francisco, CA) \n",
+ "\n",
+ " destination_trct_2020 trip_taker_household_income \\\n",
+ "199078 105 (San Francisco, CA) 237966.0 \n",
+ "362910 3551.13 (Contra Costa, CA) 237586.0 \n",
+ "397490 4381 (Alameda, CA) 101548.0 \n",
+ "646286 177 (San Francisco, CA) 163758.0 \n",
+ "432031 151 (San Francisco, CA) 191291.0 \n",
+ "\n",
+ " origin_bgrp_2020 trip_taker_commute_mode \\\n",
+ "199078 2 (Tract 4501.01, Alameda, CA) public_transit \n",
+ "362910 1 (Tract 615.06, San Francisco, CA) walking \n",
+ "397490 3 (Tract 301.02, San Francisco, CA) private_auto \n",
+ "646286 2 (Tract 3851, Contra Costa, CA) other_travel_mode \n",
+ "432031 1 (Tract 3790, Contra Costa, CA) private_auto \n",
+ "\n",
+ " trip_taker_available_vehicles primary_mode previous_trip_purpose \\\n",
+ "199078 one auto_passenger home \n",
+ "362910 one auto_passenger home \n",
+ "397490 two private_auto work \n",
+ "646286 two auto_passenger home \n",
+ "432031 two auto_passenger home \n",
+ "\n",
+ " trip_taker_building_type transit_submode \\\n",
+ "199078 multiple_units NaN \n",
+ "362910 multiple_units NaN \n",
+ "397490 several_units NaN \n",
+ "646286 single_family NaN \n",
+ "432031 single_family NaN \n",
+ "\n",
+ " trip_taker_work_st_2020 trip_taker_household_size \\\n",
+ "199078 California 4.0 \n",
+ "362910 California 2.0 \n",
+ "397490 California 4.0 \n",
+ "646286 Does not have work/school location 4.0 \n",
+ "432031 California 3.0 \n",
+ "\n",
+ " origin_cty_2020 destination_land_use trip_taker_industry \\\n",
+ "199078 Alameda non_retail_attraction naics31_33 \n",
+ "362910 San Francisco single_family naics5151 \n",
+ "397490 San Francisco multi_family naics61 \n",
+ "646286 Contra Costa mixed_use not_working \n",
+ "432031 Contra Costa mixed_use naics42 \n",
+ "\n",
+ " trip_taker_tenure vehicle_fuel_type trip_taker_home_trct_2020 \\\n",
+ "199078 renter unknown_fuel_type 4501.01 (Alameda, CA) \n",
+ "362910 renter unknown_fuel_type 615.06 (San Francisco, CA) \n",
+ "397490 owner other_non_bev 4381 (Alameda, CA) \n",
+ "646286 owner unknown_fuel_type 3851 (Contra Costa, CA) \n",
+ "432031 owner unknown_fuel_type 3790 (Contra Costa, CA) \n",
+ "\n",
+ " trip_taker_work_trct_2020 trip_taker_education \\\n",
+ "199078 105 (San Francisco, CA) advanced_degree \n",
+ "362910 615.01 (San Francisco, CA) bachelors_degree \n",
+ "397490 301.02 (San Francisco, CA) some_college \n",
+ "646286 Does not have work/school location high_school \n",
+ "432031 151 (San Francisco, CA) some_college \n",
+ "\n",
+ " trip_start_time trip_taker_individual_income \\\n",
+ "199078 04:53:00 237966.0 \n",
+ "362910 17:25:00 77510.0 \n",
+ "397490 20:53:05 71051.0 \n",
+ "646286 17:10:00 11123.0 \n",
+ "432031 13:50:00 81981.0 \n",
+ "\n",
+ " trip_taker_employment_status trip_purpose origin_land_use \\\n",
+ "199078 employed work multi_family \n",
+ "362910 employed social mixed_use \n",
+ "397490 employed home education \n",
+ "646286 not_in_labor_force eat single_family \n",
+ "432031 employed work single_family \n",
+ "\n",
+ " trip_taker_work_cty_2020 trip_distance_miles \\\n",
+ "199078 San Francisco 36.7 \n",
+ "362910 San Francisco 33.7 \n",
+ "397490 San Francisco 34.1 \n",
+ "646286 Does not have work/school location 17.5 \n",
+ "432031 San Francisco 18.4 \n",
+ "\n",
+ " trip_taker_school_grade_attending trip_taker_home_cty_2020 \\\n",
+ "199078 not_attending_school Alameda \n",
+ "362910 not_attending_school San Francisco \n",
+ "397490 not_attending_school Alameda \n",
+ "646286 not_attending_school Contra Costa \n",
+ "432031 not_attending_school Contra Costa \n",
+ "\n",
+ " trip_taker_person_id trip_end_time \n",
+ "199078 11699013211020046684 05:39:20 \n",
+ "362910 14279596460102489210 18:30:49 \n",
+ "397490 9371860226741686287 21:46:49 \n",
+ "646286 15224202816917079679 17:52:23 \n",
+ "432031 17663033273047637228 14:20:30 "
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "r_trips.sample(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e2199852-9b6e-46ca-86fd-abe71148d13f",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "75945ef2-eab7-469a-baad-ee563d70c309",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " Columns in Replica Trips Data
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\" Columns in Replica Trips Data
\"))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "1f3e4cbb-d211-40b1-b4bb-2c7cb43b33a9",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['origin_building_use', 'trip_duration_minutes', 'trip_taker_language',\n",
+ " 'origin_trct_2020', 'trip_taker_household_id', 'trip_taker_sex',\n",
+ " 'trip_taker_home_bgrp_2020', 'origin_st_2020',\n",
+ " 'trip_taker_home_st_2020', 'trip_taker_race_ethnicity',\n",
+ " 'transit_agency', 'transit_route', 'trip_taker_resident_type',\n",
+ " 'vehicle_type', 'trip_taker_age', 'activity_id',\n",
+ " 'destination_bgrp_2020', 'destination_building_use', 'trip_taker_wfh',\n",
+ " 'destination_st_2020', 'destination_cty_2020',\n",
+ " 'trip_taker_work_bgrp_2020', 'destination_trct_2020',\n",
+ " 'trip_taker_household_income', 'origin_bgrp_2020',\n",
+ " 'trip_taker_commute_mode', 'trip_taker_available_vehicles',\n",
+ " 'primary_mode', 'previous_trip_purpose', 'trip_taker_building_type',\n",
+ " 'transit_submode', 'trip_taker_work_st_2020',\n",
+ " 'trip_taker_household_size', 'origin_cty_2020', 'destination_land_use',\n",
+ " 'trip_taker_industry', 'trip_taker_tenure', 'vehicle_fuel_type',\n",
+ " 'trip_taker_home_trct_2020', 'trip_taker_work_trct_2020',\n",
+ " 'trip_taker_education', 'trip_start_time',\n",
+ " 'trip_taker_individual_income', 'trip_taker_employment_status',\n",
+ " 'trip_purpose', 'origin_land_use', 'trip_taker_work_cty_2020',\n",
+ " 'trip_distance_miles', 'trip_taker_school_grade_attending',\n",
+ " 'trip_taker_home_cty_2020', 'trip_taker_person_id', 'trip_end_time'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "r_trips.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e60fb5c7-a04b-48f3-8282-ca98a54dadd2",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "92f9a5f4-08f1-43fc-ae01-c6bd404891d6",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "#### Data Exploration: Replica Trips\n",
+ "* Summarizing the data that we exported from Replica\n",
+ "* Existing visualizations in replica: \n",
+ " * Primary Mode\n",
+ " * Trip Purpose\n",
+ " * Starting hour \n",
+ " * Trip Duration/Distance\n",
+ " * Origin Destination\n",
+ " * Vehicle Fuel type\n",
+ " * Transit Routes/Stops/Sub mode/Agency\n",
+ " * Household Income\n",
+ " * Race and Ethnicity\n",
+ " * Private Auto Availability\n",
+ " * Age\n",
+ " * Employment/School Status\n",
+ " \n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "d7f3b44a-cd77-42e9-b0d8-7b9d228b37d5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "trips = r_trips>>group_by(_.primary_mode)>>summarize(avg_trip_time = _.trip_duration_minutes.mean(),\n",
+ " number_trips = _.activity_id.nunique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "05893215-e993-44dc-9c4b-1aa7d94815cd",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.Chart(...)"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chart = (\n",
+ " alt.Chart(trips)\n",
+ " .mark_bar()\n",
+ " .encode(\n",
+ " x=alt.X(\"primary_mode\"),\n",
+ " y=alt.Y(\"avg_trip_time\"),\n",
+ " color=alt.Color(\"avg_trip_time\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS)\n",
+ " ), tooltip=trips.columns.tolist())\n",
+ " .properties(title = \"Average Trip Time\",\n",
+ " width=800,\n",
+ " height=300)\n",
+ " )\n",
+ "chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "65681a29-7f1b-42ed-9e0e-371a84a29cbd",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.Chart(...)"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chart = (\n",
+ " alt.Chart(trips)\n",
+ " .mark_bar()\n",
+ " .encode(\n",
+ " x=alt.X(\"primary_mode\"),\n",
+ " y=alt.Y(\"number_trips\"),\n",
+ " color=alt.Color(\"number_trips\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS,)\n",
+ " ), tooltip=trips.columns.tolist())\n",
+ " .properties(title=\"Number of Trips\",\n",
+ " width=800,\n",
+ " height=300)\n",
+ " )\n",
+ "chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "0d60fe54-dc88-4570-8610-125b37b5917d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Length of Trips dataframe
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Length of Trips dataframe
\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "4f98abae-ae6b-4b4e-9427-b606a557582f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "674864"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(r_trips)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "55c6ff06-afed-45dc-b764-93188c2b9958",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Number of unique activity ids in data
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Number of unique activity ids in data
\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "27c00872-5880-4464-84a5-e91423ce7895",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " number_trips | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 674864 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " number_trips\n",
+ "0 674864"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "r_trips>>summarize(number_trips = _.activity_id.nunique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "a20cbce8-e354-4aa3-99db-666a13576b5e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Number of unique trip taker ids
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Number of unique trip taker ids
\")) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "2def0aaf-9182-4393-8049-2dfb95749585",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " number_trips | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 309596 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " number_trips\n",
+ "0 309596"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "r_trips>>summarize(number_trips = _.trip_taker_person_id.nunique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "47fecd22-8276-48b8-9c74-d3a94906bbae",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "race_ethnicity_by_mode = r_trips>>group_by(_.primary_mode)>>count(_.trip_taker_race_ethnicity)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "4cbeb867-916b-48aa-9949-d769fcd5af5a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Trip Taker Race and Ethnicity by Mode
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Trip Taker Race and Ethnicity by Mode
\")) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "5e895499-2142-432d-8402-c035f81326b6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.Chart(...)"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chart = (\n",
+ " alt.Chart(race_ethnicity_by_mode)\n",
+ " .mark_bar()\n",
+ " .encode(\n",
+ " x=alt.X(\"primary_mode\"),\n",
+ " y=alt.Y(\"n\"),\n",
+ " color=alt.Color(\"trip_taker_race_ethnicity\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS,\n",
+ " domain=race_ethnicity_by_mode[\"trip_taker_race_ethnicity\"].unique().tolist())\n",
+ " ),\n",
+ " tooltip=race_ethnicity_by_mode.columns.tolist())\n",
+ " \n",
+ " .properties(\n",
+ " width=800,\n",
+ " height=300)\n",
+ " )\n",
+ "chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "da3bbe59-9b06-471f-beb1-4762b8939600",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "500ad806-51ed-47e9-88a5-fc8a41edd7aa",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Transit Mode Splits
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Transit Mode Splits
\")) \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "d8cd6354-0433-4134-ad6e-9df92122cbd2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Looking at transit mode, need to alter the transit_agency col and transit_submode to get a better count."
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Looking at transit mode, need to alter the transit_agency col and transit_submode to get a better count.\")) \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "261d7f48-f7c3-4ea7-a526-6f813ca16ede",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ptt_agency_count = (r_trips\n",
+ "# >>filter(_.primary_mode==\"public_transit\")\n",
+ "# >>group_by(_.primary_mode, _.transit_submode, _.transit_agency)\n",
+ "# >>summarize(n =_.activity_id.nunique())\n",
+ "# >>arrange(-_.n))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "e4516beb-8277-4fd4-aed1-23af65fc4a28",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ptt_agency_count"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "727511b8-3141-40e4-b654-30aaae8200d9",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Most common transit mode combinations
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Most common transit mode combinations
\")) \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "f3c31371-899f-4474-85b6-86e78e89209b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ptt_modes = (r_trips\n",
+ "# >>filter(_.primary_mode ==\"public_transit\")\n",
+ "# >>count(_.transit_submode)>>arrange(-_.n))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "657ff2f3-0d0d-419d-b3d5-512212897fb1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ptt_modes.head(10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "80daf7a1-b947-469c-b3b0-09d0302714b7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "##### unnesting the transit submode and agencies to get counts. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "edd81120-9756-4edb-b713-1586a9fd5021",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "agencies_test, mode_test = _utils.get_tranist_agency_counts(r_trips, \"primary_mode\", \"transit_submode\", \"transit_agency\", \"activity_id\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "2025391f-537e-432a-a2f4-d2d6ce57af04",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " primary_mode | \n",
+ " transit_submode | \n",
+ " transit_agency | \n",
+ " n | \n",
+ " agency_count | \n",
+ " n_modes_taken | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " public_transit | \n",
+ " bus | \n",
+ " AC TRANSIT | \n",
+ " 2788 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " public_transit | \n",
+ " bus, bus | \n",
+ " San Francisco Municipal Transportation Agency, AC TRANSIT | \n",
+ " 632 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " public_transit | \n",
+ " bus, bus | \n",
+ " AC TRANSIT, San Francisco Municipal Transportation Agency | \n",
+ " 516 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " public_transit | \n",
+ " bus, bus | \n",
+ " AC TRANSIT, AC TRANSIT | \n",
+ " 263 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 75 | \n",
+ " public_transit | \n",
+ " bus, light_rail | \n",
+ " AC TRANSIT, San Francisco Municipal Transportation Agency | \n",
+ " 253 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 128 | \n",
+ " public_transit | \n",
+ " rail, bus | \n",
+ " Caltrain, SamTrans | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 130 | \n",
+ " public_transit | \n",
+ " rail, bus, bus | \n",
+ " Amtrak, AC TRANSIT, AC TRANSIT | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 133 | \n",
+ " public_transit | \n",
+ " rail, light_rail, bus | \n",
+ " Caltrain, San Francisco Municipal Transportation Agency, AC TRANSIT | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 136 | \n",
+ " public_transit | \n",
+ " rail, subway, bus, bus | \n",
+ " Caltrain, Bay Area Rapid Transit, San Francisco Municipal Transportation Agency, AC TRANSIT | \n",
+ " 1 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 141 | \n",
+ " public_transit | \n",
+ " subway, bus, bus, bus | \n",
+ " Bay Area Rapid Transit, San Francisco Municipal Transportation Agency, SolTrans, AC TRANSIT | \n",
+ " 1 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
144 rows × 6 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " primary_mode transit_submode \\\n",
+ "0 public_transit bus \n",
+ "12 public_transit bus, bus \n",
+ "8 public_transit bus, bus \n",
+ "6 public_transit bus, bus \n",
+ "75 public_transit bus, light_rail \n",
+ ".. ... ... \n",
+ "128 public_transit rail, bus \n",
+ "130 public_transit rail, bus, bus \n",
+ "133 public_transit rail, light_rail, bus \n",
+ "136 public_transit rail, subway, bus, bus \n",
+ "141 public_transit subway, bus, bus, bus \n",
+ "\n",
+ " transit_agency \\\n",
+ "0 AC TRANSIT \n",
+ "12 San Francisco Municipal Transportation Agency, AC TRANSIT \n",
+ "8 AC TRANSIT, San Francisco Municipal Transportation Agency \n",
+ "6 AC TRANSIT, AC TRANSIT \n",
+ "75 AC TRANSIT, San Francisco Municipal Transportation Agency \n",
+ ".. ... \n",
+ "128 Caltrain, SamTrans \n",
+ "130 Amtrak, AC TRANSIT, AC TRANSIT \n",
+ "133 Caltrain, San Francisco Municipal Transportation Agency, AC TRANSIT \n",
+ "136 Caltrain, Bay Area Rapid Transit, San Francisco Municipal Transportation Agency, AC TRANSIT \n",
+ "141 Bay Area Rapid Transit, San Francisco Municipal Transportation Agency, SolTrans, AC TRANSIT \n",
+ "\n",
+ " n agency_count n_modes_taken \n",
+ "0 2788 1 1 \n",
+ "12 632 2 2 \n",
+ "8 516 2 2 \n",
+ "6 263 1 2 \n",
+ "75 253 2 2 \n",
+ ".. ... ... ... \n",
+ "128 1 2 2 \n",
+ "130 1 2 3 \n",
+ "133 1 3 3 \n",
+ "136 1 4 4 \n",
+ "141 1 4 4 \n",
+ "\n",
+ "[144 rows x 6 columns]"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "agencies_test"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "589d2a23-b528-4de9-b6a0-a10f88a6da5c",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "505f47b1-229a-4cd1-90b8-b83b4ccdd6c6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "modes_count = agencies_test>>group_by(_.n_modes_taken)>>summarize(n_trips = _.n.sum())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "97746912-39f0-4c85-80d3-a00ed47922a8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# modes_count"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "5353a2e4-762d-4a27-8f32-267c2c55bb90",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.Chart(...)"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chart = (\n",
+ " alt.Chart((modes_count))\n",
+ " .mark_bar(size=60)\n",
+ " .encode(\n",
+ " x=alt.X(\"n_modes_taken\", title =\"Number of Modes Taken per Trip\"),\n",
+ " y=alt.Y(\"n_trips\", title = \"Number of Trips\"),\n",
+ " color=alt.Color(\"n_trips\", title = \"Number of Trips\",\n",
+ " scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS)),\n",
+ " tooltip=modes_count.columns.tolist())\n",
+ " \n",
+ " .properties(title = \"How Many Modes are Taken Per Trip\",\n",
+ " width=800,\n",
+ " height=300)\n",
+ " )\n",
+ "chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "fb8565b9-989a-459a-bede-433de30252e7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ptt_agency_count>>group_by(_.agency_count)>>count(_.n_modes_taken)>>arrange(-_.n)\n",
+ "agency_mode_trips = agencies_test>>group_by(_.agency_count, _.n_modes_taken)>>summarize(ntrips=_.n.sum())>>arrange(-_.ntrips)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "b03dbe0a-a759-4fa9-849b-b31b318f86cd",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.Chart(...)"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chart = (\n",
+ " alt.Chart((agency_mode_trips))\n",
+ " .mark_circle(size=100)\n",
+ " .encode(\n",
+ " x=alt.X(\"agency_count\", title =\"Number of Agencies\"),\n",
+ " y=alt.Y(\"n_modes_taken\", title = \"Number of Modes Taken\"),\n",
+ " color=alt.Color(\"ntrips\", title = \"Number of Trips\",\n",
+ " scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS)),\n",
+ " tooltip=agency_mode_trips.columns.tolist())\n",
+ " \n",
+ " .properties(title = \"How Many Modes are Taken Per Trip\",\n",
+ " width=800,\n",
+ " height=300)\n",
+ " )\n",
+ "chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "91273d26-1fd1-42a1-b829-a77398c606fc",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "6bd200d7-c0b9-4649-adf1-e025d216328b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "##### Getting columns for each agency and counts "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "e8f5f457-ce9f-4bfb-99f6-82e6eb7d11f0",
+ "metadata": {
+ "scrolled": true,
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "agency_list = _utils.get_list_of_agencies(agencies_test, \"transit_agency\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "f303acae-da1b-44a5-98b2-2f8f5b98f56a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Agencies Identified in Trips Data
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "{'Ac Transit',\n",
+ " 'Amtrak',\n",
+ " 'Bay Area Rapid Transit',\n",
+ " 'Caltrain',\n",
+ " 'Fairfield And Suisun Transit',\n",
+ " 'Marin Transit',\n",
+ " 'Nan',\n",
+ " 'Sacramento Regional Transit',\n",
+ " 'Samtrans',\n",
+ " 'San Francisco Bay Ferry',\n",
+ " 'San Francisco Municipal Transportation Agency',\n",
+ " 'San Joaquin Regional Transit District (Rtd)',\n",
+ " 'Santa Cruz Metro',\n",
+ " 'Soltrans',\n",
+ " 'The S',\n",
+ " 'Tri Delta Transit',\n",
+ " 'Vta'}"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Agencies Identified in Trips Data
\")) \n",
+ "\n",
+ "(agency_list)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d737409e-38b2-4aa9-8cac-4ad3b8e22294",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6d5ebb21-1aaf-4743-aa0a-feb58ed44da2",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2de92f7b-2e28-4388-880f-9e34b8b603ea",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "89479d3a-eeb5-491a-a064-49a77194109e",
+ "metadata": {},
+ "source": [
+ "##### Trying ChatGPT approach"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "id": "bb062bcd-4322-4970-9115-7ea91ba7041c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = _utils.get_dummies_by_agency(agencies_test, \"transit_agency\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6a011752-5211-40b4-a108-a8036fdbacce",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "id": "97bc34c4-636f-46ca-a257-95e4af242584",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "##### Identifying trips with one agency"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "id": "46856b31-47ac-408c-903e-19d8b04283f7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cols_to_keep = [\"transit_submode\",\"unique_agencies\", \"n\",\"n_modes_taken\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "id": "2603e7e0-389a-49fb-93e9-5dc1d52c6012",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# chart = (alt.Chart(df)\n",
+ "# .mark_circle(size=100)\n",
+ "# .encode(\n",
+ "# x=alt.X(\"n_modes_taken\", title=\"Number of Modes taken\"),\n",
+ "# y=alt.Y(\"n\", title=\"Number of Trips\"),\n",
+ "# color = alt.Color(\"agency_count\", title=\"Number of Unique Agencies\",\n",
+ "# scale=alt.Scale(\n",
+ "# range=cp.CALITP_DIVERGING_COLORS,\n",
+ "# domain=df[\"agency_count\"].unique().tolist())\n",
+ "# ),\n",
+ "# tooltip=cols_to_keep)\n",
+ "# .properties(title = (\"Transit Trips Agency Breakdown\"), width=500,\n",
+ "# height=300)\n",
+ "# )\n",
+ "# chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "id": "149ed63c-436c-45fe-a526-5b1bf9f3d6fb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# (df>>filter(_.agency_count==1)>>arrange(-_.n))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "id": "7228c540-3033-4023-8145-91f8aee23eeb",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.Chart(...)"
+ ]
+ },
+ "execution_count": 47,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chart = (alt.Chart((df>>filter(_.agency_count==1)))\n",
+ " .mark_bar()\n",
+ " .encode(\n",
+ " x=alt.X(\"unique_agencies\", title=\"Agency\"),\n",
+ " y=alt.Y(\"n\", title=\"Number of Modes taken\"),\n",
+ " color = alt.Color(\"n_modes_taken\", title=\"Number of Trips\",\n",
+ " scale=alt.Scale(\n",
+ " range=cp.CALITP_SEQUENTIAL_COLORS,)),\n",
+ " tooltip=cols_to_keep)\n",
+ " .properties(title = (\"Transit Trips With Only One Agency\"), width=800,\n",
+ " height=500)\n",
+ " )\n",
+ "chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "id": "60df338f-801f-49a1-a26f-0be5e03bddf3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Most Common Agency Combination
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " transit_agency | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " AC TRANSIT | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " San Francisco Municipal Transportation Agency, AC TRANSIT | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " AC TRANSIT, San Francisco Municipal Transportation Agency | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " AC TRANSIT, AC TRANSIT | \n",
+ "
\n",
+ " \n",
+ " 75 | \n",
+ " AC TRANSIT, San Francisco Municipal Transportation Agency | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " transit_agency\n",
+ "0 AC TRANSIT\n",
+ "12 San Francisco Municipal Transportation Agency, AC TRANSIT\n",
+ "8 AC TRANSIT, San Francisco Municipal Transportation Agency\n",
+ "6 AC TRANSIT, AC TRANSIT\n",
+ "75 AC TRANSIT, San Francisco Municipal Transportation Agency"
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Most Common Agency Combination
\")) \n",
+ "\n",
+ "(df>>arrange(-_.n)>>select(_.transit_agency)).head(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "786394ef-be26-4e0d-94e7-35386d6d67c0",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fd43ab95-0391-407b-b6b0-185034ba9528",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "id": "8af03d60-4537-44d3-b82f-d857625e47c3",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/jovyan/data-analyses/sb125_analyses/corridor_study/_utils.py:102: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction.\n",
+ " df_agencies['n_trips'] = df_agencies[list(df_agencies.columns)].sum(axis=1)\n"
+ ]
+ }
+ ],
+ "source": [
+ "df_agencies = _utils.get_agencies_occurances(df)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "id": "6951b590-cef8-44bd-b27d-0b18893609c5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " agency | \n",
+ " n_trips | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Sacramento Regional Transit | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " SamTrans | \n",
+ " 19 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " nan | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " The S | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " San Francisco Municipal Transportation Agency | \n",
+ " 175 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Caltrain | \n",
+ " 20 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Marin Transit | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Tri Delta Transit | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Santa Cruz Metro | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " VTA | \n",
+ " 16 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Fairfield and Suisun Transit | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " San Joaquin Regional Transit District (RTD) | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " SolTrans | \n",
+ " 29 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " San Francisco Bay Ferry | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " AC TRANSIT | \n",
+ " 111 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " Bay Area Rapid Transit | \n",
+ " 37 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " Amtrak | \n",
+ " 10 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " agency n_trips\n",
+ "0 Sacramento Regional Transit 6\n",
+ "1 SamTrans 19\n",
+ "2 nan 1\n",
+ "3 The S 3\n",
+ "4 San Francisco Municipal Transportation Agency 175\n",
+ "5 Caltrain 20\n",
+ "6 Marin Transit 1\n",
+ "7 Tri Delta Transit 1\n",
+ "8 Santa Cruz Metro 1\n",
+ "9 VTA 16\n",
+ "10 Fairfield and Suisun Transit 1\n",
+ "11 San Joaquin Regional Transit District (RTD) 0\n",
+ "12 SolTrans 29\n",
+ "13 San Francisco Bay Ferry 3\n",
+ "14 AC TRANSIT 111\n",
+ "15 Bay Area Rapid Transit 37\n",
+ "16 Amtrak 10"
+ ]
+ },
+ "execution_count": 50,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_agencies"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "id": "f98c76b9-b83f-42c3-aa12-702ade72ac38",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tooltip_cols = [\"agency\", \"n_trips\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "id": "f1267faf-e28c-4208-ae79-43b31f67f705",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# df_agencies"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "id": "bf160b81-1bc2-4374-9529-6400252d5e4f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.Chart(...)"
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chart = (alt.Chart(df_agencies)\n",
+ " .mark_bar()\n",
+ " .encode(\n",
+ " x=alt.X(\"agency\", title = \"Agency Name\"),\n",
+ " y=alt.Y(\"n_trips\", title= \"Number of boardings reported for trips (One person taking two AC Transit trips will count as 2)\"),\n",
+ " color=alt.Color(\"n_trips\", scale=alt.Scale(range = cp.CALITP_SEQUENTIAL_COLORS)),\n",
+ " tooltip = tooltip_cols)\n",
+ " .properties(title = \"Number of Times an Agency was used for Trip taking over Bay Bridge\",\n",
+ " width=800,\n",
+ " height=500)\n",
+ " )\n",
+ "chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2b5b0cb2-465f-4e1f-83f4-7ae31396d012",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a59f170f-b2e0-41b3-ae7a-1ae4eca12596",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "id": "16b45917-adc3-44d0-8d76-15ccd4083d26",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Trips by Resident Type
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Trips by Resident Type
\")) \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "id": "5163926e-82bd-4c25-9486-45229d09a0d1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "trip_by_res_type = (r_trips\n",
+ " >>group_by(_.primary_mode,_.trip_taker_resident_type)\n",
+ " >>summarize(number_trips = _.activity_id.nunique())\n",
+ " >>arrange(_.primary_mode))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "id": "636e7771-0ba7-42e9-af1d-e3e998ff8599",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " primary_mode | \n",
+ " trip_taker_resident_type | \n",
+ " number_trips | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " auto_passenger | \n",
+ " core | \n",
+ " 367739 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " auto_passenger | \n",
+ " visitor | \n",
+ " 23552 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " commercial | \n",
+ " NaN | \n",
+ " 11897 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " on_demand_auto | \n",
+ " core | \n",
+ " 24372 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " on_demand_auto | \n",
+ " visitor | \n",
+ " 2581 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " private_auto | \n",
+ " core | \n",
+ " 232486 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " private_auto | \n",
+ " visitor | \n",
+ " 6204 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " public_transit | \n",
+ " core | \n",
+ " 5986 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " public_transit | \n",
+ " visitor | \n",
+ " 47 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " primary_mode trip_taker_resident_type number_trips\n",
+ "0 auto_passenger core 367739\n",
+ "1 auto_passenger visitor 23552\n",
+ "2 commercial NaN 11897\n",
+ "3 on_demand_auto core 24372\n",
+ "4 on_demand_auto visitor 2581\n",
+ "5 private_auto core 232486\n",
+ "6 private_auto visitor 6204\n",
+ "7 public_transit core 5986\n",
+ "8 public_transit visitor 47"
+ ]
+ },
+ "execution_count": 56,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "trip_by_res_type"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "id": "90c5d683-91c5-4663-accd-4e45027a3e49",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.Chart(...)"
+ ]
+ },
+ "execution_count": 57,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chart = (\n",
+ " alt.Chart(trip_by_res_type)\n",
+ " .mark_bar()\n",
+ " .encode(\n",
+ " x=alt.X(\"primary_mode\", title = \"Mode\"),\n",
+ " y=alt.Y(\"number_trips\", title = \"Number of Trips\"),\n",
+ " color=alt.Color(\"trip_taker_resident_type\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS,\n",
+ " domain=trip_by_res_type[\"trip_taker_resident_type\"].unique().tolist())\n",
+ " ),\n",
+ " tooltip=trip_by_res_type.columns.tolist())\n",
+ " .properties(title = \"Trips by Resident Type\",\n",
+ " width=800,\n",
+ " height=500)\n",
+ " )\n",
+ "chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f7b51fbb-8b87-4863-bcd9-50ed5047d7d6",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "id": "878c5c8e-18c3-456a-b641-67ad010f5101",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Unique Household Ids
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Unique Household Ids
\")) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "id": "2a31bf27-ccc4-4ce3-8b6a-31dbd14caaf2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trip_taker_household_id | \n",
+ " n | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 248686 | \n",
+ " | \n",
+ " 27897 | \n",
+ "
\n",
+ " \n",
+ " 262322 | \n",
+ " NaN | \n",
+ " 16384 | \n",
+ "
\n",
+ " \n",
+ " 110604 | \n",
+ " 8194219563640751815 | \n",
+ " 28 | \n",
+ "
\n",
+ " \n",
+ " 82299 | \n",
+ " 6097771312917788596 | \n",
+ " 27 | \n",
+ "
\n",
+ " \n",
+ " 74914 | \n",
+ " 5552467881171814730 | \n",
+ " 22 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 262312 | \n",
+ " 9988489366890220188 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 262316 | \n",
+ " 9991342309439330560 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 262317 | \n",
+ " 9992490903429866665 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 262318 | \n",
+ " 9992599661009943006 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 262319 | \n",
+ " 9993588483694519152 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
262323 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trip_taker_household_id n\n",
+ "248686 27897\n",
+ "262322 NaN 16384\n",
+ "110604 8194219563640751815 28\n",
+ "82299 6097771312917788596 27\n",
+ "74914 5552467881171814730 22\n",
+ "... ... ...\n",
+ "262312 9988489366890220188 1\n",
+ "262316 9991342309439330560 1\n",
+ "262317 9992490903429866665 1\n",
+ "262318 9992599661009943006 1\n",
+ "262319 9993588483694519152 1\n",
+ "\n",
+ "[262323 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 59,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "r_trips>>count(_.trip_taker_household_id)>>arrange(-_.n)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "id": "eb9780a3-b996-45af-9cc3-b41629ec0e55",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Checking one household id
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Checking one household id
\")) \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "id": "6fc3be54-b8dc-4cf8-893e-565ffa02934d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "28\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " origin_building_use | \n",
+ " trip_duration_minutes | \n",
+ " trip_taker_language | \n",
+ " origin_trct_2020 | \n",
+ " trip_taker_household_id | \n",
+ " trip_taker_sex | \n",
+ " trip_taker_home_bgrp_2020 | \n",
+ " origin_st_2020 | \n",
+ " trip_taker_home_st_2020 | \n",
+ " trip_taker_race_ethnicity | \n",
+ " transit_agency | \n",
+ " transit_route | \n",
+ " trip_taker_resident_type | \n",
+ " vehicle_type | \n",
+ " trip_taker_age | \n",
+ " activity_id | \n",
+ " destination_bgrp_2020 | \n",
+ " destination_building_use | \n",
+ " trip_taker_wfh | \n",
+ " destination_st_2020 | \n",
+ " destination_cty_2020 | \n",
+ " trip_taker_work_bgrp_2020 | \n",
+ " destination_trct_2020 | \n",
+ " trip_taker_household_income | \n",
+ " origin_bgrp_2020 | \n",
+ " trip_taker_commute_mode | \n",
+ " trip_taker_available_vehicles | \n",
+ " primary_mode | \n",
+ " previous_trip_purpose | \n",
+ " trip_taker_building_type | \n",
+ " transit_submode | \n",
+ " trip_taker_work_st_2020 | \n",
+ " trip_taker_household_size | \n",
+ " origin_cty_2020 | \n",
+ " destination_land_use | \n",
+ " trip_taker_industry | \n",
+ " trip_taker_tenure | \n",
+ " vehicle_fuel_type | \n",
+ " trip_taker_home_trct_2020 | \n",
+ " trip_taker_work_trct_2020 | \n",
+ " trip_taker_education | \n",
+ " trip_start_time | \n",
+ " trip_taker_individual_income | \n",
+ " trip_taker_employment_status | \n",
+ " trip_purpose | \n",
+ " origin_land_use | \n",
+ " trip_taker_work_cty_2020 | \n",
+ " trip_distance_miles | \n",
+ " trip_taker_school_grade_attending | \n",
+ " trip_taker_home_cty_2020 | \n",
+ " trip_taker_person_id | \n",
+ " trip_end_time | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 594856 | \n",
+ " single_family | \n",
+ " 11 | \n",
+ " indo_european | \n",
+ " 179.03 (San Francisco, CA) | \n",
+ " 8194219563640751815 | \n",
+ " male | \n",
+ " 1 (Tract 179.03, San Francisco, CA) | \n",
+ " California | \n",
+ " California | \n",
+ " white_not_hispanic_or_latino | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " core | \n",
+ " unknown_vehicle_type | \n",
+ " 27.0 | \n",
+ " 7878347104837895638 | \n",
+ " 3 (Tract 615.06, San Francisco, CA) | \n",
+ " retail | \n",
+ " remote | \n",
+ " California | \n",
+ " San Francisco | \n",
+ " 1 (Tract 179.03, San Francisco, CA) | \n",
+ " 615.06 (San Francisco, CA) | \n",
+ " 1200990.0 | \n",
+ " 1 (Tract 179.03, San Francisco, CA) | \n",
+ " worked_from_home | \n",
+ " three_plus | \n",
+ " private_auto | \n",
+ " home | \n",
+ " single_family | \n",
+ " NaN | \n",
+ " California | \n",
+ " 10.0 | \n",
+ " San Francisco | \n",
+ " mixed_use | \n",
+ " naics54 | \n",
+ " renter | \n",
+ " other_non_bev | \n",
+ " 179.03 (San Francisco, CA) | \n",
+ " 179.03 (San Francisco, CA) | \n",
+ " bachelors_degree | \n",
+ " 11:44:00 | \n",
+ " 218735.0 | \n",
+ " employed | \n",
+ " shop | \n",
+ " single_family | \n",
+ " San Francisco | \n",
+ " 4.0 | \n",
+ " not_attending_school | \n",
+ " San Francisco | \n",
+ " 14409692340574959811 | \n",
+ " 11:55:07 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " origin_building_use trip_duration_minutes trip_taker_language \\\n",
+ "594856 single_family 11 indo_european \n",
+ "\n",
+ " origin_trct_2020 trip_taker_household_id trip_taker_sex \\\n",
+ "594856 179.03 (San Francisco, CA) 8194219563640751815 male \n",
+ "\n",
+ " trip_taker_home_bgrp_2020 origin_st_2020 \\\n",
+ "594856 1 (Tract 179.03, San Francisco, CA) California \n",
+ "\n",
+ " trip_taker_home_st_2020 trip_taker_race_ethnicity transit_agency \\\n",
+ "594856 California white_not_hispanic_or_latino NaN \n",
+ "\n",
+ " transit_route trip_taker_resident_type vehicle_type \\\n",
+ "594856 NaN core unknown_vehicle_type \n",
+ "\n",
+ " trip_taker_age activity_id \\\n",
+ "594856 27.0 7878347104837895638 \n",
+ "\n",
+ " destination_bgrp_2020 destination_building_use \\\n",
+ "594856 3 (Tract 615.06, San Francisco, CA) retail \n",
+ "\n",
+ " trip_taker_wfh destination_st_2020 destination_cty_2020 \\\n",
+ "594856 remote California San Francisco \n",
+ "\n",
+ " trip_taker_work_bgrp_2020 destination_trct_2020 \\\n",
+ "594856 1 (Tract 179.03, San Francisco, CA) 615.06 (San Francisco, CA) \n",
+ "\n",
+ " trip_taker_household_income origin_bgrp_2020 \\\n",
+ "594856 1200990.0 1 (Tract 179.03, San Francisco, CA) \n",
+ "\n",
+ " trip_taker_commute_mode trip_taker_available_vehicles primary_mode \\\n",
+ "594856 worked_from_home three_plus private_auto \n",
+ "\n",
+ " previous_trip_purpose trip_taker_building_type transit_submode \\\n",
+ "594856 home single_family NaN \n",
+ "\n",
+ " trip_taker_work_st_2020 trip_taker_household_size origin_cty_2020 \\\n",
+ "594856 California 10.0 San Francisco \n",
+ "\n",
+ " destination_land_use trip_taker_industry trip_taker_tenure \\\n",
+ "594856 mixed_use naics54 renter \n",
+ "\n",
+ " vehicle_fuel_type trip_taker_home_trct_2020 \\\n",
+ "594856 other_non_bev 179.03 (San Francisco, CA) \n",
+ "\n",
+ " trip_taker_work_trct_2020 trip_taker_education trip_start_time \\\n",
+ "594856 179.03 (San Francisco, CA) bachelors_degree 11:44:00 \n",
+ "\n",
+ " trip_taker_individual_income trip_taker_employment_status \\\n",
+ "594856 218735.0 employed \n",
+ "\n",
+ " trip_purpose origin_land_use trip_taker_work_cty_2020 \\\n",
+ "594856 shop single_family San Francisco \n",
+ "\n",
+ " trip_distance_miles trip_taker_school_grade_attending \\\n",
+ "594856 4.0 not_attending_school \n",
+ "\n",
+ " trip_taker_home_cty_2020 trip_taker_person_id trip_end_time \n",
+ "594856 San Francisco 14409692340574959811 11:55:07 "
+ ]
+ },
+ "execution_count": 61,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "print(len(r_trips>>filter(_.trip_taker_household_id == 8194219563640751815)))\n",
+ "(r_trips>>filter(_.trip_taker_household_id == 8194219563640751815)).sample()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "id": "4eccfdf8-429e-43df-899e-f7d77e051831",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trip_taker_person_id | \n",
+ " trip_taker_age | \n",
+ " trip_taker_sex | \n",
+ " trip_taker_household_size | \n",
+ " n | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1325181746552868554 | \n",
+ " 22.0 | \n",
+ " female | \n",
+ " 10.0 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2310331066900902679 | \n",
+ " 30.0 | \n",
+ " female | \n",
+ " 10.0 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 4746831412975349070 | \n",
+ " 34.0 | \n",
+ " female | \n",
+ " 10.0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 10066255028734967962 | \n",
+ " 28.0 | \n",
+ " male | \n",
+ " 10.0 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 11058477761487230854 | \n",
+ " 32.0 | \n",
+ " male | \n",
+ " 10.0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 12894946300265742193 | \n",
+ " 39.0 | \n",
+ " male | \n",
+ " 10.0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 14409692340574959811 | \n",
+ " 27.0 | \n",
+ " male | \n",
+ " 10.0 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trip_taker_person_id trip_taker_age trip_taker_sex \\\n",
+ "0 1325181746552868554 22.0 female \n",
+ "1 2310331066900902679 30.0 female \n",
+ "2 4746831412975349070 34.0 female \n",
+ "3 10066255028734967962 28.0 male \n",
+ "4 11058477761487230854 32.0 male \n",
+ "5 12894946300265742193 39.0 male \n",
+ "6 14409692340574959811 27.0 male \n",
+ "\n",
+ " trip_taker_household_size n \n",
+ "0 10.0 6 \n",
+ "1 10.0 6 \n",
+ "2 10.0 2 \n",
+ "3 10.0 4 \n",
+ "4 10.0 2 \n",
+ "5 10.0 2 \n",
+ "6 10.0 6 "
+ ]
+ },
+ "execution_count": 62,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "## checking one household id\n",
+ "r_trips>>filter(_.trip_taker_household_id == 8194219563640751815)>>count(_.trip_taker_person_id, _.trip_taker_age,\n",
+ " _.trip_taker_sex, _.trip_taker_household_size)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c929c77d-0173-4a7d-be49-762349daf5cb",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ab226b47-4366-4fb9-aaae-e0aca760f9b3",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cd041141-2a65-45e8-a6ba-9b41cbffecbd",
+ "metadata": {},
+ "source": [
+ "#### Adding in Replica's People Dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "id": "dea31a60-e986-41e7-b31b-1e5403c9e041",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_483/1951040339.py:1: DtypeWarning: Columns (25) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ " r_ppl = to_snakecase(pd.read_csv(f\"{GCS_PATH}{replica_ppl}\"))\n"
+ ]
+ }
+ ],
+ "source": [
+ "r_ppl = to_snakecase(pd.read_csv(f\"{GCS_PATH}{replica_ppl}\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2cf7ed4b-7524-4e9a-858a-857fcf70950c",
+ "metadata": {},
+ "source": [
+ "replica people dataframe sample"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "id": "42bc1220-b404-4aca-8214-43992c55163b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " person_id | \n",
+ " work_st_2020 | \n",
+ " household_income | \n",
+ " resident_type | \n",
+ " language | \n",
+ " available_vehicles | \n",
+ " race_ethnicity | \n",
+ " education | \n",
+ " work_bgrp_2020 | \n",
+ " wfh | \n",
+ " home_cty_2020 | \n",
+ " home_trct_2020 | \n",
+ " tenure | \n",
+ " industry | \n",
+ " work_trct_2020 | \n",
+ " school_grade_attending | \n",
+ " building_type | \n",
+ " commute_mode | \n",
+ " employment_status | \n",
+ " work_cty_2020 | \n",
+ " individual_income | \n",
+ " home_st_2020 | \n",
+ " sex | \n",
+ " household_size | \n",
+ " home_bgrp_2020 | \n",
+ " household_id | \n",
+ " age | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 237204 | \n",
+ " 16410964223051134474 | \n",
+ " California | \n",
+ " 120704.0 | \n",
+ " core | \n",
+ " english | \n",
+ " three_plus | \n",
+ " white_not_hispanic_or_latino | \n",
+ " some_college | \n",
+ " 3 (Tract 3211.01, Contra Costa, CA) | \n",
+ " employed_not_working | \n",
+ " Contra Costa | \n",
+ " 3211.01 (Contra Costa, CA) | \n",
+ " owner | \n",
+ " naics812910 | \n",
+ " 3211.01 (Contra Costa, CA) | \n",
+ " not_attending_school | \n",
+ " single_family | \n",
+ " private_auto | \n",
+ " employed | \n",
+ " Contra Costa | \n",
+ " 33528.0 | \n",
+ " California | \n",
+ " male | \n",
+ " 4.0 | \n",
+ " 4 (Tract 3211.01, Contra Costa, CA) | \n",
+ " 503623235255197327 | \n",
+ " 30.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " person_id work_st_2020 household_income resident_type \\\n",
+ "237204 16410964223051134474 California 120704.0 core \n",
+ "\n",
+ " language available_vehicles race_ethnicity \\\n",
+ "237204 english three_plus white_not_hispanic_or_latino \n",
+ "\n",
+ " education work_bgrp_2020 \\\n",
+ "237204 some_college 3 (Tract 3211.01, Contra Costa, CA) \n",
+ "\n",
+ " wfh home_cty_2020 home_trct_2020 tenure \\\n",
+ "237204 employed_not_working Contra Costa 3211.01 (Contra Costa, CA) owner \n",
+ "\n",
+ " industry work_trct_2020 school_grade_attending \\\n",
+ "237204 naics812910 3211.01 (Contra Costa, CA) not_attending_school \n",
+ "\n",
+ " building_type commute_mode employment_status work_cty_2020 \\\n",
+ "237204 single_family private_auto employed Contra Costa \n",
+ "\n",
+ " individual_income home_st_2020 sex household_size \\\n",
+ "237204 33528.0 California male 4.0 \n",
+ "\n",
+ " home_bgrp_2020 household_id age \n",
+ "237204 4 (Tract 3211.01, Contra Costa, CA) 503623235255197327 30.0 "
+ ]
+ },
+ "execution_count": 64,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "r_ppl.sample()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "id": "d2a1647f-ed67-4c33-a089-b72375323a7d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Number of Travelers by Resident Type: Replica People Data
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Number of Travelers by Resident Type: Replica People Data
\")) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "id": "570daf6d-06b8-46e5-9664-a71c5a093073",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " resident_type | \n",
+ " _unique_ids | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " core | \n",
+ " 278158 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " visitor | \n",
+ " 23034 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " resident_type _unique_ids\n",
+ "0 core 278158\n",
+ "1 visitor 23034"
+ ]
+ },
+ "execution_count": 66,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "r_ppl>>group_by(_.resident_type)>>summarize(_unique_ids = _.person_id.nunique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "id": "1483249f-602c-4805-bcc4-d55e018022ad",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Number of Travelers by Resident Type: Replica Trips Data
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Number of Travelers by Resident Type: Replica Trips Data
\")) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "id": "5c1894f0-bd42-4014-89d4-6e8d43809319",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trip_taker_resident_type | \n",
+ " _unique_ids | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " core | \n",
+ " 282789 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " visitor | \n",
+ " 26806 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " NaN | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trip_taker_resident_type _unique_ids\n",
+ "0 core 282789\n",
+ "1 visitor 26806\n",
+ "2 NaN 1"
+ ]
+ },
+ "execution_count": 68,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "r_trips>>group_by(_.trip_taker_resident_type)>>summarize(_unique_ids = _.trip_taker_person_id.nunique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "id": "2f4b67bf-78ab-4aa8-bbb0-e382a8f78c9a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Traveler Demographics: Replica People Data
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "Fitered for Core Residents"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " commute_mode | \n",
+ " sex | \n",
+ " n_ppl | \n",
+ " avg_h_income | \n",
+ " avg_p_income | \n",
+ " avg_age | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " auto_passenger | \n",
+ " female | \n",
+ " 10908 | \n",
+ " 175717.982765 | \n",
+ " 70255.626054 | \n",
+ " 42.234507 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " auto_passenger | \n",
+ " male | \n",
+ " 11605 | \n",
+ " 177241.394916 | \n",
+ " 82072.291685 | \n",
+ " 42.185523 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " biking | \n",
+ " female | \n",
+ " 234 | \n",
+ " 248565.141026 | \n",
+ " 82592.418803 | \n",
+ " 36.085470 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " biking | \n",
+ " male | \n",
+ " 527 | \n",
+ " 219563.083491 | \n",
+ " 120152.859583 | \n",
+ " 38.426945 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " other_travel_mode | \n",
+ " female | \n",
+ " 36176 | \n",
+ " 126129.721003 | \n",
+ " 21413.682939 | \n",
+ " 55.161599 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " other_travel_mode | \n",
+ " male | \n",
+ " 26290 | \n",
+ " 116775.549106 | \n",
+ " 33563.078813 | \n",
+ " 54.328262 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " private_auto | \n",
+ " female | \n",
+ " 45312 | \n",
+ " 186499.659406 | \n",
+ " 76492.005341 | \n",
+ " 42.626302 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " private_auto | \n",
+ " male | \n",
+ " 59762 | \n",
+ " 190643.201265 | \n",
+ " 100712.386918 | \n",
+ " 42.872093 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " public_transit | \n",
+ " female | \n",
+ " 22008 | \n",
+ " 193540.921347 | \n",
+ " 84074.072110 | \n",
+ " 41.921937 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " public_transit | \n",
+ " male | \n",
+ " 26480 | \n",
+ " 207838.001699 | \n",
+ " 125445.942485 | \n",
+ " 42.259290 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " walking | \n",
+ " female | \n",
+ " 1332 | \n",
+ " 160144.725976 | \n",
+ " 63737.956456 | \n",
+ " 38.041291 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " walking | \n",
+ " male | \n",
+ " 1494 | \n",
+ " 177570.705489 | \n",
+ " 100413.455154 | \n",
+ " 37.495315 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " worked_from_home | \n",
+ " female | \n",
+ " 17409 | \n",
+ " 226232.999655 | \n",
+ " 97016.438853 | \n",
+ " 41.219829 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " worked_from_home | \n",
+ " male | \n",
+ " 18621 | \n",
+ " 253921.717631 | \n",
+ " 137849.973686 | \n",
+ " 41.092208 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " commute_mode sex n_ppl avg_h_income avg_p_income avg_age\n",
+ "0 auto_passenger female 10908 175717.982765 70255.626054 42.234507\n",
+ "1 auto_passenger male 11605 177241.394916 82072.291685 42.185523\n",
+ "2 biking female 234 248565.141026 82592.418803 36.085470\n",
+ "3 biking male 527 219563.083491 120152.859583 38.426945\n",
+ "4 other_travel_mode female 36176 126129.721003 21413.682939 55.161599\n",
+ "5 other_travel_mode male 26290 116775.549106 33563.078813 54.328262\n",
+ "6 private_auto female 45312 186499.659406 76492.005341 42.626302\n",
+ "7 private_auto male 59762 190643.201265 100712.386918 42.872093\n",
+ "8 public_transit female 22008 193540.921347 84074.072110 41.921937\n",
+ "9 public_transit male 26480 207838.001699 125445.942485 42.259290\n",
+ "10 walking female 1332 160144.725976 63737.956456 38.041291\n",
+ "11 walking male 1494 177570.705489 100413.455154 37.495315\n",
+ "12 worked_from_home female 17409 226232.999655 97016.438853 41.219829\n",
+ "13 worked_from_home male 18621 253921.717631 137849.973686 41.092208"
+ ]
+ },
+ "execution_count": 69,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Traveler Demographics: Replica People Data
\")) \n",
+ "display(HTML(\"Fitered for Core Residents\")) \n",
+ "(r_ppl\n",
+ " >>filter(_.home_bgrp_2020!=\"Visitor (no home location)\")\n",
+ " >>group_by(_.commute_mode, _.sex)\n",
+ " >>summarize(\n",
+ " n_ppl = _.person_id.nunique(),\n",
+ " avg_h_income = _.household_income.mean(),\n",
+ " avg_p_income = _.individual_income.mean(),\n",
+ " avg_age = _.age.mean())\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b0891b62-c968-4ea0-bd9b-753d299a5054",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 70,
+ "id": "2e4bd058-9c4d-4989-a5fc-db6cf6130bf2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Traveler Demographics: Replica Trips Data
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "Fitered for Core Residents"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " primary_mode | \n",
+ " trip_taker_sex | \n",
+ " n_ppl | \n",
+ " avg_h_income | \n",
+ " avg_p_income | \n",
+ " avg_age | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " auto_passenger | \n",
+ " female | \n",
+ " 99350 | \n",
+ " 173666.662228 | \n",
+ " 65104.398490 | \n",
+ " 45.064633 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " auto_passenger | \n",
+ " male | \n",
+ " 105359 | \n",
+ " 187590.985442 | \n",
+ " 97296.509379 | \n",
+ " 43.991868 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " commercial | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " on_demand_auto | \n",
+ " female | \n",
+ " 9051 | \n",
+ " 186226.833189 | \n",
+ " 79990.395102 | \n",
+ " 42.900399 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " on_demand_auto | \n",
+ " male | \n",
+ " 10159 | \n",
+ " 202467.780725 | \n",
+ " 111915.392968 | \n",
+ " 42.436605 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " private_auto | \n",
+ " female | \n",
+ " 68268 | \n",
+ " 174867.937251 | \n",
+ " 62299.064752 | \n",
+ " 45.316785 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " private_auto | \n",
+ " male | \n",
+ " 75223 | \n",
+ " 182301.952228 | \n",
+ " 90322.373466 | \n",
+ " 44.265320 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " public_transit | \n",
+ " female | \n",
+ " 2256 | \n",
+ " 186720.499643 | \n",
+ " 82496.554922 | \n",
+ " 39.104494 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " public_transit | \n",
+ " male | \n",
+ " 2535 | \n",
+ " 205334.441232 | \n",
+ " 114041.914833 | \n",
+ " 38.312382 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " primary_mode trip_taker_sex n_ppl avg_h_income avg_p_income \\\n",
+ "0 auto_passenger female 99350 173666.662228 65104.398490 \n",
+ "1 auto_passenger male 105359 187590.985442 97296.509379 \n",
+ "2 commercial NaN 1 NaN NaN \n",
+ "3 on_demand_auto female 9051 186226.833189 79990.395102 \n",
+ "4 on_demand_auto male 10159 202467.780725 111915.392968 \n",
+ "5 private_auto female 68268 174867.937251 62299.064752 \n",
+ "6 private_auto male 75223 182301.952228 90322.373466 \n",
+ "7 public_transit female 2256 186720.499643 82496.554922 \n",
+ "8 public_transit male 2535 205334.441232 114041.914833 \n",
+ "\n",
+ " avg_age \n",
+ "0 45.064633 \n",
+ "1 43.991868 \n",
+ "2 NaN \n",
+ "3 42.900399 \n",
+ "4 42.436605 \n",
+ "5 45.316785 \n",
+ "6 44.265320 \n",
+ "7 39.104494 \n",
+ "8 38.312382 "
+ ]
+ },
+ "execution_count": 70,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Traveler Demographics: Replica Trips Data
\")) \n",
+ "display(HTML(\"Fitered for Core Residents\")) \n",
+ "(r_trips\n",
+ " >>filter(_.trip_taker_home_bgrp_2020!=\"Visitor (no home location)\")\n",
+ " >>group_by(_.primary_mode, _.trip_taker_sex)\n",
+ " >>summarize(\n",
+ " n_ppl = _.trip_taker_person_id.nunique(),\n",
+ " avg_h_income = _.trip_taker_household_income.mean(),\n",
+ " avg_p_income = _.trip_taker_individual_income.mean(),\n",
+ " avg_age = _.trip_taker_age.mean())\n",
+ ")\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "id": "7b637cdd-9560-4d31-bfeb-bffa6cbb55d2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "replica_people_demographics = (r_ppl\n",
+ " >>group_by(_.commute_mode, _.sex)\n",
+ " >>summarize(\n",
+ " n_ppl = _.person_id.nunique(),\n",
+ " avg_h_income = _.household_income.mean(),\n",
+ " avg_p_income = _.individual_income.mean(),\n",
+ " avg_age = _.age.mean())\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 72,
+ "id": "bcaa0855-faf6-4eb0-aa8e-48cb3348baeb",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " commute_mode | \n",
+ " sex | \n",
+ " n_ppl | \n",
+ " avg_h_income | \n",
+ " avg_p_income | \n",
+ " avg_age | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " auto_passenger | \n",
+ " female | \n",
+ " 10908 | \n",
+ " 175717.982765 | \n",
+ " 70255.626054 | \n",
+ " 42.234507 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " auto_passenger | \n",
+ " male | \n",
+ " 11605 | \n",
+ " 177241.394916 | \n",
+ " 82072.291685 | \n",
+ " 42.185523 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " biking | \n",
+ " female | \n",
+ " 234 | \n",
+ " 248565.141026 | \n",
+ " 82592.418803 | \n",
+ " 36.085470 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " biking | \n",
+ " male | \n",
+ " 527 | \n",
+ " 219563.083491 | \n",
+ " 120152.859583 | \n",
+ " 38.426945 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " other_travel_mode | \n",
+ " female | \n",
+ " 36176 | \n",
+ " 126129.721003 | \n",
+ " 21413.682939 | \n",
+ " 55.161599 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " other_travel_mode | \n",
+ " male | \n",
+ " 26290 | \n",
+ " 116775.549106 | \n",
+ " 33563.078813 | \n",
+ " 54.328262 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " private_auto | \n",
+ " female | \n",
+ " 45312 | \n",
+ " 186499.659406 | \n",
+ " 76492.005341 | \n",
+ " 42.626302 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " private_auto | \n",
+ " male | \n",
+ " 59762 | \n",
+ " 190643.201265 | \n",
+ " 100712.386918 | \n",
+ " 42.872093 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " public_transit | \n",
+ " female | \n",
+ " 22008 | \n",
+ " 193540.921347 | \n",
+ " 84074.072110 | \n",
+ " 41.921937 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " public_transit | \n",
+ " male | \n",
+ " 26480 | \n",
+ " 207838.001699 | \n",
+ " 125445.942485 | \n",
+ " 42.259290 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " walking | \n",
+ " female | \n",
+ " 1332 | \n",
+ " 160144.725976 | \n",
+ " 63737.956456 | \n",
+ " 38.041291 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " walking | \n",
+ " male | \n",
+ " 1494 | \n",
+ " 177570.705489 | \n",
+ " 100413.455154 | \n",
+ " 37.495315 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " worked_from_home | \n",
+ " female | \n",
+ " 17409 | \n",
+ " 226232.999655 | \n",
+ " 97016.438853 | \n",
+ " 41.219829 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " worked_from_home | \n",
+ " male | \n",
+ " 18621 | \n",
+ " 253921.717631 | \n",
+ " 137849.973686 | \n",
+ " 41.092208 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 23034 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " commute_mode sex n_ppl avg_h_income avg_p_income avg_age\n",
+ "0 auto_passenger female 10908 175717.982765 70255.626054 42.234507\n",
+ "1 auto_passenger male 11605 177241.394916 82072.291685 42.185523\n",
+ "2 biking female 234 248565.141026 82592.418803 36.085470\n",
+ "3 biking male 527 219563.083491 120152.859583 38.426945\n",
+ "4 other_travel_mode female 36176 126129.721003 21413.682939 55.161599\n",
+ "5 other_travel_mode male 26290 116775.549106 33563.078813 54.328262\n",
+ "6 private_auto female 45312 186499.659406 76492.005341 42.626302\n",
+ "7 private_auto male 59762 190643.201265 100712.386918 42.872093\n",
+ "8 public_transit female 22008 193540.921347 84074.072110 41.921937\n",
+ "9 public_transit male 26480 207838.001699 125445.942485 42.259290\n",
+ "10 walking female 1332 160144.725976 63737.956456 38.041291\n",
+ "11 walking male 1494 177570.705489 100413.455154 37.495315\n",
+ "12 worked_from_home female 17409 226232.999655 97016.438853 41.219829\n",
+ "13 worked_from_home male 18621 253921.717631 137849.973686 41.092208\n",
+ "14 NaN NaN 23034 NaN NaN NaN"
+ ]
+ },
+ "execution_count": 72,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "replica_people_demographics"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "id": "7f7c1066-c820-43d3-a68e-0dcdfc1dcdce",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.Chart(...)"
+ ]
+ },
+ "execution_count": 73,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chart = (\n",
+ " alt.Chart(replica_people_demographics)\n",
+ " .mark_bar()\n",
+ " .encode(\n",
+ " x=alt.X(\"commute_mode\", title = \"Mode\"),\n",
+ " y=alt.Y(\"n_ppl\", title = \"Number of People\"),\n",
+ " color=alt.Color(\"commute_mode\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS)),\n",
+ " tooltip=replica_people_demographics.columns.tolist())\n",
+ " .properties(title = \"Trips by Resident Type\",\n",
+ " width=800,\n",
+ " height=500)\n",
+ " )\n",
+ "chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a1dba8b5-7356-441f-8e6c-0f98e0f80c4c",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "97f56b20-f299-43f5-bad4-2a42362962e2",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "### Read in Streetlight Data: 2022"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "id": "6f3cf136-7531-4b62-b050-169b7bd1c4ec",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "streetlight = \"streetlight_bay_bridge_corridor_study_corridor_study.csv\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 75,
+ "id": "47943ff0-2949-409b-a9ad-ca5efea3b52d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sl_data = to_snakecase(pd.read_csv(f\"{GCS_PATH}{streetlight}\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 76,
+ "id": "28fc95f8-5898-486f-81f3-f0ac7885431c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Streetlight Data Sample
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Streetlight Data Sample
\")) \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 77,
+ "id": "3398c3fa-856d-49a0-8d2d-7994f3f50df1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " data_periods | \n",
+ " mode_of_travel | \n",
+ " zone_id | \n",
+ " zone_name | \n",
+ " road_classification | \n",
+ " line_zone_length__miles_ | \n",
+ " zone_is_pass_through | \n",
+ " zone_direction__degrees_ | \n",
+ " zone_cardinal_direction | \n",
+ " zone_is_bi_direction | \n",
+ " day_type | \n",
+ " day_part | \n",
+ " average_daily_segment_traffic__stl_volume_ | \n",
+ " avg_segment_speed__mph_ | \n",
+ " avg_segment_travel_time__sec_ | \n",
+ " free_flow_speed__mph_ | \n",
+ " vehicle_miles_of_travel__stl_volume_ | \n",
+ " travel_time_index | \n",
+ " congested_segment | \n",
+ " _85th_speed_percentile | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1411 | \n",
+ " Jan 01, 2022 - Dec 31, 2022 | \n",
+ " All Vehicles CVD Plus - StL All Vehicles Volume | \n",
+ " 1130705657 | \n",
+ " San Francisco – Oakland Bay Bridge / 52527662 / 1 | \n",
+ " Motorway | \n",
+ " 0.104 | \n",
+ " yes | \n",
+ " 40 | \n",
+ " EAST | \n",
+ " no | \n",
+ " 2: Weekend Day (Sa-Su) | \n",
+ " 01: 12am (12am-1am) | \n",
+ " 3029 | \n",
+ " 61 | \n",
+ " 7 | \n",
+ " 65.986 | \n",
+ " 314.44 | \n",
+ " 1.08 | \n",
+ " False | \n",
+ " 70 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " data_periods \\\n",
+ "1411 Jan 01, 2022 - Dec 31, 2022 \n",
+ "\n",
+ " mode_of_travel zone_id \\\n",
+ "1411 All Vehicles CVD Plus - StL All Vehicles Volume 1130705657 \n",
+ "\n",
+ " zone_name road_classification \\\n",
+ "1411 San Francisco – Oakland Bay Bridge / 52527662 / 1 Motorway \n",
+ "\n",
+ " line_zone_length__miles_ zone_is_pass_through zone_direction__degrees_ \\\n",
+ "1411 0.104 yes 40 \n",
+ "\n",
+ " zone_cardinal_direction zone_is_bi_direction day_type \\\n",
+ "1411 EAST no 2: Weekend Day (Sa-Su) \n",
+ "\n",
+ " day_part average_daily_segment_traffic__stl_volume_ \\\n",
+ "1411 01: 12am (12am-1am) 3029 \n",
+ "\n",
+ " avg_segment_speed__mph_ avg_segment_travel_time__sec_ \\\n",
+ "1411 61 7 \n",
+ "\n",
+ " free_flow_speed__mph_ vehicle_miles_of_travel__stl_volume_ \\\n",
+ "1411 65.986 314.44 \n",
+ "\n",
+ " travel_time_index congested_segment _85th_speed_percentile \n",
+ "1411 1.08 False 70 "
+ ]
+ },
+ "execution_count": 77,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sl_data.sample()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3a3d1c17-1d5b-4ddf-aac3-78b8156ceca4",
+ "metadata": {},
+ "source": [
+ "#### Data Explorations: Streetlight"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "id": "da10c3eb-31bd-437e-b546-82472d66f964",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Length of the Streetlight Data"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "1710"
+ ]
+ },
+ "execution_count": 78,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Length of the Streetlight Data\")) \n",
+ "\n",
+ "len(sl_data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 79,
+ "id": "5e403ce9-33bc-4453-9323-df46fad1f38a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# sl_data>>count(_.zone_id, _.zone_name)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 80,
+ "id": "cd1ce590-2019-43f0-ba09-786520479ad8",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " day_part | \n",
+ " n | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 00: All Day (12am-12am) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 01: 12am (12am-1am) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 02: Early AM (12am-6am) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 03: 1am (1am-2am) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 04: 2am (2am-3am) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 05: 3am (3am-4am) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 06: 4am (4am-5am) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 07: 5am (5am-6am) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 08: 6am (6am-7am) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 09: Peak AM (6am-10am) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 10: 7am (7am-8am) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 11: 8am (8am-9am) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 12: 9am (9am-10am) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " 13: 10am (10am-11am) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " 14: Mid-Day (10am-4pm) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 15: 11am (11am-12noon) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " 16: 12pm (12noon-1pm) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " 17: 1pm (1pm-2pm) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " 18: 2pm (2pm-3pm) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " 19: 3pm (3pm-4pm) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " 20: 4pm (4pm-5pm) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " 21: Peak PM (4pm-8pm) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " 22: 5pm (5pm-6pm) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " 23: 6pm (6pm-7pm) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " 24: 7pm (7pm-8pm) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " 25: 8pm (8pm-9pm) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " 26: Late PM (8pm-12am) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " 27: 9pm (9pm-10pm) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " 28: 10pm (10pm-11pm) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " 29: 11pm (11pm-12am) | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " day_part n\n",
+ "0 00: All Day (12am-12am) 3\n",
+ "1 01: 12am (12am-1am) 3\n",
+ "2 02: Early AM (12am-6am) 3\n",
+ "3 03: 1am (1am-2am) 3\n",
+ "4 04: 2am (2am-3am) 3\n",
+ "5 05: 3am (3am-4am) 3\n",
+ "6 06: 4am (4am-5am) 3\n",
+ "7 07: 5am (5am-6am) 3\n",
+ "8 08: 6am (6am-7am) 3\n",
+ "9 09: Peak AM (6am-10am) 3\n",
+ "10 10: 7am (7am-8am) 3\n",
+ "11 11: 8am (8am-9am) 3\n",
+ "12 12: 9am (9am-10am) 3\n",
+ "13 13: 10am (10am-11am) 3\n",
+ "14 14: Mid-Day (10am-4pm) 3\n",
+ "15 15: 11am (11am-12noon) 3\n",
+ "16 16: 12pm (12noon-1pm) 3\n",
+ "17 17: 1pm (1pm-2pm) 3\n",
+ "18 18: 2pm (2pm-3pm) 3\n",
+ "19 19: 3pm (3pm-4pm) 3\n",
+ "20 20: 4pm (4pm-5pm) 3\n",
+ "21 21: Peak PM (4pm-8pm) 3\n",
+ "22 22: 5pm (5pm-6pm) 3\n",
+ "23 23: 6pm (6pm-7pm) 3\n",
+ "24 24: 7pm (7pm-8pm) 3\n",
+ "25 25: 8pm (8pm-9pm) 3\n",
+ "26 26: Late PM (8pm-12am) 3\n",
+ "27 27: 9pm (9pm-10pm) 3\n",
+ "28 28: 10pm (10pm-11pm) 3\n",
+ "29 29: 11pm (11pm-12am) 3"
+ ]
+ },
+ "execution_count": 80,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sl_data>>filter(_.zone_id==1133975975)>>count(_.day_part)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5d362f46-cfe6-4d13-b3ea-67f491a66897",
+ "metadata": {},
+ "source": [
+ "* The data here shows that each segment is broken out into hour time slots and then also aggregated to peak time periods. moving forward we will look at the `All Day` for `day_part` and `All Days` for `day_type`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "id": "57c890a6-c667-4917-a090-b85f1aadcad7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sl_data_single_day = sl_data>>filter(_.day_part == '00: All Day (12am-12am)', _.day_type == '0: All Days (M-Su)')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "id": "0a059c82-c7c8-43b9-92a6-178b42101828",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# len(sl_data_single_day)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 83,
+ "id": "67213733-9905-42bd-912e-cf943cd66b40",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# sl_data_single_day>>count(_.zone_id, _.zone_name)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 84,
+ "id": "2e497c77-91ef-490f-a288-abe4e2c740dc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# sl_data_single_day>>filter(_.zone_id==1133975975)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 85,
+ "id": "28292f62-b2ab-46cb-ac08-b728facdfdc0",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " data_periods | \n",
+ " mode_of_travel | \n",
+ " zone_id | \n",
+ " zone_name | \n",
+ " road_classification | \n",
+ " line_zone_length__miles_ | \n",
+ " zone_is_pass_through | \n",
+ " zone_direction__degrees_ | \n",
+ " zone_cardinal_direction | \n",
+ " zone_is_bi_direction | \n",
+ " day_type | \n",
+ " day_part | \n",
+ " average_daily_segment_traffic__stl_volume_ | \n",
+ " avg_segment_speed__mph_ | \n",
+ " avg_segment_travel_time__sec_ | \n",
+ " free_flow_speed__mph_ | \n",
+ " vehicle_miles_of_travel__stl_volume_ | \n",
+ " travel_time_index | \n",
+ " congested_segment | \n",
+ " _85th_speed_percentile | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1440 | \n",
+ " Jan 01, 2022 - Dec 31, 2022 | \n",
+ " All Vehicles CVD Plus - StL All Vehicles Volume | \n",
+ " 1132861884 | \n",
+ " I 80 / 236348365 / 1 | \n",
+ " Motorway | \n",
+ " 0.006 | \n",
+ " yes | \n",
+ " 85 | \n",
+ " EAST | \n",
+ " no | \n",
+ " 0: All Days (M-Su) | \n",
+ " 00: All Day (12am-12am) | \n",
+ " 128581 | \n",
+ " 63 | \n",
+ " 2 | \n",
+ " 70.532 | \n",
+ " 725.2 | \n",
+ " 1.12 | \n",
+ " False | \n",
+ " 72 | \n",
+ "
\n",
+ " \n",
+ " 360 | \n",
+ " Jan 01, 2022 - Dec 31, 2022 | \n",
+ " All Vehicles CVD Plus - StL All Vehicles Volume | \n",
+ " 1036056766 | \n",
+ " San Francisco – Oakland Bay Bridge / 52721870 / 1 | \n",
+ " Motorway | \n",
+ " 0.073 | \n",
+ " yes | \n",
+ " 37 | \n",
+ " EAST | \n",
+ " no | \n",
+ " 0: All Days (M-Su) | \n",
+ " 00: All Day (12am-12am) | \n",
+ " 158493 | \n",
+ " 51 | \n",
+ " 5 | \n",
+ " 67.267 | \n",
+ " 11532.2 | \n",
+ " 1.32 | \n",
+ " True | \n",
+ " 66 | \n",
+ "
\n",
+ " \n",
+ " 270 | \n",
+ " Jan 01, 2022 - Dec 31, 2022 | \n",
+ " All Vehicles CVD Plus - StL All Vehicles Volume | \n",
+ " 1032819756 | \n",
+ " San Francisco – Oakland Bay Bridge / 11415208 / 1 | \n",
+ " Motorway | \n",
+ " 0.109 | \n",
+ " yes | \n",
+ " 220 | \n",
+ " WEST | \n",
+ " no | \n",
+ " 0: All Days (M-Su) | \n",
+ " 00: All Day (12am-12am) | \n",
+ " 132245 | \n",
+ " 49 | \n",
+ " 10 | \n",
+ " 68.596 | \n",
+ " 14450.0 | \n",
+ " 1.41 | \n",
+ " True | \n",
+ " 67 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " data_periods \\\n",
+ "1440 Jan 01, 2022 - Dec 31, 2022 \n",
+ "360 Jan 01, 2022 - Dec 31, 2022 \n",
+ "270 Jan 01, 2022 - Dec 31, 2022 \n",
+ "\n",
+ " mode_of_travel zone_id \\\n",
+ "1440 All Vehicles CVD Plus - StL All Vehicles Volume 1132861884 \n",
+ "360 All Vehicles CVD Plus - StL All Vehicles Volume 1036056766 \n",
+ "270 All Vehicles CVD Plus - StL All Vehicles Volume 1032819756 \n",
+ "\n",
+ " zone_name road_classification \\\n",
+ "1440 I 80 / 236348365 / 1 Motorway \n",
+ "360 San Francisco – Oakland Bay Bridge / 52721870 / 1 Motorway \n",
+ "270 San Francisco – Oakland Bay Bridge / 11415208 / 1 Motorway \n",
+ "\n",
+ " line_zone_length__miles_ zone_is_pass_through zone_direction__degrees_ \\\n",
+ "1440 0.006 yes 85 \n",
+ "360 0.073 yes 37 \n",
+ "270 0.109 yes 220 \n",
+ "\n",
+ " zone_cardinal_direction zone_is_bi_direction day_type \\\n",
+ "1440 EAST no 0: All Days (M-Su) \n",
+ "360 EAST no 0: All Days (M-Su) \n",
+ "270 WEST no 0: All Days (M-Su) \n",
+ "\n",
+ " day_part average_daily_segment_traffic__stl_volume_ \\\n",
+ "1440 00: All Day (12am-12am) 128581 \n",
+ "360 00: All Day (12am-12am) 158493 \n",
+ "270 00: All Day (12am-12am) 132245 \n",
+ "\n",
+ " avg_segment_speed__mph_ avg_segment_travel_time__sec_ \\\n",
+ "1440 63 2 \n",
+ "360 51 5 \n",
+ "270 49 10 \n",
+ "\n",
+ " free_flow_speed__mph_ vehicle_miles_of_travel__stl_volume_ \\\n",
+ "1440 70.532 725.2 \n",
+ "360 67.267 11532.2 \n",
+ "270 68.596 14450.0 \n",
+ "\n",
+ " travel_time_index congested_segment _85th_speed_percentile \n",
+ "1440 1.12 False 72 \n",
+ "360 1.32 True 66 \n",
+ "270 1.41 True 67 "
+ ]
+ },
+ "execution_count": 85,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sl_data_single_day.sample(3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 86,
+ "id": "14211b7a-38f9-4b55-b669-67fdae44aa80",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# sl_data_single_day>>group_by(_.zone_cardinal_direction, _.day_type, _.day_part)>>summarize(avg_speed= _.avg_segment_speed__mph_.mean(),\n",
+ "# avg_travel_time = _.avg_segment_travel_time__sec_.mean(),\n",
+ "# avg_volume = _.average_daily_segment_traffic__stl_volume_.mean(),\n",
+ "# sum_volume = _.average_daily_segment_traffic__stl_volume_.sum())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 87,
+ "id": "778d58f0-25b7-4cf7-b60a-10e773055f48",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sl_peaks = (sl_data>>filter(\n",
+ " _.day_part != \"01: 12am (12am-1am)\",\n",
+ " _.day_part != \"03: 1am (1am-2am)\",\n",
+ " _.day_part != \"04: 2am (2am-3am)\",\n",
+ " _.day_part != \"05: 3am (3am-4am)\",\n",
+ " _.day_part != \"06: 4am (4am-5am)\",\n",
+ " _.day_part != \"07: 5am (5am-6am)\",\n",
+ " _.day_part != \"08: 6am (6am-7am)\",\n",
+ " _.day_part != \"10: 7am (7am-8am)\",\n",
+ " _.day_part != \"11: 8am (8am-9am)\",\n",
+ " _.day_part != \"12: 9am (9am-10am)\",\n",
+ " _.day_part != \"13: 10am (10am-11am)\",\n",
+ " _.day_part != \"15: 11am (11am-12noon)\",\n",
+ " _.day_part != \"16: 12pm (12noon-1pm)\",\n",
+ " _.day_part != \"17: 1pm (1pm-2pm)\",\n",
+ " _.day_part != \"18: 2pm (2pm-3pm)\",\n",
+ " _.day_part != \"19: 3pm (3pm-4pm)\",\n",
+ " _.day_part != \"20: 4pm (4pm-5pm)\",\n",
+ " _.day_part != \"22: 5pm (5pm-6pm)\",\n",
+ " _.day_part != \"23: 6pm (6pm-7pm)\",\n",
+ " _.day_part != \"24: 7pm (7pm-8pm)\",\n",
+ " _.day_part != \"25: 8pm (8pm-9pm)\",\n",
+ " _.day_part != \"27: 9pm (9pm-10pm)\",\n",
+ " _.day_part != \"28: 10pm (10pm-11pm)\",\n",
+ " _.day_part !=\"29: 11pm (11pm-12am)\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2f2fa572-8bb8-4b9f-a299-48df2b838c31",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 88,
+ "id": "29eab191-4eaa-445c-b444-49048135ef3b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sl_hourly = (sl_data>>filter(_.day_part != \"00: All Day (12am-12am)\",\n",
+ " _.day_part != \"02: Early AM (12am-6am)\",\n",
+ " _.day_part != \"09: Peak AM (6am-10am)\",\n",
+ " _.day_part != \"14: Mid-Day (10am-4pm)\",\n",
+ " _.day_part != \"21: Peak PM (4pm-8pm)\", _.day_part !=\"26: Late PM (8pm-12am)\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4cd2a638-cce1-4c94-8444-4aa4844bf7f0",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 89,
+ "id": "896b63a9-b881-418a-ba40-dbea63f81614",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sl_peaks_agg = sl_peaks>>group_by(_.zone_cardinal_direction,\n",
+ " _.day_type, _.day_part)>>summarize(avg_speed= _.avg_segment_speed__mph_.mean(),\n",
+ " avg_travel_time = _.avg_segment_travel_time__sec_.mean(),\n",
+ " avg_volume = _.average_daily_segment_traffic__stl_volume_.mean(),\n",
+ " sum_volume = _.average_daily_segment_traffic__stl_volume_.sum())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 90,
+ "id": "cac29684-36ac-488b-b2d1-044ab5ef86c0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# sl_peaks_agg.sample()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 91,
+ "id": "eedc968c-8306-449a-a2e3-774719d8e256",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.Chart(...)"
+ ]
+ },
+ "execution_count": 91,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chart = (\n",
+ " alt.Chart(sl_peaks_agg)\n",
+ " .mark_bar()\n",
+ " .encode(\n",
+ " x=alt.X(\"day_part\"),\n",
+ " y=alt.Y(\"avg_speed\"),\n",
+ " color=alt.Color(\"zone_cardinal_direction\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS,\n",
+ " domain=sl_peaks_agg[\"zone_cardinal_direction\"].unique().tolist())\n",
+ " ),\n",
+ " tooltip=sl_peaks_agg.columns.tolist(),\n",
+ " )\n",
+ " .properties(title = \"Average Speed by Direction by Day Part\",\n",
+ " width=800,\n",
+ " height=300)\n",
+ " )\n",
+ "chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 92,
+ "id": "637afffc-560f-4485-985a-d6ee73dd65a7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.Chart(...)"
+ ]
+ },
+ "execution_count": 92,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chart = (\n",
+ " alt.Chart(sl_peaks_agg)\n",
+ " .mark_bar()\n",
+ " .encode(\n",
+ " x=alt.X(\"day_part\"),\n",
+ " y=alt.Y(\"sum_volume\"),\n",
+ " color=alt.Color(\"zone_cardinal_direction\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS,\n",
+ " domain=sl_peaks_agg[\"zone_cardinal_direction\"].unique().tolist())\n",
+ " ),\n",
+ " tooltip=sl_peaks_agg.columns.tolist(),\n",
+ " )\n",
+ " .properties(title = \"Average Volume by Direction by Day Part\",\n",
+ " width=800,\n",
+ " height=300)\n",
+ " )\n",
+ "chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6dade438-083b-446d-b1f7-d1542a3e60e6",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7d954475-9f22-418b-bb31-0ab9799a459e",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 93,
+ "id": "b2eaa07a-2d05-4ed5-a405-a474500a643e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sl_hourly_agg = sl_hourly>>filter(_.day_type==\"0: All Days (M-Su)\")>>group_by(_.zone_cardinal_direction,\n",
+ " _.day_type, _.day_part)>>summarize(avg_speed= _.avg_segment_speed__mph_.mean(),\n",
+ " avg_travel_time = _.avg_segment_travel_time__sec_.mean(),\n",
+ " avg_volume = _.average_daily_segment_traffic__stl_volume_.mean(),\n",
+ " sum_volume = _.average_daily_segment_traffic__stl_volume_.sum())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 94,
+ "id": "1ab3835d-5f8e-4721-9c29-7bd107bad334",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# sl_hourly_agg.sample()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "10efa809-ec77-4127-aa62-a838dd005c7d",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 95,
+ "id": "8cc145e6-b3d9-4b31-9a0d-270431ebf5fb",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.Chart(...)"
+ ]
+ },
+ "execution_count": 95,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chart = (\n",
+ " alt.Chart(sl_hourly_agg)\n",
+ " .mark_bar()\n",
+ " .encode(\n",
+ " x=alt.X(\"day_part\"),\n",
+ " y=alt.Y(\"avg_speed\"),\n",
+ " color=alt.Color(\"zone_cardinal_direction\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS,\n",
+ " domain=sl_hourly_agg[\"zone_cardinal_direction\"].unique().tolist())\n",
+ " ),\n",
+ " tooltip=sl_hourly_agg.columns.tolist(),\n",
+ " )\n",
+ " .properties(title = \"Average Speed by Hour\", \n",
+ " width=800,\n",
+ " height=300)\n",
+ " )\n",
+ "chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 96,
+ "id": "b54b867c-d073-4280-bfd3-2fa5bd387016",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.Chart(...)"
+ ]
+ },
+ "execution_count": 96,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chart = (\n",
+ " alt.Chart(sl_hourly_agg)\n",
+ " .mark_bar()\n",
+ " .encode(\n",
+ " x=alt.X(\"day_part\"),\n",
+ " y=alt.Y(\"sum_volume\"),\n",
+ " color=alt.Color(\"zone_cardinal_direction\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS,\n",
+ " domain=sl_hourly_agg[\"zone_cardinal_direction\"].unique().tolist())\n",
+ " ),\n",
+ " tooltip=sl_hourly_agg.columns.tolist(),\n",
+ " )\n",
+ " .properties(title = \"Total Volume by Hour\",\n",
+ " width=800,\n",
+ " height=300)\n",
+ " )\n",
+ "chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cd1987e0-957e-4b30-a940-86f3c9c0fd39",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "76157cad-aeda-4ba4-9f98-77c677e6b6c6",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bfe9e847-b78f-4e81-b735-708df21764a0",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "006dfcc5-bf3f-4a33-a654-601fbb232b71",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ecded8d2-70ae-4a9a-8592-cc5e9b9cd8df",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5d007800-e20a-4991-b99d-f6cf74521b05",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bbc819b1-0dfc-46be-90bd-b136263dc22c",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.13"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "state": {},
+ "version_major": 2,
+ "version_minor": 0
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/sb125_analyses/corridor_study/data_downloads_skirball.ipynb b/sb125_analyses/corridor_study/data_downloads_skirball.ipynb
new file mode 100644
index 000000000..268ec1842
--- /dev/null
+++ b/sb125_analyses/corridor_study/data_downloads_skirball.ipynb
@@ -0,0 +1,4459 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "82a53f1d-2622-4cb0-bfdd-36ceec652215",
+ "metadata": {},
+ "source": [
+ "# SB1 Big Data Downloads: I-405 Skirball\n",
+ "An analysis into the corridor analysis data downloads from Streetlight and Replica"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "45233485-2055-499a-a89e-fc154fd56e63",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_516/2202862553.py:12: DeprecationWarning: Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display\n",
+ " from IPython.core.display import display\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "from siuba import *\n",
+ "import ast\n",
+ "\n",
+ "\n",
+ "from calitp_data_analysis.sql import to_snakecase\n",
+ "\n",
+ "import altair as alt\n",
+ "from calitp_data_analysis import calitp_color_palette as cp\n",
+ "\n",
+ "from IPython.display import Markdown, HTML, display_html, display\n",
+ "from IPython.core.display import display\n",
+ "\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "9e5db6b9-2add-42e7-b820-1b075dc3bbcc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from dla_utils import _dla_utils"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "c0ea35c1-246d-4356-ad1c-83fe9cc437ff",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import _utils"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "651aae92-5188-4676-8e5f-3c040f77077c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pd.set_option('display.max_columns', 500)\n",
+ "pd.set_option('display.max_colwidth', 100)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "af402da0-0b03-4c71-a1af-19f97f67cef1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "GCS_PATH = \"gs://calitp-analytics-data/data-analyses/sb125/corridor_study_data/\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "71c1deda-8e83-45d8-a0f5-20b36b7051c0",
+ "metadata": {},
+ "source": [
+ "### Read in Replica Data Spring 2023"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "0e5c1b44-c981-4a9a-b6ea-e1cb2a7a279e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "replica_trips = \"replica-405_skirball-05_24_24-trips_dataset.csv\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a9f14685-0953-4b1d-a636-882230fef1d5",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "cb3dea26-4e6d-4dc7-acc8-27b899a0a537",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_516/2643890524.py:1: DtypeWarning: Columns (18,19,20,25,26,28,29,30,31,33,36,38,39,40,41,42,43) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ " r_trips = to_snakecase(pd.read_csv(f\"{GCS_PATH}{replica_trips}\"))\n"
+ ]
+ }
+ ],
+ "source": [
+ "r_trips = to_snakecase(pd.read_csv(f\"{GCS_PATH}{replica_trips}\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "ca697494-b872-4de8-afd9-c538a455364c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " Sample of data
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\" Sample of data
\"))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "4713b1a5-ccac-4e1b-8552-850285cad53e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " activity_id | \n",
+ " origin_bgrp_2020 | \n",
+ " origin_trct_2020 | \n",
+ " origin_cty_2020 | \n",
+ " origin_st_2020 | \n",
+ " destination_bgrp_2020 | \n",
+ " destination_trct_2020 | \n",
+ " destination_cty_2020 | \n",
+ " destination_st_2020 | \n",
+ " primary_mode | \n",
+ " trip_purpose | \n",
+ " previous_trip_purpose | \n",
+ " trip_start_time | \n",
+ " trip_end_time | \n",
+ " trip_duration_minutes | \n",
+ " trip_distance_miles | \n",
+ " vehicle_type | \n",
+ " vehicle_fuel_type | \n",
+ " transit_submode | \n",
+ " transit_agency | \n",
+ " transit_route | \n",
+ " origin_land_use | \n",
+ " origin_building_use | \n",
+ " destination_land_use | \n",
+ " destination_building_use | \n",
+ " trip_taker_person_id | \n",
+ " trip_taker_household_id | \n",
+ " trip_taker_age | \n",
+ " trip_taker_sex | \n",
+ " trip_taker_race_ethnicity | \n",
+ " trip_taker_employment_status | \n",
+ " trip_taker_wfh | \n",
+ " trip_taker_individual_income | \n",
+ " trip_taker_commute_mode | \n",
+ " trip_taker_household_size | \n",
+ " trip_taker_household_income | \n",
+ " trip_taker_available_vehicles | \n",
+ " trip_taker_resident_type | \n",
+ " trip_taker_industry | \n",
+ " trip_taker_building_type | \n",
+ " trip_taker_school_grade_attending | \n",
+ " trip_taker_education | \n",
+ " trip_taker_tenure | \n",
+ " trip_taker_language | \n",
+ " trip_taker_home_bgrp_2020 | \n",
+ " trip_taker_home_trct_2020 | \n",
+ " trip_taker_home_cty_2020 | \n",
+ " trip_taker_home_st_2020 | \n",
+ " trip_taker_work_bgrp_2020 | \n",
+ " trip_taker_work_trct_2020 | \n",
+ " trip_taker_work_cty_2020 | \n",
+ " trip_taker_work_st_2020 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 257013 | \n",
+ " 9302896571626397264 | \n",
+ " 2 (Tract 2169.02, Los Angeles, CA) | \n",
+ " 2169.02 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ " 1 (Tract 1281.02, Los Angeles, CA) | \n",
+ " 1281.02 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ " private_auto | \n",
+ " home | \n",
+ " social | \n",
+ " 12:17:00 | \n",
+ " 12:50:00 | \n",
+ " 33 | \n",
+ " 18.90 | \n",
+ " unknown_vehicle_type | \n",
+ " other_non_bev | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " single_family | \n",
+ " single_family | \n",
+ " multi_family | \n",
+ " multi_family | \n",
+ " 7579849294298285797 | \n",
+ " 9607203881269597076 | \n",
+ " 33.00 | \n",
+ " male | \n",
+ " white_not_hispanic_or_latino | \n",
+ " employed | \n",
+ " in_person | \n",
+ " 59,218.00 | \n",
+ " private_auto | \n",
+ " 3.00 | \n",
+ " 184,761.00 | \n",
+ " two | \n",
+ " core | \n",
+ " naics23 | \n",
+ " several_units | \n",
+ " not_attending_school | \n",
+ " some_college | \n",
+ " renter | \n",
+ " english | \n",
+ " 1 (Tract 1281.02, Los Angeles, CA) | \n",
+ " 1281.02 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ " 1 (Tract 2171.01, Los Angeles, CA) | \n",
+ " 2171.01 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ "
\n",
+ " \n",
+ " 366384 | \n",
+ " 10044791123655181857 | \n",
+ " 2 (Tract 5033.01, Los Angeles, CA) | \n",
+ " 5033.01 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ " 2 (Tract 2622, Los Angeles, CA) | \n",
+ " 2622 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ " auto_passenger | \n",
+ " home | \n",
+ " social | \n",
+ " 12:55:16 | \n",
+ " 13:57:55 | \n",
+ " 62 | \n",
+ " 35.70 | \n",
+ " unknown_vehicle_type | \n",
+ " unknown_fuel_type | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " single_family | \n",
+ " single_family | \n",
+ " single_family | \n",
+ " single_family | \n",
+ " 1348767672027219422 | \n",
+ " 14444669387201913369 | \n",
+ " 27.00 | \n",
+ " female | \n",
+ " black_not_hispanic_or_latino | \n",
+ " employed | \n",
+ " in_person | \n",
+ " 94,986.00 | \n",
+ " private_auto | \n",
+ " 2.00 | \n",
+ " 94,986.00 | \n",
+ " three_plus | \n",
+ " core | \n",
+ " naics61 | \n",
+ " single_family | \n",
+ " not_attending_school | \n",
+ " advanced_degree | \n",
+ " owner | \n",
+ " english | \n",
+ " 2 (Tract 2622, Los Angeles, CA) | \n",
+ " 2622 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ " 2 (Tract 1397.01, Los Angeles, CA) | \n",
+ " 1397.01 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ "
\n",
+ " \n",
+ " 404087 | \n",
+ " 8025058747789155362 | \n",
+ " 1 (Tract 2623.03, Los Angeles, CA) | \n",
+ " 2623.03 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ " 2 (Tract 1041.03, Los Angeles, CA) | \n",
+ " 1041.03 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ " private_auto | \n",
+ " home | \n",
+ " social | \n",
+ " 13:39:16 | \n",
+ " 14:19:59 | \n",
+ " 40 | \n",
+ " 21.00 | \n",
+ " unknown_vehicle_type | \n",
+ " other_non_bev | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " single_family | \n",
+ " single_family | \n",
+ " single_family | \n",
+ " single_family | \n",
+ " 15874717675890836160 | \n",
+ " 5139820635381915520 | \n",
+ " 52.00 | \n",
+ " female | \n",
+ " hispanic_or_latino_origin | \n",
+ " not_in_labor_force | \n",
+ " unemployed_under_16_not_in_labor_force | \n",
+ " 0.00 | \n",
+ " other_travel_mode | \n",
+ " 13.00 | \n",
+ " 115,058.00 | \n",
+ " three_plus | \n",
+ " core | \n",
+ " not_working | \n",
+ " single_family | \n",
+ " not_attending_school | \n",
+ " k_12 | \n",
+ " owner | \n",
+ " spanish | \n",
+ " 2 (Tract 1041.03, Los Angeles, CA) | \n",
+ " 1041.03 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ " Does not have work/school location | \n",
+ " Does not have work/school location | \n",
+ " Does not have work/school location | \n",
+ " Does not have work/school location | \n",
+ "
\n",
+ " \n",
+ " 213866 | \n",
+ " 7281458345523096603 | \n",
+ " 1 (Tract 1375.01, Los Angeles, CA) | \n",
+ " 1375.01 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ " 3 (Tract 7014.02, Los Angeles, CA) | \n",
+ " 7014.02 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ " auto_passenger | \n",
+ " work | \n",
+ " home | \n",
+ " 06:32:00 | \n",
+ " 07:14:10 | \n",
+ " 42 | \n",
+ " 21.40 | \n",
+ " unknown_vehicle_type | \n",
+ " unknown_fuel_type | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " single_family | \n",
+ " single_family | \n",
+ " mixed_use | \n",
+ " healthcare | \n",
+ " 6116471627154071969 | \n",
+ " 7757742330168392134 | \n",
+ " 34.00 | \n",
+ " male | \n",
+ " white_not_hispanic_or_latino | \n",
+ " employed | \n",
+ " in_person | \n",
+ " 129,138.00 | \n",
+ " private_auto | \n",
+ " 2.00 | \n",
+ " 129,138.00 | \n",
+ " two | \n",
+ " core | \n",
+ " naics622110 | \n",
+ " single_family | \n",
+ " not_attending_school | \n",
+ " advanced_degree | \n",
+ " renter | \n",
+ " indo_european | \n",
+ " 1 (Tract 1375.01, Los Angeles, CA) | \n",
+ " 1375.01 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ " 3 (Tract 7014.02, Los Angeles, CA) | \n",
+ " 7014.02 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ "
\n",
+ " \n",
+ " 403783 | \n",
+ " 16782161229821888706 | \n",
+ " 3 (Tract 7028.01, Los Angeles, CA) | \n",
+ " 7028.01 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ " 2 (Tract 1066.04, Los Angeles, CA) | \n",
+ " 1066.04 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ " auto_passenger | \n",
+ " home | \n",
+ " shop | \n",
+ " 16:58:00 | \n",
+ " 18:02:01 | \n",
+ " 64 | \n",
+ " 23.40 | \n",
+ " unknown_vehicle_type | \n",
+ " unknown_fuel_type | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " office | \n",
+ " office | \n",
+ " single_family | \n",
+ " single_family | \n",
+ " 14388114560565293720 | \n",
+ " 4788446096359409371 | \n",
+ " 88.00 | \n",
+ " male | \n",
+ " hispanic_or_latino_origin | \n",
+ " not_in_labor_force | \n",
+ " unemployed_under_16_not_in_labor_force | \n",
+ " 26,495.00 | \n",
+ " other_travel_mode | \n",
+ " 1.00 | \n",
+ " 26,495.00 | \n",
+ " zero | \n",
+ " core | \n",
+ " not_working | \n",
+ " single_family | \n",
+ " not_attending_school | \n",
+ " k_12 | \n",
+ " renter | \n",
+ " spanish | \n",
+ " 2 (Tract 1066.04, Los Angeles, CA) | \n",
+ " 1066.04 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ " Does not have work/school location | \n",
+ " Does not have work/school location | \n",
+ " Does not have work/school location | \n",
+ " Does not have work/school location | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " activity_id origin_bgrp_2020 \\\n",
+ "257013 9302896571626397264 2 (Tract 2169.02, Los Angeles, CA) \n",
+ "366384 10044791123655181857 2 (Tract 5033.01, Los Angeles, CA) \n",
+ "404087 8025058747789155362 1 (Tract 2623.03, Los Angeles, CA) \n",
+ "213866 7281458345523096603 1 (Tract 1375.01, Los Angeles, CA) \n",
+ "403783 16782161229821888706 3 (Tract 7028.01, Los Angeles, CA) \n",
+ "\n",
+ " origin_trct_2020 origin_cty_2020 origin_st_2020 \\\n",
+ "257013 2169.02 (Los Angeles, CA) Los Angeles County, CA California \n",
+ "366384 5033.01 (Los Angeles, CA) Los Angeles County, CA California \n",
+ "404087 2623.03 (Los Angeles, CA) Los Angeles County, CA California \n",
+ "213866 1375.01 (Los Angeles, CA) Los Angeles County, CA California \n",
+ "403783 7028.01 (Los Angeles, CA) Los Angeles County, CA California \n",
+ "\n",
+ " destination_bgrp_2020 destination_trct_2020 \\\n",
+ "257013 1 (Tract 1281.02, Los Angeles, CA) 1281.02 (Los Angeles, CA) \n",
+ "366384 2 (Tract 2622, Los Angeles, CA) 2622 (Los Angeles, CA) \n",
+ "404087 2 (Tract 1041.03, Los Angeles, CA) 1041.03 (Los Angeles, CA) \n",
+ "213866 3 (Tract 7014.02, Los Angeles, CA) 7014.02 (Los Angeles, CA) \n",
+ "403783 2 (Tract 1066.04, Los Angeles, CA) 1066.04 (Los Angeles, CA) \n",
+ "\n",
+ " destination_cty_2020 destination_st_2020 primary_mode \\\n",
+ "257013 Los Angeles County, CA California private_auto \n",
+ "366384 Los Angeles County, CA California auto_passenger \n",
+ "404087 Los Angeles County, CA California private_auto \n",
+ "213866 Los Angeles County, CA California auto_passenger \n",
+ "403783 Los Angeles County, CA California auto_passenger \n",
+ "\n",
+ " trip_purpose previous_trip_purpose trip_start_time trip_end_time \\\n",
+ "257013 home social 12:17:00 12:50:00 \n",
+ "366384 home social 12:55:16 13:57:55 \n",
+ "404087 home social 13:39:16 14:19:59 \n",
+ "213866 work home 06:32:00 07:14:10 \n",
+ "403783 home shop 16:58:00 18:02:01 \n",
+ "\n",
+ " trip_duration_minutes trip_distance_miles vehicle_type \\\n",
+ "257013 33 18.90 unknown_vehicle_type \n",
+ "366384 62 35.70 unknown_vehicle_type \n",
+ "404087 40 21.00 unknown_vehicle_type \n",
+ "213866 42 21.40 unknown_vehicle_type \n",
+ "403783 64 23.40 unknown_vehicle_type \n",
+ "\n",
+ " vehicle_fuel_type transit_submode transit_agency transit_route \\\n",
+ "257013 other_non_bev NaN NaN NaN \n",
+ "366384 unknown_fuel_type NaN NaN NaN \n",
+ "404087 other_non_bev NaN NaN NaN \n",
+ "213866 unknown_fuel_type NaN NaN NaN \n",
+ "403783 unknown_fuel_type NaN NaN NaN \n",
+ "\n",
+ " origin_land_use origin_building_use destination_land_use \\\n",
+ "257013 single_family single_family multi_family \n",
+ "366384 single_family single_family single_family \n",
+ "404087 single_family single_family single_family \n",
+ "213866 single_family single_family mixed_use \n",
+ "403783 office office single_family \n",
+ "\n",
+ " destination_building_use trip_taker_person_id trip_taker_household_id \\\n",
+ "257013 multi_family 7579849294298285797 9607203881269597076 \n",
+ "366384 single_family 1348767672027219422 14444669387201913369 \n",
+ "404087 single_family 15874717675890836160 5139820635381915520 \n",
+ "213866 healthcare 6116471627154071969 7757742330168392134 \n",
+ "403783 single_family 14388114560565293720 4788446096359409371 \n",
+ "\n",
+ " trip_taker_age trip_taker_sex trip_taker_race_ethnicity \\\n",
+ "257013 33.00 male white_not_hispanic_or_latino \n",
+ "366384 27.00 female black_not_hispanic_or_latino \n",
+ "404087 52.00 female hispanic_or_latino_origin \n",
+ "213866 34.00 male white_not_hispanic_or_latino \n",
+ "403783 88.00 male hispanic_or_latino_origin \n",
+ "\n",
+ " trip_taker_employment_status trip_taker_wfh \\\n",
+ "257013 employed in_person \n",
+ "366384 employed in_person \n",
+ "404087 not_in_labor_force unemployed_under_16_not_in_labor_force \n",
+ "213866 employed in_person \n",
+ "403783 not_in_labor_force unemployed_under_16_not_in_labor_force \n",
+ "\n",
+ " trip_taker_individual_income trip_taker_commute_mode \\\n",
+ "257013 59,218.00 private_auto \n",
+ "366384 94,986.00 private_auto \n",
+ "404087 0.00 other_travel_mode \n",
+ "213866 129,138.00 private_auto \n",
+ "403783 26,495.00 other_travel_mode \n",
+ "\n",
+ " trip_taker_household_size trip_taker_household_income \\\n",
+ "257013 3.00 184,761.00 \n",
+ "366384 2.00 94,986.00 \n",
+ "404087 13.00 115,058.00 \n",
+ "213866 2.00 129,138.00 \n",
+ "403783 1.00 26,495.00 \n",
+ "\n",
+ " trip_taker_available_vehicles trip_taker_resident_type \\\n",
+ "257013 two core \n",
+ "366384 three_plus core \n",
+ "404087 three_plus core \n",
+ "213866 two core \n",
+ "403783 zero core \n",
+ "\n",
+ " trip_taker_industry trip_taker_building_type \\\n",
+ "257013 naics23 several_units \n",
+ "366384 naics61 single_family \n",
+ "404087 not_working single_family \n",
+ "213866 naics622110 single_family \n",
+ "403783 not_working single_family \n",
+ "\n",
+ " trip_taker_school_grade_attending trip_taker_education \\\n",
+ "257013 not_attending_school some_college \n",
+ "366384 not_attending_school advanced_degree \n",
+ "404087 not_attending_school k_12 \n",
+ "213866 not_attending_school advanced_degree \n",
+ "403783 not_attending_school k_12 \n",
+ "\n",
+ " trip_taker_tenure trip_taker_language \\\n",
+ "257013 renter english \n",
+ "366384 owner english \n",
+ "404087 owner spanish \n",
+ "213866 renter indo_european \n",
+ "403783 renter spanish \n",
+ "\n",
+ " trip_taker_home_bgrp_2020 trip_taker_home_trct_2020 \\\n",
+ "257013 1 (Tract 1281.02, Los Angeles, CA) 1281.02 (Los Angeles, CA) \n",
+ "366384 2 (Tract 2622, Los Angeles, CA) 2622 (Los Angeles, CA) \n",
+ "404087 2 (Tract 1041.03, Los Angeles, CA) 1041.03 (Los Angeles, CA) \n",
+ "213866 1 (Tract 1375.01, Los Angeles, CA) 1375.01 (Los Angeles, CA) \n",
+ "403783 2 (Tract 1066.04, Los Angeles, CA) 1066.04 (Los Angeles, CA) \n",
+ "\n",
+ " trip_taker_home_cty_2020 trip_taker_home_st_2020 \\\n",
+ "257013 Los Angeles County, CA California \n",
+ "366384 Los Angeles County, CA California \n",
+ "404087 Los Angeles County, CA California \n",
+ "213866 Los Angeles County, CA California \n",
+ "403783 Los Angeles County, CA California \n",
+ "\n",
+ " trip_taker_work_bgrp_2020 \\\n",
+ "257013 1 (Tract 2171.01, Los Angeles, CA) \n",
+ "366384 2 (Tract 1397.01, Los Angeles, CA) \n",
+ "404087 Does not have work/school location \n",
+ "213866 3 (Tract 7014.02, Los Angeles, CA) \n",
+ "403783 Does not have work/school location \n",
+ "\n",
+ " trip_taker_work_trct_2020 \\\n",
+ "257013 2171.01 (Los Angeles, CA) \n",
+ "366384 1397.01 (Los Angeles, CA) \n",
+ "404087 Does not have work/school location \n",
+ "213866 7014.02 (Los Angeles, CA) \n",
+ "403783 Does not have work/school location \n",
+ "\n",
+ " trip_taker_work_cty_2020 trip_taker_work_st_2020 \n",
+ "257013 Los Angeles County, CA California \n",
+ "366384 Los Angeles County, CA California \n",
+ "404087 Does not have work/school location Does not have work/school location \n",
+ "213866 Los Angeles County, CA California \n",
+ "403783 Does not have work/school location Does not have work/school location "
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "r_trips.sample(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e2199852-9b6e-46ca-86fd-abe71148d13f",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "75945ef2-eab7-469a-baad-ee563d70c309",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " Columns in Replica Trips Data
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\" Columns in Replica Trips Data
\"))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "1f3e4cbb-d211-40b1-b4bb-2c7cb43b33a9",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['activity_id', 'origin_bgrp_2020', 'origin_trct_2020',\n",
+ " 'origin_cty_2020', 'origin_st_2020', 'destination_bgrp_2020',\n",
+ " 'destination_trct_2020', 'destination_cty_2020', 'destination_st_2020',\n",
+ " 'primary_mode', 'trip_purpose', 'previous_trip_purpose',\n",
+ " 'trip_start_time', 'trip_end_time', 'trip_duration_minutes',\n",
+ " 'trip_distance_miles', 'vehicle_type', 'vehicle_fuel_type',\n",
+ " 'transit_submode', 'transit_agency', 'transit_route', 'origin_land_use',\n",
+ " 'origin_building_use', 'destination_land_use',\n",
+ " 'destination_building_use', 'trip_taker_person_id',\n",
+ " 'trip_taker_household_id', 'trip_taker_age', 'trip_taker_sex',\n",
+ " 'trip_taker_race_ethnicity', 'trip_taker_employment_status',\n",
+ " 'trip_taker_wfh', 'trip_taker_individual_income',\n",
+ " 'trip_taker_commute_mode', 'trip_taker_household_size',\n",
+ " 'trip_taker_household_income', 'trip_taker_available_vehicles',\n",
+ " 'trip_taker_resident_type', 'trip_taker_industry',\n",
+ " 'trip_taker_building_type', 'trip_taker_school_grade_attending',\n",
+ " 'trip_taker_education', 'trip_taker_tenure', 'trip_taker_language',\n",
+ " 'trip_taker_home_bgrp_2020', 'trip_taker_home_trct_2020',\n",
+ " 'trip_taker_home_cty_2020', 'trip_taker_home_st_2020',\n",
+ " 'trip_taker_work_bgrp_2020', 'trip_taker_work_trct_2020',\n",
+ " 'trip_taker_work_cty_2020', 'trip_taker_work_st_2020'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "r_trips.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e60fb5c7-a04b-48f3-8282-ca98a54dadd2",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "92f9a5f4-08f1-43fc-ae01-c6bd404891d6",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "#### Data Exploration: Replica Trips\n",
+ "* Summarizing the data that we exported from Replica\n",
+ "* Existing visualizations in replica: \n",
+ " * Primary Mode\n",
+ " * Trip Purpose\n",
+ " * Starting hour \n",
+ " * Trip Duration/Distance\n",
+ " * Origin Destination\n",
+ " * Vehicle Fuel type\n",
+ " * Transit Routes/Stops/Sub mode/Agency\n",
+ " * Household Income\n",
+ " * Race and Ethnicity\n",
+ " * Private Auto Availability\n",
+ " * Age\n",
+ " * Employment/School Status\n",
+ " \n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "c191620c-cef4-446a-bf6b-e17222f47ab6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# def basic_bar_chart(df, x_col, y_col, color_col):\n",
+ "\n",
+ "# chart = (alt.Chart(df)\n",
+ "# .mark_bar()\n",
+ "# .encode(\n",
+ "# x=alt.X(x_col, title=labeling(x_col)),\n",
+ "# y=alt.Y(y_col, title=labeling(y_col)),\n",
+ "# color = (alt.Color(color_col,\n",
+ "# scale=alt.Scale(\n",
+ "# range=cp.CALITP_CATEGORY_BRIGHT_COLORS),\n",
+ "# legend=alt.Legend(title=(labeling(color_col)), symbolLimit=10)\n",
+ "# )),\n",
+ "# tooltip=[alt.Tooltip(x_col, title=labeling(x_col)),\n",
+ "# alt.Tooltip(y_col, title=labeling(y_col))]\n",
+ "# )\n",
+ "# )\n",
+ "\n",
+ "# chart=styleguide.preset_chart_config(chart)\n",
+ "# chart = add_tooltip(chart, labeling(x_col), labeling(y_col))\n",
+ "# return chart"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "d7f3b44a-cd77-42e9-b0d8-7b9d228b37d5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "trips = r_trips>>group_by(_.primary_mode)>>summarize(avg_trip_time = _.trip_duration_minutes.mean(),\n",
+ " number_trips = _.activity_id.nunique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "3c77c803-9ecb-4cb4-9d1d-d7e2fa5d4074",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 5 entries, 0 to 4\n",
+ "Data columns (total 3 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 primary_mode 5 non-null object \n",
+ " 1 avg_trip_time 5 non-null float64\n",
+ " 2 number_trips 5 non-null int64 \n",
+ "dtypes: float64(1), int64(1), object(1)\n",
+ "memory usage: 248.0+ bytes\n"
+ ]
+ }
+ ],
+ "source": [
+ "trips.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "05893215-e993-44dc-9c4b-1aa7d94815cd",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.Chart(...)"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chart = (\n",
+ " alt.Chart(trips)\n",
+ " .mark_bar()\n",
+ " .encode(\n",
+ " x=alt.X(\"primary_mode\"),\n",
+ " y=alt.Y(\"avg_trip_time\"),\n",
+ " color=alt.Color(\"avg_trip_time\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS)\n",
+ " ), tooltip=trips.columns.tolist())\n",
+ " .properties(title = \"Average Trip Time\",\n",
+ " width=800,\n",
+ " height=300)\n",
+ " )\n",
+ "chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "65681a29-7f1b-42ed-9e0e-371a84a29cbd",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.Chart(...)"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chart = (\n",
+ " alt.Chart(trips)\n",
+ " .mark_bar()\n",
+ " .encode(\n",
+ " x=alt.X(\"primary_mode\"),\n",
+ " y=alt.Y(\"number_trips\"),\n",
+ " color=alt.Color(\"number_trips\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS,)\n",
+ " ), tooltip=trips.columns.tolist())\n",
+ " .properties(title=\"Number of Trips\",\n",
+ " width=800,\n",
+ " height=300)\n",
+ " )\n",
+ "chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "0d60fe54-dc88-4570-8610-125b37b5917d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Length of Trips dataframe
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Length of Trips dataframe
\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "4f98abae-ae6b-4b4e-9427-b606a557582f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "466756"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(r_trips)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "55c6ff06-afed-45dc-b764-93188c2b9958",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Number of unique activity ids in data
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Number of unique activity ids in data
\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "27c00872-5880-4464-84a5-e91423ce7895",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " number_trips | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 466756 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " number_trips\n",
+ "0 466756"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "r_trips>>summarize(number_trips = _.activity_id.nunique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "a20cbce8-e354-4aa3-99db-666a13576b5e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Number of unique trip taker ids
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Number of unique trip taker ids
\")) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "2def0aaf-9182-4393-8049-2dfb95749585",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " number_trips | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 281798 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " number_trips\n",
+ "0 281798"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "r_trips>>summarize(number_trips = _.trip_taker_person_id.nunique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "47fecd22-8276-48b8-9c74-d3a94906bbae",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "race_ethnicity_by_mode = r_trips>>group_by(_.primary_mode)>>count(_.trip_taker_race_ethnicity)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "4cbeb867-916b-48aa-9949-d769fcd5af5a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Trip Taker Race and Ethnicity by Mode
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Trip Taker Race and Ethnicity by Mode
\")) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "5e895499-2142-432d-8402-c035f81326b6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.Chart(...)"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chart = (\n",
+ " alt.Chart(race_ethnicity_by_mode)\n",
+ " .mark_bar()\n",
+ " .encode(\n",
+ " x=alt.X(\"primary_mode\"),\n",
+ " y=alt.Y(\"n\"),\n",
+ " color=alt.Color(\"trip_taker_race_ethnicity\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS,\n",
+ " domain=race_ethnicity_by_mode[\"trip_taker_race_ethnicity\"].unique().tolist())\n",
+ " ),\n",
+ " tooltip=race_ethnicity_by_mode.columns.tolist())\n",
+ " \n",
+ " .properties(\n",
+ " width=800,\n",
+ " height=300)\n",
+ " )\n",
+ "chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "da3bbe59-9b06-471f-beb1-4762b8939600",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "500ad806-51ed-47e9-88a5-fc8a41edd7aa",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Transit Mode Splits
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Transit Mode Splits
\")) \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "d8cd6354-0433-4134-ad6e-9df92122cbd2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Looking at transit mode, need to alter the transit_agency col and transit_submode to get a better count."
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Looking at transit mode, need to alter the transit_agency col and transit_submode to get a better count.\")) \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "261d7f48-f7c3-4ea7-a526-6f813ca16ede",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ptt_agency_count = (r_trips\n",
+ "# >>filter(_.primary_mode==\"public_transit\")\n",
+ "# >>group_by(_.primary_mode, _.transit_submode, _.transit_agency)\n",
+ "# >>summarize(n =_.activity_id.nunique())\n",
+ "# >>arrange(-_.n))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "e4516beb-8277-4fd4-aed1-23af65fc4a28",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ptt_agency_count"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "727511b8-3141-40e4-b654-30aaae8200d9",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Most common transit mode combinations
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Most common transit mode combinations
\")) \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f3c31371-899f-4474-85b6-86e78e89209b",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "657ff2f3-0d0d-419d-b3d5-512212897fb1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ptt_modes.head(10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "80daf7a1-b947-469c-b3b0-09d0302714b7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "##### unnesting the transit submode and agencies to get counts. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "edd81120-9756-4edb-b713-1586a9fd5021",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " activity_id | \n",
+ " origin_bgrp_2020 | \n",
+ " origin_trct_2020 | \n",
+ " origin_cty_2020 | \n",
+ " origin_st_2020 | \n",
+ " destination_bgrp_2020 | \n",
+ " destination_trct_2020 | \n",
+ " destination_cty_2020 | \n",
+ " destination_st_2020 | \n",
+ " primary_mode | \n",
+ " trip_purpose | \n",
+ " previous_trip_purpose | \n",
+ " trip_start_time | \n",
+ " trip_end_time | \n",
+ " trip_duration_minutes | \n",
+ " trip_distance_miles | \n",
+ " vehicle_type | \n",
+ " vehicle_fuel_type | \n",
+ " transit_submode | \n",
+ " transit_agency | \n",
+ " transit_route | \n",
+ " origin_land_use | \n",
+ " origin_building_use | \n",
+ " destination_land_use | \n",
+ " destination_building_use | \n",
+ " trip_taker_person_id | \n",
+ " trip_taker_household_id | \n",
+ " trip_taker_age | \n",
+ " trip_taker_sex | \n",
+ " trip_taker_race_ethnicity | \n",
+ " trip_taker_employment_status | \n",
+ " trip_taker_wfh | \n",
+ " trip_taker_individual_income | \n",
+ " trip_taker_commute_mode | \n",
+ " trip_taker_household_size | \n",
+ " trip_taker_household_income | \n",
+ " trip_taker_available_vehicles | \n",
+ " trip_taker_resident_type | \n",
+ " trip_taker_industry | \n",
+ " trip_taker_building_type | \n",
+ " trip_taker_school_grade_attending | \n",
+ " trip_taker_education | \n",
+ " trip_taker_tenure | \n",
+ " trip_taker_language | \n",
+ " trip_taker_home_bgrp_2020 | \n",
+ " trip_taker_home_trct_2020 | \n",
+ " trip_taker_home_cty_2020 | \n",
+ " trip_taker_home_st_2020 | \n",
+ " trip_taker_work_bgrp_2020 | \n",
+ " trip_taker_work_trct_2020 | \n",
+ " trip_taker_work_cty_2020 | \n",
+ " trip_taker_work_st_2020 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 61898 | \n",
+ " 7706952349763026875 | \n",
+ " 2 (Tract 1111, Los Angeles, CA) | \n",
+ " 1111 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ " 1 (Tract 2060.51, Los Angeles, CA) | \n",
+ " 2060.51 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ " public_transit | \n",
+ " work | \n",
+ " home | \n",
+ " 05:35:00 | \n",
+ " 07:53:14 | \n",
+ " 138 | \n",
+ " 29.80 | \n",
+ " unknown_vehicle_type | \n",
+ " unknown_fuel_type | \n",
+ " bus, bus | \n",
+ " LADOTMVN, Metro - Los Angeles | \n",
+ " CE573, Metro Rapid Line | \n",
+ " single_family | \n",
+ " single_family | \n",
+ " retail | \n",
+ " retail | \n",
+ " 15982840578351068133 | \n",
+ " 16243967259505879865 | \n",
+ " 48.00 | \n",
+ " male | \n",
+ " hispanic_or_latino_origin | \n",
+ " employed | \n",
+ " in_person | \n",
+ " 46,903.00 | \n",
+ " private_auto | \n",
+ " 5.00 | \n",
+ " 88,596.00 | \n",
+ " zero | \n",
+ " core | \n",
+ " naics56 | \n",
+ " single_family | \n",
+ " not_attending_school | \n",
+ " high_school | \n",
+ " renter | \n",
+ " spanish | \n",
+ " 2 (Tract 1111, Los Angeles, CA) | \n",
+ " 1111 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ " 1 (Tract 2060.51, Los Angeles, CA) | \n",
+ " 2060.51 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " activity_id origin_bgrp_2020 \\\n",
+ "61898 7706952349763026875 2 (Tract 1111, Los Angeles, CA) \n",
+ "\n",
+ " origin_trct_2020 origin_cty_2020 origin_st_2020 \\\n",
+ "61898 1111 (Los Angeles, CA) Los Angeles County, CA California \n",
+ "\n",
+ " destination_bgrp_2020 destination_trct_2020 \\\n",
+ "61898 1 (Tract 2060.51, Los Angeles, CA) 2060.51 (Los Angeles, CA) \n",
+ "\n",
+ " destination_cty_2020 destination_st_2020 primary_mode \\\n",
+ "61898 Los Angeles County, CA California public_transit \n",
+ "\n",
+ " trip_purpose previous_trip_purpose trip_start_time trip_end_time \\\n",
+ "61898 work home 05:35:00 07:53:14 \n",
+ "\n",
+ " trip_duration_minutes trip_distance_miles vehicle_type \\\n",
+ "61898 138 29.80 unknown_vehicle_type \n",
+ "\n",
+ " vehicle_fuel_type transit_submode transit_agency \\\n",
+ "61898 unknown_fuel_type bus, bus LADOTMVN, Metro - Los Angeles \n",
+ "\n",
+ " transit_route origin_land_use origin_building_use \\\n",
+ "61898 CE573, Metro Rapid Line single_family single_family \n",
+ "\n",
+ " destination_land_use destination_building_use trip_taker_person_id \\\n",
+ "61898 retail retail 15982840578351068133 \n",
+ "\n",
+ " trip_taker_household_id trip_taker_age trip_taker_sex \\\n",
+ "61898 16243967259505879865 48.00 male \n",
+ "\n",
+ " trip_taker_race_ethnicity trip_taker_employment_status trip_taker_wfh \\\n",
+ "61898 hispanic_or_latino_origin employed in_person \n",
+ "\n",
+ " trip_taker_individual_income trip_taker_commute_mode \\\n",
+ "61898 46,903.00 private_auto \n",
+ "\n",
+ " trip_taker_household_size trip_taker_household_income \\\n",
+ "61898 5.00 88,596.00 \n",
+ "\n",
+ " trip_taker_available_vehicles trip_taker_resident_type \\\n",
+ "61898 zero core \n",
+ "\n",
+ " trip_taker_industry trip_taker_building_type \\\n",
+ "61898 naics56 single_family \n",
+ "\n",
+ " trip_taker_school_grade_attending trip_taker_education \\\n",
+ "61898 not_attending_school high_school \n",
+ "\n",
+ " trip_taker_tenure trip_taker_language trip_taker_home_bgrp_2020 \\\n",
+ "61898 renter spanish 2 (Tract 1111, Los Angeles, CA) \n",
+ "\n",
+ " trip_taker_home_trct_2020 trip_taker_home_cty_2020 \\\n",
+ "61898 1111 (Los Angeles, CA) Los Angeles County, CA \n",
+ "\n",
+ " trip_taker_home_st_2020 trip_taker_work_bgrp_2020 \\\n",
+ "61898 California 1 (Tract 2060.51, Los Angeles, CA) \n",
+ "\n",
+ " trip_taker_work_trct_2020 trip_taker_work_cty_2020 \\\n",
+ "61898 2060.51 (Los Angeles, CA) Los Angeles County, CA \n",
+ "\n",
+ " trip_taker_work_st_2020 \n",
+ "61898 California "
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "(r_trips>>filter(_.primary_mode==\"public_transit\")).sample()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2025391f-537e-432a-a2f4-d2d6ce57af04",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1af1d8ca-813c-44bc-ab93-c35bb11f0ea5",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "eb27ab6b-6244-406f-b41a-55503a77521a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "agencies_test, mode_test = _utils.get_tranist_agency_counts(r_trips, \"primary_mode\", \"transit_submode\", \"transit_agency\", \"activity_id\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "92ed5077-3524-4c39-8cf4-bf1b7922da2a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " primary_mode | \n",
+ " transit_submode | \n",
+ " transit_agency | \n",
+ " n | \n",
+ " agency_count | \n",
+ " n_modes_taken | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 15 | \n",
+ " public_transit | \n",
+ " bus, bus | \n",
+ " Metro - Los Angeles, Metro - Los Angeles | \n",
+ " 501 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 57 | \n",
+ " public_transit | \n",
+ " bus, bus, bus | \n",
+ " Metro - Los Angeles, Metro - Los Angeles, Metro - Los Angeles | \n",
+ " 375 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " public_transit | \n",
+ " bus, bus, bus | \n",
+ " Big Blue Bus, Metro - Los Angeles, Metro - Los Angeles | \n",
+ " 192 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " public_transit | \n",
+ " bus, bus | \n",
+ " Big Blue Bus, Metro - Los Angeles | \n",
+ " 171 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " public_transit | \n",
+ " bus | \n",
+ " Metro - Los Angeles | \n",
+ " 145 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 118 | \n",
+ " public_transit | \n",
+ " rail, bus, bus, bus | \n",
+ " Amtrak, Metro - Los Angeles, Metro - Los Angeles, Metro - Los Angeles | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 119 | \n",
+ " public_transit | \n",
+ " rail, bus, bus, rail | \n",
+ " Amtrak, Big Blue Bus, Metro - Los Angeles, Amtrak | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 120 | \n",
+ " public_transit | \n",
+ " rail, bus, light_rail, bus | \n",
+ " Amtrak, Metro - Los Angeles, Metro - Los Angeles, Metro - Los Angeles | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 121 | \n",
+ " public_transit | \n",
+ " subway, bus | \n",
+ " Metro - Los Angeles, Metro - Los Angeles | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 122 | \n",
+ " public_transit | \n",
+ " subway, bus, bus | \n",
+ " Metro - Los Angeles, Metro - Los Angeles, Metro - Los Angeles | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
123 rows × 6 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " primary_mode transit_submode \\\n",
+ "15 public_transit bus, bus \n",
+ "57 public_transit bus, bus, bus \n",
+ "27 public_transit bus, bus, bus \n",
+ "5 public_transit bus, bus \n",
+ "1 public_transit bus \n",
+ ".. ... ... \n",
+ "118 public_transit rail, bus, bus, bus \n",
+ "119 public_transit rail, bus, bus, rail \n",
+ "120 public_transit rail, bus, light_rail, bus \n",
+ "121 public_transit subway, bus \n",
+ "122 public_transit subway, bus, bus \n",
+ "\n",
+ " transit_agency \\\n",
+ "15 Metro - Los Angeles, Metro - Los Angeles \n",
+ "57 Metro - Los Angeles, Metro - Los Angeles, Metro - Los Angeles \n",
+ "27 Big Blue Bus, Metro - Los Angeles, Metro - Los Angeles \n",
+ "5 Big Blue Bus, Metro - Los Angeles \n",
+ "1 Metro - Los Angeles \n",
+ ".. ... \n",
+ "118 Amtrak, Metro - Los Angeles, Metro - Los Angeles, Metro - Los Angeles \n",
+ "119 Amtrak, Big Blue Bus, Metro - Los Angeles, Amtrak \n",
+ "120 Amtrak, Metro - Los Angeles, Metro - Los Angeles, Metro - Los Angeles \n",
+ "121 Metro - Los Angeles, Metro - Los Angeles \n",
+ "122 Metro - Los Angeles, Metro - Los Angeles, Metro - Los Angeles \n",
+ "\n",
+ " n agency_count n_modes_taken \n",
+ "15 501 1 2 \n",
+ "57 375 1 3 \n",
+ "27 192 2 3 \n",
+ "5 171 2 2 \n",
+ "1 145 1 1 \n",
+ ".. ... ... ... \n",
+ "118 1 2 4 \n",
+ "119 1 3 4 \n",
+ "120 1 2 4 \n",
+ "121 1 1 2 \n",
+ "122 1 1 3 \n",
+ "\n",
+ "[123 rows x 6 columns]"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "agencies_test"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "0cd5c4f6-116a-41a9-b67c-d4146994ee36",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " transit_submode | \n",
+ " n | \n",
+ " n_modes_taken | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2 | \n",
+ " bus, bus, bus | \n",
+ " 855 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " bus, bus | \n",
+ " 842 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " bus | \n",
+ " 163 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " bus, light_rail, bus | \n",
+ " 124 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " bus, bus, bus, bus | \n",
+ " 85 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " light_rail, bus | \n",
+ " 56 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " light_rail, bus, bus | \n",
+ " 24 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " bus, light_rail, bus, bus | \n",
+ " 18 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " light_rail, light_rail, bus | \n",
+ " 12 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " bus, bus, subway | \n",
+ " 7 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " bus, light_rail, light_rail, bus | \n",
+ " 7 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " rail, bus, bus | \n",
+ " 5 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " bus, bus, light_rail | \n",
+ " 2 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " light_rail, bus, bus, bus | \n",
+ " 2 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " rail, bus, bus, bus | \n",
+ " 2 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " bus, bus, light_rail, bus | \n",
+ " 1 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " bus, light_rail | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " bus, rail | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " rail, bus, bus, rail | \n",
+ " 1 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " rail, bus, light_rail, bus | \n",
+ " 1 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " subway, bus | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " subway, bus, bus | \n",
+ " 1 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " transit_submode n n_modes_taken\n",
+ "2 bus, bus, bus 855 3\n",
+ "1 bus, bus 842 2\n",
+ "0 bus 163 1\n",
+ "8 bus, light_rail, bus 124 3\n",
+ "3 bus, bus, bus, bus 85 4\n",
+ "12 light_rail, bus 56 2\n",
+ "13 light_rail, bus, bus 24 3\n",
+ "9 bus, light_rail, bus, bus 18 4\n",
+ "15 light_rail, light_rail, bus 12 3\n",
+ "6 bus, bus, subway 7 3\n",
+ "10 bus, light_rail, light_rail, bus 7 4\n",
+ "16 rail, bus, bus 5 3\n",
+ "4 bus, bus, light_rail 2 3\n",
+ "14 light_rail, bus, bus, bus 2 4\n",
+ "17 rail, bus, bus, bus 2 4\n",
+ "5 bus, bus, light_rail, bus 1 4\n",
+ "7 bus, light_rail 1 2\n",
+ "11 bus, rail 1 2\n",
+ "18 rail, bus, bus, rail 1 4\n",
+ "19 rail, bus, light_rail, bus 1 4\n",
+ "20 subway, bus 1 2\n",
+ "21 subway, bus, bus 1 3"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mode_test"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3d93bd0e-f30d-4a53-9077-25818eef0cf7",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "cb12d509-d786-458c-894a-193c80666bb6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "## This line of code gets all agencies listed, even if agency is named twice. \n",
+ "## Need code that counts the unique agencies\n",
+ "#ptt_agency_count['agency_count'] = ptt_agency_count.transit_agency.apply(lambda x: len(x.split(\", \")))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0b628473-3842-44df-bbe0-9e17a1e250f7",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "589d2a23-b528-4de9-b6a0-a10f88a6da5c",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "505f47b1-229a-4cd1-90b8-b83b4ccdd6c6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "modes_count = agencies_test>>group_by(_.n_modes_taken)>>summarize(n_trips = _.n.sum())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "97746912-39f0-4c85-80d3-a00ed47922a8",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " n_modes_taken | \n",
+ " n_trips | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 163 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 901 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 1030 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 117 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " n_modes_taken n_trips\n",
+ "0 1 163\n",
+ "1 2 901\n",
+ "2 3 1030\n",
+ "3 4 117"
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "modes_count"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "5353a2e4-762d-4a27-8f32-267c2c55bb90",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.Chart(...)"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chart = (\n",
+ " alt.Chart((modes_count))\n",
+ " .mark_bar(size=60)\n",
+ " .encode(\n",
+ " x=alt.X(\"n_modes_taken\", title =\"Number of Modes Taken per Trip\"),\n",
+ " y=alt.Y(\"n_trips\", title = \"Number of Trips\"),\n",
+ " color=alt.Color(\"n_trips\", title = \"Number of Trips\",\n",
+ " scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS)),\n",
+ " tooltip=modes_count.columns.tolist())\n",
+ " \n",
+ " .properties(title = \"How Many Modes are Taken Per Trip\",\n",
+ " width=800,\n",
+ " height=300)\n",
+ " )\n",
+ "chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "71d43883-c5fe-4dff-9084-41426c399cec",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "68cafc15-d68a-49ea-9e03-48bbcb6513d5",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "fb8565b9-989a-459a-bede-433de30252e7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "agency_mode_trips = agencies_test>>group_by(_.agency_count, _.n_modes_taken)>>summarize(ntrips=_.n.sum())>>arrange(-_.ntrips)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "id": "b03dbe0a-a759-4fa9-849b-b31b318f86cd",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.Chart(...)"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chart = (\n",
+ " alt.Chart((agency_mode_trips))\n",
+ " .mark_circle(size=100)\n",
+ " .encode(\n",
+ " x=alt.X(\"agency_count\", title =\"Number of Agencies\"),\n",
+ " y=alt.Y(\"n_modes_taken\", title = \"Number of Modes Taken\"),\n",
+ " color=alt.Color(\"ntrips\", title = \"Number of Trips\",\n",
+ " scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS)),\n",
+ " tooltip=agency_mode_trips.columns.tolist())\n",
+ " \n",
+ " .properties(title = \"How Many Modes are Taken Per Trip\",\n",
+ " width=800,\n",
+ " height=300)\n",
+ " )\n",
+ "chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "91273d26-1fd1-42a1-b829-a77398c606fc",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "id": "6bd200d7-c0b9-4649-adf1-e025d216328b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "##### Getting columns for each agency and counts "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ebb28116-cc54-43ce-ac77-9dc9e798d58a",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2c39646f-7503-4c96-a594-5bbe5960d9f0",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "id": "e8f5f457-ce9f-4bfb-99f6-82e6eb7d11f0",
+ "metadata": {
+ "scrolled": true,
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# ptt_agency_count['transit_agency'].unique().tolist()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5c1af5bd-c08e-472e-9b71-aac3c29ceb71",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "id": "3c399936-bc62-491c-b71d-89a861e02fc7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "agency_list = _utils.get_list_of_agencies(agencies_test, \"transit_agency\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "id": "416d16f0-3128-4857-9136-69f0403ce5cc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# len(agency_list)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "id": "f303acae-da1b-44a5-98b2-2f8f5b98f56a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Agencies Identified in Trips Data
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "{'Amtrak',\n",
+ " 'Avta',\n",
+ " 'Beach Cities Transit-City Of Redondo Beach',\n",
+ " 'Big Blue Bus',\n",
+ " 'Culver Citybus',\n",
+ " 'Foothill Transit',\n",
+ " 'Gtrans',\n",
+ " 'La Go Bus',\n",
+ " 'Ladot126',\n",
+ " 'Ladotdt',\n",
+ " 'Ladotmvn',\n",
+ " 'Ladotmvs',\n",
+ " 'Long Beach Transit',\n",
+ " 'Metro - Los Angeles',\n",
+ " 'Metrolink Trains',\n",
+ " 'Santa Clarita Transit'}"
+ ]
+ },
+ "execution_count": 47,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Agencies Identified in Trips Data
\")) \n",
+ "\n",
+ "(agency_list)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8b121b63-083b-4898-9eb1-cb919698d9ca",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "34eada82-1f1c-4411-8dd4-3bb71b0ddb16",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "id": "71af6e25-52b0-45e9-ba2b-a00a670e3010",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "### making a copy of the ptt_agency_count\n",
+ "# df = r_trips.copy()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5bdc0792-219c-4877-880c-cd7d84db2303",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2de92f7b-2e28-4388-880f-9e34b8b603ea",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "89479d3a-eeb5-491a-a064-49a77194109e",
+ "metadata": {},
+ "source": [
+ "##### Trying ChatGPT approach"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "id": "bd6f46a5-404f-41eb-9fd8-4600451e6d20",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# agency_indv_count = (df>>filter(_.primary_mode==\"public_transit\")>>select(_.transit_agency))\n",
+ "# agency_indv_count['transit_agency'] = agency_indv_count['transit_agency'].astype(str)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "id": "33c93185-2e76-4257-840e-c7634f3cae05",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# agency_indv_count"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5a50d438-a9c1-4ade-8a38-0e17227dac09",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "045be8fb-d8fd-4019-8282-df53602b122b",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "id": "efc4eb07-c0f9-4bc2-87bb-cac95f606657",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = _utils.get_dummies_by_agency(agencies_test, \"transit_agency\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ca4b0345-f196-40cc-8b26-f42abb58f199",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "id": "97bc34c4-636f-46ca-a257-95e4af242584",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "##### Identifying trips with one agency"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "id": "46856b31-47ac-408c-903e-19d8b04283f7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cols_to_keep = [\"transit_submode\",\"unique_agencies\", \"n\",\"n_modes_taken\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "id": "2603e7e0-389a-49fb-93e9-5dc1d52c6012",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# chart = (alt.Chart(df)\n",
+ "# .mark_circle(size=100)\n",
+ "# .encode(\n",
+ "# x=alt.X(\"n_modes_taken\", title=\"Number of Modes taken\"),\n",
+ "# y=alt.Y(\"n\", title=\"Number of Trips\"),\n",
+ "# color = alt.Color(\"agency_count\", title=\"Number of Unique Agencies\",\n",
+ "# scale=alt.Scale(\n",
+ "# range=cp.CALITP_DIVERGING_COLORS,\n",
+ "# domain=df[\"agency_count\"].unique().tolist())\n",
+ "# ),\n",
+ "# tooltip=cols_to_keep)\n",
+ "# .properties(title = (\"Transit Trips Agency Breakdown\"), width=500,\n",
+ "# height=300)\n",
+ "# )\n",
+ "# chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "id": "149ed63c-436c-45fe-a526-5b1bf9f3d6fb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# (df>>filter(_.agency_count==1)>>arrange(-_.n))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d400fa0e-2bb1-4d8a-9a63-6865afe33897",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "id": "7228c540-3033-4023-8145-91f8aee23eeb",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.Chart(...)"
+ ]
+ },
+ "execution_count": 56,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chart = (alt.Chart((df>>filter(_.agency_count==1)))\n",
+ " .mark_bar()\n",
+ " .encode(\n",
+ " x=alt.X(\"unique_agencies\", title=\"Agency\"),\n",
+ " y=alt.Y(\"n\", title=\"Number of Modes taken\"),\n",
+ " color = alt.Color(\"n_modes_taken\", title=\"Number of Trips\",\n",
+ " scale=alt.Scale(\n",
+ " range=cp.CALITP_SEQUENTIAL_COLORS,)),\n",
+ " tooltip=cols_to_keep)\n",
+ " .properties(title = (\"Transit Trips With Only One Agency\"), width=800,\n",
+ " height=500)\n",
+ " )\n",
+ "chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "id": "60df338f-801f-49a1-a26f-0be5e03bddf3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Most Common Agency Combination
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " transit_agency | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 15 | \n",
+ " Metro - Los Angeles, Metro - Los Angeles | \n",
+ "
\n",
+ " \n",
+ " 57 | \n",
+ " Metro - Los Angeles, Metro - Los Angeles, Metro - Los Angeles | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " Big Blue Bus, Metro - Los Angeles, Metro - Los Angeles | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Big Blue Bus, Metro - Los Angeles | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Metro - Los Angeles | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " transit_agency\n",
+ "15 Metro - Los Angeles, Metro - Los Angeles\n",
+ "57 Metro - Los Angeles, Metro - Los Angeles, Metro - Los Angeles\n",
+ "27 Big Blue Bus, Metro - Los Angeles, Metro - Los Angeles\n",
+ "5 Big Blue Bus, Metro - Los Angeles\n",
+ "1 Metro - Los Angeles"
+ ]
+ },
+ "execution_count": 57,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Most Common Agency Combination
\")) \n",
+ "\n",
+ "(df>>arrange(-_.n)>>select(_.transit_agency)).head(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "786394ef-be26-4e0d-94e7-35386d6d67c0",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fd43ab95-0391-407b-b6b0-185034ba9528",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "febd910a-2f1d-4b99-bb3b-76adb11f0b63",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "id": "96b794f2-bcfd-4afb-9ce5-8793c15fe199",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/jovyan/data-analyses/sb125_analyses/corridor_study/_utils.py:102: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction.\n",
+ " df_agencies['n_trips'] = df_agencies[list(df_agencies.columns)].sum(axis=1)\n"
+ ]
+ }
+ ],
+ "source": [
+ "df_agencies = _utils.get_agencies_occurances(df)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "id": "80cbffde-6686-4921-b543-84e52f97a0c5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " agency | \n",
+ " n_trips | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Foothill Transit | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " LADOTMVS | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " LADOTMVN | \n",
+ " 54 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " AVTA | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Beach Cities Transit-City of Redondo Beach | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Metro - Los Angeles | \n",
+ " 195 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Long Beach Transit | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " LADOT126 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Santa Clarita Transit | \n",
+ " 28 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Metrolink Trains | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " LADOTDT | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " Culver CityBus | \n",
+ " 28 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " GTrans | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " Amtrak | \n",
+ " 8 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " LA Go Bus | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " Big Blue Bus | \n",
+ " 37 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " agency n_trips\n",
+ "0 Foothill Transit 1\n",
+ "1 LADOTMVS 4\n",
+ "2 LADOTMVN 54\n",
+ "3 AVTA 4\n",
+ "4 Beach Cities Transit-City of Redondo Beach 2\n",
+ "5 Metro - Los Angeles 195\n",
+ "6 Long Beach Transit 1\n",
+ "7 LADOT126 4\n",
+ "8 Santa Clarita Transit 28\n",
+ "9 Metrolink Trains 1\n",
+ "10 LADOTDT 3\n",
+ "11 Culver CityBus 28\n",
+ "12 GTrans 1\n",
+ "13 Amtrak 8\n",
+ "14 LA Go Bus 1\n",
+ "15 Big Blue Bus 37"
+ ]
+ },
+ "execution_count": 59,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_agencies"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "id": "f98c76b9-b83f-42c3-aa12-702ade72ac38",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tooltip_cols = [\"agency\", \"n_trips\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "id": "bf160b81-1bc2-4374-9529-6400252d5e4f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.Chart(...)"
+ ]
+ },
+ "execution_count": 61,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chart = (alt.Chart(df_agencies)\n",
+ " .mark_bar()\n",
+ " .encode(\n",
+ " x=alt.X(\"agency\", title = \"Agency Name\"),\n",
+ " y=alt.Y(\"n_trips\", title= \"Number of boardings reported for trips (One person taking two LA Metro trips will count as 2)\"),\n",
+ " color=alt.Color(\"n_trips\", scale=alt.Scale(range = cp.CALITP_SEQUENTIAL_COLORS)),\n",
+ " tooltip = tooltip_cols)\n",
+ " .properties(title = \"Number of Times an Agency was used for Trip Taking\",\n",
+ " width=800,\n",
+ " height=500)\n",
+ " )\n",
+ "chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a59f170f-b2e0-41b3-ae7a-1ae4eca12596",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "id": "16b45917-adc3-44d0-8d76-15ccd4083d26",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Trips by Resident Type
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Trips by Resident Type
\")) \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "id": "5163926e-82bd-4c25-9486-45229d09a0d1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "trip_by_res_type = (r_trips\n",
+ " >>group_by(_.primary_mode,_.trip_taker_resident_type)\n",
+ " >>summarize(number_trips = _.activity_id.nunique())\n",
+ " >>arrange(_.primary_mode))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "id": "636e7771-0ba7-42e9-af1d-e3e998ff8599",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " primary_mode | \n",
+ " trip_taker_resident_type | \n",
+ " number_trips | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " auto_passenger | \n",
+ " core | \n",
+ " 116685 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " auto_passenger | \n",
+ " donut | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " auto_passenger | \n",
+ " visitor | \n",
+ " 28818 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " commercial | \n",
+ " NaN | \n",
+ " 10154 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " on_demand_auto | \n",
+ " core | \n",
+ " 3191 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " on_demand_auto | \n",
+ " visitor | \n",
+ " 595 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " private_auto | \n",
+ " core | \n",
+ " 294704 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " private_auto | \n",
+ " donut | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " private_auto | \n",
+ " visitor | \n",
+ " 10394 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " private_auto | \n",
+ " NaN | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " public_transit | \n",
+ " core | \n",
+ " 2155 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " public_transit | \n",
+ " visitor | \n",
+ " 56 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " primary_mode trip_taker_resident_type number_trips\n",
+ "0 auto_passenger core 116685\n",
+ "1 auto_passenger donut 2\n",
+ "2 auto_passenger visitor 28818\n",
+ "3 commercial NaN 10154\n",
+ "4 on_demand_auto core 3191\n",
+ "5 on_demand_auto visitor 595\n",
+ "6 private_auto core 294704\n",
+ "7 private_auto donut 1\n",
+ "8 private_auto visitor 10394\n",
+ "9 private_auto NaN 1\n",
+ "10 public_transit core 2155\n",
+ "11 public_transit visitor 56"
+ ]
+ },
+ "execution_count": 64,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "trip_by_res_type"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "id": "90c5d683-91c5-4663-accd-4e45027a3e49",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ "alt.Chart(...)"
+ ]
+ },
+ "execution_count": 65,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "chart = (\n",
+ " alt.Chart(trip_by_res_type)\n",
+ " .mark_bar()\n",
+ " .encode(\n",
+ " x=alt.X(\"primary_mode\", title = \"Mode\"),\n",
+ " y=alt.Y(\"number_trips\", title = \"Number of Trips\"),\n",
+ " color=alt.Color(\"trip_taker_resident_type\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS,\n",
+ " domain=trip_by_res_type[\"trip_taker_resident_type\"].unique().tolist())\n",
+ " ),\n",
+ " tooltip=trip_by_res_type.columns.tolist())\n",
+ " .properties(title = \"Trips by Resident Type\",\n",
+ " width=800,\n",
+ " height=500)\n",
+ " )\n",
+ "chart "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f7b51fbb-8b87-4863-bcd9-50ed5047d7d6",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "id": "878c5c8e-18c3-456a-b641-67ad010f5101",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Unique Household Ids
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Unique Household Ids
\")) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "id": "2a31bf27-ccc4-4ce3-8b6a-31dbd14caaf2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trip_taker_household_id | \n",
+ " n | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 160790 | \n",
+ " | \n",
+ " 33634 | \n",
+ "
\n",
+ " \n",
+ " 267303 | \n",
+ " NaN | \n",
+ " 16384 | \n",
+ "
\n",
+ " \n",
+ " 79436 | \n",
+ " 9146817053558612433 | \n",
+ " 16 | \n",
+ "
\n",
+ " \n",
+ " 23626 | \n",
+ " 2709972826117271852 | \n",
+ " 15 | \n",
+ "
\n",
+ " \n",
+ " 30303 | \n",
+ " 3488167635549658463 | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 267296 | \n",
+ " 9999130854496671765 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 267299 | \n",
+ " 999930961444594419 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 267300 | \n",
+ " 9999534358623422158 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 267301 | \n",
+ " 9999886348099258237 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 267302 | \n",
+ " 9999902755724147714 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
267304 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trip_taker_household_id n\n",
+ "160790 33634\n",
+ "267303 NaN 16384\n",
+ "79436 9146817053558612433 16\n",
+ "23626 2709972826117271852 15\n",
+ "30303 3488167635549658463 14\n",
+ "... ... ...\n",
+ "267296 9999130854496671765 1\n",
+ "267299 999930961444594419 1\n",
+ "267300 9999534358623422158 1\n",
+ "267301 9999886348099258237 1\n",
+ "267302 9999902755724147714 1\n",
+ "\n",
+ "[267304 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 67,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "r_trips>>count(_.trip_taker_household_id)>>arrange(-_.n)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "id": "eb9780a3-b996-45af-9cc3-b41629ec0e55",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Checking one household id
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Checking one household id
\")) \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "id": "6fc3be54-b8dc-4cf8-893e-565ffa02934d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "16\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " activity_id | \n",
+ " origin_bgrp_2020 | \n",
+ " origin_trct_2020 | \n",
+ " origin_cty_2020 | \n",
+ " origin_st_2020 | \n",
+ " destination_bgrp_2020 | \n",
+ " destination_trct_2020 | \n",
+ " destination_cty_2020 | \n",
+ " destination_st_2020 | \n",
+ " primary_mode | \n",
+ " trip_purpose | \n",
+ " previous_trip_purpose | \n",
+ " trip_start_time | \n",
+ " trip_end_time | \n",
+ " trip_duration_minutes | \n",
+ " trip_distance_miles | \n",
+ " vehicle_type | \n",
+ " vehicle_fuel_type | \n",
+ " transit_submode | \n",
+ " transit_agency | \n",
+ " transit_route | \n",
+ " origin_land_use | \n",
+ " origin_building_use | \n",
+ " destination_land_use | \n",
+ " destination_building_use | \n",
+ " trip_taker_person_id | \n",
+ " trip_taker_household_id | \n",
+ " trip_taker_age | \n",
+ " trip_taker_sex | \n",
+ " trip_taker_race_ethnicity | \n",
+ " trip_taker_employment_status | \n",
+ " trip_taker_wfh | \n",
+ " trip_taker_individual_income | \n",
+ " trip_taker_commute_mode | \n",
+ " trip_taker_household_size | \n",
+ " trip_taker_household_income | \n",
+ " trip_taker_available_vehicles | \n",
+ " trip_taker_resident_type | \n",
+ " trip_taker_industry | \n",
+ " trip_taker_building_type | \n",
+ " trip_taker_school_grade_attending | \n",
+ " trip_taker_education | \n",
+ " trip_taker_tenure | \n",
+ " trip_taker_language | \n",
+ " trip_taker_home_bgrp_2020 | \n",
+ " trip_taker_home_trct_2020 | \n",
+ " trip_taker_home_cty_2020 | \n",
+ " trip_taker_home_st_2020 | \n",
+ " trip_taker_work_bgrp_2020 | \n",
+ " trip_taker_work_trct_2020 | \n",
+ " trip_taker_work_cty_2020 | \n",
+ " trip_taker_work_st_2020 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 304389 | \n",
+ " 11797067000884676673 | \n",
+ " 1 (Tract 7010, Los Angeles, CA) | \n",
+ " 7010 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ " 1 (Tract 2623.01, Los Angeles, CA) | \n",
+ " 2623.01 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ " auto_passenger | \n",
+ " home | \n",
+ " work | \n",
+ " 18:33:00 | \n",
+ " 18:55:47 | \n",
+ " 22 | \n",
+ " 10.00 | \n",
+ " unknown_vehicle_type | \n",
+ " unknown_fuel_type | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " office | \n",
+ " office | \n",
+ " single_family | \n",
+ " single_family | \n",
+ " 8423484308586815884 | \n",
+ " 9146817053558612433 | \n",
+ " 32.00 | \n",
+ " female | \n",
+ " asian_not_hispanic_or_latino | \n",
+ " employed | \n",
+ " in_person | \n",
+ " 17,261.00 | \n",
+ " private_auto | \n",
+ " 11.00 | \n",
+ " 318,660.00 | \n",
+ " three_plus | \n",
+ " core | \n",
+ " naics812199 | \n",
+ " single_family | \n",
+ " not_attending_school | \n",
+ " some_college | \n",
+ " owner | \n",
+ " asian_pacific | \n",
+ " 1 (Tract 2623.01, Los Angeles, CA) | \n",
+ " 2623.01 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ " 1 (Tract 7010, Los Angeles, CA) | \n",
+ " 7010 (Los Angeles, CA) | \n",
+ " Los Angeles County, CA | \n",
+ " California | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " activity_id origin_bgrp_2020 \\\n",
+ "304389 11797067000884676673 1 (Tract 7010, Los Angeles, CA) \n",
+ "\n",
+ " origin_trct_2020 origin_cty_2020 origin_st_2020 \\\n",
+ "304389 7010 (Los Angeles, CA) Los Angeles County, CA California \n",
+ "\n",
+ " destination_bgrp_2020 destination_trct_2020 \\\n",
+ "304389 1 (Tract 2623.01, Los Angeles, CA) 2623.01 (Los Angeles, CA) \n",
+ "\n",
+ " destination_cty_2020 destination_st_2020 primary_mode \\\n",
+ "304389 Los Angeles County, CA California auto_passenger \n",
+ "\n",
+ " trip_purpose previous_trip_purpose trip_start_time trip_end_time \\\n",
+ "304389 home work 18:33:00 18:55:47 \n",
+ "\n",
+ " trip_duration_minutes trip_distance_miles vehicle_type \\\n",
+ "304389 22 10.00 unknown_vehicle_type \n",
+ "\n",
+ " vehicle_fuel_type transit_submode transit_agency transit_route \\\n",
+ "304389 unknown_fuel_type NaN NaN NaN \n",
+ "\n",
+ " origin_land_use origin_building_use destination_land_use \\\n",
+ "304389 office office single_family \n",
+ "\n",
+ " destination_building_use trip_taker_person_id trip_taker_household_id \\\n",
+ "304389 single_family 8423484308586815884 9146817053558612433 \n",
+ "\n",
+ " trip_taker_age trip_taker_sex trip_taker_race_ethnicity \\\n",
+ "304389 32.00 female asian_not_hispanic_or_latino \n",
+ "\n",
+ " trip_taker_employment_status trip_taker_wfh \\\n",
+ "304389 employed in_person \n",
+ "\n",
+ " trip_taker_individual_income trip_taker_commute_mode \\\n",
+ "304389 17,261.00 private_auto \n",
+ "\n",
+ " trip_taker_household_size trip_taker_household_income \\\n",
+ "304389 11.00 318,660.00 \n",
+ "\n",
+ " trip_taker_available_vehicles trip_taker_resident_type \\\n",
+ "304389 three_plus core \n",
+ "\n",
+ " trip_taker_industry trip_taker_building_type \\\n",
+ "304389 naics812199 single_family \n",
+ "\n",
+ " trip_taker_school_grade_attending trip_taker_education \\\n",
+ "304389 not_attending_school some_college \n",
+ "\n",
+ " trip_taker_tenure trip_taker_language \\\n",
+ "304389 owner asian_pacific \n",
+ "\n",
+ " trip_taker_home_bgrp_2020 trip_taker_home_trct_2020 \\\n",
+ "304389 1 (Tract 2623.01, Los Angeles, CA) 2623.01 (Los Angeles, CA) \n",
+ "\n",
+ " trip_taker_home_cty_2020 trip_taker_home_st_2020 \\\n",
+ "304389 Los Angeles County, CA California \n",
+ "\n",
+ " trip_taker_work_bgrp_2020 trip_taker_work_trct_2020 \\\n",
+ "304389 1 (Tract 7010, Los Angeles, CA) 7010 (Los Angeles, CA) \n",
+ "\n",
+ " trip_taker_work_cty_2020 trip_taker_work_st_2020 \n",
+ "304389 Los Angeles County, CA California "
+ ]
+ },
+ "execution_count": 69,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "print(len(r_trips>>filter(_.trip_taker_household_id == 9146817053558612433)))\n",
+ "(r_trips>>filter(_.trip_taker_household_id == 9146817053558612433)).sample()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 70,
+ "id": "4eccfdf8-429e-43df-899e-f7d77e051831",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trip_taker_person_id | \n",
+ " trip_taker_age | \n",
+ " trip_taker_sex | \n",
+ " trip_taker_household_size | \n",
+ " n | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 4508016952300979367 | \n",
+ " 25.00 | \n",
+ " female | \n",
+ " 11.00 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 4736398356956282212 | \n",
+ " 38.00 | \n",
+ " female | \n",
+ " 11.00 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 8423484308586815884 | \n",
+ " 32.00 | \n",
+ " female | \n",
+ " 11.00 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 12395012647665989305 | \n",
+ " 21.00 | \n",
+ " female | \n",
+ " 11.00 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 14673592671547908751 | \n",
+ " 18.00 | \n",
+ " female | \n",
+ " 11.00 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 16545634957747565398 | \n",
+ " 49.00 | \n",
+ " female | \n",
+ " 11.00 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 16684115452596066816 | \n",
+ " 53.00 | \n",
+ " male | \n",
+ " 11.00 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trip_taker_person_id trip_taker_age trip_taker_sex \\\n",
+ "0 4508016952300979367 25.00 female \n",
+ "1 4736398356956282212 38.00 female \n",
+ "2 8423484308586815884 32.00 female \n",
+ "3 12395012647665989305 21.00 female \n",
+ "4 14673592671547908751 18.00 female \n",
+ "5 16545634957747565398 49.00 female \n",
+ "6 16684115452596066816 53.00 male \n",
+ "\n",
+ " trip_taker_household_size n \n",
+ "0 11.00 5 \n",
+ "1 11.00 2 \n",
+ "2 11.00 2 \n",
+ "3 11.00 1 \n",
+ "4 11.00 4 \n",
+ "5 11.00 1 \n",
+ "6 11.00 1 "
+ ]
+ },
+ "execution_count": 70,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "## checking one household id\n",
+ "r_trips>>filter(_.trip_taker_household_id == 9146817053558612433)>>count(_.trip_taker_person_id, _.trip_taker_age,\n",
+ " _.trip_taker_sex, _.trip_taker_household_size)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c929c77d-0173-4a7d-be49-762349daf5cb",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ab226b47-4366-4fb9-aaae-e0aca760f9b3",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "id": "1483249f-602c-4805-bcc4-d55e018022ad",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Number of Travelers by Resident Type: Replica Trips Data
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Number of Travelers by Resident Type: Replica Trips Data
\")) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 72,
+ "id": "5c1894f0-bd42-4014-89d4-6e8d43809319",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trip_taker_resident_type | \n",
+ " _unique_ids | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " core | \n",
+ " 246597 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " donut | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " visitor | \n",
+ " 35198 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " NaN | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trip_taker_resident_type _unique_ids\n",
+ "0 core 246597\n",
+ "1 donut 2\n",
+ "2 visitor 35198\n",
+ "3 NaN 1"
+ ]
+ },
+ "execution_count": 72,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "r_trips>>group_by(_.trip_taker_resident_type)>>summarize(_unique_ids = _.trip_taker_person_id.nunique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b0891b62-c968-4ea0-bd9b-753d299a5054",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "id": "2e4bd058-9c4d-4989-a5fc-db6cf6130bf2",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Traveler Demographics: Replica Trips Data
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "Fitered for Core Residents"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " primary_mode | \n",
+ " trip_taker_sex | \n",
+ " n_ppl | \n",
+ " avg_h_income | \n",
+ " avg_p_income | \n",
+ " avg_age | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " auto_passenger | \n",
+ " female | \n",
+ " 44266 | \n",
+ " 162,899.90 | \n",
+ " 57,807.91 | \n",
+ " 43.65 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " auto_passenger | \n",
+ " male | \n",
+ " 43836 | \n",
+ " 173,439.46 | \n",
+ " 87,237.70 | \n",
+ " 42.86 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " commercial | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " on_demand_auto | \n",
+ " female | \n",
+ " 1560 | \n",
+ " 181,325.52 | \n",
+ " 58,599.19 | \n",
+ " 46.58 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " on_demand_auto | \n",
+ " male | \n",
+ " 1532 | \n",
+ " 181,278.14 | \n",
+ " 92,035.74 | \n",
+ " 43.23 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " private_auto | \n",
+ " female | \n",
+ " 90634 | \n",
+ " 167,383.19 | \n",
+ " 62,989.92 | \n",
+ " 43.79 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " private_auto | \n",
+ " male | \n",
+ " 99967 | \n",
+ " 174,477.16 | \n",
+ " 94,470.93 | \n",
+ " 43.81 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " private_auto | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " public_transit | \n",
+ " female | \n",
+ " 927 | \n",
+ " 81,553.45 | \n",
+ " 39,058.57 | \n",
+ " 42.90 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " public_transit | \n",
+ " male | \n",
+ " 979 | \n",
+ " 97,733.91 | \n",
+ " 44,295.90 | \n",
+ " 39.82 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " primary_mode trip_taker_sex n_ppl avg_h_income avg_p_income avg_age\n",
+ "0 auto_passenger female 44266 162,899.90 57,807.91 43.65\n",
+ "1 auto_passenger male 43836 173,439.46 87,237.70 42.86\n",
+ "2 commercial NaN 1 NaN NaN NaN\n",
+ "3 on_demand_auto female 1560 181,325.52 58,599.19 46.58\n",
+ "4 on_demand_auto male 1532 181,278.14 92,035.74 43.23\n",
+ "5 private_auto female 90634 167,383.19 62,989.92 43.79\n",
+ "6 private_auto male 99967 174,477.16 94,470.93 43.81\n",
+ "7 private_auto NaN 1 NaN NaN NaN\n",
+ "8 public_transit female 927 81,553.45 39,058.57 42.90\n",
+ "9 public_transit male 979 97,733.91 44,295.90 39.82"
+ ]
+ },
+ "execution_count": 73,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "display(HTML(\"Traveler Demographics: Replica Trips Data
\")) \n",
+ "display(HTML(\"Fitered for Core Residents\")) \n",
+ "(r_trips\n",
+ " >>filter(_.trip_taker_home_bgrp_2020!=\"Visitor (no home location)\")\n",
+ " >>group_by(_.primary_mode, _.trip_taker_sex)\n",
+ " >>summarize(\n",
+ " n_ppl = _.trip_taker_person_id.nunique(),\n",
+ " avg_h_income = _.trip_taker_household_income.mean(),\n",
+ " avg_p_income = _.trip_taker_individual_income.mean(),\n",
+ " avg_age = _.trip_taker_age.mean())\n",
+ ")\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a1dba8b5-7356-441f-8e6c-0f98e0f80c4c",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aa84cc8c-8b17-41b6-abdf-c8397ef1e706",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "101bbd51-38b9-4791-8119-f0dc3002b6a6",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bbc819b1-0dfc-46be-90bd-b136263dc22c",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.13"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "state": {},
+ "version_major": 2,
+ "version_minor": 0
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}