diff --git a/sb125_analyses/corridor_study/_utils.py b/sb125_analyses/corridor_study/_utils.py new file mode 100644 index 000000000..aba4dcdc6 --- /dev/null +++ b/sb125_analyses/corridor_study/_utils.py @@ -0,0 +1,113 @@ +""" +Replica and Streetlight Analysis Utils +""" + +import pandas as pd +from siuba import * +import ast + +from calitp_data_analysis.sql import to_snakecase + +import altair as alt +from calitp_data_analysis import calitp_color_palette as cp + + +""" +Replica Analysis Utils +""" +##function that returns Replica transit data into df we can analyze easier +def get_tranist_agency_counts(df, primary_mode_col, transit_mode_col, transit_agency_col, activity_id_col): + ## return a df with the agency counts + agencies = (df + >>filter(_[primary_mode_col] =="public_transit") + >>group_by(_[primary_mode_col], _[transit_mode_col], _[transit_agency_col]) + >>summarize(n =_[activity_id_col].nunique()) + >>arrange(-_.n)) + + agencies[transit_mode_col] = agencies[transit_mode_col].astype(str) + agencies[transit_agency_col] = agencies[transit_agency_col].astype(str) + + agencies['agency_count'] = [len(set(x.split(", "))) for x in + agencies[transit_agency_col].str.lower()] + agencies['n_modes_taken'] = agencies[transit_mode_col].apply(lambda x: len(x.split())) + + ## return a df with the mode counts + modes = (df + >>filter(_[primary_mode_col] =="public_transit") + >>count(_[transit_mode_col])>>arrange(-_.n)) + + modes[transit_mode_col] = modes[transit_mode_col].astype(str) + # modes[transit_agency_col] = modes[transit_agency_col].astype(str) + + modes['n_modes_taken'] = modes[transit_mode_col].apply(lambda x: len(x.split())) + + return agencies, modes + +def get_list_of_agencies(df, transit_agency_col): + + ## Get just one columns + column = df[[transit_agency_col]] + #remove single-dimensional entries from the shape of an array + col_text = column.squeeze() + # get list of words + text_list = col_text.tolist() + # #join list of words + text_list = ', '.join(text_list).title() + + text_list = text_list.replace(", ", "', '") + text_list = "['" + text_list + "']" + + agency_list = ast.literal_eval(text_list) + agency_list = set(agency_list) + + return agency_list + +def get_dummies_by_agency(df, col): + transit_agencies = set() + for agencies in df[col].str.split(', '): + transit_agencies.update(agencies) + unique_agencies = [] + + for agency in transit_agencies: + df[agency] = df[col].str.count(agency) + unique_agencies.append(agency) + + ### adding column for unique agencies list + def get_unique_agencies(agency_list): + unique_agencies = set() + for agencies in agency_list: + unique_agencies.update(agencies.split(', ')) + return ', '.join(sorted(list(unique_agencies))) + + # Applying the function to each row of the dataframe to get unique agencies + df['unique_agencies'] = df[col].str.split(', ').apply(lambda x: get_unique_agencies(x)) + + return df + +def get_agencies_occurances(df): + + ##get list of columns that have agency names + columns_to_keep = list(df.columns.values) + + ## remove columns that are not agency names + unwanted_cols = {'transit_agency', 'unique_agencies', 'primary_mode', 'transit_submode', 'n', 'agency_count', 'n_modes_taken' } + columns_to_keep = [e for e in columns_to_keep if e not in unwanted_cols] + + df_agencies = df.loc[:, columns_to_keep] + + ## transpose the df again + df_agencies = pd.DataFrame(df_agencies).transpose().reset_index().rename(columns={'index':'agency'}) + + ##sum up the number of trips by agency + df_agencies['n_trips'] = df_agencies[list(df_agencies.columns)].sum(axis=1) + + ## filter df to only have two cols + df_agencies = df_agencies>>select(_.agency, _.n_trips) + + return df_agencies + +""" +Streetlight Analysis Utils +""" + + diff --git a/sb125_analyses/corridor_study/data_downloads_baybridge.ipynb b/sb125_analyses/corridor_study/data_downloads_baybridge.ipynb new file mode 100644 index 000000000..181e33062 --- /dev/null +++ b/sb125_analyses/corridor_study/data_downloads_baybridge.ipynb @@ -0,0 +1,5807 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "82a53f1d-2622-4cb0-bfdd-36ceec652215", + "metadata": {}, + "source": [ + "# SB1 Big Data Downloads\n", + "An analysis into the corridor analysis data downloads from Streetlight and Replica" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "45233485-2055-499a-a89e-fc154fd56e63", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_483/4150955979.py:10: DeprecationWarning: Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display\n", + " from IPython.core.display import display\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "from siuba import *\n", + "\n", + "from calitp_data_analysis.sql import to_snakecase\n", + "\n", + "import altair as alt\n", + "from calitp_data_analysis import calitp_color_palette as cp\n", + "\n", + "from IPython.display import Markdown, HTML, display_html, display\n", + "from IPython.core.display import display\n", + "\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "577fe167-1d9d-4676-999c-ccb2b89575ff", + "metadata": {}, + "outputs": [], + "source": [ + "import _utils" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "651aae92-5188-4676-8e5f-3c040f77077c", + "metadata": {}, + "outputs": [], + "source": [ + "pd.set_option('display.max_columns', 500)\n", + "pd.set_option('display.max_colwidth', 100)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "af402da0-0b03-4c71-a1af-19f97f67cef1", + "metadata": {}, + "outputs": [], + "source": [ + "GCS_PATH = \"gs://calitp-analytics-data/data-analyses/sb125/corridor_study_data/\"" + ] + }, + { + "cell_type": "markdown", + "id": "71c1deda-8e83-45d8-a0f5-20b36b7051c0", + "metadata": {}, + "source": [ + "### Read in Replica Data Spring 2023" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "0e5c1b44-c981-4a9a-b6ea-e1cb2a7a279e", + "metadata": {}, + "outputs": [], + "source": [ + "replica_trips = \"replica-bay_bridge-03_06_24-trips_dataset.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b0bd67f0-6f82-4f2c-8da0-3627a2813571", + "metadata": {}, + "outputs": [], + "source": [ + "replica_ppl = \"replica-bay_bridge-03_06_24-people_dataset.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9f14685-0953-4b1d-a636-882230fef1d5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "cb3dea26-4e6d-4dc7-acc8-27b899a0a537", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_483/2643890524.py:1: DtypeWarning: Columns (2,4,5,9,10,11,18,25,26,29,30,35,36,40,43,48,50) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " r_trips = to_snakecase(pd.read_csv(f\"{GCS_PATH}{replica_trips}\"))\n" + ] + } + ], + "source": [ + "r_trips = to_snakecase(pd.read_csv(f\"{GCS_PATH}{replica_trips}\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ca697494-b872-4de8-afd9-c538a455364c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Sample of data

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Sample of data

\"))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "4713b1a5-ccac-4e1b-8552-850285cad53e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
origin_building_usetrip_duration_minutestrip_taker_languageorigin_trct_2020trip_taker_household_idtrip_taker_sextrip_taker_home_bgrp_2020origin_st_2020trip_taker_home_st_2020trip_taker_race_ethnicitytransit_agencytransit_routetrip_taker_resident_typevehicle_typetrip_taker_ageactivity_iddestination_bgrp_2020destination_building_usetrip_taker_wfhdestination_st_2020destination_cty_2020trip_taker_work_bgrp_2020destination_trct_2020trip_taker_household_incomeorigin_bgrp_2020trip_taker_commute_modetrip_taker_available_vehiclesprimary_modeprevious_trip_purposetrip_taker_building_typetransit_submodetrip_taker_work_st_2020trip_taker_household_sizeorigin_cty_2020destination_land_usetrip_taker_industrytrip_taker_tenurevehicle_fuel_typetrip_taker_home_trct_2020trip_taker_work_trct_2020trip_taker_educationtrip_start_timetrip_taker_individual_incometrip_taker_employment_statustrip_purposeorigin_land_usetrip_taker_work_cty_2020trip_distance_milestrip_taker_school_grade_attendingtrip_taker_home_cty_2020trip_taker_person_idtrip_end_time
199078multi_family46other4501.01 (Alameda, CA)69970950706205725male2 (Tract 4501.01, Alameda, CA)CaliforniaCaliforniawhite_not_hispanic_or_latinoNaNNaNcoreunknown_vehicle_type39.044861975266645930402 (Tract 105, San Francisco, CA)non_retail_attractionin_personCaliforniaSan Francisco2 (Tract 105, San Francisco, CA)105 (San Francisco, CA)237966.02 (Tract 4501.01, Alameda, CA)public_transitoneauto_passengerhomemultiple_unitsNaNCalifornia4.0Alamedanon_retail_attractionnaics31_33renterunknown_fuel_type4501.01 (Alameda, CA)105 (San Francisco, CA)advanced_degree04:53:00237966.0employedworkmulti_familySan Francisco36.7not_attending_schoolAlameda1169901321102004668405:39:20
362910multi_family65english615.06 (San Francisco, CA)14710396768871133663female1 (Tract 615.06, San Francisco, CA)CaliforniaCaliforniawhite_not_hispanic_or_latinoNaNNaNcoreunknown_vehicle_type30.033887983626050912481 (Tract 3551.13, Contra Costa, CA)single_familyin_personCaliforniaContra Costa1 (Tract 615.01, San Francisco, CA)3551.13 (Contra Costa, CA)237586.01 (Tract 615.06, San Francisco, CA)walkingoneauto_passengerhomemultiple_unitsNaNCalifornia2.0San Franciscosingle_familynaics5151renterunknown_fuel_type615.06 (San Francisco, CA)615.01 (San Francisco, CA)bachelors_degree17:25:0077510.0employedsocialmixed_useSan Francisco33.7not_attending_schoolSan Francisco1427959646010248921018:30:49
397490education53indo_european301.02 (San Francisco, CA)11603413512180790232male5 (Tract 4381, Alameda, CA)CaliforniaCaliforniatwo_races_not_hispanic_or_latinoNaNNaNcoreunknown_vehicle_type46.0177484167974683437715 (Tract 4381, Alameda, CA)multi_familyin_personCaliforniaAlameda3 (Tract 301.02, San Francisco, CA)4381 (Alameda, CA)101548.03 (Tract 301.02, San Francisco, CA)private_autotwoprivate_autoworkseveral_unitsNaNCalifornia4.0San Franciscomulti_familynaics61ownerother_non_bev4381 (Alameda, CA)301.02 (San Francisco, CA)some_college20:53:0571051.0employedhomeeducationSan Francisco34.1not_attending_schoolAlameda937186022674168628721:46:49
646286single_family42asian_pacific3851 (Contra Costa, CA)8856161571093041221female2 (Tract 3851, Contra Costa, CA)CaliforniaCaliforniaasian_not_hispanic_or_latinoNaNNaNcoreunknown_vehicle_type61.010965869717100814241 (Tract 177, San Francisco, CA)retailunemployed_under_16_not_in_labor_forceCaliforniaSan FranciscoDoes not have work/school location177 (San Francisco, CA)163758.02 (Tract 3851, Contra Costa, CA)other_travel_modetwoauto_passengerhomesingle_familyNaNDoes not have work/school location4.0Contra Costamixed_usenot_workingownerunknown_fuel_type3851 (Contra Costa, CA)Does not have work/school locationhigh_school17:10:0011123.0not_in_labor_forceeatsingle_familyDoes not have work/school location17.5not_attending_schoolContra Costa1522420281691707967917:52:23
432031single_family30english3790 (Contra Costa, CA)11125850078941310816male1 (Tract 3790, Contra Costa, CA)CaliforniaCaliforniawhite_not_hispanic_or_latinoNaNNaNcoreunknown_vehicle_type58.0178781079434812569063 (Tract 151, San Francisco, CA)officein_personCaliforniaSan Francisco3 (Tract 151, San Francisco, CA)151 (San Francisco, CA)191291.01 (Tract 3790, Contra Costa, CA)private_autotwoauto_passengerhomesingle_familyNaNCalifornia3.0Contra Costamixed_usenaics42ownerunknown_fuel_type3790 (Contra Costa, CA)151 (San Francisco, CA)some_college13:50:0081981.0employedworksingle_familySan Francisco18.4not_attending_schoolContra Costa1766303327304763722814:20:30
\n", + "
" + ], + "text/plain": [ + " origin_building_use trip_duration_minutes trip_taker_language \\\n", + "199078 multi_family 46 other \n", + "362910 multi_family 65 english \n", + "397490 education 53 indo_european \n", + "646286 single_family 42 asian_pacific \n", + "432031 single_family 30 english \n", + "\n", + " origin_trct_2020 trip_taker_household_id trip_taker_sex \\\n", + "199078 4501.01 (Alameda, CA) 69970950706205725 male \n", + "362910 615.06 (San Francisco, CA) 14710396768871133663 female \n", + "397490 301.02 (San Francisco, CA) 11603413512180790232 male \n", + "646286 3851 (Contra Costa, CA) 8856161571093041221 female \n", + "432031 3790 (Contra Costa, CA) 11125850078941310816 male \n", + "\n", + " trip_taker_home_bgrp_2020 origin_st_2020 \\\n", + "199078 2 (Tract 4501.01, Alameda, CA) California \n", + "362910 1 (Tract 615.06, San Francisco, CA) California \n", + "397490 5 (Tract 4381, Alameda, CA) California \n", + "646286 2 (Tract 3851, Contra Costa, CA) California \n", + "432031 1 (Tract 3790, Contra Costa, CA) California \n", + "\n", + " trip_taker_home_st_2020 trip_taker_race_ethnicity \\\n", + "199078 California white_not_hispanic_or_latino \n", + "362910 California white_not_hispanic_or_latino \n", + "397490 California two_races_not_hispanic_or_latino \n", + "646286 California asian_not_hispanic_or_latino \n", + "432031 California white_not_hispanic_or_latino \n", + "\n", + " transit_agency transit_route trip_taker_resident_type \\\n", + "199078 NaN NaN core \n", + "362910 NaN NaN core \n", + "397490 NaN NaN core \n", + "646286 NaN NaN core \n", + "432031 NaN NaN core \n", + "\n", + " vehicle_type trip_taker_age activity_id \\\n", + "199078 unknown_vehicle_type 39.0 4486197526664593040 \n", + "362910 unknown_vehicle_type 30.0 3388798362605091248 \n", + "397490 unknown_vehicle_type 46.0 17748416797468343771 \n", + "646286 unknown_vehicle_type 61.0 1096586971710081424 \n", + "432031 unknown_vehicle_type 58.0 17878107943481256906 \n", + "\n", + " destination_bgrp_2020 destination_building_use \\\n", + "199078 2 (Tract 105, San Francisco, CA) non_retail_attraction \n", + "362910 1 (Tract 3551.13, Contra Costa, CA) single_family \n", + "397490 5 (Tract 4381, Alameda, CA) multi_family \n", + "646286 1 (Tract 177, San Francisco, CA) retail \n", + "432031 3 (Tract 151, San Francisco, CA) office \n", + "\n", + " trip_taker_wfh destination_st_2020 \\\n", + "199078 in_person California \n", + "362910 in_person California \n", + "397490 in_person California \n", + "646286 unemployed_under_16_not_in_labor_force California \n", + "432031 in_person California \n", + "\n", + " destination_cty_2020 trip_taker_work_bgrp_2020 \\\n", + "199078 San Francisco 2 (Tract 105, San Francisco, CA) \n", + "362910 Contra Costa 1 (Tract 615.01, San Francisco, CA) \n", + "397490 Alameda 3 (Tract 301.02, San Francisco, CA) \n", + "646286 San Francisco Does not have work/school location \n", + "432031 San Francisco 3 (Tract 151, San Francisco, CA) \n", + "\n", + " destination_trct_2020 trip_taker_household_income \\\n", + "199078 105 (San Francisco, CA) 237966.0 \n", + "362910 3551.13 (Contra Costa, CA) 237586.0 \n", + "397490 4381 (Alameda, CA) 101548.0 \n", + "646286 177 (San Francisco, CA) 163758.0 \n", + "432031 151 (San Francisco, CA) 191291.0 \n", + "\n", + " origin_bgrp_2020 trip_taker_commute_mode \\\n", + "199078 2 (Tract 4501.01, Alameda, CA) public_transit \n", + "362910 1 (Tract 615.06, San Francisco, CA) walking \n", + "397490 3 (Tract 301.02, San Francisco, CA) private_auto \n", + "646286 2 (Tract 3851, Contra Costa, CA) other_travel_mode \n", + "432031 1 (Tract 3790, Contra Costa, CA) private_auto \n", + "\n", + " trip_taker_available_vehicles primary_mode previous_trip_purpose \\\n", + "199078 one auto_passenger home \n", + "362910 one auto_passenger home \n", + "397490 two private_auto work \n", + "646286 two auto_passenger home \n", + "432031 two auto_passenger home \n", + "\n", + " trip_taker_building_type transit_submode \\\n", + "199078 multiple_units NaN \n", + "362910 multiple_units NaN \n", + "397490 several_units NaN \n", + "646286 single_family NaN \n", + "432031 single_family NaN \n", + "\n", + " trip_taker_work_st_2020 trip_taker_household_size \\\n", + "199078 California 4.0 \n", + "362910 California 2.0 \n", + "397490 California 4.0 \n", + "646286 Does not have work/school location 4.0 \n", + "432031 California 3.0 \n", + "\n", + " origin_cty_2020 destination_land_use trip_taker_industry \\\n", + "199078 Alameda non_retail_attraction naics31_33 \n", + "362910 San Francisco single_family naics5151 \n", + "397490 San Francisco multi_family naics61 \n", + "646286 Contra Costa mixed_use not_working \n", + "432031 Contra Costa mixed_use naics42 \n", + "\n", + " trip_taker_tenure vehicle_fuel_type trip_taker_home_trct_2020 \\\n", + "199078 renter unknown_fuel_type 4501.01 (Alameda, CA) \n", + "362910 renter unknown_fuel_type 615.06 (San Francisco, CA) \n", + "397490 owner other_non_bev 4381 (Alameda, CA) \n", + "646286 owner unknown_fuel_type 3851 (Contra Costa, CA) \n", + "432031 owner unknown_fuel_type 3790 (Contra Costa, CA) \n", + "\n", + " trip_taker_work_trct_2020 trip_taker_education \\\n", + "199078 105 (San Francisco, CA) advanced_degree \n", + "362910 615.01 (San Francisco, CA) bachelors_degree \n", + "397490 301.02 (San Francisco, CA) some_college \n", + "646286 Does not have work/school location high_school \n", + "432031 151 (San Francisco, CA) some_college \n", + "\n", + " trip_start_time trip_taker_individual_income \\\n", + "199078 04:53:00 237966.0 \n", + "362910 17:25:00 77510.0 \n", + "397490 20:53:05 71051.0 \n", + "646286 17:10:00 11123.0 \n", + "432031 13:50:00 81981.0 \n", + "\n", + " trip_taker_employment_status trip_purpose origin_land_use \\\n", + "199078 employed work multi_family \n", + "362910 employed social mixed_use \n", + "397490 employed home education \n", + "646286 not_in_labor_force eat single_family \n", + "432031 employed work single_family \n", + "\n", + " trip_taker_work_cty_2020 trip_distance_miles \\\n", + "199078 San Francisco 36.7 \n", + "362910 San Francisco 33.7 \n", + "397490 San Francisco 34.1 \n", + "646286 Does not have work/school location 17.5 \n", + "432031 San Francisco 18.4 \n", + "\n", + " trip_taker_school_grade_attending trip_taker_home_cty_2020 \\\n", + "199078 not_attending_school Alameda \n", + "362910 not_attending_school San Francisco \n", + "397490 not_attending_school Alameda \n", + "646286 not_attending_school Contra Costa \n", + "432031 not_attending_school Contra Costa \n", + "\n", + " trip_taker_person_id trip_end_time \n", + "199078 11699013211020046684 05:39:20 \n", + "362910 14279596460102489210 18:30:49 \n", + "397490 9371860226741686287 21:46:49 \n", + "646286 15224202816917079679 17:52:23 \n", + "432031 17663033273047637228 14:20:30 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r_trips.sample(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2199852-9b6e-46ca-86fd-abe71148d13f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "75945ef2-eab7-469a-baad-ee563d70c309", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Columns in Replica Trips Data

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Columns in Replica Trips Data

\"))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "1f3e4cbb-d211-40b1-b4bb-2c7cb43b33a9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['origin_building_use', 'trip_duration_minutes', 'trip_taker_language',\n", + " 'origin_trct_2020', 'trip_taker_household_id', 'trip_taker_sex',\n", + " 'trip_taker_home_bgrp_2020', 'origin_st_2020',\n", + " 'trip_taker_home_st_2020', 'trip_taker_race_ethnicity',\n", + " 'transit_agency', 'transit_route', 'trip_taker_resident_type',\n", + " 'vehicle_type', 'trip_taker_age', 'activity_id',\n", + " 'destination_bgrp_2020', 'destination_building_use', 'trip_taker_wfh',\n", + " 'destination_st_2020', 'destination_cty_2020',\n", + " 'trip_taker_work_bgrp_2020', 'destination_trct_2020',\n", + " 'trip_taker_household_income', 'origin_bgrp_2020',\n", + " 'trip_taker_commute_mode', 'trip_taker_available_vehicles',\n", + " 'primary_mode', 'previous_trip_purpose', 'trip_taker_building_type',\n", + " 'transit_submode', 'trip_taker_work_st_2020',\n", + " 'trip_taker_household_size', 'origin_cty_2020', 'destination_land_use',\n", + " 'trip_taker_industry', 'trip_taker_tenure', 'vehicle_fuel_type',\n", + " 'trip_taker_home_trct_2020', 'trip_taker_work_trct_2020',\n", + " 'trip_taker_education', 'trip_start_time',\n", + " 'trip_taker_individual_income', 'trip_taker_employment_status',\n", + " 'trip_purpose', 'origin_land_use', 'trip_taker_work_cty_2020',\n", + " 'trip_distance_miles', 'trip_taker_school_grade_attending',\n", + " 'trip_taker_home_cty_2020', 'trip_taker_person_id', 'trip_end_time'],\n", + " dtype='object')" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r_trips.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e60fb5c7-a04b-48f3-8282-ca98a54dadd2", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "92f9a5f4-08f1-43fc-ae01-c6bd404891d6", + "metadata": { + "tags": [] + }, + "source": [ + "#### Data Exploration: Replica Trips\n", + "* Summarizing the data that we exported from Replica\n", + "* Existing visualizations in replica: \n", + " * Primary Mode\n", + " * Trip Purpose\n", + " * Starting hour \n", + " * Trip Duration/Distance\n", + " * Origin Destination\n", + " * Vehicle Fuel type\n", + " * Transit Routes/Stops/Sub mode/Agency\n", + " * Household Income\n", + " * Race and Ethnicity\n", + " * Private Auto Availability\n", + " * Age\n", + " * Employment/School Status\n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "d7f3b44a-cd77-42e9-b0d8-7b9d228b37d5", + "metadata": {}, + "outputs": [], + "source": [ + "trips = r_trips>>group_by(_.primary_mode)>>summarize(avg_trip_time = _.trip_duration_minutes.mean(),\n", + " number_trips = _.activity_id.nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "05893215-e993-44dc-9c4b-1aa7d94815cd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart = (\n", + " alt.Chart(trips)\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\"primary_mode\"),\n", + " y=alt.Y(\"avg_trip_time\"),\n", + " color=alt.Color(\"avg_trip_time\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS)\n", + " ), tooltip=trips.columns.tolist())\n", + " .properties(title = \"Average Trip Time\",\n", + " width=800,\n", + " height=300)\n", + " )\n", + "chart " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "65681a29-7f1b-42ed-9e0e-371a84a29cbd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart = (\n", + " alt.Chart(trips)\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\"primary_mode\"),\n", + " y=alt.Y(\"number_trips\"),\n", + " color=alt.Color(\"number_trips\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS,)\n", + " ), tooltip=trips.columns.tolist())\n", + " .properties(title=\"Number of Trips\",\n", + " width=800,\n", + " height=300)\n", + " )\n", + "chart " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "0d60fe54-dc88-4570-8610-125b37b5917d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Length of Trips dataframe

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Length of Trips dataframe

\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "4f98abae-ae6b-4b4e-9427-b606a557582f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "674864" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(r_trips)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "55c6ff06-afed-45dc-b764-93188c2b9958", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Number of unique activity ids in data

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Number of unique activity ids in data

\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "27c00872-5880-4464-84a5-e91423ce7895", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
number_trips
0674864
\n", + "
" + ], + "text/plain": [ + " number_trips\n", + "0 674864" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r_trips>>summarize(number_trips = _.activity_id.nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "a20cbce8-e354-4aa3-99db-666a13576b5e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Number of unique trip taker ids

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Number of unique trip taker ids

\")) " + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "2def0aaf-9182-4393-8049-2dfb95749585", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
number_trips
0309596
\n", + "
" + ], + "text/plain": [ + " number_trips\n", + "0 309596" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r_trips>>summarize(number_trips = _.trip_taker_person_id.nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "47fecd22-8276-48b8-9c74-d3a94906bbae", + "metadata": {}, + "outputs": [], + "source": [ + "race_ethnicity_by_mode = r_trips>>group_by(_.primary_mode)>>count(_.trip_taker_race_ethnicity)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "4cbeb867-916b-48aa-9949-d769fcd5af5a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Trip Taker Race and Ethnicity by Mode

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Trip Taker Race and Ethnicity by Mode

\")) " + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "5e895499-2142-432d-8402-c035f81326b6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart = (\n", + " alt.Chart(race_ethnicity_by_mode)\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\"primary_mode\"),\n", + " y=alt.Y(\"n\"),\n", + " color=alt.Color(\"trip_taker_race_ethnicity\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS,\n", + " domain=race_ethnicity_by_mode[\"trip_taker_race_ethnicity\"].unique().tolist())\n", + " ),\n", + " tooltip=race_ethnicity_by_mode.columns.tolist())\n", + " \n", + " .properties(\n", + " width=800,\n", + " height=300)\n", + " )\n", + "chart " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da3bbe59-9b06-471f-beb1-4762b8939600", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "500ad806-51ed-47e9-88a5-fc8a41edd7aa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Transit Mode Splits

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Transit Mode Splits

\")) \n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "d8cd6354-0433-4134-ad6e-9df92122cbd2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Looking at transit mode, need to alter the transit_agency col and transit_submode to get a better count." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"Looking at transit mode, need to alter the transit_agency col and transit_submode to get a better count.\")) \n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "261d7f48-f7c3-4ea7-a526-6f813ca16ede", + "metadata": {}, + "outputs": [], + "source": [ + "# ptt_agency_count = (r_trips\n", + "# >>filter(_.primary_mode==\"public_transit\")\n", + "# >>group_by(_.primary_mode, _.transit_submode, _.transit_agency)\n", + "# >>summarize(n =_.activity_id.nunique())\n", + "# >>arrange(-_.n))" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "e4516beb-8277-4fd4-aed1-23af65fc4a28", + "metadata": {}, + "outputs": [], + "source": [ + "# ptt_agency_count" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "727511b8-3141-40e4-b654-30aaae8200d9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Most common transit mode combinations

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Most common transit mode combinations

\")) \n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "f3c31371-899f-4474-85b6-86e78e89209b", + "metadata": {}, + "outputs": [], + "source": [ + "# ptt_modes = (r_trips\n", + "# >>filter(_.primary_mode ==\"public_transit\")\n", + "# >>count(_.transit_submode)>>arrange(-_.n))" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "657ff2f3-0d0d-419d-b3d5-512212897fb1", + "metadata": {}, + "outputs": [], + "source": [ + "# ptt_modes.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "80daf7a1-b947-469c-b3b0-09d0302714b7", + "metadata": {}, + "outputs": [], + "source": [ + "##### unnesting the transit submode and agencies to get counts. " + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "edd81120-9756-4edb-b713-1586a9fd5021", + "metadata": {}, + "outputs": [], + "source": [ + "agencies_test, mode_test = _utils.get_tranist_agency_counts(r_trips, \"primary_mode\", \"transit_submode\", \"transit_agency\", \"activity_id\")" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "2025391f-537e-432a-a2f4-d2d6ce57af04", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
primary_modetransit_submodetransit_agencynagency_countn_modes_taken
0public_transitbusAC TRANSIT278811
12public_transitbus, busSan Francisco Municipal Transportation Agency, AC TRANSIT63222
8public_transitbus, busAC TRANSIT, San Francisco Municipal Transportation Agency51622
6public_transitbus, busAC TRANSIT, AC TRANSIT26312
75public_transitbus, light_railAC TRANSIT, San Francisco Municipal Transportation Agency25322
.....................
128public_transitrail, busCaltrain, SamTrans122
130public_transitrail, bus, busAmtrak, AC TRANSIT, AC TRANSIT123
133public_transitrail, light_rail, busCaltrain, San Francisco Municipal Transportation Agency, AC TRANSIT133
136public_transitrail, subway, bus, busCaltrain, Bay Area Rapid Transit, San Francisco Municipal Transportation Agency, AC TRANSIT144
141public_transitsubway, bus, bus, busBay Area Rapid Transit, San Francisco Municipal Transportation Agency, SolTrans, AC TRANSIT144
\n", + "

144 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " primary_mode transit_submode \\\n", + "0 public_transit bus \n", + "12 public_transit bus, bus \n", + "8 public_transit bus, bus \n", + "6 public_transit bus, bus \n", + "75 public_transit bus, light_rail \n", + ".. ... ... \n", + "128 public_transit rail, bus \n", + "130 public_transit rail, bus, bus \n", + "133 public_transit rail, light_rail, bus \n", + "136 public_transit rail, subway, bus, bus \n", + "141 public_transit subway, bus, bus, bus \n", + "\n", + " transit_agency \\\n", + "0 AC TRANSIT \n", + "12 San Francisco Municipal Transportation Agency, AC TRANSIT \n", + "8 AC TRANSIT, San Francisco Municipal Transportation Agency \n", + "6 AC TRANSIT, AC TRANSIT \n", + "75 AC TRANSIT, San Francisco Municipal Transportation Agency \n", + ".. ... \n", + "128 Caltrain, SamTrans \n", + "130 Amtrak, AC TRANSIT, AC TRANSIT \n", + "133 Caltrain, San Francisco Municipal Transportation Agency, AC TRANSIT \n", + "136 Caltrain, Bay Area Rapid Transit, San Francisco Municipal Transportation Agency, AC TRANSIT \n", + "141 Bay Area Rapid Transit, San Francisco Municipal Transportation Agency, SolTrans, AC TRANSIT \n", + "\n", + " n agency_count n_modes_taken \n", + "0 2788 1 1 \n", + "12 632 2 2 \n", + "8 516 2 2 \n", + "6 263 1 2 \n", + "75 253 2 2 \n", + ".. ... ... ... \n", + "128 1 2 2 \n", + "130 1 2 3 \n", + "133 1 3 3 \n", + "136 1 4 4 \n", + "141 1 4 4 \n", + "\n", + "[144 rows x 6 columns]" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agencies_test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "589d2a23-b528-4de9-b6a0-a10f88a6da5c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "505f47b1-229a-4cd1-90b8-b83b4ccdd6c6", + "metadata": {}, + "outputs": [], + "source": [ + "modes_count = agencies_test>>group_by(_.n_modes_taken)>>summarize(n_trips = _.n.sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "97746912-39f0-4c85-80d3-a00ed47922a8", + "metadata": {}, + "outputs": [], + "source": [ + "# modes_count" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "5353a2e4-762d-4a27-8f32-267c2c55bb90", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart = (\n", + " alt.Chart((modes_count))\n", + " .mark_bar(size=60)\n", + " .encode(\n", + " x=alt.X(\"n_modes_taken\", title =\"Number of Modes Taken per Trip\"),\n", + " y=alt.Y(\"n_trips\", title = \"Number of Trips\"),\n", + " color=alt.Color(\"n_trips\", title = \"Number of Trips\",\n", + " scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS)),\n", + " tooltip=modes_count.columns.tolist())\n", + " \n", + " .properties(title = \"How Many Modes are Taken Per Trip\",\n", + " width=800,\n", + " height=300)\n", + " )\n", + "chart " + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "fb8565b9-989a-459a-bede-433de30252e7", + "metadata": {}, + "outputs": [], + "source": [ + "# ptt_agency_count>>group_by(_.agency_count)>>count(_.n_modes_taken)>>arrange(-_.n)\n", + "agency_mode_trips = agencies_test>>group_by(_.agency_count, _.n_modes_taken)>>summarize(ntrips=_.n.sum())>>arrange(-_.ntrips)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "b03dbe0a-a759-4fa9-849b-b31b318f86cd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart = (\n", + " alt.Chart((agency_mode_trips))\n", + " .mark_circle(size=100)\n", + " .encode(\n", + " x=alt.X(\"agency_count\", title =\"Number of Agencies\"),\n", + " y=alt.Y(\"n_modes_taken\", title = \"Number of Modes Taken\"),\n", + " color=alt.Color(\"ntrips\", title = \"Number of Trips\",\n", + " scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS)),\n", + " tooltip=agency_mode_trips.columns.tolist())\n", + " \n", + " .properties(title = \"How Many Modes are Taken Per Trip\",\n", + " width=800,\n", + " height=300)\n", + " )\n", + "chart " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "91273d26-1fd1-42a1-b829-a77398c606fc", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "6bd200d7-c0b9-4649-adf1-e025d216328b", + "metadata": {}, + "outputs": [], + "source": [ + "##### Getting columns for each agency and counts " + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "e8f5f457-ce9f-4bfb-99f6-82e6eb7d11f0", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], + "source": [ + "agency_list = _utils.get_list_of_agencies(agencies_test, \"transit_agency\")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "f303acae-da1b-44a5-98b2-2f8f5b98f56a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Agencies Identified in Trips Data

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "{'Ac Transit',\n", + " 'Amtrak',\n", + " 'Bay Area Rapid Transit',\n", + " 'Caltrain',\n", + " 'Fairfield And Suisun Transit',\n", + " 'Marin Transit',\n", + " 'Nan',\n", + " 'Sacramento Regional Transit',\n", + " 'Samtrans',\n", + " 'San Francisco Bay Ferry',\n", + " 'San Francisco Municipal Transportation Agency',\n", + " 'San Joaquin Regional Transit District (Rtd)',\n", + " 'Santa Cruz Metro',\n", + " 'Soltrans',\n", + " 'The S',\n", + " 'Tri Delta Transit',\n", + " 'Vta'}" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "display(HTML(\"

Agencies Identified in Trips Data

\")) \n", + "\n", + "(agency_list)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d737409e-38b2-4aa9-8cac-4ad3b8e22294", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6d5ebb21-1aaf-4743-aa0a-feb58ed44da2", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2de92f7b-2e28-4388-880f-9e34b8b603ea", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "89479d3a-eeb5-491a-a064-49a77194109e", + "metadata": {}, + "source": [ + "##### Trying ChatGPT approach" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "bb062bcd-4322-4970-9115-7ea91ba7041c", + "metadata": {}, + "outputs": [], + "source": [ + "df = _utils.get_dummies_by_agency(agencies_test, \"transit_agency\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a011752-5211-40b4-a108-a8036fdbacce", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "97bc34c4-636f-46ca-a257-95e4af242584", + "metadata": {}, + "outputs": [], + "source": [ + "##### Identifying trips with one agency" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "46856b31-47ac-408c-903e-19d8b04283f7", + "metadata": {}, + "outputs": [], + "source": [ + "cols_to_keep = [\"transit_submode\",\"unique_agencies\", \"n\",\"n_modes_taken\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "2603e7e0-389a-49fb-93e9-5dc1d52c6012", + "metadata": {}, + "outputs": [], + "source": [ + "# chart = (alt.Chart(df)\n", + "# .mark_circle(size=100)\n", + "# .encode(\n", + "# x=alt.X(\"n_modes_taken\", title=\"Number of Modes taken\"),\n", + "# y=alt.Y(\"n\", title=\"Number of Trips\"),\n", + "# color = alt.Color(\"agency_count\", title=\"Number of Unique Agencies\",\n", + "# scale=alt.Scale(\n", + "# range=cp.CALITP_DIVERGING_COLORS,\n", + "# domain=df[\"agency_count\"].unique().tolist())\n", + "# ),\n", + "# tooltip=cols_to_keep)\n", + "# .properties(title = (\"Transit Trips Agency Breakdown\"), width=500,\n", + "# height=300)\n", + "# )\n", + "# chart " + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "149ed63c-436c-45fe-a526-5b1bf9f3d6fb", + "metadata": {}, + "outputs": [], + "source": [ + "# (df>>filter(_.agency_count==1)>>arrange(-_.n))" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "7228c540-3033-4023-8145-91f8aee23eeb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart = (alt.Chart((df>>filter(_.agency_count==1)))\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\"unique_agencies\", title=\"Agency\"),\n", + " y=alt.Y(\"n\", title=\"Number of Modes taken\"),\n", + " color = alt.Color(\"n_modes_taken\", title=\"Number of Trips\",\n", + " scale=alt.Scale(\n", + " range=cp.CALITP_SEQUENTIAL_COLORS,)),\n", + " tooltip=cols_to_keep)\n", + " .properties(title = (\"Transit Trips With Only One Agency\"), width=800,\n", + " height=500)\n", + " )\n", + "chart " + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "60df338f-801f-49a1-a26f-0be5e03bddf3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Most Common Agency Combination

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
transit_agency
0AC TRANSIT
12San Francisco Municipal Transportation Agency, AC TRANSIT
8AC TRANSIT, San Francisco Municipal Transportation Agency
6AC TRANSIT, AC TRANSIT
75AC TRANSIT, San Francisco Municipal Transportation Agency
\n", + "
" + ], + "text/plain": [ + " transit_agency\n", + "0 AC TRANSIT\n", + "12 San Francisco Municipal Transportation Agency, AC TRANSIT\n", + "8 AC TRANSIT, San Francisco Municipal Transportation Agency\n", + "6 AC TRANSIT, AC TRANSIT\n", + "75 AC TRANSIT, San Francisco Municipal Transportation Agency" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "display(HTML(\"

Most Common Agency Combination

\")) \n", + "\n", + "(df>>arrange(-_.n)>>select(_.transit_agency)).head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "786394ef-be26-4e0d-94e7-35386d6d67c0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd43ab95-0391-407b-b6b0-185034ba9528", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "8af03d60-4537-44d3-b82f-d857625e47c3", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/jovyan/data-analyses/sb125_analyses/corridor_study/_utils.py:102: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction.\n", + " df_agencies['n_trips'] = df_agencies[list(df_agencies.columns)].sum(axis=1)\n" + ] + } + ], + "source": [ + "df_agencies = _utils.get_agencies_occurances(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "6951b590-cef8-44bd-b27d-0b18893609c5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agencyn_trips
0Sacramento Regional Transit6
1SamTrans19
2nan1
3The S3
4San Francisco Municipal Transportation Agency175
5Caltrain20
6Marin Transit1
7Tri Delta Transit1
8Santa Cruz Metro1
9VTA16
10Fairfield and Suisun Transit1
11San Joaquin Regional Transit District (RTD)0
12SolTrans29
13San Francisco Bay Ferry3
14AC TRANSIT111
15Bay Area Rapid Transit37
16Amtrak10
\n", + "
" + ], + "text/plain": [ + " agency n_trips\n", + "0 Sacramento Regional Transit 6\n", + "1 SamTrans 19\n", + "2 nan 1\n", + "3 The S 3\n", + "4 San Francisco Municipal Transportation Agency 175\n", + "5 Caltrain 20\n", + "6 Marin Transit 1\n", + "7 Tri Delta Transit 1\n", + "8 Santa Cruz Metro 1\n", + "9 VTA 16\n", + "10 Fairfield and Suisun Transit 1\n", + "11 San Joaquin Regional Transit District (RTD) 0\n", + "12 SolTrans 29\n", + "13 San Francisco Bay Ferry 3\n", + "14 AC TRANSIT 111\n", + "15 Bay Area Rapid Transit 37\n", + "16 Amtrak 10" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_agencies" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "f98c76b9-b83f-42c3-aa12-702ade72ac38", + "metadata": {}, + "outputs": [], + "source": [ + "tooltip_cols = [\"agency\", \"n_trips\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "f1267faf-e28c-4208-ae79-43b31f67f705", + "metadata": {}, + "outputs": [], + "source": [ + "# df_agencies" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "bf160b81-1bc2-4374-9529-6400252d5e4f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart = (alt.Chart(df_agencies)\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\"agency\", title = \"Agency Name\"),\n", + " y=alt.Y(\"n_trips\", title= \"Number of boardings reported for trips (One person taking two AC Transit trips will count as 2)\"),\n", + " color=alt.Color(\"n_trips\", scale=alt.Scale(range = cp.CALITP_SEQUENTIAL_COLORS)),\n", + " tooltip = tooltip_cols)\n", + " .properties(title = \"Number of Times an Agency was used for Trip taking over Bay Bridge\",\n", + " width=800,\n", + " height=500)\n", + " )\n", + "chart " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2b5b0cb2-465f-4e1f-83f4-7ae31396d012", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a59f170f-b2e0-41b3-ae7a-1ae4eca12596", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "16b45917-adc3-44d0-8d76-15ccd4083d26", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Trips by Resident Type

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Trips by Resident Type

\")) \n" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "5163926e-82bd-4c25-9486-45229d09a0d1", + "metadata": {}, + "outputs": [], + "source": [ + "trip_by_res_type = (r_trips\n", + " >>group_by(_.primary_mode,_.trip_taker_resident_type)\n", + " >>summarize(number_trips = _.activity_id.nunique())\n", + " >>arrange(_.primary_mode))" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "636e7771-0ba7-42e9-af1d-e3e998ff8599", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
primary_modetrip_taker_resident_typenumber_trips
0auto_passengercore367739
1auto_passengervisitor23552
2commercialNaN11897
3on_demand_autocore24372
4on_demand_autovisitor2581
5private_autocore232486
6private_autovisitor6204
7public_transitcore5986
8public_transitvisitor47
\n", + "
" + ], + "text/plain": [ + " primary_mode trip_taker_resident_type number_trips\n", + "0 auto_passenger core 367739\n", + "1 auto_passenger visitor 23552\n", + "2 commercial NaN 11897\n", + "3 on_demand_auto core 24372\n", + "4 on_demand_auto visitor 2581\n", + "5 private_auto core 232486\n", + "6 private_auto visitor 6204\n", + "7 public_transit core 5986\n", + "8 public_transit visitor 47" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trip_by_res_type" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "90c5d683-91c5-4663-accd-4e45027a3e49", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart = (\n", + " alt.Chart(trip_by_res_type)\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\"primary_mode\", title = \"Mode\"),\n", + " y=alt.Y(\"number_trips\", title = \"Number of Trips\"),\n", + " color=alt.Color(\"trip_taker_resident_type\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS,\n", + " domain=trip_by_res_type[\"trip_taker_resident_type\"].unique().tolist())\n", + " ),\n", + " tooltip=trip_by_res_type.columns.tolist())\n", + " .properties(title = \"Trips by Resident Type\",\n", + " width=800,\n", + " height=500)\n", + " )\n", + "chart " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f7b51fbb-8b87-4863-bcd9-50ed5047d7d6", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "878c5c8e-18c3-456a-b641-67ad010f5101", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Unique Household Ids

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Unique Household Ids

\")) " + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "2a31bf27-ccc4-4ce3-8b6a-31dbd14caaf2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_taker_household_idn
24868627897
262322NaN16384
110604819421956364075181528
82299609777131291778859627
74914555246788117181473022
.........
26231299884893668902201881
26231699913423094393305601
26231799924909034298666651
26231899925996610099430061
26231999935884836945191521
\n", + "

262323 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " trip_taker_household_id n\n", + "248686 27897\n", + "262322 NaN 16384\n", + "110604 8194219563640751815 28\n", + "82299 6097771312917788596 27\n", + "74914 5552467881171814730 22\n", + "... ... ...\n", + "262312 9988489366890220188 1\n", + "262316 9991342309439330560 1\n", + "262317 9992490903429866665 1\n", + "262318 9992599661009943006 1\n", + "262319 9993588483694519152 1\n", + "\n", + "[262323 rows x 2 columns]" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r_trips>>count(_.trip_taker_household_id)>>arrange(-_.n)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "eb9780a3-b996-45af-9cc3-b41629ec0e55", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Checking one household id

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Checking one household id

\")) \n" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "6fc3be54-b8dc-4cf8-893e-565ffa02934d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "28\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
origin_building_usetrip_duration_minutestrip_taker_languageorigin_trct_2020trip_taker_household_idtrip_taker_sextrip_taker_home_bgrp_2020origin_st_2020trip_taker_home_st_2020trip_taker_race_ethnicitytransit_agencytransit_routetrip_taker_resident_typevehicle_typetrip_taker_ageactivity_iddestination_bgrp_2020destination_building_usetrip_taker_wfhdestination_st_2020destination_cty_2020trip_taker_work_bgrp_2020destination_trct_2020trip_taker_household_incomeorigin_bgrp_2020trip_taker_commute_modetrip_taker_available_vehiclesprimary_modeprevious_trip_purposetrip_taker_building_typetransit_submodetrip_taker_work_st_2020trip_taker_household_sizeorigin_cty_2020destination_land_usetrip_taker_industrytrip_taker_tenurevehicle_fuel_typetrip_taker_home_trct_2020trip_taker_work_trct_2020trip_taker_educationtrip_start_timetrip_taker_individual_incometrip_taker_employment_statustrip_purposeorigin_land_usetrip_taker_work_cty_2020trip_distance_milestrip_taker_school_grade_attendingtrip_taker_home_cty_2020trip_taker_person_idtrip_end_time
594856single_family11indo_european179.03 (San Francisco, CA)8194219563640751815male1 (Tract 179.03, San Francisco, CA)CaliforniaCaliforniawhite_not_hispanic_or_latinoNaNNaNcoreunknown_vehicle_type27.078783471048378956383 (Tract 615.06, San Francisco, CA)retailremoteCaliforniaSan Francisco1 (Tract 179.03, San Francisco, CA)615.06 (San Francisco, CA)1200990.01 (Tract 179.03, San Francisco, CA)worked_from_homethree_plusprivate_autohomesingle_familyNaNCalifornia10.0San Franciscomixed_usenaics54renterother_non_bev179.03 (San Francisco, CA)179.03 (San Francisco, CA)bachelors_degree11:44:00218735.0employedshopsingle_familySan Francisco4.0not_attending_schoolSan Francisco1440969234057495981111:55:07
\n", + "
" + ], + "text/plain": [ + " origin_building_use trip_duration_minutes trip_taker_language \\\n", + "594856 single_family 11 indo_european \n", + "\n", + " origin_trct_2020 trip_taker_household_id trip_taker_sex \\\n", + "594856 179.03 (San Francisco, CA) 8194219563640751815 male \n", + "\n", + " trip_taker_home_bgrp_2020 origin_st_2020 \\\n", + "594856 1 (Tract 179.03, San Francisco, CA) California \n", + "\n", + " trip_taker_home_st_2020 trip_taker_race_ethnicity transit_agency \\\n", + "594856 California white_not_hispanic_or_latino NaN \n", + "\n", + " transit_route trip_taker_resident_type vehicle_type \\\n", + "594856 NaN core unknown_vehicle_type \n", + "\n", + " trip_taker_age activity_id \\\n", + "594856 27.0 7878347104837895638 \n", + "\n", + " destination_bgrp_2020 destination_building_use \\\n", + "594856 3 (Tract 615.06, San Francisco, CA) retail \n", + "\n", + " trip_taker_wfh destination_st_2020 destination_cty_2020 \\\n", + "594856 remote California San Francisco \n", + "\n", + " trip_taker_work_bgrp_2020 destination_trct_2020 \\\n", + "594856 1 (Tract 179.03, San Francisco, CA) 615.06 (San Francisco, CA) \n", + "\n", + " trip_taker_household_income origin_bgrp_2020 \\\n", + "594856 1200990.0 1 (Tract 179.03, San Francisco, CA) \n", + "\n", + " trip_taker_commute_mode trip_taker_available_vehicles primary_mode \\\n", + "594856 worked_from_home three_plus private_auto \n", + "\n", + " previous_trip_purpose trip_taker_building_type transit_submode \\\n", + "594856 home single_family NaN \n", + "\n", + " trip_taker_work_st_2020 trip_taker_household_size origin_cty_2020 \\\n", + "594856 California 10.0 San Francisco \n", + "\n", + " destination_land_use trip_taker_industry trip_taker_tenure \\\n", + "594856 mixed_use naics54 renter \n", + "\n", + " vehicle_fuel_type trip_taker_home_trct_2020 \\\n", + "594856 other_non_bev 179.03 (San Francisco, CA) \n", + "\n", + " trip_taker_work_trct_2020 trip_taker_education trip_start_time \\\n", + "594856 179.03 (San Francisco, CA) bachelors_degree 11:44:00 \n", + "\n", + " trip_taker_individual_income trip_taker_employment_status \\\n", + "594856 218735.0 employed \n", + "\n", + " trip_purpose origin_land_use trip_taker_work_cty_2020 \\\n", + "594856 shop single_family San Francisco \n", + "\n", + " trip_distance_miles trip_taker_school_grade_attending \\\n", + "594856 4.0 not_attending_school \n", + "\n", + " trip_taker_home_cty_2020 trip_taker_person_id trip_end_time \n", + "594856 San Francisco 14409692340574959811 11:55:07 " + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(len(r_trips>>filter(_.trip_taker_household_id == 8194219563640751815)))\n", + "(r_trips>>filter(_.trip_taker_household_id == 8194219563640751815)).sample()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "4eccfdf8-429e-43df-899e-f7d77e051831", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_taker_person_idtrip_taker_agetrip_taker_sextrip_taker_household_sizen
0132518174655286855422.0female10.06
1231033106690090267930.0female10.06
2474683141297534907034.0female10.02
31006625502873496796228.0male10.04
41105847776148723085432.0male10.02
51289494630026574219339.0male10.02
61440969234057495981127.0male10.06
\n", + "
" + ], + "text/plain": [ + " trip_taker_person_id trip_taker_age trip_taker_sex \\\n", + "0 1325181746552868554 22.0 female \n", + "1 2310331066900902679 30.0 female \n", + "2 4746831412975349070 34.0 female \n", + "3 10066255028734967962 28.0 male \n", + "4 11058477761487230854 32.0 male \n", + "5 12894946300265742193 39.0 male \n", + "6 14409692340574959811 27.0 male \n", + "\n", + " trip_taker_household_size n \n", + "0 10.0 6 \n", + "1 10.0 6 \n", + "2 10.0 2 \n", + "3 10.0 4 \n", + "4 10.0 2 \n", + "5 10.0 2 \n", + "6 10.0 6 " + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "## checking one household id\n", + "r_trips>>filter(_.trip_taker_household_id == 8194219563640751815)>>count(_.trip_taker_person_id, _.trip_taker_age,\n", + " _.trip_taker_sex, _.trip_taker_household_size)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c929c77d-0173-4a7d-be49-762349daf5cb", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab226b47-4366-4fb9-aaae-e0aca760f9b3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "cd041141-2a65-45e8-a6ba-9b41cbffecbd", + "metadata": {}, + "source": [ + "#### Adding in Replica's People Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "dea31a60-e986-41e7-b31b-1e5403c9e041", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_483/1951040339.py:1: DtypeWarning: Columns (25) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " r_ppl = to_snakecase(pd.read_csv(f\"{GCS_PATH}{replica_ppl}\"))\n" + ] + } + ], + "source": [ + "r_ppl = to_snakecase(pd.read_csv(f\"{GCS_PATH}{replica_ppl}\"))" + ] + }, + { + "cell_type": "markdown", + "id": "2cf7ed4b-7524-4e9a-858a-857fcf70950c", + "metadata": {}, + "source": [ + "replica people dataframe sample" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "42bc1220-b404-4aca-8214-43992c55163b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
person_idwork_st_2020household_incomeresident_typelanguageavailable_vehiclesrace_ethnicityeducationwork_bgrp_2020wfhhome_cty_2020home_trct_2020tenureindustrywork_trct_2020school_grade_attendingbuilding_typecommute_modeemployment_statuswork_cty_2020individual_incomehome_st_2020sexhousehold_sizehome_bgrp_2020household_idage
23720416410964223051134474California120704.0coreenglishthree_pluswhite_not_hispanic_or_latinosome_college3 (Tract 3211.01, Contra Costa, CA)employed_not_workingContra Costa3211.01 (Contra Costa, CA)ownernaics8129103211.01 (Contra Costa, CA)not_attending_schoolsingle_familyprivate_autoemployedContra Costa33528.0Californiamale4.04 (Tract 3211.01, Contra Costa, CA)50362323525519732730.0
\n", + "
" + ], + "text/plain": [ + " person_id work_st_2020 household_income resident_type \\\n", + "237204 16410964223051134474 California 120704.0 core \n", + "\n", + " language available_vehicles race_ethnicity \\\n", + "237204 english three_plus white_not_hispanic_or_latino \n", + "\n", + " education work_bgrp_2020 \\\n", + "237204 some_college 3 (Tract 3211.01, Contra Costa, CA) \n", + "\n", + " wfh home_cty_2020 home_trct_2020 tenure \\\n", + "237204 employed_not_working Contra Costa 3211.01 (Contra Costa, CA) owner \n", + "\n", + " industry work_trct_2020 school_grade_attending \\\n", + "237204 naics812910 3211.01 (Contra Costa, CA) not_attending_school \n", + "\n", + " building_type commute_mode employment_status work_cty_2020 \\\n", + "237204 single_family private_auto employed Contra Costa \n", + "\n", + " individual_income home_st_2020 sex household_size \\\n", + "237204 33528.0 California male 4.0 \n", + "\n", + " home_bgrp_2020 household_id age \n", + "237204 4 (Tract 3211.01, Contra Costa, CA) 503623235255197327 30.0 " + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r_ppl.sample()" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "d2a1647f-ed67-4c33-a089-b72375323a7d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Number of Travelers by Resident Type: Replica People Data

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Number of Travelers by Resident Type: Replica People Data

\")) " + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "570daf6d-06b8-46e5-9664-a71c5a093073", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
resident_type_unique_ids
0core278158
1visitor23034
\n", + "
" + ], + "text/plain": [ + " resident_type _unique_ids\n", + "0 core 278158\n", + "1 visitor 23034" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r_ppl>>group_by(_.resident_type)>>summarize(_unique_ids = _.person_id.nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "1483249f-602c-4805-bcc4-d55e018022ad", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Number of Travelers by Resident Type: Replica Trips Data

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Number of Travelers by Resident Type: Replica Trips Data

\")) " + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "5c1894f0-bd42-4014-89d4-6e8d43809319", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_taker_resident_type_unique_ids
0core282789
1visitor26806
2NaN1
\n", + "
" + ], + "text/plain": [ + " trip_taker_resident_type _unique_ids\n", + "0 core 282789\n", + "1 visitor 26806\n", + "2 NaN 1" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r_trips>>group_by(_.trip_taker_resident_type)>>summarize(_unique_ids = _.trip_taker_person_id.nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "2f4b67bf-78ab-4aa8-bbb0-e382a8f78c9a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Traveler Demographics: Replica People Data

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Fitered for Core Residents" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
commute_modesexn_pplavg_h_incomeavg_p_incomeavg_age
0auto_passengerfemale10908175717.98276570255.62605442.234507
1auto_passengermale11605177241.39491682072.29168542.185523
2bikingfemale234248565.14102682592.41880336.085470
3bikingmale527219563.083491120152.85958338.426945
4other_travel_modefemale36176126129.72100321413.68293955.161599
5other_travel_modemale26290116775.54910633563.07881354.328262
6private_autofemale45312186499.65940676492.00534142.626302
7private_automale59762190643.201265100712.38691842.872093
8public_transitfemale22008193540.92134784074.07211041.921937
9public_transitmale26480207838.001699125445.94248542.259290
10walkingfemale1332160144.72597663737.95645638.041291
11walkingmale1494177570.705489100413.45515437.495315
12worked_from_homefemale17409226232.99965597016.43885341.219829
13worked_from_homemale18621253921.717631137849.97368641.092208
\n", + "
" + ], + "text/plain": [ + " commute_mode sex n_ppl avg_h_income avg_p_income avg_age\n", + "0 auto_passenger female 10908 175717.982765 70255.626054 42.234507\n", + "1 auto_passenger male 11605 177241.394916 82072.291685 42.185523\n", + "2 biking female 234 248565.141026 82592.418803 36.085470\n", + "3 biking male 527 219563.083491 120152.859583 38.426945\n", + "4 other_travel_mode female 36176 126129.721003 21413.682939 55.161599\n", + "5 other_travel_mode male 26290 116775.549106 33563.078813 54.328262\n", + "6 private_auto female 45312 186499.659406 76492.005341 42.626302\n", + "7 private_auto male 59762 190643.201265 100712.386918 42.872093\n", + "8 public_transit female 22008 193540.921347 84074.072110 41.921937\n", + "9 public_transit male 26480 207838.001699 125445.942485 42.259290\n", + "10 walking female 1332 160144.725976 63737.956456 38.041291\n", + "11 walking male 1494 177570.705489 100413.455154 37.495315\n", + "12 worked_from_home female 17409 226232.999655 97016.438853 41.219829\n", + "13 worked_from_home male 18621 253921.717631 137849.973686 41.092208" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "display(HTML(\"

Traveler Demographics: Replica People Data

\")) \n", + "display(HTML(\"Fitered for Core Residents\")) \n", + "(r_ppl\n", + " >>filter(_.home_bgrp_2020!=\"Visitor (no home location)\")\n", + " >>group_by(_.commute_mode, _.sex)\n", + " >>summarize(\n", + " n_ppl = _.person_id.nunique(),\n", + " avg_h_income = _.household_income.mean(),\n", + " avg_p_income = _.individual_income.mean(),\n", + " avg_age = _.age.mean())\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0891b62-c968-4ea0-bd9b-753d299a5054", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "2e4bd058-9c4d-4989-a5fc-db6cf6130bf2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Traveler Demographics: Replica Trips Data

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Fitered for Core Residents" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
primary_modetrip_taker_sexn_pplavg_h_incomeavg_p_incomeavg_age
0auto_passengerfemale99350173666.66222865104.39849045.064633
1auto_passengermale105359187590.98544297296.50937943.991868
2commercialNaN1NaNNaNNaN
3on_demand_autofemale9051186226.83318979990.39510242.900399
4on_demand_automale10159202467.780725111915.39296842.436605
5private_autofemale68268174867.93725162299.06475245.316785
6private_automale75223182301.95222890322.37346644.265320
7public_transitfemale2256186720.49964382496.55492239.104494
8public_transitmale2535205334.441232114041.91483338.312382
\n", + "
" + ], + "text/plain": [ + " primary_mode trip_taker_sex n_ppl avg_h_income avg_p_income \\\n", + "0 auto_passenger female 99350 173666.662228 65104.398490 \n", + "1 auto_passenger male 105359 187590.985442 97296.509379 \n", + "2 commercial NaN 1 NaN NaN \n", + "3 on_demand_auto female 9051 186226.833189 79990.395102 \n", + "4 on_demand_auto male 10159 202467.780725 111915.392968 \n", + "5 private_auto female 68268 174867.937251 62299.064752 \n", + "6 private_auto male 75223 182301.952228 90322.373466 \n", + "7 public_transit female 2256 186720.499643 82496.554922 \n", + "8 public_transit male 2535 205334.441232 114041.914833 \n", + "\n", + " avg_age \n", + "0 45.064633 \n", + "1 43.991868 \n", + "2 NaN \n", + "3 42.900399 \n", + "4 42.436605 \n", + "5 45.316785 \n", + "6 44.265320 \n", + "7 39.104494 \n", + "8 38.312382 " + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "display(HTML(\"

Traveler Demographics: Replica Trips Data

\")) \n", + "display(HTML(\"Fitered for Core Residents\")) \n", + "(r_trips\n", + " >>filter(_.trip_taker_home_bgrp_2020!=\"Visitor (no home location)\")\n", + " >>group_by(_.primary_mode, _.trip_taker_sex)\n", + " >>summarize(\n", + " n_ppl = _.trip_taker_person_id.nunique(),\n", + " avg_h_income = _.trip_taker_household_income.mean(),\n", + " avg_p_income = _.trip_taker_individual_income.mean(),\n", + " avg_age = _.trip_taker_age.mean())\n", + ")\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "7b637cdd-9560-4d31-bfeb-bffa6cbb55d2", + "metadata": {}, + "outputs": [], + "source": [ + "replica_people_demographics = (r_ppl\n", + " >>group_by(_.commute_mode, _.sex)\n", + " >>summarize(\n", + " n_ppl = _.person_id.nunique(),\n", + " avg_h_income = _.household_income.mean(),\n", + " avg_p_income = _.individual_income.mean(),\n", + " avg_age = _.age.mean())\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "bcaa0855-faf6-4eb0-aa8e-48cb3348baeb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
commute_modesexn_pplavg_h_incomeavg_p_incomeavg_age
0auto_passengerfemale10908175717.98276570255.62605442.234507
1auto_passengermale11605177241.39491682072.29168542.185523
2bikingfemale234248565.14102682592.41880336.085470
3bikingmale527219563.083491120152.85958338.426945
4other_travel_modefemale36176126129.72100321413.68293955.161599
5other_travel_modemale26290116775.54910633563.07881354.328262
6private_autofemale45312186499.65940676492.00534142.626302
7private_automale59762190643.201265100712.38691842.872093
8public_transitfemale22008193540.92134784074.07211041.921937
9public_transitmale26480207838.001699125445.94248542.259290
10walkingfemale1332160144.72597663737.95645638.041291
11walkingmale1494177570.705489100413.45515437.495315
12worked_from_homefemale17409226232.99965597016.43885341.219829
13worked_from_homemale18621253921.717631137849.97368641.092208
14NaNNaN23034NaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " commute_mode sex n_ppl avg_h_income avg_p_income avg_age\n", + "0 auto_passenger female 10908 175717.982765 70255.626054 42.234507\n", + "1 auto_passenger male 11605 177241.394916 82072.291685 42.185523\n", + "2 biking female 234 248565.141026 82592.418803 36.085470\n", + "3 biking male 527 219563.083491 120152.859583 38.426945\n", + "4 other_travel_mode female 36176 126129.721003 21413.682939 55.161599\n", + "5 other_travel_mode male 26290 116775.549106 33563.078813 54.328262\n", + "6 private_auto female 45312 186499.659406 76492.005341 42.626302\n", + "7 private_auto male 59762 190643.201265 100712.386918 42.872093\n", + "8 public_transit female 22008 193540.921347 84074.072110 41.921937\n", + "9 public_transit male 26480 207838.001699 125445.942485 42.259290\n", + "10 walking female 1332 160144.725976 63737.956456 38.041291\n", + "11 walking male 1494 177570.705489 100413.455154 37.495315\n", + "12 worked_from_home female 17409 226232.999655 97016.438853 41.219829\n", + "13 worked_from_home male 18621 253921.717631 137849.973686 41.092208\n", + "14 NaN NaN 23034 NaN NaN NaN" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "replica_people_demographics" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "7f7c1066-c820-43d3-a68e-0dcdfc1dcdce", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart = (\n", + " alt.Chart(replica_people_demographics)\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\"commute_mode\", title = \"Mode\"),\n", + " y=alt.Y(\"n_ppl\", title = \"Number of People\"),\n", + " color=alt.Color(\"commute_mode\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS)),\n", + " tooltip=replica_people_demographics.columns.tolist())\n", + " .properties(title = \"Trips by Resident Type\",\n", + " width=800,\n", + " height=500)\n", + " )\n", + "chart " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1dba8b5-7356-441f-8e6c-0f98e0f80c4c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "97f56b20-f299-43f5-bad4-2a42362962e2", + "metadata": { + "tags": [] + }, + "source": [ + "### Read in Streetlight Data: 2022" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "6f3cf136-7531-4b62-b050-169b7bd1c4ec", + "metadata": {}, + "outputs": [], + "source": [ + "streetlight = \"streetlight_bay_bridge_corridor_study_corridor_study.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "47943ff0-2949-409b-a9ad-ca5efea3b52d", + "metadata": {}, + "outputs": [], + "source": [ + "sl_data = to_snakecase(pd.read_csv(f\"{GCS_PATH}{streetlight}\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "28fc95f8-5898-486f-81f3-f0ac7885431c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Streetlight Data Sample

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Streetlight Data Sample

\")) \n" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "3398c3fa-856d-49a0-8d2d-7994f3f50df1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
data_periodsmode_of_travelzone_idzone_nameroad_classificationline_zone_length__miles_zone_is_pass_throughzone_direction__degrees_zone_cardinal_directionzone_is_bi_directionday_typeday_partaverage_daily_segment_traffic__stl_volume_avg_segment_speed__mph_avg_segment_travel_time__sec_free_flow_speed__mph_vehicle_miles_of_travel__stl_volume_travel_time_indexcongested_segment_85th_speed_percentile
1411Jan 01, 2022 - Dec 31, 2022All Vehicles CVD Plus - StL All Vehicles Volume1130705657San Francisco – Oakland Bay Bridge / 52527662 / 1Motorway0.104yes40EASTno2: Weekend Day (Sa-Su)01: 12am (12am-1am)302961765.986314.441.08False70
\n", + "
" + ], + "text/plain": [ + " data_periods \\\n", + "1411 Jan 01, 2022 - Dec 31, 2022 \n", + "\n", + " mode_of_travel zone_id \\\n", + "1411 All Vehicles CVD Plus - StL All Vehicles Volume 1130705657 \n", + "\n", + " zone_name road_classification \\\n", + "1411 San Francisco – Oakland Bay Bridge / 52527662 / 1 Motorway \n", + "\n", + " line_zone_length__miles_ zone_is_pass_through zone_direction__degrees_ \\\n", + "1411 0.104 yes 40 \n", + "\n", + " zone_cardinal_direction zone_is_bi_direction day_type \\\n", + "1411 EAST no 2: Weekend Day (Sa-Su) \n", + "\n", + " day_part average_daily_segment_traffic__stl_volume_ \\\n", + "1411 01: 12am (12am-1am) 3029 \n", + "\n", + " avg_segment_speed__mph_ avg_segment_travel_time__sec_ \\\n", + "1411 61 7 \n", + "\n", + " free_flow_speed__mph_ vehicle_miles_of_travel__stl_volume_ \\\n", + "1411 65.986 314.44 \n", + "\n", + " travel_time_index congested_segment _85th_speed_percentile \n", + "1411 1.08 False 70 " + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sl_data.sample()" + ] + }, + { + "cell_type": "markdown", + "id": "3a3d1c17-1d5b-4ddf-aac3-78b8156ceca4", + "metadata": {}, + "source": [ + "#### Data Explorations: Streetlight" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "da10c3eb-31bd-437e-b546-82472d66f964", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Length of the Streetlight Data" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "1710" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "display(HTML(\"Length of the Streetlight Data\")) \n", + "\n", + "len(sl_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "5e403ce9-33bc-4453-9323-df46fad1f38a", + "metadata": {}, + "outputs": [], + "source": [ + "# sl_data>>count(_.zone_id, _.zone_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "cd1ce590-2019-43f0-ba09-786520479ad8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
day_partn
000: All Day (12am-12am)3
101: 12am (12am-1am)3
202: Early AM (12am-6am)3
303: 1am (1am-2am)3
404: 2am (2am-3am)3
505: 3am (3am-4am)3
606: 4am (4am-5am)3
707: 5am (5am-6am)3
808: 6am (6am-7am)3
909: Peak AM (6am-10am)3
1010: 7am (7am-8am)3
1111: 8am (8am-9am)3
1212: 9am (9am-10am)3
1313: 10am (10am-11am)3
1414: Mid-Day (10am-4pm)3
1515: 11am (11am-12noon)3
1616: 12pm (12noon-1pm)3
1717: 1pm (1pm-2pm)3
1818: 2pm (2pm-3pm)3
1919: 3pm (3pm-4pm)3
2020: 4pm (4pm-5pm)3
2121: Peak PM (4pm-8pm)3
2222: 5pm (5pm-6pm)3
2323: 6pm (6pm-7pm)3
2424: 7pm (7pm-8pm)3
2525: 8pm (8pm-9pm)3
2626: Late PM (8pm-12am)3
2727: 9pm (9pm-10pm)3
2828: 10pm (10pm-11pm)3
2929: 11pm (11pm-12am)3
\n", + "
" + ], + "text/plain": [ + " day_part n\n", + "0 00: All Day (12am-12am) 3\n", + "1 01: 12am (12am-1am) 3\n", + "2 02: Early AM (12am-6am) 3\n", + "3 03: 1am (1am-2am) 3\n", + "4 04: 2am (2am-3am) 3\n", + "5 05: 3am (3am-4am) 3\n", + "6 06: 4am (4am-5am) 3\n", + "7 07: 5am (5am-6am) 3\n", + "8 08: 6am (6am-7am) 3\n", + "9 09: Peak AM (6am-10am) 3\n", + "10 10: 7am (7am-8am) 3\n", + "11 11: 8am (8am-9am) 3\n", + "12 12: 9am (9am-10am) 3\n", + "13 13: 10am (10am-11am) 3\n", + "14 14: Mid-Day (10am-4pm) 3\n", + "15 15: 11am (11am-12noon) 3\n", + "16 16: 12pm (12noon-1pm) 3\n", + "17 17: 1pm (1pm-2pm) 3\n", + "18 18: 2pm (2pm-3pm) 3\n", + "19 19: 3pm (3pm-4pm) 3\n", + "20 20: 4pm (4pm-5pm) 3\n", + "21 21: Peak PM (4pm-8pm) 3\n", + "22 22: 5pm (5pm-6pm) 3\n", + "23 23: 6pm (6pm-7pm) 3\n", + "24 24: 7pm (7pm-8pm) 3\n", + "25 25: 8pm (8pm-9pm) 3\n", + "26 26: Late PM (8pm-12am) 3\n", + "27 27: 9pm (9pm-10pm) 3\n", + "28 28: 10pm (10pm-11pm) 3\n", + "29 29: 11pm (11pm-12am) 3" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sl_data>>filter(_.zone_id==1133975975)>>count(_.day_part)" + ] + }, + { + "cell_type": "markdown", + "id": "5d362f46-cfe6-4d13-b3ea-67f491a66897", + "metadata": {}, + "source": [ + "* The data here shows that each segment is broken out into hour time slots and then also aggregated to peak time periods. moving forward we will look at the `All Day` for `day_part` and `All Days` for `day_type`" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "57c890a6-c667-4917-a090-b85f1aadcad7", + "metadata": {}, + "outputs": [], + "source": [ + "sl_data_single_day = sl_data>>filter(_.day_part == '00: All Day (12am-12am)', _.day_type == '0: All Days (M-Su)')" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "0a059c82-c7c8-43b9-92a6-178b42101828", + "metadata": {}, + "outputs": [], + "source": [ + "# len(sl_data_single_day)" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "id": "67213733-9905-42bd-912e-cf943cd66b40", + "metadata": {}, + "outputs": [], + "source": [ + "# sl_data_single_day>>count(_.zone_id, _.zone_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "2e497c77-91ef-490f-a288-abe4e2c740dc", + "metadata": {}, + "outputs": [], + "source": [ + "# sl_data_single_day>>filter(_.zone_id==1133975975)" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "id": "28292f62-b2ab-46cb-ac08-b728facdfdc0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
data_periodsmode_of_travelzone_idzone_nameroad_classificationline_zone_length__miles_zone_is_pass_throughzone_direction__degrees_zone_cardinal_directionzone_is_bi_directionday_typeday_partaverage_daily_segment_traffic__stl_volume_avg_segment_speed__mph_avg_segment_travel_time__sec_free_flow_speed__mph_vehicle_miles_of_travel__stl_volume_travel_time_indexcongested_segment_85th_speed_percentile
1440Jan 01, 2022 - Dec 31, 2022All Vehicles CVD Plus - StL All Vehicles Volume1132861884I 80 / 236348365 / 1Motorway0.006yes85EASTno0: All Days (M-Su)00: All Day (12am-12am)12858163270.532725.21.12False72
360Jan 01, 2022 - Dec 31, 2022All Vehicles CVD Plus - StL All Vehicles Volume1036056766San Francisco – Oakland Bay Bridge / 52721870 / 1Motorway0.073yes37EASTno0: All Days (M-Su)00: All Day (12am-12am)15849351567.26711532.21.32True66
270Jan 01, 2022 - Dec 31, 2022All Vehicles CVD Plus - StL All Vehicles Volume1032819756San Francisco – Oakland Bay Bridge / 11415208 / 1Motorway0.109yes220WESTno0: All Days (M-Su)00: All Day (12am-12am)132245491068.59614450.01.41True67
\n", + "
" + ], + "text/plain": [ + " data_periods \\\n", + "1440 Jan 01, 2022 - Dec 31, 2022 \n", + "360 Jan 01, 2022 - Dec 31, 2022 \n", + "270 Jan 01, 2022 - Dec 31, 2022 \n", + "\n", + " mode_of_travel zone_id \\\n", + "1440 All Vehicles CVD Plus - StL All Vehicles Volume 1132861884 \n", + "360 All Vehicles CVD Plus - StL All Vehicles Volume 1036056766 \n", + "270 All Vehicles CVD Plus - StL All Vehicles Volume 1032819756 \n", + "\n", + " zone_name road_classification \\\n", + "1440 I 80 / 236348365 / 1 Motorway \n", + "360 San Francisco – Oakland Bay Bridge / 52721870 / 1 Motorway \n", + "270 San Francisco – Oakland Bay Bridge / 11415208 / 1 Motorway \n", + "\n", + " line_zone_length__miles_ zone_is_pass_through zone_direction__degrees_ \\\n", + "1440 0.006 yes 85 \n", + "360 0.073 yes 37 \n", + "270 0.109 yes 220 \n", + "\n", + " zone_cardinal_direction zone_is_bi_direction day_type \\\n", + "1440 EAST no 0: All Days (M-Su) \n", + "360 EAST no 0: All Days (M-Su) \n", + "270 WEST no 0: All Days (M-Su) \n", + "\n", + " day_part average_daily_segment_traffic__stl_volume_ \\\n", + "1440 00: All Day (12am-12am) 128581 \n", + "360 00: All Day (12am-12am) 158493 \n", + "270 00: All Day (12am-12am) 132245 \n", + "\n", + " avg_segment_speed__mph_ avg_segment_travel_time__sec_ \\\n", + "1440 63 2 \n", + "360 51 5 \n", + "270 49 10 \n", + "\n", + " free_flow_speed__mph_ vehicle_miles_of_travel__stl_volume_ \\\n", + "1440 70.532 725.2 \n", + "360 67.267 11532.2 \n", + "270 68.596 14450.0 \n", + "\n", + " travel_time_index congested_segment _85th_speed_percentile \n", + "1440 1.12 False 72 \n", + "360 1.32 True 66 \n", + "270 1.41 True 67 " + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sl_data_single_day.sample(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "id": "14211b7a-38f9-4b55-b669-67fdae44aa80", + "metadata": {}, + "outputs": [], + "source": [ + "# sl_data_single_day>>group_by(_.zone_cardinal_direction, _.day_type, _.day_part)>>summarize(avg_speed= _.avg_segment_speed__mph_.mean(),\n", + "# avg_travel_time = _.avg_segment_travel_time__sec_.mean(),\n", + "# avg_volume = _.average_daily_segment_traffic__stl_volume_.mean(),\n", + "# sum_volume = _.average_daily_segment_traffic__stl_volume_.sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "id": "778d58f0-25b7-4cf7-b60a-10e773055f48", + "metadata": {}, + "outputs": [], + "source": [ + "sl_peaks = (sl_data>>filter(\n", + " _.day_part != \"01: 12am (12am-1am)\",\n", + " _.day_part != \"03: 1am (1am-2am)\",\n", + " _.day_part != \"04: 2am (2am-3am)\",\n", + " _.day_part != \"05: 3am (3am-4am)\",\n", + " _.day_part != \"06: 4am (4am-5am)\",\n", + " _.day_part != \"07: 5am (5am-6am)\",\n", + " _.day_part != \"08: 6am (6am-7am)\",\n", + " _.day_part != \"10: 7am (7am-8am)\",\n", + " _.day_part != \"11: 8am (8am-9am)\",\n", + " _.day_part != \"12: 9am (9am-10am)\",\n", + " _.day_part != \"13: 10am (10am-11am)\",\n", + " _.day_part != \"15: 11am (11am-12noon)\",\n", + " _.day_part != \"16: 12pm (12noon-1pm)\",\n", + " _.day_part != \"17: 1pm (1pm-2pm)\",\n", + " _.day_part != \"18: 2pm (2pm-3pm)\",\n", + " _.day_part != \"19: 3pm (3pm-4pm)\",\n", + " _.day_part != \"20: 4pm (4pm-5pm)\",\n", + " _.day_part != \"22: 5pm (5pm-6pm)\",\n", + " _.day_part != \"23: 6pm (6pm-7pm)\",\n", + " _.day_part != \"24: 7pm (7pm-8pm)\",\n", + " _.day_part != \"25: 8pm (8pm-9pm)\",\n", + " _.day_part != \"27: 9pm (9pm-10pm)\",\n", + " _.day_part != \"28: 10pm (10pm-11pm)\",\n", + " _.day_part !=\"29: 11pm (11pm-12am)\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f2fa572-8bb8-4b9f-a299-48df2b838c31", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 88, + "id": "29eab191-4eaa-445c-b444-49048135ef3b", + "metadata": {}, + "outputs": [], + "source": [ + "sl_hourly = (sl_data>>filter(_.day_part != \"00: All Day (12am-12am)\",\n", + " _.day_part != \"02: Early AM (12am-6am)\",\n", + " _.day_part != \"09: Peak AM (6am-10am)\",\n", + " _.day_part != \"14: Mid-Day (10am-4pm)\",\n", + " _.day_part != \"21: Peak PM (4pm-8pm)\", _.day_part !=\"26: Late PM (8pm-12am)\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4cd2a638-cce1-4c94-8444-4aa4844bf7f0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 89, + "id": "896b63a9-b881-418a-ba40-dbea63f81614", + "metadata": {}, + "outputs": [], + "source": [ + "sl_peaks_agg = sl_peaks>>group_by(_.zone_cardinal_direction,\n", + " _.day_type, _.day_part)>>summarize(avg_speed= _.avg_segment_speed__mph_.mean(),\n", + " avg_travel_time = _.avg_segment_travel_time__sec_.mean(),\n", + " avg_volume = _.average_daily_segment_traffic__stl_volume_.mean(),\n", + " sum_volume = _.average_daily_segment_traffic__stl_volume_.sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "id": "cac29684-36ac-488b-b2d1-044ab5ef86c0", + "metadata": {}, + "outputs": [], + "source": [ + "# sl_peaks_agg.sample()" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "id": "eedc968c-8306-449a-a2e3-774719d8e256", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 91, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart = (\n", + " alt.Chart(sl_peaks_agg)\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\"day_part\"),\n", + " y=alt.Y(\"avg_speed\"),\n", + " color=alt.Color(\"zone_cardinal_direction\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS,\n", + " domain=sl_peaks_agg[\"zone_cardinal_direction\"].unique().tolist())\n", + " ),\n", + " tooltip=sl_peaks_agg.columns.tolist(),\n", + " )\n", + " .properties(title = \"Average Speed by Direction by Day Part\",\n", + " width=800,\n", + " height=300)\n", + " )\n", + "chart " + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "id": "637afffc-560f-4485-985a-d6ee73dd65a7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart = (\n", + " alt.Chart(sl_peaks_agg)\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\"day_part\"),\n", + " y=alt.Y(\"sum_volume\"),\n", + " color=alt.Color(\"zone_cardinal_direction\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS,\n", + " domain=sl_peaks_agg[\"zone_cardinal_direction\"].unique().tolist())\n", + " ),\n", + " tooltip=sl_peaks_agg.columns.tolist(),\n", + " )\n", + " .properties(title = \"Average Volume by Direction by Day Part\",\n", + " width=800,\n", + " height=300)\n", + " )\n", + "chart " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6dade438-083b-446d-b1f7-d1542a3e60e6", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d954475-9f22-418b-bb31-0ab9799a459e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 93, + "id": "b2eaa07a-2d05-4ed5-a405-a474500a643e", + "metadata": {}, + "outputs": [], + "source": [ + "sl_hourly_agg = sl_hourly>>filter(_.day_type==\"0: All Days (M-Su)\")>>group_by(_.zone_cardinal_direction,\n", + " _.day_type, _.day_part)>>summarize(avg_speed= _.avg_segment_speed__mph_.mean(),\n", + " avg_travel_time = _.avg_segment_travel_time__sec_.mean(),\n", + " avg_volume = _.average_daily_segment_traffic__stl_volume_.mean(),\n", + " sum_volume = _.average_daily_segment_traffic__stl_volume_.sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "id": "1ab3835d-5f8e-4721-9c29-7bd107bad334", + "metadata": {}, + "outputs": [], + "source": [ + "# sl_hourly_agg.sample()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10efa809-ec77-4127-aa62-a838dd005c7d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 95, + "id": "8cc145e6-b3d9-4b31-9a0d-270431ebf5fb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart = (\n", + " alt.Chart(sl_hourly_agg)\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\"day_part\"),\n", + " y=alt.Y(\"avg_speed\"),\n", + " color=alt.Color(\"zone_cardinal_direction\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS,\n", + " domain=sl_hourly_agg[\"zone_cardinal_direction\"].unique().tolist())\n", + " ),\n", + " tooltip=sl_hourly_agg.columns.tolist(),\n", + " )\n", + " .properties(title = \"Average Speed by Hour\", \n", + " width=800,\n", + " height=300)\n", + " )\n", + "chart " + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "id": "b54b867c-d073-4280-bfd3-2fa5bd387016", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 96, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart = (\n", + " alt.Chart(sl_hourly_agg)\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\"day_part\"),\n", + " y=alt.Y(\"sum_volume\"),\n", + " color=alt.Color(\"zone_cardinal_direction\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS,\n", + " domain=sl_hourly_agg[\"zone_cardinal_direction\"].unique().tolist())\n", + " ),\n", + " tooltip=sl_hourly_agg.columns.tolist(),\n", + " )\n", + " .properties(title = \"Total Volume by Hour\",\n", + " width=800,\n", + " height=300)\n", + " )\n", + "chart " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd1987e0-957e-4b30-a940-86f3c9c0fd39", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76157cad-aeda-4ba4-9f98-77c677e6b6c6", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bfe9e847-b78f-4e81-b735-708df21764a0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "006dfcc5-bf3f-4a33-a654-601fbb232b71", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecded8d2-70ae-4a9a-8592-cc5e9b9cd8df", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d007800-e20a-4991-b99d-f6cf74521b05", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bbc819b1-0dfc-46be-90bd-b136263dc22c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/sb125_analyses/corridor_study/data_downloads_skirball.ipynb b/sb125_analyses/corridor_study/data_downloads_skirball.ipynb new file mode 100644 index 000000000..268ec1842 --- /dev/null +++ b/sb125_analyses/corridor_study/data_downloads_skirball.ipynb @@ -0,0 +1,4459 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "82a53f1d-2622-4cb0-bfdd-36ceec652215", + "metadata": {}, + "source": [ + "# SB1 Big Data Downloads: I-405 Skirball\n", + "An analysis into the corridor analysis data downloads from Streetlight and Replica" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "45233485-2055-499a-a89e-fc154fd56e63", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_516/2202862553.py:12: DeprecationWarning: Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display\n", + " from IPython.core.display import display\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "from siuba import *\n", + "import ast\n", + "\n", + "\n", + "from calitp_data_analysis.sql import to_snakecase\n", + "\n", + "import altair as alt\n", + "from calitp_data_analysis import calitp_color_palette as cp\n", + "\n", + "from IPython.display import Markdown, HTML, display_html, display\n", + "from IPython.core.display import display\n", + "\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "9e5db6b9-2add-42e7-b820-1b075dc3bbcc", + "metadata": {}, + "outputs": [], + "source": [ + "from dla_utils import _dla_utils" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c0ea35c1-246d-4356-ad1c-83fe9cc437ff", + "metadata": {}, + "outputs": [], + "source": [ + "import _utils" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "651aae92-5188-4676-8e5f-3c040f77077c", + "metadata": {}, + "outputs": [], + "source": [ + "pd.set_option('display.max_columns', 500)\n", + "pd.set_option('display.max_colwidth', 100)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "af402da0-0b03-4c71-a1af-19f97f67cef1", + "metadata": {}, + "outputs": [], + "source": [ + "GCS_PATH = \"gs://calitp-analytics-data/data-analyses/sb125/corridor_study_data/\"" + ] + }, + { + "cell_type": "markdown", + "id": "71c1deda-8e83-45d8-a0f5-20b36b7051c0", + "metadata": {}, + "source": [ + "### Read in Replica Data Spring 2023" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "0e5c1b44-c981-4a9a-b6ea-e1cb2a7a279e", + "metadata": {}, + "outputs": [], + "source": [ + "replica_trips = \"replica-405_skirball-05_24_24-trips_dataset.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9f14685-0953-4b1d-a636-882230fef1d5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "cb3dea26-4e6d-4dc7-acc8-27b899a0a537", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_516/2643890524.py:1: DtypeWarning: Columns (18,19,20,25,26,28,29,30,31,33,36,38,39,40,41,42,43) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " r_trips = to_snakecase(pd.read_csv(f\"{GCS_PATH}{replica_trips}\"))\n" + ] + } + ], + "source": [ + "r_trips = to_snakecase(pd.read_csv(f\"{GCS_PATH}{replica_trips}\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ca697494-b872-4de8-afd9-c538a455364c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Sample of data

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Sample of data

\"))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "4713b1a5-ccac-4e1b-8552-850285cad53e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
activity_idorigin_bgrp_2020origin_trct_2020origin_cty_2020origin_st_2020destination_bgrp_2020destination_trct_2020destination_cty_2020destination_st_2020primary_modetrip_purposeprevious_trip_purposetrip_start_timetrip_end_timetrip_duration_minutestrip_distance_milesvehicle_typevehicle_fuel_typetransit_submodetransit_agencytransit_routeorigin_land_useorigin_building_usedestination_land_usedestination_building_usetrip_taker_person_idtrip_taker_household_idtrip_taker_agetrip_taker_sextrip_taker_race_ethnicitytrip_taker_employment_statustrip_taker_wfhtrip_taker_individual_incometrip_taker_commute_modetrip_taker_household_sizetrip_taker_household_incometrip_taker_available_vehiclestrip_taker_resident_typetrip_taker_industrytrip_taker_building_typetrip_taker_school_grade_attendingtrip_taker_educationtrip_taker_tenuretrip_taker_languagetrip_taker_home_bgrp_2020trip_taker_home_trct_2020trip_taker_home_cty_2020trip_taker_home_st_2020trip_taker_work_bgrp_2020trip_taker_work_trct_2020trip_taker_work_cty_2020trip_taker_work_st_2020
25701393028965716263972642 (Tract 2169.02, Los Angeles, CA)2169.02 (Los Angeles, CA)Los Angeles County, CACalifornia1 (Tract 1281.02, Los Angeles, CA)1281.02 (Los Angeles, CA)Los Angeles County, CACaliforniaprivate_autohomesocial12:17:0012:50:003318.90unknown_vehicle_typeother_non_bevNaNNaNNaNsingle_familysingle_familymulti_familymulti_family7579849294298285797960720388126959707633.00malewhite_not_hispanic_or_latinoemployedin_person59,218.00private_auto3.00184,761.00twocorenaics23several_unitsnot_attending_schoolsome_collegerenterenglish1 (Tract 1281.02, Los Angeles, CA)1281.02 (Los Angeles, CA)Los Angeles County, CACalifornia1 (Tract 2171.01, Los Angeles, CA)2171.01 (Los Angeles, CA)Los Angeles County, CACalifornia
366384100447911236551818572 (Tract 5033.01, Los Angeles, CA)5033.01 (Los Angeles, CA)Los Angeles County, CACalifornia2 (Tract 2622, Los Angeles, CA)2622 (Los Angeles, CA)Los Angeles County, CACaliforniaauto_passengerhomesocial12:55:1613:57:556235.70unknown_vehicle_typeunknown_fuel_typeNaNNaNNaNsingle_familysingle_familysingle_familysingle_family13487676720272194221444466938720191336927.00femaleblack_not_hispanic_or_latinoemployedin_person94,986.00private_auto2.0094,986.00three_pluscorenaics61single_familynot_attending_schooladvanced_degreeownerenglish2 (Tract 2622, Los Angeles, CA)2622 (Los Angeles, CA)Los Angeles County, CACalifornia2 (Tract 1397.01, Los Angeles, CA)1397.01 (Los Angeles, CA)Los Angeles County, CACalifornia
40408780250587477891553621 (Tract 2623.03, Los Angeles, CA)2623.03 (Los Angeles, CA)Los Angeles County, CACalifornia2 (Tract 1041.03, Los Angeles, CA)1041.03 (Los Angeles, CA)Los Angeles County, CACaliforniaprivate_autohomesocial13:39:1614:19:594021.00unknown_vehicle_typeother_non_bevNaNNaNNaNsingle_familysingle_familysingle_familysingle_family15874717675890836160513982063538191552052.00femalehispanic_or_latino_originnot_in_labor_forceunemployed_under_16_not_in_labor_force0.00other_travel_mode13.00115,058.00three_pluscorenot_workingsingle_familynot_attending_schoolk_12ownerspanish2 (Tract 1041.03, Los Angeles, CA)1041.03 (Los Angeles, CA)Los Angeles County, CACaliforniaDoes not have work/school locationDoes not have work/school locationDoes not have work/school locationDoes not have work/school location
21386672814583455230966031 (Tract 1375.01, Los Angeles, CA)1375.01 (Los Angeles, CA)Los Angeles County, CACalifornia3 (Tract 7014.02, Los Angeles, CA)7014.02 (Los Angeles, CA)Los Angeles County, CACaliforniaauto_passengerworkhome06:32:0007:14:104221.40unknown_vehicle_typeunknown_fuel_typeNaNNaNNaNsingle_familysingle_familymixed_usehealthcare6116471627154071969775774233016839213434.00malewhite_not_hispanic_or_latinoemployedin_person129,138.00private_auto2.00129,138.00twocorenaics622110single_familynot_attending_schooladvanced_degreerenterindo_european1 (Tract 1375.01, Los Angeles, CA)1375.01 (Los Angeles, CA)Los Angeles County, CACalifornia3 (Tract 7014.02, Los Angeles, CA)7014.02 (Los Angeles, CA)Los Angeles County, CACalifornia
403783167821612298218887063 (Tract 7028.01, Los Angeles, CA)7028.01 (Los Angeles, CA)Los Angeles County, CACalifornia2 (Tract 1066.04, Los Angeles, CA)1066.04 (Los Angeles, CA)Los Angeles County, CACaliforniaauto_passengerhomeshop16:58:0018:02:016423.40unknown_vehicle_typeunknown_fuel_typeNaNNaNNaNofficeofficesingle_familysingle_family14388114560565293720478844609635940937188.00malehispanic_or_latino_originnot_in_labor_forceunemployed_under_16_not_in_labor_force26,495.00other_travel_mode1.0026,495.00zerocorenot_workingsingle_familynot_attending_schoolk_12renterspanish2 (Tract 1066.04, Los Angeles, CA)1066.04 (Los Angeles, CA)Los Angeles County, CACaliforniaDoes not have work/school locationDoes not have work/school locationDoes not have work/school locationDoes not have work/school location
\n", + "
" + ], + "text/plain": [ + " activity_id origin_bgrp_2020 \\\n", + "257013 9302896571626397264 2 (Tract 2169.02, Los Angeles, CA) \n", + "366384 10044791123655181857 2 (Tract 5033.01, Los Angeles, CA) \n", + "404087 8025058747789155362 1 (Tract 2623.03, Los Angeles, CA) \n", + "213866 7281458345523096603 1 (Tract 1375.01, Los Angeles, CA) \n", + "403783 16782161229821888706 3 (Tract 7028.01, Los Angeles, CA) \n", + "\n", + " origin_trct_2020 origin_cty_2020 origin_st_2020 \\\n", + "257013 2169.02 (Los Angeles, CA) Los Angeles County, CA California \n", + "366384 5033.01 (Los Angeles, CA) Los Angeles County, CA California \n", + "404087 2623.03 (Los Angeles, CA) Los Angeles County, CA California \n", + "213866 1375.01 (Los Angeles, CA) Los Angeles County, CA California \n", + "403783 7028.01 (Los Angeles, CA) Los Angeles County, CA California \n", + "\n", + " destination_bgrp_2020 destination_trct_2020 \\\n", + "257013 1 (Tract 1281.02, Los Angeles, CA) 1281.02 (Los Angeles, CA) \n", + "366384 2 (Tract 2622, Los Angeles, CA) 2622 (Los Angeles, CA) \n", + "404087 2 (Tract 1041.03, Los Angeles, CA) 1041.03 (Los Angeles, CA) \n", + "213866 3 (Tract 7014.02, Los Angeles, CA) 7014.02 (Los Angeles, CA) \n", + "403783 2 (Tract 1066.04, Los Angeles, CA) 1066.04 (Los Angeles, CA) \n", + "\n", + " destination_cty_2020 destination_st_2020 primary_mode \\\n", + "257013 Los Angeles County, CA California private_auto \n", + "366384 Los Angeles County, CA California auto_passenger \n", + "404087 Los Angeles County, CA California private_auto \n", + "213866 Los Angeles County, CA California auto_passenger \n", + "403783 Los Angeles County, CA California auto_passenger \n", + "\n", + " trip_purpose previous_trip_purpose trip_start_time trip_end_time \\\n", + "257013 home social 12:17:00 12:50:00 \n", + "366384 home social 12:55:16 13:57:55 \n", + "404087 home social 13:39:16 14:19:59 \n", + "213866 work home 06:32:00 07:14:10 \n", + "403783 home shop 16:58:00 18:02:01 \n", + "\n", + " trip_duration_minutes trip_distance_miles vehicle_type \\\n", + "257013 33 18.90 unknown_vehicle_type \n", + "366384 62 35.70 unknown_vehicle_type \n", + "404087 40 21.00 unknown_vehicle_type \n", + "213866 42 21.40 unknown_vehicle_type \n", + "403783 64 23.40 unknown_vehicle_type \n", + "\n", + " vehicle_fuel_type transit_submode transit_agency transit_route \\\n", + "257013 other_non_bev NaN NaN NaN \n", + "366384 unknown_fuel_type NaN NaN NaN \n", + "404087 other_non_bev NaN NaN NaN \n", + "213866 unknown_fuel_type NaN NaN NaN \n", + "403783 unknown_fuel_type NaN NaN NaN \n", + "\n", + " origin_land_use origin_building_use destination_land_use \\\n", + "257013 single_family single_family multi_family \n", + "366384 single_family single_family single_family \n", + "404087 single_family single_family single_family \n", + "213866 single_family single_family mixed_use \n", + "403783 office office single_family \n", + "\n", + " destination_building_use trip_taker_person_id trip_taker_household_id \\\n", + "257013 multi_family 7579849294298285797 9607203881269597076 \n", + "366384 single_family 1348767672027219422 14444669387201913369 \n", + "404087 single_family 15874717675890836160 5139820635381915520 \n", + "213866 healthcare 6116471627154071969 7757742330168392134 \n", + "403783 single_family 14388114560565293720 4788446096359409371 \n", + "\n", + " trip_taker_age trip_taker_sex trip_taker_race_ethnicity \\\n", + "257013 33.00 male white_not_hispanic_or_latino \n", + "366384 27.00 female black_not_hispanic_or_latino \n", + "404087 52.00 female hispanic_or_latino_origin \n", + "213866 34.00 male white_not_hispanic_or_latino \n", + "403783 88.00 male hispanic_or_latino_origin \n", + "\n", + " trip_taker_employment_status trip_taker_wfh \\\n", + "257013 employed in_person \n", + "366384 employed in_person \n", + "404087 not_in_labor_force unemployed_under_16_not_in_labor_force \n", + "213866 employed in_person \n", + "403783 not_in_labor_force unemployed_under_16_not_in_labor_force \n", + "\n", + " trip_taker_individual_income trip_taker_commute_mode \\\n", + "257013 59,218.00 private_auto \n", + "366384 94,986.00 private_auto \n", + "404087 0.00 other_travel_mode \n", + "213866 129,138.00 private_auto \n", + "403783 26,495.00 other_travel_mode \n", + "\n", + " trip_taker_household_size trip_taker_household_income \\\n", + "257013 3.00 184,761.00 \n", + "366384 2.00 94,986.00 \n", + "404087 13.00 115,058.00 \n", + "213866 2.00 129,138.00 \n", + "403783 1.00 26,495.00 \n", + "\n", + " trip_taker_available_vehicles trip_taker_resident_type \\\n", + "257013 two core \n", + "366384 three_plus core \n", + "404087 three_plus core \n", + "213866 two core \n", + "403783 zero core \n", + "\n", + " trip_taker_industry trip_taker_building_type \\\n", + "257013 naics23 several_units \n", + "366384 naics61 single_family \n", + "404087 not_working single_family \n", + "213866 naics622110 single_family \n", + "403783 not_working single_family \n", + "\n", + " trip_taker_school_grade_attending trip_taker_education \\\n", + "257013 not_attending_school some_college \n", + "366384 not_attending_school advanced_degree \n", + "404087 not_attending_school k_12 \n", + "213866 not_attending_school advanced_degree \n", + "403783 not_attending_school k_12 \n", + "\n", + " trip_taker_tenure trip_taker_language \\\n", + "257013 renter english \n", + "366384 owner english \n", + "404087 owner spanish \n", + "213866 renter indo_european \n", + "403783 renter spanish \n", + "\n", + " trip_taker_home_bgrp_2020 trip_taker_home_trct_2020 \\\n", + "257013 1 (Tract 1281.02, Los Angeles, CA) 1281.02 (Los Angeles, CA) \n", + "366384 2 (Tract 2622, Los Angeles, CA) 2622 (Los Angeles, CA) \n", + "404087 2 (Tract 1041.03, Los Angeles, CA) 1041.03 (Los Angeles, CA) \n", + "213866 1 (Tract 1375.01, Los Angeles, CA) 1375.01 (Los Angeles, CA) \n", + "403783 2 (Tract 1066.04, Los Angeles, CA) 1066.04 (Los Angeles, CA) \n", + "\n", + " trip_taker_home_cty_2020 trip_taker_home_st_2020 \\\n", + "257013 Los Angeles County, CA California \n", + "366384 Los Angeles County, CA California \n", + "404087 Los Angeles County, CA California \n", + "213866 Los Angeles County, CA California \n", + "403783 Los Angeles County, CA California \n", + "\n", + " trip_taker_work_bgrp_2020 \\\n", + "257013 1 (Tract 2171.01, Los Angeles, CA) \n", + "366384 2 (Tract 1397.01, Los Angeles, CA) \n", + "404087 Does not have work/school location \n", + "213866 3 (Tract 7014.02, Los Angeles, CA) \n", + "403783 Does not have work/school location \n", + "\n", + " trip_taker_work_trct_2020 \\\n", + "257013 2171.01 (Los Angeles, CA) \n", + "366384 1397.01 (Los Angeles, CA) \n", + "404087 Does not have work/school location \n", + "213866 7014.02 (Los Angeles, CA) \n", + "403783 Does not have work/school location \n", + "\n", + " trip_taker_work_cty_2020 trip_taker_work_st_2020 \n", + "257013 Los Angeles County, CA California \n", + "366384 Los Angeles County, CA California \n", + "404087 Does not have work/school location Does not have work/school location \n", + "213866 Los Angeles County, CA California \n", + "403783 Does not have work/school location Does not have work/school location " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r_trips.sample(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2199852-9b6e-46ca-86fd-abe71148d13f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "75945ef2-eab7-469a-baad-ee563d70c309", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Columns in Replica Trips Data

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Columns in Replica Trips Data

\"))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "1f3e4cbb-d211-40b1-b4bb-2c7cb43b33a9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['activity_id', 'origin_bgrp_2020', 'origin_trct_2020',\n", + " 'origin_cty_2020', 'origin_st_2020', 'destination_bgrp_2020',\n", + " 'destination_trct_2020', 'destination_cty_2020', 'destination_st_2020',\n", + " 'primary_mode', 'trip_purpose', 'previous_trip_purpose',\n", + " 'trip_start_time', 'trip_end_time', 'trip_duration_minutes',\n", + " 'trip_distance_miles', 'vehicle_type', 'vehicle_fuel_type',\n", + " 'transit_submode', 'transit_agency', 'transit_route', 'origin_land_use',\n", + " 'origin_building_use', 'destination_land_use',\n", + " 'destination_building_use', 'trip_taker_person_id',\n", + " 'trip_taker_household_id', 'trip_taker_age', 'trip_taker_sex',\n", + " 'trip_taker_race_ethnicity', 'trip_taker_employment_status',\n", + " 'trip_taker_wfh', 'trip_taker_individual_income',\n", + " 'trip_taker_commute_mode', 'trip_taker_household_size',\n", + " 'trip_taker_household_income', 'trip_taker_available_vehicles',\n", + " 'trip_taker_resident_type', 'trip_taker_industry',\n", + " 'trip_taker_building_type', 'trip_taker_school_grade_attending',\n", + " 'trip_taker_education', 'trip_taker_tenure', 'trip_taker_language',\n", + " 'trip_taker_home_bgrp_2020', 'trip_taker_home_trct_2020',\n", + " 'trip_taker_home_cty_2020', 'trip_taker_home_st_2020',\n", + " 'trip_taker_work_bgrp_2020', 'trip_taker_work_trct_2020',\n", + " 'trip_taker_work_cty_2020', 'trip_taker_work_st_2020'],\n", + " dtype='object')" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r_trips.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e60fb5c7-a04b-48f3-8282-ca98a54dadd2", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "92f9a5f4-08f1-43fc-ae01-c6bd404891d6", + "metadata": { + "tags": [] + }, + "source": [ + "#### Data Exploration: Replica Trips\n", + "* Summarizing the data that we exported from Replica\n", + "* Existing visualizations in replica: \n", + " * Primary Mode\n", + " * Trip Purpose\n", + " * Starting hour \n", + " * Trip Duration/Distance\n", + " * Origin Destination\n", + " * Vehicle Fuel type\n", + " * Transit Routes/Stops/Sub mode/Agency\n", + " * Household Income\n", + " * Race and Ethnicity\n", + " * Private Auto Availability\n", + " * Age\n", + " * Employment/School Status\n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "c191620c-cef4-446a-bf6b-e17222f47ab6", + "metadata": {}, + "outputs": [], + "source": [ + "# def basic_bar_chart(df, x_col, y_col, color_col):\n", + "\n", + "# chart = (alt.Chart(df)\n", + "# .mark_bar()\n", + "# .encode(\n", + "# x=alt.X(x_col, title=labeling(x_col)),\n", + "# y=alt.Y(y_col, title=labeling(y_col)),\n", + "# color = (alt.Color(color_col,\n", + "# scale=alt.Scale(\n", + "# range=cp.CALITP_CATEGORY_BRIGHT_COLORS),\n", + "# legend=alt.Legend(title=(labeling(color_col)), symbolLimit=10)\n", + "# )),\n", + "# tooltip=[alt.Tooltip(x_col, title=labeling(x_col)),\n", + "# alt.Tooltip(y_col, title=labeling(y_col))]\n", + "# )\n", + "# )\n", + "\n", + "# chart=styleguide.preset_chart_config(chart)\n", + "# chart = add_tooltip(chart, labeling(x_col), labeling(y_col))\n", + "# return chart" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "d7f3b44a-cd77-42e9-b0d8-7b9d228b37d5", + "metadata": {}, + "outputs": [], + "source": [ + "trips = r_trips>>group_by(_.primary_mode)>>summarize(avg_trip_time = _.trip_duration_minutes.mean(),\n", + " number_trips = _.activity_id.nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "3c77c803-9ecb-4cb4-9d1d-d7e2fa5d4074", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 5 entries, 0 to 4\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 primary_mode 5 non-null object \n", + " 1 avg_trip_time 5 non-null float64\n", + " 2 number_trips 5 non-null int64 \n", + "dtypes: float64(1), int64(1), object(1)\n", + "memory usage: 248.0+ bytes\n" + ] + } + ], + "source": [ + "trips.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "05893215-e993-44dc-9c4b-1aa7d94815cd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart = (\n", + " alt.Chart(trips)\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\"primary_mode\"),\n", + " y=alt.Y(\"avg_trip_time\"),\n", + " color=alt.Color(\"avg_trip_time\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS)\n", + " ), tooltip=trips.columns.tolist())\n", + " .properties(title = \"Average Trip Time\",\n", + " width=800,\n", + " height=300)\n", + " )\n", + "chart " + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "65681a29-7f1b-42ed-9e0e-371a84a29cbd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart = (\n", + " alt.Chart(trips)\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\"primary_mode\"),\n", + " y=alt.Y(\"number_trips\"),\n", + " color=alt.Color(\"number_trips\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS,)\n", + " ), tooltip=trips.columns.tolist())\n", + " .properties(title=\"Number of Trips\",\n", + " width=800,\n", + " height=300)\n", + " )\n", + "chart " + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "0d60fe54-dc88-4570-8610-125b37b5917d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Length of Trips dataframe

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Length of Trips dataframe

\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "4f98abae-ae6b-4b4e-9427-b606a557582f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "466756" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(r_trips)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "55c6ff06-afed-45dc-b764-93188c2b9958", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Number of unique activity ids in data

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Number of unique activity ids in data

\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "27c00872-5880-4464-84a5-e91423ce7895", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
number_trips
0466756
\n", + "
" + ], + "text/plain": [ + " number_trips\n", + "0 466756" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r_trips>>summarize(number_trips = _.activity_id.nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "a20cbce8-e354-4aa3-99db-666a13576b5e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Number of unique trip taker ids

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Number of unique trip taker ids

\")) " + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "2def0aaf-9182-4393-8049-2dfb95749585", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
number_trips
0281798
\n", + "
" + ], + "text/plain": [ + " number_trips\n", + "0 281798" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r_trips>>summarize(number_trips = _.trip_taker_person_id.nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "47fecd22-8276-48b8-9c74-d3a94906bbae", + "metadata": {}, + "outputs": [], + "source": [ + "race_ethnicity_by_mode = r_trips>>group_by(_.primary_mode)>>count(_.trip_taker_race_ethnicity)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "4cbeb867-916b-48aa-9949-d769fcd5af5a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Trip Taker Race and Ethnicity by Mode

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Trip Taker Race and Ethnicity by Mode

\")) " + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "5e895499-2142-432d-8402-c035f81326b6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart = (\n", + " alt.Chart(race_ethnicity_by_mode)\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\"primary_mode\"),\n", + " y=alt.Y(\"n\"),\n", + " color=alt.Color(\"trip_taker_race_ethnicity\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS,\n", + " domain=race_ethnicity_by_mode[\"trip_taker_race_ethnicity\"].unique().tolist())\n", + " ),\n", + " tooltip=race_ethnicity_by_mode.columns.tolist())\n", + " \n", + " .properties(\n", + " width=800,\n", + " height=300)\n", + " )\n", + "chart " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da3bbe59-9b06-471f-beb1-4762b8939600", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "500ad806-51ed-47e9-88a5-fc8a41edd7aa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Transit Mode Splits

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Transit Mode Splits

\")) \n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "d8cd6354-0433-4134-ad6e-9df92122cbd2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Looking at transit mode, need to alter the transit_agency col and transit_submode to get a better count." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"Looking at transit mode, need to alter the transit_agency col and transit_submode to get a better count.\")) \n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "261d7f48-f7c3-4ea7-a526-6f813ca16ede", + "metadata": {}, + "outputs": [], + "source": [ + "# ptt_agency_count = (r_trips\n", + "# >>filter(_.primary_mode==\"public_transit\")\n", + "# >>group_by(_.primary_mode, _.transit_submode, _.transit_agency)\n", + "# >>summarize(n =_.activity_id.nunique())\n", + "# >>arrange(-_.n))" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "e4516beb-8277-4fd4-aed1-23af65fc4a28", + "metadata": {}, + "outputs": [], + "source": [ + "# ptt_agency_count" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "727511b8-3141-40e4-b654-30aaae8200d9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Most common transit mode combinations

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Most common transit mode combinations

\")) \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3c31371-899f-4474-85b6-86e78e89209b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "657ff2f3-0d0d-419d-b3d5-512212897fb1", + "metadata": {}, + "outputs": [], + "source": [ + "# ptt_modes.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "80daf7a1-b947-469c-b3b0-09d0302714b7", + "metadata": {}, + "outputs": [], + "source": [ + "##### unnesting the transit submode and agencies to get counts. " + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "edd81120-9756-4edb-b713-1586a9fd5021", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
activity_idorigin_bgrp_2020origin_trct_2020origin_cty_2020origin_st_2020destination_bgrp_2020destination_trct_2020destination_cty_2020destination_st_2020primary_modetrip_purposeprevious_trip_purposetrip_start_timetrip_end_timetrip_duration_minutestrip_distance_milesvehicle_typevehicle_fuel_typetransit_submodetransit_agencytransit_routeorigin_land_useorigin_building_usedestination_land_usedestination_building_usetrip_taker_person_idtrip_taker_household_idtrip_taker_agetrip_taker_sextrip_taker_race_ethnicitytrip_taker_employment_statustrip_taker_wfhtrip_taker_individual_incometrip_taker_commute_modetrip_taker_household_sizetrip_taker_household_incometrip_taker_available_vehiclestrip_taker_resident_typetrip_taker_industrytrip_taker_building_typetrip_taker_school_grade_attendingtrip_taker_educationtrip_taker_tenuretrip_taker_languagetrip_taker_home_bgrp_2020trip_taker_home_trct_2020trip_taker_home_cty_2020trip_taker_home_st_2020trip_taker_work_bgrp_2020trip_taker_work_trct_2020trip_taker_work_cty_2020trip_taker_work_st_2020
6189877069523497630268752 (Tract 1111, Los Angeles, CA)1111 (Los Angeles, CA)Los Angeles County, CACalifornia1 (Tract 2060.51, Los Angeles, CA)2060.51 (Los Angeles, CA)Los Angeles County, CACaliforniapublic_transitworkhome05:35:0007:53:1413829.80unknown_vehicle_typeunknown_fuel_typebus, busLADOTMVN, Metro - Los AngelesCE573, Metro Rapid Linesingle_familysingle_familyretailretail159828405783510681331624396725950587986548.00malehispanic_or_latino_originemployedin_person46,903.00private_auto5.0088,596.00zerocorenaics56single_familynot_attending_schoolhigh_schoolrenterspanish2 (Tract 1111, Los Angeles, CA)1111 (Los Angeles, CA)Los Angeles County, CACalifornia1 (Tract 2060.51, Los Angeles, CA)2060.51 (Los Angeles, CA)Los Angeles County, CACalifornia
\n", + "
" + ], + "text/plain": [ + " activity_id origin_bgrp_2020 \\\n", + "61898 7706952349763026875 2 (Tract 1111, Los Angeles, CA) \n", + "\n", + " origin_trct_2020 origin_cty_2020 origin_st_2020 \\\n", + "61898 1111 (Los Angeles, CA) Los Angeles County, CA California \n", + "\n", + " destination_bgrp_2020 destination_trct_2020 \\\n", + "61898 1 (Tract 2060.51, Los Angeles, CA) 2060.51 (Los Angeles, CA) \n", + "\n", + " destination_cty_2020 destination_st_2020 primary_mode \\\n", + "61898 Los Angeles County, CA California public_transit \n", + "\n", + " trip_purpose previous_trip_purpose trip_start_time trip_end_time \\\n", + "61898 work home 05:35:00 07:53:14 \n", + "\n", + " trip_duration_minutes trip_distance_miles vehicle_type \\\n", + "61898 138 29.80 unknown_vehicle_type \n", + "\n", + " vehicle_fuel_type transit_submode transit_agency \\\n", + "61898 unknown_fuel_type bus, bus LADOTMVN, Metro - Los Angeles \n", + "\n", + " transit_route origin_land_use origin_building_use \\\n", + "61898 CE573, Metro Rapid Line single_family single_family \n", + "\n", + " destination_land_use destination_building_use trip_taker_person_id \\\n", + "61898 retail retail 15982840578351068133 \n", + "\n", + " trip_taker_household_id trip_taker_age trip_taker_sex \\\n", + "61898 16243967259505879865 48.00 male \n", + "\n", + " trip_taker_race_ethnicity trip_taker_employment_status trip_taker_wfh \\\n", + "61898 hispanic_or_latino_origin employed in_person \n", + "\n", + " trip_taker_individual_income trip_taker_commute_mode \\\n", + "61898 46,903.00 private_auto \n", + "\n", + " trip_taker_household_size trip_taker_household_income \\\n", + "61898 5.00 88,596.00 \n", + "\n", + " trip_taker_available_vehicles trip_taker_resident_type \\\n", + "61898 zero core \n", + "\n", + " trip_taker_industry trip_taker_building_type \\\n", + "61898 naics56 single_family \n", + "\n", + " trip_taker_school_grade_attending trip_taker_education \\\n", + "61898 not_attending_school high_school \n", + "\n", + " trip_taker_tenure trip_taker_language trip_taker_home_bgrp_2020 \\\n", + "61898 renter spanish 2 (Tract 1111, Los Angeles, CA) \n", + "\n", + " trip_taker_home_trct_2020 trip_taker_home_cty_2020 \\\n", + "61898 1111 (Los Angeles, CA) Los Angeles County, CA \n", + "\n", + " trip_taker_home_st_2020 trip_taker_work_bgrp_2020 \\\n", + "61898 California 1 (Tract 2060.51, Los Angeles, CA) \n", + "\n", + " trip_taker_work_trct_2020 trip_taker_work_cty_2020 \\\n", + "61898 2060.51 (Los Angeles, CA) Los Angeles County, CA \n", + "\n", + " trip_taker_work_st_2020 \n", + "61898 California " + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(r_trips>>filter(_.primary_mode==\"public_transit\")).sample()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2025391f-537e-432a-a2f4-d2d6ce57af04", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1af1d8ca-813c-44bc-ab93-c35bb11f0ea5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "eb27ab6b-6244-406f-b41a-55503a77521a", + "metadata": {}, + "outputs": [], + "source": [ + "agencies_test, mode_test = _utils.get_tranist_agency_counts(r_trips, \"primary_mode\", \"transit_submode\", \"transit_agency\", \"activity_id\")" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "92ed5077-3524-4c39-8cf4-bf1b7922da2a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
primary_modetransit_submodetransit_agencynagency_countn_modes_taken
15public_transitbus, busMetro - Los Angeles, Metro - Los Angeles50112
57public_transitbus, bus, busMetro - Los Angeles, Metro - Los Angeles, Metro - Los Angeles37513
27public_transitbus, bus, busBig Blue Bus, Metro - Los Angeles, Metro - Los Angeles19223
5public_transitbus, busBig Blue Bus, Metro - Los Angeles17122
1public_transitbusMetro - Los Angeles14511
.....................
118public_transitrail, bus, bus, busAmtrak, Metro - Los Angeles, Metro - Los Angeles, Metro - Los Angeles124
119public_transitrail, bus, bus, railAmtrak, Big Blue Bus, Metro - Los Angeles, Amtrak134
120public_transitrail, bus, light_rail, busAmtrak, Metro - Los Angeles, Metro - Los Angeles, Metro - Los Angeles124
121public_transitsubway, busMetro - Los Angeles, Metro - Los Angeles112
122public_transitsubway, bus, busMetro - Los Angeles, Metro - Los Angeles, Metro - Los Angeles113
\n", + "

123 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " primary_mode transit_submode \\\n", + "15 public_transit bus, bus \n", + "57 public_transit bus, bus, bus \n", + "27 public_transit bus, bus, bus \n", + "5 public_transit bus, bus \n", + "1 public_transit bus \n", + ".. ... ... \n", + "118 public_transit rail, bus, bus, bus \n", + "119 public_transit rail, bus, bus, rail \n", + "120 public_transit rail, bus, light_rail, bus \n", + "121 public_transit subway, bus \n", + "122 public_transit subway, bus, bus \n", + "\n", + " transit_agency \\\n", + "15 Metro - Los Angeles, Metro - Los Angeles \n", + "57 Metro - Los Angeles, Metro - Los Angeles, Metro - Los Angeles \n", + "27 Big Blue Bus, Metro - Los Angeles, Metro - Los Angeles \n", + "5 Big Blue Bus, Metro - Los Angeles \n", + "1 Metro - Los Angeles \n", + ".. ... \n", + "118 Amtrak, Metro - Los Angeles, Metro - Los Angeles, Metro - Los Angeles \n", + "119 Amtrak, Big Blue Bus, Metro - Los Angeles, Amtrak \n", + "120 Amtrak, Metro - Los Angeles, Metro - Los Angeles, Metro - Los Angeles \n", + "121 Metro - Los Angeles, Metro - Los Angeles \n", + "122 Metro - Los Angeles, Metro - Los Angeles, Metro - Los Angeles \n", + "\n", + " n agency_count n_modes_taken \n", + "15 501 1 2 \n", + "57 375 1 3 \n", + "27 192 2 3 \n", + "5 171 2 2 \n", + "1 145 1 1 \n", + ".. ... ... ... \n", + "118 1 2 4 \n", + "119 1 3 4 \n", + "120 1 2 4 \n", + "121 1 1 2 \n", + "122 1 1 3 \n", + "\n", + "[123 rows x 6 columns]" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agencies_test" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "0cd5c4f6-116a-41a9-b67c-d4146994ee36", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
transit_submodenn_modes_taken
2bus, bus, bus8553
1bus, bus8422
0bus1631
8bus, light_rail, bus1243
3bus, bus, bus, bus854
12light_rail, bus562
13light_rail, bus, bus243
9bus, light_rail, bus, bus184
15light_rail, light_rail, bus123
6bus, bus, subway73
10bus, light_rail, light_rail, bus74
16rail, bus, bus53
4bus, bus, light_rail23
14light_rail, bus, bus, bus24
17rail, bus, bus, bus24
5bus, bus, light_rail, bus14
7bus, light_rail12
11bus, rail12
18rail, bus, bus, rail14
19rail, bus, light_rail, bus14
20subway, bus12
21subway, bus, bus13
\n", + "
" + ], + "text/plain": [ + " transit_submode n n_modes_taken\n", + "2 bus, bus, bus 855 3\n", + "1 bus, bus 842 2\n", + "0 bus 163 1\n", + "8 bus, light_rail, bus 124 3\n", + "3 bus, bus, bus, bus 85 4\n", + "12 light_rail, bus 56 2\n", + "13 light_rail, bus, bus 24 3\n", + "9 bus, light_rail, bus, bus 18 4\n", + "15 light_rail, light_rail, bus 12 3\n", + "6 bus, bus, subway 7 3\n", + "10 bus, light_rail, light_rail, bus 7 4\n", + "16 rail, bus, bus 5 3\n", + "4 bus, bus, light_rail 2 3\n", + "14 light_rail, bus, bus, bus 2 4\n", + "17 rail, bus, bus, bus 2 4\n", + "5 bus, bus, light_rail, bus 1 4\n", + "7 bus, light_rail 1 2\n", + "11 bus, rail 1 2\n", + "18 rail, bus, bus, rail 1 4\n", + "19 rail, bus, light_rail, bus 1 4\n", + "20 subway, bus 1 2\n", + "21 subway, bus, bus 1 3" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mode_test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d93bd0e-f30d-4a53-9077-25818eef0cf7", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "cb12d509-d786-458c-894a-193c80666bb6", + "metadata": {}, + "outputs": [], + "source": [ + "## This line of code gets all agencies listed, even if agency is named twice. \n", + "## Need code that counts the unique agencies\n", + "#ptt_agency_count['agency_count'] = ptt_agency_count.transit_agency.apply(lambda x: len(x.split(\", \")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b628473-3842-44df-bbe0-9e17a1e250f7", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "589d2a23-b528-4de9-b6a0-a10f88a6da5c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "505f47b1-229a-4cd1-90b8-b83b4ccdd6c6", + "metadata": {}, + "outputs": [], + "source": [ + "modes_count = agencies_test>>group_by(_.n_modes_taken)>>summarize(n_trips = _.n.sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "97746912-39f0-4c85-80d3-a00ed47922a8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
n_modes_takenn_trips
01163
12901
231030
34117
\n", + "
" + ], + "text/plain": [ + " n_modes_taken n_trips\n", + "0 1 163\n", + "1 2 901\n", + "2 3 1030\n", + "3 4 117" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "modes_count" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "5353a2e4-762d-4a27-8f32-267c2c55bb90", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart = (\n", + " alt.Chart((modes_count))\n", + " .mark_bar(size=60)\n", + " .encode(\n", + " x=alt.X(\"n_modes_taken\", title =\"Number of Modes Taken per Trip\"),\n", + " y=alt.Y(\"n_trips\", title = \"Number of Trips\"),\n", + " color=alt.Color(\"n_trips\", title = \"Number of Trips\",\n", + " scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS)),\n", + " tooltip=modes_count.columns.tolist())\n", + " \n", + " .properties(title = \"How Many Modes are Taken Per Trip\",\n", + " width=800,\n", + " height=300)\n", + " )\n", + "chart " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71d43883-c5fe-4dff-9084-41426c399cec", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68cafc15-d68a-49ea-9e03-48bbcb6513d5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "fb8565b9-989a-459a-bede-433de30252e7", + "metadata": {}, + "outputs": [], + "source": [ + "agency_mode_trips = agencies_test>>group_by(_.agency_count, _.n_modes_taken)>>summarize(ntrips=_.n.sum())>>arrange(-_.ntrips)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "b03dbe0a-a759-4fa9-849b-b31b318f86cd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart = (\n", + " alt.Chart((agency_mode_trips))\n", + " .mark_circle(size=100)\n", + " .encode(\n", + " x=alt.X(\"agency_count\", title =\"Number of Agencies\"),\n", + " y=alt.Y(\"n_modes_taken\", title = \"Number of Modes Taken\"),\n", + " color=alt.Color(\"ntrips\", title = \"Number of Trips\",\n", + " scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS)),\n", + " tooltip=agency_mode_trips.columns.tolist())\n", + " \n", + " .properties(title = \"How Many Modes are Taken Per Trip\",\n", + " width=800,\n", + " height=300)\n", + " )\n", + "chart " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "91273d26-1fd1-42a1-b829-a77398c606fc", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "6bd200d7-c0b9-4649-adf1-e025d216328b", + "metadata": {}, + "outputs": [], + "source": [ + "##### Getting columns for each agency and counts " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ebb28116-cc54-43ce-ac77-9dc9e798d58a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c39646f-7503-4c96-a594-5bbe5960d9f0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "e8f5f457-ce9f-4bfb-99f6-82e6eb7d11f0", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], + "source": [ + "# ptt_agency_count['transit_agency'].unique().tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c1af5bd-c08e-472e-9b71-aac3c29ceb71", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "3c399936-bc62-491c-b71d-89a861e02fc7", + "metadata": {}, + "outputs": [], + "source": [ + "agency_list = _utils.get_list_of_agencies(agencies_test, \"transit_agency\")" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "416d16f0-3128-4857-9136-69f0403ce5cc", + "metadata": {}, + "outputs": [], + "source": [ + "# len(agency_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "f303acae-da1b-44a5-98b2-2f8f5b98f56a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Agencies Identified in Trips Data

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "{'Amtrak',\n", + " 'Avta',\n", + " 'Beach Cities Transit-City Of Redondo Beach',\n", + " 'Big Blue Bus',\n", + " 'Culver Citybus',\n", + " 'Foothill Transit',\n", + " 'Gtrans',\n", + " 'La Go Bus',\n", + " 'Ladot126',\n", + " 'Ladotdt',\n", + " 'Ladotmvn',\n", + " 'Ladotmvs',\n", + " 'Long Beach Transit',\n", + " 'Metro - Los Angeles',\n", + " 'Metrolink Trains',\n", + " 'Santa Clarita Transit'}" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "display(HTML(\"

Agencies Identified in Trips Data

\")) \n", + "\n", + "(agency_list)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b121b63-083b-4898-9eb1-cb919698d9ca", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34eada82-1f1c-4411-8dd4-3bb71b0ddb16", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "71af6e25-52b0-45e9-ba2b-a00a670e3010", + "metadata": {}, + "outputs": [], + "source": [ + "### making a copy of the ptt_agency_count\n", + "# df = r_trips.copy()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5bdc0792-219c-4877-880c-cd7d84db2303", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2de92f7b-2e28-4388-880f-9e34b8b603ea", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "89479d3a-eeb5-491a-a064-49a77194109e", + "metadata": {}, + "source": [ + "##### Trying ChatGPT approach" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "bd6f46a5-404f-41eb-9fd8-4600451e6d20", + "metadata": {}, + "outputs": [], + "source": [ + "# agency_indv_count = (df>>filter(_.primary_mode==\"public_transit\")>>select(_.transit_agency))\n", + "# agency_indv_count['transit_agency'] = agency_indv_count['transit_agency'].astype(str)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "33c93185-2e76-4257-840e-c7634f3cae05", + "metadata": {}, + "outputs": [], + "source": [ + "# agency_indv_count" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a50d438-a9c1-4ade-8a38-0e17227dac09", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "045be8fb-d8fd-4019-8282-df53602b122b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "efc4eb07-c0f9-4bc2-87bb-cac95f606657", + "metadata": {}, + "outputs": [], + "source": [ + "df = _utils.get_dummies_by_agency(agencies_test, \"transit_agency\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ca4b0345-f196-40cc-8b26-f42abb58f199", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "97bc34c4-636f-46ca-a257-95e4af242584", + "metadata": {}, + "outputs": [], + "source": [ + "##### Identifying trips with one agency" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "46856b31-47ac-408c-903e-19d8b04283f7", + "metadata": {}, + "outputs": [], + "source": [ + "cols_to_keep = [\"transit_submode\",\"unique_agencies\", \"n\",\"n_modes_taken\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "2603e7e0-389a-49fb-93e9-5dc1d52c6012", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# chart = (alt.Chart(df)\n", + "# .mark_circle(size=100)\n", + "# .encode(\n", + "# x=alt.X(\"n_modes_taken\", title=\"Number of Modes taken\"),\n", + "# y=alt.Y(\"n\", title=\"Number of Trips\"),\n", + "# color = alt.Color(\"agency_count\", title=\"Number of Unique Agencies\",\n", + "# scale=alt.Scale(\n", + "# range=cp.CALITP_DIVERGING_COLORS,\n", + "# domain=df[\"agency_count\"].unique().tolist())\n", + "# ),\n", + "# tooltip=cols_to_keep)\n", + "# .properties(title = (\"Transit Trips Agency Breakdown\"), width=500,\n", + "# height=300)\n", + "# )\n", + "# chart " + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "149ed63c-436c-45fe-a526-5b1bf9f3d6fb", + "metadata": {}, + "outputs": [], + "source": [ + "# (df>>filter(_.agency_count==1)>>arrange(-_.n))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d400fa0e-2bb1-4d8a-9a63-6865afe33897", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "7228c540-3033-4023-8145-91f8aee23eeb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart = (alt.Chart((df>>filter(_.agency_count==1)))\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\"unique_agencies\", title=\"Agency\"),\n", + " y=alt.Y(\"n\", title=\"Number of Modes taken\"),\n", + " color = alt.Color(\"n_modes_taken\", title=\"Number of Trips\",\n", + " scale=alt.Scale(\n", + " range=cp.CALITP_SEQUENTIAL_COLORS,)),\n", + " tooltip=cols_to_keep)\n", + " .properties(title = (\"Transit Trips With Only One Agency\"), width=800,\n", + " height=500)\n", + " )\n", + "chart " + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "60df338f-801f-49a1-a26f-0be5e03bddf3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Most Common Agency Combination

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
transit_agency
15Metro - Los Angeles, Metro - Los Angeles
57Metro - Los Angeles, Metro - Los Angeles, Metro - Los Angeles
27Big Blue Bus, Metro - Los Angeles, Metro - Los Angeles
5Big Blue Bus, Metro - Los Angeles
1Metro - Los Angeles
\n", + "
" + ], + "text/plain": [ + " transit_agency\n", + "15 Metro - Los Angeles, Metro - Los Angeles\n", + "57 Metro - Los Angeles, Metro - Los Angeles, Metro - Los Angeles\n", + "27 Big Blue Bus, Metro - Los Angeles, Metro - Los Angeles\n", + "5 Big Blue Bus, Metro - Los Angeles\n", + "1 Metro - Los Angeles" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "display(HTML(\"

Most Common Agency Combination

\")) \n", + "\n", + "(df>>arrange(-_.n)>>select(_.transit_agency)).head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "786394ef-be26-4e0d-94e7-35386d6d67c0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd43ab95-0391-407b-b6b0-185034ba9528", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "febd910a-2f1d-4b99-bb3b-76adb11f0b63", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "96b794f2-bcfd-4afb-9ce5-8793c15fe199", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/jovyan/data-analyses/sb125_analyses/corridor_study/_utils.py:102: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction.\n", + " df_agencies['n_trips'] = df_agencies[list(df_agencies.columns)].sum(axis=1)\n" + ] + } + ], + "source": [ + "df_agencies = _utils.get_agencies_occurances(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "80cbffde-6686-4921-b543-84e52f97a0c5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agencyn_trips
0Foothill Transit1
1LADOTMVS4
2LADOTMVN54
3AVTA4
4Beach Cities Transit-City of Redondo Beach2
5Metro - Los Angeles195
6Long Beach Transit1
7LADOT1264
8Santa Clarita Transit28
9Metrolink Trains1
10LADOTDT3
11Culver CityBus28
12GTrans1
13Amtrak8
14LA Go Bus1
15Big Blue Bus37
\n", + "
" + ], + "text/plain": [ + " agency n_trips\n", + "0 Foothill Transit 1\n", + "1 LADOTMVS 4\n", + "2 LADOTMVN 54\n", + "3 AVTA 4\n", + "4 Beach Cities Transit-City of Redondo Beach 2\n", + "5 Metro - Los Angeles 195\n", + "6 Long Beach Transit 1\n", + "7 LADOT126 4\n", + "8 Santa Clarita Transit 28\n", + "9 Metrolink Trains 1\n", + "10 LADOTDT 3\n", + "11 Culver CityBus 28\n", + "12 GTrans 1\n", + "13 Amtrak 8\n", + "14 LA Go Bus 1\n", + "15 Big Blue Bus 37" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_agencies" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "f98c76b9-b83f-42c3-aa12-702ade72ac38", + "metadata": {}, + "outputs": [], + "source": [ + "tooltip_cols = [\"agency\", \"n_trips\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "bf160b81-1bc2-4374-9529-6400252d5e4f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart = (alt.Chart(df_agencies)\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\"agency\", title = \"Agency Name\"),\n", + " y=alt.Y(\"n_trips\", title= \"Number of boardings reported for trips (One person taking two LA Metro trips will count as 2)\"),\n", + " color=alt.Color(\"n_trips\", scale=alt.Scale(range = cp.CALITP_SEQUENTIAL_COLORS)),\n", + " tooltip = tooltip_cols)\n", + " .properties(title = \"Number of Times an Agency was used for Trip Taking\",\n", + " width=800,\n", + " height=500)\n", + " )\n", + "chart " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a59f170f-b2e0-41b3-ae7a-1ae4eca12596", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "16b45917-adc3-44d0-8d76-15ccd4083d26", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Trips by Resident Type

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Trips by Resident Type

\")) \n" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "5163926e-82bd-4c25-9486-45229d09a0d1", + "metadata": {}, + "outputs": [], + "source": [ + "trip_by_res_type = (r_trips\n", + " >>group_by(_.primary_mode,_.trip_taker_resident_type)\n", + " >>summarize(number_trips = _.activity_id.nunique())\n", + " >>arrange(_.primary_mode))" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "636e7771-0ba7-42e9-af1d-e3e998ff8599", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
primary_modetrip_taker_resident_typenumber_trips
0auto_passengercore116685
1auto_passengerdonut2
2auto_passengervisitor28818
3commercialNaN10154
4on_demand_autocore3191
5on_demand_autovisitor595
6private_autocore294704
7private_autodonut1
8private_autovisitor10394
9private_autoNaN1
10public_transitcore2155
11public_transitvisitor56
\n", + "
" + ], + "text/plain": [ + " primary_mode trip_taker_resident_type number_trips\n", + "0 auto_passenger core 116685\n", + "1 auto_passenger donut 2\n", + "2 auto_passenger visitor 28818\n", + "3 commercial NaN 10154\n", + "4 on_demand_auto core 3191\n", + "5 on_demand_auto visitor 595\n", + "6 private_auto core 294704\n", + "7 private_auto donut 1\n", + "8 private_auto visitor 10394\n", + "9 private_auto NaN 1\n", + "10 public_transit core 2155\n", + "11 public_transit visitor 56" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trip_by_res_type" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "90c5d683-91c5-4663-accd-4e45027a3e49", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart = (\n", + " alt.Chart(trip_by_res_type)\n", + " .mark_bar()\n", + " .encode(\n", + " x=alt.X(\"primary_mode\", title = \"Mode\"),\n", + " y=alt.Y(\"number_trips\", title = \"Number of Trips\"),\n", + " color=alt.Color(\"trip_taker_resident_type\", scale=alt.Scale(range = cp.CALITP_DIVERGING_COLORS,\n", + " domain=trip_by_res_type[\"trip_taker_resident_type\"].unique().tolist())\n", + " ),\n", + " tooltip=trip_by_res_type.columns.tolist())\n", + " .properties(title = \"Trips by Resident Type\",\n", + " width=800,\n", + " height=500)\n", + " )\n", + "chart " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f7b51fbb-8b87-4863-bcd9-50ed5047d7d6", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "878c5c8e-18c3-456a-b641-67ad010f5101", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Unique Household Ids

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Unique Household Ids

\")) " + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "2a31bf27-ccc4-4ce3-8b6a-31dbd14caaf2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_taker_household_idn
16079033634
267303NaN16384
79436914681705355861243316
23626270997282611727185215
30303348816763554965846314
.........
26729699991308544966717651
2672999999309614445944191
26730099995343586234221581
26730199998863480992582371
26730299999027557241477141
\n", + "

267304 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " trip_taker_household_id n\n", + "160790 33634\n", + "267303 NaN 16384\n", + "79436 9146817053558612433 16\n", + "23626 2709972826117271852 15\n", + "30303 3488167635549658463 14\n", + "... ... ...\n", + "267296 9999130854496671765 1\n", + "267299 999930961444594419 1\n", + "267300 9999534358623422158 1\n", + "267301 9999886348099258237 1\n", + "267302 9999902755724147714 1\n", + "\n", + "[267304 rows x 2 columns]" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r_trips>>count(_.trip_taker_household_id)>>arrange(-_.n)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "eb9780a3-b996-45af-9cc3-b41629ec0e55", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Checking one household id

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Checking one household id

\")) \n" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "6fc3be54-b8dc-4cf8-893e-565ffa02934d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "16\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
activity_idorigin_bgrp_2020origin_trct_2020origin_cty_2020origin_st_2020destination_bgrp_2020destination_trct_2020destination_cty_2020destination_st_2020primary_modetrip_purposeprevious_trip_purposetrip_start_timetrip_end_timetrip_duration_minutestrip_distance_milesvehicle_typevehicle_fuel_typetransit_submodetransit_agencytransit_routeorigin_land_useorigin_building_usedestination_land_usedestination_building_usetrip_taker_person_idtrip_taker_household_idtrip_taker_agetrip_taker_sextrip_taker_race_ethnicitytrip_taker_employment_statustrip_taker_wfhtrip_taker_individual_incometrip_taker_commute_modetrip_taker_household_sizetrip_taker_household_incometrip_taker_available_vehiclestrip_taker_resident_typetrip_taker_industrytrip_taker_building_typetrip_taker_school_grade_attendingtrip_taker_educationtrip_taker_tenuretrip_taker_languagetrip_taker_home_bgrp_2020trip_taker_home_trct_2020trip_taker_home_cty_2020trip_taker_home_st_2020trip_taker_work_bgrp_2020trip_taker_work_trct_2020trip_taker_work_cty_2020trip_taker_work_st_2020
304389117970670008846766731 (Tract 7010, Los Angeles, CA)7010 (Los Angeles, CA)Los Angeles County, CACalifornia1 (Tract 2623.01, Los Angeles, CA)2623.01 (Los Angeles, CA)Los Angeles County, CACaliforniaauto_passengerhomework18:33:0018:55:472210.00unknown_vehicle_typeunknown_fuel_typeNaNNaNNaNofficeofficesingle_familysingle_family8423484308586815884914681705355861243332.00femaleasian_not_hispanic_or_latinoemployedin_person17,261.00private_auto11.00318,660.00three_pluscorenaics812199single_familynot_attending_schoolsome_collegeownerasian_pacific1 (Tract 2623.01, Los Angeles, CA)2623.01 (Los Angeles, CA)Los Angeles County, CACalifornia1 (Tract 7010, Los Angeles, CA)7010 (Los Angeles, CA)Los Angeles County, CACalifornia
\n", + "
" + ], + "text/plain": [ + " activity_id origin_bgrp_2020 \\\n", + "304389 11797067000884676673 1 (Tract 7010, Los Angeles, CA) \n", + "\n", + " origin_trct_2020 origin_cty_2020 origin_st_2020 \\\n", + "304389 7010 (Los Angeles, CA) Los Angeles County, CA California \n", + "\n", + " destination_bgrp_2020 destination_trct_2020 \\\n", + "304389 1 (Tract 2623.01, Los Angeles, CA) 2623.01 (Los Angeles, CA) \n", + "\n", + " destination_cty_2020 destination_st_2020 primary_mode \\\n", + "304389 Los Angeles County, CA California auto_passenger \n", + "\n", + " trip_purpose previous_trip_purpose trip_start_time trip_end_time \\\n", + "304389 home work 18:33:00 18:55:47 \n", + "\n", + " trip_duration_minutes trip_distance_miles vehicle_type \\\n", + "304389 22 10.00 unknown_vehicle_type \n", + "\n", + " vehicle_fuel_type transit_submode transit_agency transit_route \\\n", + "304389 unknown_fuel_type NaN NaN NaN \n", + "\n", + " origin_land_use origin_building_use destination_land_use \\\n", + "304389 office office single_family \n", + "\n", + " destination_building_use trip_taker_person_id trip_taker_household_id \\\n", + "304389 single_family 8423484308586815884 9146817053558612433 \n", + "\n", + " trip_taker_age trip_taker_sex trip_taker_race_ethnicity \\\n", + "304389 32.00 female asian_not_hispanic_or_latino \n", + "\n", + " trip_taker_employment_status trip_taker_wfh \\\n", + "304389 employed in_person \n", + "\n", + " trip_taker_individual_income trip_taker_commute_mode \\\n", + "304389 17,261.00 private_auto \n", + "\n", + " trip_taker_household_size trip_taker_household_income \\\n", + "304389 11.00 318,660.00 \n", + "\n", + " trip_taker_available_vehicles trip_taker_resident_type \\\n", + "304389 three_plus core \n", + "\n", + " trip_taker_industry trip_taker_building_type \\\n", + "304389 naics812199 single_family \n", + "\n", + " trip_taker_school_grade_attending trip_taker_education \\\n", + "304389 not_attending_school some_college \n", + "\n", + " trip_taker_tenure trip_taker_language \\\n", + "304389 owner asian_pacific \n", + "\n", + " trip_taker_home_bgrp_2020 trip_taker_home_trct_2020 \\\n", + "304389 1 (Tract 2623.01, Los Angeles, CA) 2623.01 (Los Angeles, CA) \n", + "\n", + " trip_taker_home_cty_2020 trip_taker_home_st_2020 \\\n", + "304389 Los Angeles County, CA California \n", + "\n", + " trip_taker_work_bgrp_2020 trip_taker_work_trct_2020 \\\n", + "304389 1 (Tract 7010, Los Angeles, CA) 7010 (Los Angeles, CA) \n", + "\n", + " trip_taker_work_cty_2020 trip_taker_work_st_2020 \n", + "304389 Los Angeles County, CA California " + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(len(r_trips>>filter(_.trip_taker_household_id == 9146817053558612433)))\n", + "(r_trips>>filter(_.trip_taker_household_id == 9146817053558612433)).sample()" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "4eccfdf8-429e-43df-899e-f7d77e051831", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_taker_person_idtrip_taker_agetrip_taker_sextrip_taker_household_sizen
0450801695230097936725.00female11.005
1473639835695628221238.00female11.002
2842348430858681588432.00female11.002
31239501264766598930521.00female11.001
41467359267154790875118.00female11.004
51654563495774756539849.00female11.001
61668411545259606681653.00male11.001
\n", + "
" + ], + "text/plain": [ + " trip_taker_person_id trip_taker_age trip_taker_sex \\\n", + "0 4508016952300979367 25.00 female \n", + "1 4736398356956282212 38.00 female \n", + "2 8423484308586815884 32.00 female \n", + "3 12395012647665989305 21.00 female \n", + "4 14673592671547908751 18.00 female \n", + "5 16545634957747565398 49.00 female \n", + "6 16684115452596066816 53.00 male \n", + "\n", + " trip_taker_household_size n \n", + "0 11.00 5 \n", + "1 11.00 2 \n", + "2 11.00 2 \n", + "3 11.00 1 \n", + "4 11.00 4 \n", + "5 11.00 1 \n", + "6 11.00 1 " + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "## checking one household id\n", + "r_trips>>filter(_.trip_taker_household_id == 9146817053558612433)>>count(_.trip_taker_person_id, _.trip_taker_age,\n", + " _.trip_taker_sex, _.trip_taker_household_size)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c929c77d-0173-4a7d-be49-762349daf5cb", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab226b47-4366-4fb9-aaae-e0aca760f9b3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "1483249f-602c-4805-bcc4-d55e018022ad", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Number of Travelers by Resident Type: Replica Trips Data

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(\"

Number of Travelers by Resident Type: Replica Trips Data

\")) " + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "5c1894f0-bd42-4014-89d4-6e8d43809319", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_taker_resident_type_unique_ids
0core246597
1donut2
2visitor35198
3NaN1
\n", + "
" + ], + "text/plain": [ + " trip_taker_resident_type _unique_ids\n", + "0 core 246597\n", + "1 donut 2\n", + "2 visitor 35198\n", + "3 NaN 1" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r_trips>>group_by(_.trip_taker_resident_type)>>summarize(_unique_ids = _.trip_taker_person_id.nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0891b62-c968-4ea0-bd9b-753d299a5054", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "2e4bd058-9c4d-4989-a5fc-db6cf6130bf2", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "

Traveler Demographics: Replica Trips Data

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Fitered for Core Residents" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
primary_modetrip_taker_sexn_pplavg_h_incomeavg_p_incomeavg_age
0auto_passengerfemale44266162,899.9057,807.9143.65
1auto_passengermale43836173,439.4687,237.7042.86
2commercialNaN1NaNNaNNaN
3on_demand_autofemale1560181,325.5258,599.1946.58
4on_demand_automale1532181,278.1492,035.7443.23
5private_autofemale90634167,383.1962,989.9243.79
6private_automale99967174,477.1694,470.9343.81
7private_autoNaN1NaNNaNNaN
8public_transitfemale92781,553.4539,058.5742.90
9public_transitmale97997,733.9144,295.9039.82
\n", + "
" + ], + "text/plain": [ + " primary_mode trip_taker_sex n_ppl avg_h_income avg_p_income avg_age\n", + "0 auto_passenger female 44266 162,899.90 57,807.91 43.65\n", + "1 auto_passenger male 43836 173,439.46 87,237.70 42.86\n", + "2 commercial NaN 1 NaN NaN NaN\n", + "3 on_demand_auto female 1560 181,325.52 58,599.19 46.58\n", + "4 on_demand_auto male 1532 181,278.14 92,035.74 43.23\n", + "5 private_auto female 90634 167,383.19 62,989.92 43.79\n", + "6 private_auto male 99967 174,477.16 94,470.93 43.81\n", + "7 private_auto NaN 1 NaN NaN NaN\n", + "8 public_transit female 927 81,553.45 39,058.57 42.90\n", + "9 public_transit male 979 97,733.91 44,295.90 39.82" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "display(HTML(\"

Traveler Demographics: Replica Trips Data

\")) \n", + "display(HTML(\"Fitered for Core Residents\")) \n", + "(r_trips\n", + " >>filter(_.trip_taker_home_bgrp_2020!=\"Visitor (no home location)\")\n", + " >>group_by(_.primary_mode, _.trip_taker_sex)\n", + " >>summarize(\n", + " n_ppl = _.trip_taker_person_id.nunique(),\n", + " avg_h_income = _.trip_taker_household_income.mean(),\n", + " avg_p_income = _.trip_taker_individual_income.mean(),\n", + " avg_age = _.trip_taker_age.mean())\n", + ")\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1dba8b5-7356-441f-8e6c-0f98e0f80c4c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aa84cc8c-8b17-41b6-abdf-c8397ef1e706", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "101bbd51-38b9-4791-8119-f0dc3002b6a6", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bbc819b1-0dfc-46be-90bd-b136263dc22c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}