Skip to content

Commit 5ee0155

Browse files
author
tiffanychu90
committed
shared_utils to calitp_data_analysis for msd_dashboard_metric/
1 parent e56c695 commit 5ee0155

8 files changed

+68
-77
lines changed

msd_dashboard_metric/01_area_population_metrics.ipynb

+20-24
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,10 @@
2222
"\n",
2323
"os.environ[\"CALITP_BQ_MAX_BYTES\"] = str(100_000_000_000)\n",
2424
"\n",
25-
"from calitp.tables import tbl\n",
25+
"from calitp_data_analysis.tables import tbls\n",
2626
"from siuba import *\n",
2727
"from IPython.display import Markdown\n",
2828
"\n",
29-
"import shared_utils\n",
3029
"from utils import *"
3130
]
3231
},
@@ -40,22 +39,20 @@
4039
"#create_coverage_data.save_initial_data()\n",
4140
"\n",
4241
"# Read in data from queries\n",
43-
"ca_block_joined = shared_utils.utils.download_geoparquet(GCS_FILE_PATH, \n",
44-
" 'block_population_joined')\n",
42+
"ca_block_joined = gpd.read_parquet(\n",
43+
" f\"{GCS_FILE_PATH}block_population_joined.parquet\")\n",
4544
"rt_complete = pd.read_parquet(f\"{GCS_FILE_PATH}rt_complete.parquet\")\n",
46-
"all_stops = shared_utils.utils.download_geoparquet(GCS_FILE_PATH, \n",
47-
" 'all_stops')\n",
48-
"accessible_stops_trips = shared_utils.utils.download_geoparquet(GCS_FILE_PATH, \n",
49-
" 'accessible_stops_trips')\n",
45+
"all_stops = gpd.read_parquet(f\"{GCS_FILE_PATH}all_stops.parquet\")\n",
46+
"accessible_stops_trips = gpd.read_parquet(\n",
47+
" f\"{GCS_FILE_PATH}accessible_stops_trips.parquet\")\n",
5048
"\n",
5149
"# Read in employment data by tract\n",
5250
"#tract_pop_employ_filtered = create_coverage_data.get_employment_tract_data()\n",
5351
"#shared_utils.utils.geoparquet_gcs_export(tract_pop_employ_filtered, \n",
5452
"# GCS_FILE_PATH, 'tract_pop_employ_filtered')\n",
5553
"\n",
56-
"tract_pop_employ_filtered = shared_utils.utils.download_geoparquet(GCS_FILE_PATH,\n",
57-
" 'tract_pop_employ_filtered'\n",
58-
" )"
54+
"tract_pop_employ_filtered = gpd.read_parquet(\n",
55+
" f\"{GCS_FILE_PATH}tract_pop_employ_filtered.parquet\")"
5956
]
6057
},
6158
{
@@ -98,8 +95,7 @@
9895
"\n",
9996
"for key, value in rename_block_files.items():\n",
10097
" print(key)\n",
101-
" sjoin_blocks[key] = shared_utils.utils.download_geoparquet(\n",
102-
" GCS_FILE_PATH, value)"
98+
" sjoin_blocks[key] = gpd.read_parquet(f\"{GCS_FILE_PATH}{value}.parquet\")"
10399
]
104100
},
105101
{
@@ -256,8 +252,8 @@
256252
}
257253
],
258254
"source": [
259-
"block_level_static = shared_utils.utils.download_geoparquet(\n",
260-
" GCS_FILE_PATH, \"block_level_static\")\n",
255+
"block_level_static = gpd.read_parquet(\n",
256+
" f\"{GCS_FILE_PATH}block_level_static.parquet\")\n",
261257
"\n",
262258
"display(Markdown(f\"### All Stops Static\"))\n",
263259
"\n",
@@ -541,8 +537,8 @@
541537
"\n",
542538
"for t in tract_files:\n",
543539
" print(t)\n",
544-
" sjoin_tracts[t] = shared_utils.utils.download_geoparquet(\n",
545-
" GCS_FILE_PATH, t)"
540+
" sjoin_tracts[t] = gpd.read_parquet(\n",
541+
" f\"{GCS_FILE_PATH}{t}.parquet\")"
546542
]
547543
},
548544
{
@@ -596,8 +592,8 @@
596592
"source": [
597593
"def make_coverage_summary():\n",
598594
" \n",
599-
" tract_df = shared_utils.utils.download_geoparquet(\n",
600-
" GCS_FILE_PATH, \"tract_all_stops\")\n",
595+
" tract_df = gpd.read_parquet(\n",
596+
" f\"{GCS_FILE_PATH}tract_all_stops.parquet\")\n",
601597
" \n",
602598
" ## since employment data is tract-level, only includes tracts < 4 sq km (~60% of jobs)\n",
603599
" employment_summary = (tract_df\n",
@@ -610,11 +606,11 @@
610606
"\n",
611607
" SQ_MI_PER_SQ_M = 3.86e-7\n",
612608
" \n",
613-
" block_level_static = shared_utils.utils.download_geoparquet(\n",
614-
" GCS_FILE_PATH, \"block_level_static\")\n",
609+
" block_level_static = gpd.read_parquet(\n",
610+
" f\"{GCS_FILE_PATH}block_level_static.parquet\")\n",
615611
" \n",
616-
" block_level_accessible = shared_utils.utils.download_geoparquet(\n",
617-
" GCS_FILE_PATH, \"block_level_accessible\")\n",
612+
" block_level_accessible = gpd.read_parquet(\n",
613+
" f\"{GCS_FILE_PATH}block_level_accessible.parquet\")\n",
618614
" \n",
619615
" coverage_summary = (block_level_static\n",
620616
" >> group_by(_.calitp_itp_id)\n",
@@ -688,7 +684,7 @@
688684
"name": "python",
689685
"nbconvert_exporter": "python",
690686
"pygments_lexer": "ipython3",
691-
"version": "3.9.7"
687+
"version": "3.9.13"
692688
}
693689
},
694690
"nbformat": 4,

msd_dashboard_metric/02_coverage_mapping.ipynb

+2-3
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,7 @@
2222
"\n",
2323
"from siuba import *\n",
2424
"\n",
25-
"from shared_utils import map_utils\n",
26-
"from shared_utils import calitp_color_palette as cp\n",
25+
"from calitp_data_analysis import calitp_color_palette as cp\n",
2726
"from utils import *"
2827
]
2928
},
@@ -515,7 +514,7 @@
515514
"name": "python",
516515
"nbconvert_exporter": "python",
517516
"pygments_lexer": "ipython3",
518-
"version": "3.9.7"
517+
"version": "3.9.13"
519518
}
520519
},
521520
"nbformat": 4,

msd_dashboard_metric/03_accessibility_feeds.ipynb

+3-3
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@
4444
"import warehouse_queries\n",
4545
"import create_accessibility_data\n",
4646
"import setup_charts\n",
47-
"from shared_utils import styleguide\n",
48-
"from shared_utils import calitp_color_palette as cp\n",
47+
"from calitp_data_analysis import styleguide\n",
48+
"from calitp_data_analysis import calitp_color_palette as cp\n",
4949
"\n",
5050
"alt.themes.register(\"calitp_theme\", styleguide.calitp_theme)\n",
5151
"# enable\n",
@@ -508,7 +508,7 @@
508508
"name": "python",
509509
"nbconvert_exporter": "python",
510510
"pygments_lexer": "ipython3",
511-
"version": "3.9.7"
511+
"version": "3.9.13"
512512
}
513513
},
514514
"nbformat": 4,

msd_dashboard_metric/06_summary_metrics.ipynb

+11-14
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@
2020
"import geopandas as gpd\n",
2121
"\n",
2222
"from siuba import *\n",
23-
"\n",
24-
"import shared_utils\n",
2523
"from utils import *"
2624
]
2725
},
@@ -32,11 +30,11 @@
3230
"metadata": {},
3331
"outputs": [],
3432
"source": [
35-
"ca_block_joined = shared_utils.utils.download_geoparquet(GCS_FILE_PATH, \n",
36-
" 'block_population_joined')\n",
37-
"tract_pop_employ_filtered = shared_utils.utils.download_geoparquet(GCS_FILE_PATH,\n",
38-
" 'tract_pop_employ_filtered'\n",
39-
" )"
33+
"ca_block_joined = gpd.read_parquet(\n",
34+
" f\"{GCS_FILE_PATH}block_population_joined.parquet\")\n",
35+
"\n",
36+
"tract_pop_employ_filtered = gpd.read_parquet(\n",
37+
" f\"{GCS_FILE_PATH}tract_pop_employ_filtered.parquet\")"
4038
]
4139
},
4240
{
@@ -67,12 +65,11 @@
6765
"\n",
6866
"for key, value in rename_block_files.items():\n",
6967
" print(key)\n",
70-
" sjoin_blocks[key] = shared_utils.utils.download_geoparquet(\n",
71-
" GCS_FILE_PATH, value)\n",
68+
" sjoin_blocks[key] = gpd.read_parquet(f\"{GCS_FILE_PATH}{value}.parquet\")\n",
7269
"\n",
7370
"# This one needs to be read in as df, in a dict, kernel will crash\n",
74-
"block_level_static = shared_utils.utils.download_geoparquet(GCS_FILE_PATH, \n",
75-
" \"block_level_static\")"
71+
"block_level_static = gpd.read_parquet(\n",
72+
" f\"{GCS_FILE_PATH}block_level_static.parquet\")"
7673
]
7774
},
7875
{
@@ -101,8 +98,8 @@
10198
"\n",
10299
"for t in tract_files:\n",
103100
" print(t)\n",
104-
" sjoin_tracts[t] = shared_utils.utils.download_geoparquet(\n",
105-
" GCS_FILE_PATH, t)"
101+
" sjoin_tracts[t] = gpd.read_parquet(\n",
102+
" f\"{GCS_FILE_PATH}{t}.parquet\")"
106103
]
107104
},
108105
{
@@ -282,7 +279,7 @@
282279
"name": "python",
283280
"nbconvert_exporter": "python",
284281
"pygments_lexer": "ipython3",
285-
"version": "3.9.7"
282+
"version": "3.9.13"
286283
}
287284
},
288285
"nbformat": 4,

msd_dashboard_metric/07_fares_v2.ipynb

+5-4
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@
4242
"import create_accessibility_data\n",
4343
"import setup_charts\n",
4444
"import utils\n",
45-
"from shared_utils import geography_utils, styleguide\n",
46-
"from shared_utils import calitp_color_palette as cp\n",
45+
"from calitp_data_analysis import styleguide\n",
46+
"from calitp_data_analysis import calitp_color_palette as cp\n",
47+
"from shared_utils import portfolio_utils\n",
4748
"\n",
4849
"display(Markdown(\n",
4950
" f\"<b>Report updated / data available through: \"\n",
@@ -69,7 +70,7 @@
6970
"metadata": {},
7071
"outputs": [],
7172
"source": [
72-
"feeds_by_date = (geography_utils.aggregate_by_geography(\n",
73+
"feeds_by_date = (portfolio_utils.aggregate_by_geography(\n",
7374
" fares_feeds,\n",
7475
" group_cols = [\"date\"],\n",
7576
" count_cols = [\"feed_key\"]\n",
@@ -200,7 +201,7 @@
200201
"name": "python",
201202
"nbconvert_exporter": "python",
202203
"pygments_lexer": "ipython3",
203-
"version": "3.9.7"
204+
"version": "3.9.13"
204205
}
205206
},
206207
"nbformat": 4,

msd_dashboard_metric/create_coverage_data.py

+19-19
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@
66

77
os.environ["CALITP_BQ_MAX_BYTES"] = str(100_000_000_000)
88

9-
from calitp.tables import tbl
9+
from calitp_data_analysis.tables import tbls
1010
from calitp_data_analysis.sql import query_sql
1111
from siuba import *
1212

13-
import utils
14-
import shared_utils
13+
import utils as _utils
14+
from calitp_data_analysis import geography_utils, utils
1515

1616

1717
catalog = intake.open_catalog("./catalog.yml")
@@ -41,7 +41,7 @@ def get_employment_tract_data():
4141
)
4242

4343
tract_pop_employ = tract_pop_employ.to_crs(
44-
shared_utils.geography_utils.CA_NAD83Albers)
44+
geography_utils.CA_NAD83Albers)
4545
tract_pop_employ['area'] = tract_pop_employ.geometry.area
4646

4747

@@ -70,18 +70,18 @@ def get_employment_tract_data():
7070

7171

7272
def save_initial_data():
73-
ca_block_joined = utils.get_ca_block_geo()
74-
shared_utils.utils.geoparquet_gcs_export(ca_block_joined, utils.GCS_FILE_PATH,
73+
ca_block_joined = _utils.get_ca_block_geo()
74+
utils.geoparquet_gcs_export(ca_block_joined, _utils.GCS_FILE_PATH,
7575
'block_population_joined')
7676

77-
all_stops = utils.get_stops_and_trips(filter_accessible = False)
77+
all_stops = _utils.get_stops_and_trips(filter_accessible = False)
7878
all_stops = all_stops.apply(buffer_by_route_type, axis=1)
79-
shared_utils.utils.geoparquet_gcs_export(all_stops, utils.GCS_FILE_PATH,
79+
utils.geoparquet_gcs_export(all_stops, _utils.GCS_FILE_PATH,
8080
'all_stops')
8181

82-
accessible_stops_trips = utils.get_stops_and_trips(filter_accessible = True)
82+
accessible_stops_trips = _utils.get_stops_and_trips(filter_accessible = True)
8383
accessible_stops_trips = accessible_stops_trips.apply(buffer_by_route_type, axis=1)
84-
shared_utils.utils.geoparquet_gcs_export(accessible_stops_trips, utils.GCS_FILE_PATH,
84+
utils.geoparquet_gcs_export(accessible_stops_trips, _utils.GCS_FILE_PATH,
8585
'accessible_stops_trips')
8686

8787

@@ -107,7 +107,7 @@ def save_initial_data():
107107
_.calitp_url_number == _.url_number)
108108
)
109109

110-
rt_complete.to_parquet(f'{utils.GCS_FILE_PATH}rt_complete.parquet')
110+
rt_complete.to_parquet(f'{_utils.GCS_FILE_PATH}rt_complete.parquet')
111111

112112

113113

@@ -223,12 +223,12 @@ def spatial_joins_to_blocks_and_tracts():
223223
Return 2 dictionaries of results.
224224
'''
225225
# Read in parquets from above
226-
ca_block_joined = shared_utils.utils.download_geoparquet(
227-
utils.GCS_FILE_PATH, 'block_population_joined')
228-
all_stops = shared_utils.utils.download_geoparquet(utils.GCS_FILE_PATH, 'all_stops')
229-
accessible_stops_trips = shared_utils.utils.download_geoparquet(
230-
utils.GCS_FILE_PATH, 'accessible_stops_trips')
231-
rt_complete = pd.read_parquet(f"{utils.GCS_FILE_PATH}rt_complete.parquet")
226+
ca_block_joined = gpd.read_parquet(
227+
f"{_utils.GCS_FILE_PATH}block_population_joined.parquet")
228+
all_stops = gpd.read_parquet(f"{_utils.GCS_FILE_PATH}all_stops.parquet")
229+
accessible_stops_trips = gpd.read_parquet(
230+
f"{_utils.GCS_FILE_PATH}accessible_stops_trips.parquet")
231+
rt_complete = pd.read_parquet(f"{_utils.GCS_FILE_PATH}rt_complete.parquet")
232232

233233
# Read in employment data by tract
234234
tract_pop_employ_filtered = get_employment_tract_data()
@@ -265,8 +265,8 @@ def spatial_joins_to_blocks_and_tracts():
265265
for key, value in sjoin_blocks.items():
266266
print(key)
267267
new_name = rename_block_files[key]
268-
shared_utils.utils.geoparquet_gcs_export(value, GCS_FILE_PATH, f"{new_name}")
268+
utils.geoparquet_gcs_export(value, GCS_FILE_PATH, f"{new_name}")
269269

270270
for key, value in sjoin_tracts.items():
271271
print(key)
272-
shared_utils.utils.geoparquet_gcs_export(value, GCS_FILE_PATH, f"{key}")
272+
utils.geoparquet_gcs_export(value, GCS_FILE_PATH, f"{key}")

msd_dashboard_metric/setup_charts.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import altair as alt
22
import pandas as pd
33

4-
from shared_utils import styleguide
5-
from shared_utils import calitp_color_palette as cp
4+
from calitp_data_analysis import styleguide
5+
from calitp_data_analysis import calitp_color_palette as cp
66

77
AXIS_DATE_FORMAT ="%-m/%-d/%y"
88

msd_dashboard_metric/utils.py

+6-8
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,8 @@
33
import geopandas as gpd
44
import datetime as dt
55

6-
import shared_utils
7-
8-
import calitp
9-
from calitp.tables import tbl
6+
from calitp_data_analysis.tables import tbls
7+
from calitp_data_analysis import geography_utils
108
from siuba import *
119

1210
import requests
@@ -30,20 +28,20 @@ def get_ca_block_group_geo():
3028
stanford_shorelines = catalog.stanford_shorelines.read()
3129
ca_shoreline = stanford_shorelines >> filter(_.STFIPS == '06')
3230
ca_block_geo = ca_block_geo.clip(ca_shoreline)
33-
ca_block_geo = ca_block_geo.to_crs(shared_utils.geography_utils.CA_NAD83Albers)
31+
ca_block_geo = ca_block_geo.to_crs(geography_utils.CA_NAD83Albers)
3432

3533
return ca_block_geo
3634

3735
# Use this one, move to TIGER file
3836
def get_ca_block_geo():
3937
# Bring in block geometry
40-
ca_blocks = gpd.read_parquet(f'{utils.GCS_FILE_PATH}2020_tiger_block_geo.parquet')
38+
ca_blocks = gpd.read_parquet(f'{GCS_FILE_PATH}2020_tiger_block_geo.parquet')
4139
ca_blocks = (ca_blocks >> filter(_.ALAND20 > 10) ## remove water
4240
>> select(_.county == _.COUNTYFP20,
4341
_.tract == _.TRACTCE20,
4442
_.block == _.BLOCKCE20,
4543
_.geo_id == _.GEOID20, _.geometry))
46-
ca_blocks = ca_blocks.to_crs(shared_utils.geography_utils.CA_NAD83Albers)
44+
ca_blocks = ca_blocks.to_crs(geography_utils.CA_NAD83Albers)
4745

4846
# Bring in block population
4947
ca_block_pop = catalog.ca_block_population.read()
@@ -105,7 +103,7 @@ def get_stops_and_trips(filter_accessible):
105103
geometry=gpd.points_from_xy(stops_trips.stop_lon,
106104
stops_trips.stop_lat),
107105
crs = 'EPSG:4326')
108-
.to_crs(shared_utils.geography_utils.CA_NAD83Albers)
106+
.to_crs(geography_utils.CA_NAD83Albers)
109107
)
110108

111109
return stops_trips

0 commit comments

Comments
 (0)