Skip to content

Commit d3cffd6

Browse files
erikamovErika Pacheco
andauthored
Create NTD external tables for 2022 API data (#3465)
[#3403] Co-authored-by: Erika Pacheco <[email protected]>
1 parent 62a61fd commit d3cffd6

34 files changed

+2318
-0
lines changed
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
operator: operators.ExternalTable
2+
bucket: gs://calitp-ntd-api-products
3+
source_objects:
4+
- "breakdowns/2022/*.jsonl.gz"
5+
source_format: NEWLINE_DELIMITED_JSON
6+
use_bq_client: true
7+
hive_options:
8+
mode: CUSTOM
9+
require_partition_filter: false
10+
source_uri_prefix: "breakdowns/2022/{dt:DATE}/{execution_ts:TIMESTAMP}"
11+
destination_project_dataset_table: "external_ntd__annual_reporting.2022__breakdowns"
12+
prefix_bucket: false
13+
post_hook: SELECT * FROM `{{ get_project_id() }}`.external_ntd__annual_reporting.2022__breakdowns LIMIT 1;
14+
schema_fields:
15+
- name: agency
16+
type: STRING
17+
- name: agency_voms
18+
type: NUMERIC
19+
- name: city
20+
type: STRING
21+
- name: major_mechanical_failures
22+
type: NUMERIC
23+
- name: mode
24+
type: STRING
25+
- name: mode_name
26+
type: STRING
27+
- name: mode_voms
28+
type: NUMERIC
29+
- name: ntd_id
30+
type: NUMERIC
31+
- name: organization_type
32+
type: STRING
33+
- name: other_mechanical_failures
34+
type: NUMERIC
35+
- name: primary_uza_population
36+
type: NUMERIC
37+
- name: report_year
38+
type: NUMERIC
39+
- name: reporter_type
40+
type: STRING
41+
- name: state
42+
type: STRING
43+
- name: total_mechanical_failures
44+
type: NUMERIC
45+
- name: train_miles
46+
type: NUMERIC
47+
- name: train_revenue_miles
48+
type: NUMERIC
49+
- name: type_of_service
50+
type: STRING
51+
- name: uace_code
52+
type: STRING
53+
- name: uza_name
54+
type: STRING
55+
- name: vehicle_passenger_car_miles
56+
type: NUMERIC
57+
- name: vehicle_passenger_car_revenue
58+
type: NUMERIC
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
operator: operators.ExternalTable
2+
bucket: gs://calitp-ntd-api-products
3+
source_objects:
4+
- "breakdowns_by_agency/2022/*.jsonl.gz"
5+
source_format: NEWLINE_DELIMITED_JSON
6+
use_bq_client: true
7+
hive_options:
8+
mode: CUSTOM
9+
require_partition_filter: false
10+
source_uri_prefix: "breakdowns_by_agency/2022/{dt:DATE}/{execution_ts:TIMESTAMP}"
11+
destination_project_dataset_table: "external_ntd__annual_reporting.2022__breakdowns_by_agency"
12+
prefix_bucket: false
13+
post_hook: SELECT * FROM `{{ get_project_id() }}`.external_ntd__annual_reporting.2022__breakdowns_by_agency LIMIT 1;
14+
schema_fields:
15+
- name: count_major_mechanical_failures_questionable
16+
type: NUMERIC
17+
- name: count_other_mechanical_failures_questionable
18+
type: NUMERIC
19+
- name: count_total_mechanical_failures_questionable
20+
type: NUMERIC
21+
- name: count_train_miles_questionable
22+
type: NUMERIC
23+
- name: count_train_revenue_miles_questionable
24+
type: NUMERIC
25+
- name: count_vehicle_passenger_car_miles_questionable
26+
type: NUMERIC
27+
- name: max_agency
28+
type: STRING
29+
- name: max_agency_voms
30+
type: NUMERIC
31+
- name: max_city
32+
type: STRING
33+
- name: max_organization_type
34+
type: STRING
35+
- name: max_primary_uza_population
36+
type: NUMERIC
37+
- name: max_reporter_type
38+
type: STRING
39+
- name: max_state
40+
type: STRING
41+
- name: max_uace_code
42+
type: STRING
43+
- name: max_uza_name
44+
type: STRING
45+
- name: ntd_id
46+
type: NUMERIC
47+
- name: report_year
48+
type: NUMERIC
49+
- name: sum_major_mechanical_failures
50+
type: NUMERIC
51+
- name: sum_other_mechanical_failures
52+
type: NUMERIC
53+
- name: sum_total_mechanical_failures
54+
type: NUMERIC
55+
- name: sum_train_miles
56+
type: NUMERIC
57+
- name: sum_train_revenue_miles
58+
type: NUMERIC
59+
- name: sum_vehicle_passenger_car_miles
60+
type: NUMERIC
61+
- name: sum_vehicle_passenger_car_revenue
62+
type: NUMERIC
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
operator: operators.ExternalTable
2+
bucket: gs://calitp-ntd-api-products
3+
source_objects:
4+
- "capital_expenses_by_capital_use/2022/*.jsonl.gz"
5+
source_format: NEWLINE_DELIMITED_JSON
6+
use_bq_client: true
7+
hive_options:
8+
mode: CUSTOM
9+
require_partition_filter: false
10+
source_uri_prefix: "capital_expenses_by_capital_use/2022/{dt:DATE}/{execution_ts:TIMESTAMP}"
11+
destination_project_dataset_table: "external_ntd__annual_reporting.2022__capital_expenses_by_capital_use"
12+
prefix_bucket: false
13+
post_hook: SELECT * FROM `{{ get_project_id() }}`.external_ntd__annual_reporting.2022__capital_expenses_by_capital_use LIMIT 1;
14+
schema_fields:
15+
- name: administrative_buildings
16+
type: STRING
17+
- name: agency
18+
type: STRING
19+
- name: agency_voms
20+
type: NUMERIC
21+
- name: city
22+
type: STRING
23+
- name: communication_information
24+
type: STRING
25+
- name: fare_collection_equipment
26+
type: STRING
27+
- name: form_type
28+
type: STRING
29+
- name: guideway
30+
type: STRING
31+
- name: maintenance_buildings
32+
type: STRING
33+
- name: mode_name
34+
type: STRING
35+
- name: mode_voms
36+
type: NUMERIC
37+
- name: modecd
38+
type: STRING
39+
- name: ntd_id
40+
type: NUMERIC
41+
- name: organization_type
42+
type: STRING
43+
- name: other
44+
type: STRING
45+
- name: other_vehicles
46+
type: STRING
47+
- name: passenger_vehicles
48+
type: STRING
49+
- name: reduced_reporter
50+
type: STRING
51+
- name: report_year
52+
type: NUMERIC
53+
- name: reporter_type
54+
type: STRING
55+
- name: state
56+
type: STRING
57+
- name: stations
58+
type: STRING
59+
- name: total
60+
type: NUMERIC
61+
- name: typeofservicecd
62+
type: STRING
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
operator: operators.ExternalTable
2+
bucket: gs://calitp-ntd-api-products
3+
source_objects:
4+
- "capital_expenses_by_mode/2022/*.jsonl.gz"
5+
source_format: NEWLINE_DELIMITED_JSON
6+
use_bq_client: true
7+
hive_options:
8+
mode: CUSTOM
9+
require_partition_filter: false
10+
source_uri_prefix: "capital_expenses_by_mode/2022/{dt:DATE}/{execution_ts:TIMESTAMP}"
11+
destination_project_dataset_table: "external_ntd__annual_reporting.2022__capital_expenses_by_mode"
12+
prefix_bucket: false
13+
post_hook: SELECT * FROM `{{ get_project_id() }}`.external_ntd__annual_reporting.2022__capital_expenses_by_mode LIMIT 1;
14+
schema_fields:
15+
- name: count_administrative_buildings_q
16+
type: NUMERIC
17+
- name: count_communication_information_q
18+
type: NUMERIC
19+
- name: count_fare_collection_equipment_q
20+
type: NUMERIC
21+
- name: count_maintenance_buildings_q
22+
type: NUMERIC
23+
- name: count_other_q
24+
type: NUMERIC
25+
- name: count_other_vehicles_q
26+
type: NUMERIC
27+
- name: count_passenger_vehicles_q
28+
type: NUMERIC
29+
- name: count_reduced_reporter_q
30+
type: NUMERIC
31+
- name: count_stations_q
32+
type: NUMERIC
33+
- name: max_agency
34+
type: STRING
35+
- name: max_agency_voms
36+
type: STRING
37+
- name: max_city
38+
type: STRING
39+
- name: max_mode_name
40+
type: STRING
41+
- name: max_organization_type
42+
type: STRING
43+
- name: max_reporter_type
44+
type: STRING
45+
- name: max_state
46+
type: STRING
47+
- name: modecd
48+
type: STRING
49+
- name: ntd_id
50+
type: NUMERIC
51+
- name: report_year
52+
type: NUMERIC
53+
- name: sum_administrative_buildings
54+
type: NUMERIC
55+
- name: sum_communication_information
56+
type: NUMERIC
57+
- name: sum_fare_collection_equipment
58+
type: NUMERIC
59+
- name: sum_guideway
60+
type: NUMERIC
61+
- name: sum_maintenance_buildings
62+
type: NUMERIC
63+
- name: sum_other
64+
type: NUMERIC
65+
- name: sum_other_vehicles
66+
type: NUMERIC
67+
- name: sum_passenger_vehicles
68+
type: NUMERIC
69+
- name: sum_reduced_reporter
70+
type: NUMERIC
71+
- name: sum_stations
72+
type: NUMERIC
73+
- name: sum_total
74+
type: NUMERIC
75+
- name: typeofservicecd
76+
type: STRING
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
operator: operators.ExternalTable
2+
bucket: gs://calitp-ntd-api-products
3+
source_objects:
4+
- "capital_expenses_for_existing_service/2022/*.jsonl.gz"
5+
source_format: NEWLINE_DELIMITED_JSON
6+
use_bq_client: true
7+
hive_options:
8+
mode: CUSTOM
9+
require_partition_filter: false
10+
source_uri_prefix: "capital_expenses_for_existing_service/2022/{dt:DATE}/{execution_ts:TIMESTAMP}"
11+
destination_project_dataset_table: "external_ntd__annual_reporting.2022__capital_expenses_for_existing_service"
12+
prefix_bucket: false
13+
post_hook: SELECT * FROM `{{ get_project_id() }}`.external_ntd__annual_reporting.2022__capital_expenses_for_existing_service LIMIT 1;
14+
schema_fields:
15+
- name: form_type
16+
type: STRING
17+
- name: max_agency
18+
type: STRING
19+
- name: max_agency_voms
20+
type: STRING
21+
- name: max_city
22+
type: STRING
23+
- name: max_organization_type
24+
type: STRING
25+
- name: max_primary_uza_population
26+
type: STRING
27+
- name: max_reporter_type
28+
type: STRING
29+
- name: max_state
30+
type: STRING
31+
- name: max_uace_code
32+
type: STRING
33+
- name: max_uza_name
34+
type: STRING
35+
- name: ntd_id
36+
type: NUMERIC
37+
- name: report_year
38+
type: NUMERIC
39+
- name: sum_administrative_buildings
40+
type: NUMERIC
41+
- name: sum_communication_information
42+
type: NUMERIC
43+
- name: sum_fare_collection_equipment
44+
type: NUMERIC
45+
- name: sum_guideway
46+
type: NUMERIC
47+
- name: sum_maintenance_buildings
48+
type: NUMERIC
49+
- name: sum_other
50+
type: NUMERIC
51+
- name: sum_other_vehicles
52+
type: NUMERIC
53+
- name: sum_passenger_vehicles
54+
type: NUMERIC
55+
- name: sum_reduced_reporter
56+
type: NUMERIC
57+
- name: sum_stations
58+
type: NUMERIC
59+
- name: sum_total
60+
type: NUMERIC
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
operator: operators.ExternalTable
2+
bucket: gs://calitp-ntd-api-products
3+
source_objects:
4+
- "capital_expenses_for_expansion_of_service/2022/*.jsonl.gz"
5+
source_format: NEWLINE_DELIMITED_JSON
6+
use_bq_client: true
7+
hive_options:
8+
mode: CUSTOM
9+
require_partition_filter: false
10+
source_uri_prefix: "capital_expenses_for_expansion_of_service/2022/{dt:DATE}/{execution_ts:TIMESTAMP}"
11+
destination_project_dataset_table: "external_ntd__annual_reporting.2022__capital_expenses_for_expansion_of_service"
12+
prefix_bucket: false
13+
post_hook: SELECT * FROM `{{ get_project_id() }}`.external_ntd__annual_reporting.2022__capital_expenses_for_expansion_of_service LIMIT 1;
14+
schema_fields:
15+
- name: form_type
16+
type: STRING
17+
- name: max_agency
18+
type: STRING
19+
- name: max_agency_voms
20+
type: STRING
21+
- name: max_city
22+
type: STRING
23+
- name: max_organization_type
24+
type: STRING
25+
- name: max_primary_uza_population
26+
type: NUMERIC
27+
- name: max_reporter_type
28+
type: STRING
29+
- name: max_state
30+
type: STRING
31+
- name: max_uace_code
32+
type: STRING
33+
- name: max_uza_name
34+
type: STRING
35+
- name: ntd_id
36+
type: NUMERIC
37+
- name: report_year
38+
type: NUMERIC
39+
- name: sum_administrative_buildings
40+
type: NUMERIC
41+
- name: sum_communication_information
42+
type: NUMERIC
43+
- name: sum_fare_collection_equipment
44+
type: NUMERIC
45+
- name: sum_guideway
46+
type: NUMERIC
47+
- name: sum_maintenance_buildings
48+
type: NUMERIC
49+
- name: sum_other
50+
type: NUMERIC
51+
- name: sum_other_vehicles
52+
type: NUMERIC
53+
- name: sum_passenger_vehicles
54+
type: NUMERIC
55+
- name: sum_reduced_reporter
56+
type: NUMERIC
57+
- name: sum_stations
58+
type: NUMERIC
59+
- name: sum_total
60+
type: NUMERIC

0 commit comments

Comments
 (0)