Skip to content

remove --routes-without-agency-id, handle feeds with 0 agencies #65

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions cli.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,6 @@ const {
'trips-without-shape-id': {
type: 'boolean',
},
'routes-without-agency-id': {
type: 'boolean',
},
'stops-without-level-id': {
type: 'boolean',
},
Expand Down Expand Up @@ -98,7 +95,6 @@ Options:
Default: google-extended
--trips-without-shape-id Don't require trips.txt items to have a shape_id.
Default if shapes.txt has not been provided.
--routes-without-agency-id Don't require routes.txt items to have an agency_id.
--stops-without-level-id Don't require stops.txt items to have a level_id.
Default if levels.txt has not been provided.
--stops-location-index Create a spatial index on stops.stop_loc for efficient
Expand Down Expand Up @@ -179,7 +175,6 @@ const opt = {
ignoreUnsupportedFiles: !!flags['ignore-unsupported'],
routeTypesScheme: flags['route-types-scheme'] || 'google-extended',
tripsWithoutShapeId: !!flags['trips-without-shape-id'],
routesWithoutAgencyId: !!flags['routes-without-agency-id'],
stopsLocationIndex: !!flags['stops-location-index'],
statsByRouteIdAndDate: flags['stats-by-route-date'] || 'none',
statsByAgencyIdAndRouteIdAndStopAndHour: flags['stats-by-agency-route-stop-hour'] || 'none',
Expand Down
28 changes: 27 additions & 1 deletion index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const {Stringifier} = require('csv-stringify')
const formatters = require('./lib')
const getDependencies = require('./lib/deps')
const pkg = require('./package.json')
const {DEFAULT_AGENCY_ID} = require('./lib/agency')

const convertGtfsToSql = async function* (files, opt = {}) {
opt = {
Expand All @@ -17,14 +18,15 @@ const convertGtfsToSql = async function* (files, opt = {}) {
ignoreUnsupportedFiles: false,
routeTypesScheme: 'google-extended',
tripsWithoutShapeId: !files.some(f => f.name === 'shapes'),
routesWithoutAgencyId: false,
stopsWithoutLevelId: !files.some(f => f.name === 'levels'),
stopsLocationIndex: false,
lowerCaseLanguageCodes: false,
statsByRouteIdAndDate: 'none',
statsByAgencyIdAndRouteIdAndStopAndHour: 'none',
statsActiveTripsByHour: 'none',
schema: 'public',
// todo: find something more helpful than falling back to Etc/GMT!
defaultTimezone: new Intl.DateTimeFormat().resolvedOptions().timeZone || 'Etc/GMT',
postgraphile: false,
postgraphilePassword: process.env.POSTGRAPHILE_PGPASSWORD || null,
postgrest: false,
Expand Down Expand Up @@ -209,6 +211,30 @@ LANGUAGE sql;
const nrOfRowsByName = new Map()
const workingState = {
nrOfRowsByName,
insertDefaultAgency: false,
onlyAgencyId: null,
}

// The GTFS spec allows agency.txt to be empty/null if there is exactly one agency in the feed.
// It seems that GTFS has allowed this at least since 2016:
// https://github.com/google/transit/blame/217e9bf/gtfs/spec/en/reference.md#L544-L554
// However, because we have to use left join instead of an inner join in tables referencing `agency`, this prevents the PostgreSQL query planner from doing some filter pushdowns, e.g.
// - when querying `arrivals_departures` by route, stop, date and t_departure/t_arrival
// todo: add tests: 0 agencies (implicit single agency), 1 agency
{
let agencies = 0
for await (const agency of await readCsv('agency')) {
workingState.onlyAgencyId = agency.agency_id
if (++agencies >= 2) {
workingState.onlyAgencyId = null
break
}
}
// We insert a mock agency in order to use an inner join in tables referencing `agency`.
if (agencies === 0) {
workingState.insertDefaultAgency = true
workingState.onlyAgencyId = DEFAULT_AGENCY_ID
}
}

for (const name of order) {
Expand Down
25 changes: 24 additions & 1 deletion lib/agency.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
'use strict'

const DEFAULT_AGENCY_ID = 'default-agency'

// https://gtfs.org/schedule/reference/#agencytxt
const beforeAll = (opt) => `\
CREATE TABLE "${opt.schema}".agency (
Expand Down Expand Up @@ -39,11 +41,32 @@ const formatAgencyRow = (a) => {
]
}

const afterAll = `\
const afterAll = (opt, workingState) => {
let sql = `\
\\.
`

if (workingState.insertDefaultAgency) {
sql += `\
INSERT INTO "${opt.schema}".agency (
agency_id,
agency_name,
agency_url,
agency_timezone
) VALUES (
'${DEFAULT_AGENCY_ID}',
'implicit default agency, the CSV file doesn\\'t contain one',
'http://example.org',
'${opt.defaultTimezone}'
);
`
}

return sql
}

module.exports = {
DEFAULT_AGENCY_ID,
beforeAll,
formatRow: formatAgencyRow,
afterAll,
Expand Down
3 changes: 1 addition & 2 deletions lib/deps.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
const getDependencies = (opt, files) => {
const {
tripsWithoutShapeId,
routesWithoutAgencyId,
stopsWithoutLevelId,
} = opt
return {
Expand All @@ -27,7 +26,7 @@ const getDependencies = (opt, files) => {
'frequencies',
],
routes: [
...(routesWithoutAgencyId ? [] : ['agency']),
'agency',
],
trips: [
'routes',
Expand Down
25 changes: 10 additions & 15 deletions lib/routes.js
Original file line number Diff line number Diff line change
Expand Up @@ -292,21 +292,16 @@ COPY "${opt.schema}".routes (
}

const formatRoutesRow = (r, opt, workingState) => {
const agency_id = r.agency_id || null
if (agency_id === null && !opt.routesWithoutAgencyId) {
// The GTFS spec allows routes.agency_id to be empty/null if there is exactly one agency in the feed.
// It seems that GTFS has allowed this at least since 2016:
// https://github.com/google/transit/blame/217e9bf/gtfs/spec/en/reference.md#L544-L554
if (workingState.nrOfRowsByName.get('agency') !== 1) {
// todo: throw special error indicating an error in the input data
throw new DataError(
'routes',
'agency_id must not be empty/null',
[
'The GTFS spec allows routes.agency_id to be empty/null only if there is exactly one agency in the feed.'
],
)
}
// The GTFS spec allows routes.agency_id to be empty/null if there is exactly one agency in the feed. In this case, we insert a default agency.
const agency_id = r.agency_id || workingState.onlyAgencyId
if (!agency_id) {
throw new DataError(
'routes',
'agency_id must not be empty/null',
[
'The GTFS spec allows routes.agency_id to be empty/null only if there is exactly one agency in the feed.'
],
)
}

return [
Expand Down
20 changes: 4 additions & 16 deletions lib/stop_times.js
Original file line number Diff line number Diff line change
Expand Up @@ -225,14 +225,8 @@ WITH stop_times_based AS NOT MATERIALIZED (
LEFT JOIN "${opt.schema}".stops stations ON stops.parent_station = stations.stop_id
JOIN "${opt.schema}".trips ON s.trip_id = trips.trip_id
JOIN "${opt.schema}".routes ON trips.route_id = routes.route_id
LEFT JOIN "${opt.schema}".agency ON (
-- The GTFS spec allows routes.agency_id to be NULL if there is exactly one agency in the feed.
-- Note: We implicitly rely on other parts of the code base to validate that agency has just one row!
-- It seems that GTFS has allowed this at least since 2016:
-- https://github.com/google/transit/blame/217e9bf/gtfs/spec/en/reference.md#L544-L554
routes.agency_id IS NULL -- match first (and only) agency
OR routes.agency_id = agency.agency_id -- match by ID
)
-- todo: what if the route is missing (LEFT JOIN), does this work?
JOIN "${opt.schema}".agency ON routes.agency_id = agency.agency_id
JOIN "${opt.schema}".service_days ON trips.service_id = service_days.service_id
)
-- todo: this slows down slightly
Expand Down Expand Up @@ -465,14 +459,8 @@ WITH stop_times_based AS NOT MATERIALIZED (
) AS to_wheelchair_boarding
FROM "${opt.schema}".trips
LEFT JOIN "${opt.schema}".routes ON trips.route_id = routes.route_id
LEFT JOIN "${opt.schema}".agency ON (
-- The GTFS spec allows routes.agency_id to be NULL if there is exactly one agency in the feed.
-- Note: We implicitly rely on other parts of the code base to validate that agency has just one row!
-- It seems that GTFS has allowed this at least since 2016:
-- https://github.com/google/transit/blame/217e9bf/gtfs/spec/en/reference.md#L544-L554
routes.agency_id IS NULL -- match first (and only) agency
OR routes.agency_id = agency.agency_id -- match by ID
)
-- todo: what if the route is missing (LEFT JOIN), does this work?
JOIN "${opt.schema}".agency ON routes.agency_id = agency.agency_id
LEFT JOIN "${opt.schema}".stop_times ON trips.trip_id = stop_times.trip_id
LEFT JOIN "${opt.schema}".stops from_stops ON stop_times.stop_id = from_stops.stop_id
LEFT JOIN "${opt.schema}".stops from_stations ON from_stops.parent_station = from_stations.stop_id
Expand Down
1 change: 0 additions & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ Options:
Default: google-extended
--trips-without-shape-id Don't require trips.txt items to have a shape_id.
Default if shapes.txt has not been provided.
--routes-without-agency-id Don't require routes.txt items to have an agency_id.
--stops-without-level-id Don't require stops.txt items to have a level_id.
Default if levels.txt has not been provided.
--stops-location-index Create a spatial index on stops.stop_loc for efficient
Expand Down
Loading