Skip to content

Commit d7dfb74

Browse files
committed
removed conveyal update report for dotw
1 parent 6bcc337 commit d7dfb74

File tree

1 file changed

+14
-23
lines changed

1 file changed

+14
-23
lines changed

conveyal_update/evaluate_feeds.py

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -77,13 +77,13 @@ def get_undefined_feeds(feeds_on_target: pd.DataFrame) -> pd.DataFrame:
7777

7878
def report_unavailable_feeds(feeds, fname):
7979
undefined = feeds.loc[
80-
feeds["valid_date_other_than_service_date"] | ~feeds["usable_schedule_feed_exists"]
80+
feeds["valid_date_other_than_service_date"] | feeds["no_schedule_feed_found"]
8181
].copy()
8282
if undefined.empty:
8383
print('no undefined service feeds')
8484
else:
8585
print('these feeds have no service defined on target date, nor are their services captured in other feeds:')
86-
print(undefined.loc[~undefined["usable_schedule_feed_exists"], "gtfs_dataset_name"].drop_duplicates())
86+
print(undefined.loc[undefined["no_schedule_feed_found"], "gtfs_dataset_name"].drop_duplicates())
8787
print('these feeds have defined service, but only in a feed defined on a prior day')
8888
print(undefined.loc[undefined["valid_date_other_than_service_date"], "gtfs_dataset_name"].drop_duplicates())
8989
print(f'saving detailed csv to {fname}')
@@ -102,63 +102,54 @@ def get_old_feeds(undefined_feeds_base64_urls: pd.Series, target_date: dt.date |
102102
SELECT
103103
`mart_gtfs.dim_schedule_feeds`.base64_url AS base64_url,
104104
`mart_gtfs.dim_schedule_feeds`.key as feed_key,
105-
`mart_gtfs.dim_calendar`.{day_of_the_week} AS target_day_of_the_week,
106-
MAX(`mart_gtfs.dim_schedule_feeds`._valid_to) AS valid_feed_date,
105+
MAX(`mart_gtfs.dim_schedule_feeds`._valid_to) AS valid_feed_date
107106
from `mart_gtfs.dim_schedule_feeds`
108107
LEFT JOIN `mart_gtfs.dim_calendar`
109108
ON `mart_gtfs.dim_schedule_feeds`.key = `mart_gtfs.dim_calendar`.feed_key
110109
WHERE `mart_gtfs.dim_schedule_feeds`.base64_url IN {base_64_urls_str}
111-
AND `mart_gtfs.dim_schedule_feeds`._valid_to <= '{target_date}'
112110
AND `mart_gtfs.dim_schedule_feeds`._valid_to >= '{max_lookback_date}'
113-
AND `mart_gtfs.dim_calendar`.start_date <= '{target_date}'
111+
AND `mart_gtfs.dim_schedule_feeds`._valid_to <= '{target_date}'
112+
AND `mart_gtfs.dim_calendar`.{day_of_the_week} = 1
113+
AND `mart_gtfs.dim_calendar`.start_date <= '{target_date}'
114114
AND `mart_gtfs.dim_calendar`.end_date >= '{target_date}'
115115
GROUP BY
116116
`mart_gtfs.dim_schedule_feeds`.base64_url,
117-
`mart_gtfs.dim_schedule_feeds`.key,
118-
`mart_gtfs.dim_calendar`.{day_of_the_week}
119-
ORDER BY target_day_of_the_week DESC
117+
`mart_gtfs.dim_schedule_feeds`.key
120118
LIMIT 1000
121119
"""
122120
response = query_sql(
123121
query
124122
)
125123
response_grouped = response.groupby("base64_url")
126124
feed_info_by_url = response_grouped[["valid_feed_date", "feed_key"]].first()
127-
print(feed_info_by_url)
128-
feed_info_by_url["valid_feed_date"] = feed_info_by_url["valid_feed_date"].dt.date - dt.timedelta(days=1)
125+
feed_info_by_url["date_processed"] = feed_info_by_url["valid_feed_date"].dt.date - dt.timedelta(days=1)
129126
# we have the day the feed becomes invalid, so the day we are interested in where the feed *is* valid is the day after
130-
feed_info_by_url["no_operations_on_target_day_of_the_week"] = ~(response_grouped["target_day_of_the_week"].any())
131-
return feed_info_by_url
127+
return feed_info_by_url.drop("valid_feed_date", axis=1)
132128

133129
def merge_old_feeds(df_all_feeds: pd.DataFrame, df_undefined_feeds: pd.DataFrame, target_date: dt.date, max_lookback_timedelta: dt.timedelta) -> pd.DataFrame:
134130
feed_search_result = get_old_feeds(
135131
df_undefined_feeds["base64_url"],
136132
target_date,
137133
max_lookback_timedelta
138134
)
135+
print(feed_search_result)
139136
feeds_merged = df_all_feeds.merge(
140137
feed_search_result,
141138
how="left",
142139
left_on="base64_url",
143140
right_index=True,
144141
validate="many_to_one"
145142
)
143+
print(list(feeds_merged.columns))
146144
feeds_merged["feed_key"] = feeds_merged["feed_key_y"].fillna(feeds_merged["feed_key_x"])
147145
feeds_merged["no_schedule_feed_found"] = (
148146
(feeds_merged["base64_url"].isin(df_undefined_feeds["base64_url"])) & (~feeds_merged["base64_url"].isin(feed_search_result.index))
149-
)
150-
feeds_merged["no_operations_on_target_date_but_valid_feed_exists"] = (feeds_merged["no_operations_on_target_day_of_the_week"].fillna(False))
151-
feeds_merged["usable_schedule_feed_exists"] = (
152-
~(feeds_merged["no_schedule_feed_found"] | feeds_merged["no_operations_on_target_date_but_valid_feed_exists"])
153-
)
154-
feeds_merged["date"] = feeds_merged.loc[
155-
~feeds_merged["no_operations_on_target_date_but_valid_feed_exists"], "valid_feed_date"
156-
]
157-
feeds_merged["date"] = feeds_merged["date"].fillna(target_date)
147+
).fillna(False)
148+
feeds_merged["date"] = feeds_merged["date_processed"].fillna(target_date)
158149
feeds_merged["valid_date_other_than_service_date"] = feeds_merged["date"] != target_date
159150

160151
return feeds_merged.drop(
161-
["valid_feed_date", "no_operations_on_target_day_of_the_week", "feed_key_x", "feed_key_y"], axis=1
152+
["date_processed", "feed_key_x", "feed_key_y"], axis=1
162153
)
163154

164155
if __name__ == '__main__':

0 commit comments

Comments
 (0)