From 79da27876dbdb36e77ca112eb79aac63abb2f1c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Balcerek?= Date: Fri, 2 Aug 2019 13:54:10 +0200 Subject: [PATCH] Issue 22 | Add workaround for table using table from another DAG (#23) * Fix typo in variable name dag_progess -> dag_progress * Add a workaround for table referencing another from different DAG This replaces node's parent to link to root, instead of failing to lookup table from another DAG Also references issue created on Github * Document the current state of the /issues/22 https://github.com/Wikia/discreETLy/issues/22 --- dashboard/model/tables_data.py | 12 +++++++----- dashboard/plugins/tables/README.md | 2 -- dashboard/plugins/tables/tables.yaml.template | 7 ------- 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/dashboard/model/tables_data.py b/dashboard/model/tables_data.py index a3f8779..ebf7d7e 100644 --- a/dashboard/model/tables_data.py +++ b/dashboard/model/tables_data.py @@ -1,5 +1,6 @@ import csv from concurrent.futures import ThreadPoolExecutor +from typing import Dict from dashboard.models import Period, Table from dashboard.utils import clean_dag_id, handle_resource, simple_state @@ -43,7 +44,6 @@ class DagTableProgress: class TableDataProvider: - @staticmethod def _format_table_id(table): id = table['db'] + '.' + table['name'] @@ -75,8 +75,8 @@ def get_tables(self): def history(self, table): return self.__get_detailed_view_data(self.tables[table], DETAILED_CHART_DAYS_NUM) - def get_tables_by_dag(self, dag_name): - return [table for table in self.tables.values() if table.dag_id == dag_name] + def get_tables_by_dag(self, dag_name) -> Dict[str, Table]: + return {id: table for id, table in self.tables.items() if table.dag_id == dag_name} def get_tables_graph(self, dag_id, execution_date): name_without_version = clean_dag_id(dag_id) @@ -89,7 +89,7 @@ def get_tables_graph(self, dag_id, execution_date): state=self.airflow.get_dag_state(dag_id, execution_date)) # tables - for table in dag_tables: + for table in dag_tables.values(): yield GraphVertex( id=table.id, name=table.name + (' ({})'.format(table.period.name) if table.period else ''), @@ -98,7 +98,9 @@ def get_tables_graph(self, dag_id, execution_date): dag_progress[table.task_id].end_date, dag_progress[table.task_id].duration ), - parent=table.get_parent() + # workaround for this entire method not being able to reference table managed by other DAG in table.uses + # see https://github.com/Wikia/discreETLy/issues/22 + parent='main' if table.uses is None or table.uses not in dag_tables.keys() else table.uses ) @handle_resource('influx') diff --git a/dashboard/plugins/tables/README.md b/dashboard/plugins/tables/README.md index 7cdf94c..13f27bf 100644 --- a/dashboard/plugins/tables/README.md +++ b/dashboard/plugins/tables/README.md @@ -24,8 +24,6 @@ Additionally you may want to provide cadence for how often the table is updated Update `id` incrementally starting from 1 for each table redeclared with another cadence. This makes specifying dependency on a particular cadence possible, e.g.: `uses: dbname.frequently_updated.daily`. -If several DAGs use the same table, it needs to be redeclared with each `dag_id` for the table to be supported by - `Tables managed by DAG` view. See [example file](tables.yaml.template) for more details on the data structure. diff --git a/dashboard/plugins/tables/tables.yaml.template b/dashboard/plugins/tables/tables.yaml.template index 235ce67..1fbf272 100644 --- a/dashboard/plugins/tables/tables.yaml.template +++ b/dashboard/plugins/tables/tables.yaml.template @@ -23,10 +23,3 @@ dag_id: my_dag task_id: rollup_table_insert uses: dbname.frequently_updated_table.daily - -# redeclaring popular_raw_table for use with another DAG -- name: popular_raw_table - db: dbname - dag_id: another_dag - task_id: popular_raw_table_sensor -