|
2 | 2 | import json
|
3 | 3 | from fal import FalDbt
|
4 | 4 |
|
5 |
| -from column_lineage import ColumnLineage |
6 |
| -from utils import _preprocess_sql, _produce_json |
| 5 | +# from column_lineage import ColumnLineage |
| 6 | +from lineagex.ColumnLineage import ColumnLineage |
| 7 | +from utils import dbt_preprocess_sql, dbt_produce_json, dbt_find_column |
7 | 8 | from typing import List
|
8 |
| -#from itertools import islice |
| 9 | +# from itertools import islice |
9 | 10 | # for key, value in islice(manifest['nodes'].items(), 3):
|
10 | 11 |
|
11 | 12 |
|
@@ -33,34 +34,35 @@ def _run_lineage(self) -> None:
|
33 | 34 | :return: the output_dict object will be the final output with each model name being key
|
34 | 35 | """
|
35 | 36 | self.part_tables = self._get_part_tables()
|
36 |
| - #key = 'model.mimic.age_histogram_test' |
37 |
| - #value = self.manifest['nodes'][key] |
| 37 | + # key = 'model.mimic.age_histogram_test' |
| 38 | + # value = self.manifest['nodes'][key] |
38 | 39 | for key, value in self.manifest["nodes"].items():
|
39 |
| - #for key, value in islice(self.manifest['nodes'].items(), 3): |
40 |
| - print(key) |
| 40 | + # for key, value in islice(self.manifest['nodes'].items(), 3): |
| 41 | + print(key, " completed") |
41 | 42 | table_name = value["schema"] + "." + value["name"]
|
42 | 43 | self.output_dict[key] = {}
|
43 |
| - ret_sql = _preprocess_sql(value) |
| 44 | + ret_sql = dbt_preprocess_sql(value) |
44 | 45 | # self.output_dict[key]["sql"] = value["compiled_code"].replace('\n', '')
|
45 |
| - #self.output_dict[key]["sql"] = ret_sql |
| 46 | + # self.output_dict[key]["sql"] = ret_sql |
46 | 47 | ret_fal = self.faldbt.execute_sql(
|
47 | 48 | "EXPLAIN (VERBOSE TRUE, FORMAT JSON, COSTS FALSE) {}".format(ret_sql)
|
48 | 49 | )
|
49 | 50 | plan = json.loads(ret_fal.iloc[0]["QUERY PLAN"][1:-1])
|
50 |
| - #col_names_new = self.table_cols_df[self.table_cols_df["table"] == table_name] |
51 |
| - #print(self.table_cols_df, col_names) |
| 51 | + # col_names_new = self.table_cols_df[self.table_cols_df["table"] == table_name] |
| 52 | + # print(self.table_cols_df, col_names) |
| 53 | + cols = dbt_find_column(table_name=table_name, engine=self.faldbt) |
52 | 54 | col_lineage = ColumnLineage(
|
53 | 55 | plan=plan["Plan"],
|
54 | 56 | sql=ret_sql,
|
55 |
| - table_name=table_name, |
56 |
| - faldbt=self.faldbt, |
| 57 | + columns=cols, |
| 58 | + conn=self.faldbt, |
57 | 59 | part_tables=self.part_tables,
|
58 | 60 | )
|
59 | 61 | self.output_dict[key]["tables"] = col_lineage.table_list
|
60 | 62 | self.output_dict[key]["columns"] = col_lineage.column_dict
|
61 | 63 | self.output_dict[key]["table_name"] = table_name
|
62 |
| - #self.output_dict[key]["plan"] = plan["Plan"] |
63 |
| - _produce_json(self.output_dict, self.faldbt) |
| 64 | + # self.output_dict[key]["plan"] = plan["Plan"] |
| 65 | + dbt_produce_json(self.output_dict, self.faldbt) |
64 | 66 |
|
65 | 67 | def _get_part_tables(self) -> dict:
|
66 | 68 | """
|
|
0 commit comments