-
Notifications
You must be signed in to change notification settings - Fork 3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(powerbi): Report to Dashboard lineage #12451
Changes from 15 commits
c2e0dac
cad0b37
7c92a0e
0fa4114
a0327ac
6479072
43072d5
18a539e
3c33063
00adc98
11bc937
97d91d9
6f0f29b
6db9ef6
843133e
1d131e3
a1f77b8
bf21f7d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -286,11 +286,13 @@ class CreatedFrom(Enum): | |
id: str | ||
title: str | ||
embedUrl: str | ||
dataset: Optional["PowerBIDataset"] | ||
dataset_id: Optional[str] | ||
report: Optional[Report] | ||
report_id: Optional[str] | ||
createdFrom: CreatedFrom | ||
|
||
dataset: Optional["PowerBIDataset"] | ||
report: Optional[Report] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are report_id and report set at the same time? if not, it might be worth putting a comment that says the latter is filled in at a later time There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ids are set on a first traversal and objects in a later one |
||
|
||
def get_urn_part(self): | ||
return f"charts.{self.id}" | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -337,41 +337,6 @@ def get_tiles(self, workspace: Workspace, dashboard: Dashboard) -> List[Tile]: | |
-tiles), there is no information available on pagination | ||
|
||
""" | ||
|
||
def new_dataset_or_report(tile_instance: Any) -> dict: | ||
""" | ||
Find out which is the data source for tile. It is either REPORT or DATASET | ||
""" | ||
report_fields = { | ||
Constant.REPORT: ( | ||
self.get_report( | ||
workspace=workspace, | ||
report_id=tile_instance.get(Constant.REPORT_ID), | ||
) | ||
if tile_instance.get(Constant.REPORT_ID) is not None | ||
else None | ||
), | ||
Constant.CREATED_FROM: Tile.CreatedFrom.UNKNOWN, | ||
} | ||
|
||
# reportId and datasetId are exclusive in tile_instance | ||
# if datasetId is present that means tile is created from dataset | ||
# if reportId is present that means tile is created from report | ||
# if both i.e. reportId and datasetId are not present then tile is created from some visualization | ||
if tile_instance.get(Constant.REPORT_ID) is not None: | ||
report_fields[Constant.CREATED_FROM] = Tile.CreatedFrom.REPORT | ||
elif tile_instance.get(Constant.DATASET_ID) is not None: | ||
report_fields[Constant.CREATED_FROM] = Tile.CreatedFrom.DATASET | ||
else: | ||
report_fields[Constant.CREATED_FROM] = Tile.CreatedFrom.VISUALIZATION | ||
|
||
title: Optional[str] = tile_instance.get(Constant.TITLE) | ||
_id: Optional[str] = tile_instance.get(Constant.ID) | ||
created_from: Any = report_fields[Constant.CREATED_FROM] | ||
logger.info(f"Tile {title}({_id}) is created from {created_from}") | ||
|
||
return report_fields | ||
|
||
tile_list_endpoint: str = self.get_tiles_endpoint( | ||
workspace, dashboard_id=dashboard.id | ||
) | ||
|
@@ -393,8 +358,18 @@ def new_dataset_or_report(tile_instance: Any) -> dict: | |
title=instance.get(Constant.TITLE), | ||
embedUrl=instance.get(Constant.EMBED_URL), | ||
dataset_id=instance.get(Constant.DATASET_ID), | ||
report_id=instance.get(Constant.REPORT_ID), | ||
dataset=None, | ||
**new_dataset_or_report(instance), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks for cleaning this up are there any entries in |
||
report=None, | ||
createdFrom=( | ||
# In the past we considered that only one of the two report_id or dataset_id would be present | ||
# but we have seen cases where both are present. If both are present, we prioritize the report. | ||
Tile.CreatedFrom.REPORT | ||
if instance.get(Constant.REPORT_ID) | ||
else Tile.CreatedFrom.DATASET | ||
if instance.get(Constant.DATASET_ID) | ||
else Tile.CreatedFrom.VISUALIZATION | ||
), | ||
) | ||
for instance in tile_dict | ||
if instance is not None | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
seems worrying that these lines weren't covered by the tests? as per the codecov report
also, to make sure I understand - the lineage is tile -> dashboard -> report?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this may be a sort of false positive, because my update on
default_mock_response.json
was specifically tailored to force this code pathLineage is represented with a contains relationship. Any PowerBI Dashboard having a PowerBI Tile which references a parent PowerBI Report is modelled as follows:
This was confirmed with users to match their expectations; this is how is modelled in PowerBI Lineage.