Skip to content

Commit 8a92827

Browse files
authored
Merge branch 'master' into feat/update-mlflow-ui
2 parents cdc23a8 + a20f660 commit 8a92827

File tree

17 files changed

+909
-85
lines changed

17 files changed

+909
-85
lines changed

.github/pr-labeler-config.yml

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,32 @@
11
ingestion:
2-
- "metadata-ingestion/**/*"
3-
- "metadata-ingestion-modules/**/*"
4-
- "metadata-integration/**/*"
2+
- changed-files:
3+
- any-glob-to-any-file:
4+
- "metadata-ingestion/**/*"
5+
- "metadata-ingestion-modules/**/*"
6+
- "metadata-integration/**/*"
57

68
devops:
7-
- "docker/**/*"
8-
- ".github/**/*"
9-
- "perf-test/**/*"
10-
- "metadata-service/**/*"
9+
- changed-files:
10+
- any-glob-to-any-file:
11+
- "docker/**/*"
12+
- ".github/**/*"
13+
- "perf-test/**/*"
14+
- "metadata-service/**/*"
1115

1216
product:
13-
- "datahub-web-react/**/*"
14-
- "datahub-frontend/**/*"
15-
- "datahub-graphql-core/**/*"
16-
- "metadata-io/**/*"
17+
- changed-files:
18+
- any-glob-to-any-file:
19+
- "datahub-web-react/**/*"
20+
- "datahub-frontend/**/*"
21+
- "datahub-graphql-core/**/*"
22+
- "metadata-io/**/*"
1723

1824
docs:
19-
- "docs/**/*"
25+
- changed-files:
26+
- any-glob-to-any-file:
27+
- "docs/**/*"
2028

2129
smoke_test:
22-
- "smoke-test/**/*"
30+
- changed-files:
31+
- any-glob-to-any-file:
32+
- "smoke-test/**/*"

.github/workflows/build-and-test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ jobs:
6363
sudo apt-get remove 'dotnet-*' azure-cli || true
6464
sudo rm -rf /usr/local/lib/android/ || true
6565
sudo docker image prune -a -f || true
66-
- uses: szenius/set-timezone@v1.2
66+
- uses: szenius/set-timezone@v2.0
6767
with:
6868
timezoneLinux: ${{ matrix.timezone }}
6969
- name: Check out the repo

.github/workflows/pr-labeler.yml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@ on:
33
pull_request_target:
44
types: [opened, reopened]
55

6+
permissions:
7+
contents: read
8+
pull-requests: write
9+
610
jobs:
711
triage:
812
permissions:
@@ -25,7 +29,6 @@ jobs:
2529
"chriscollins3456",
2630
"david-leifker",
2731
"shirshanka",
28-
"sid-acryl",
2932
"swaroopjagadish",
3033
"treff7es",
3134
"yoonhyejin",
@@ -46,7 +49,9 @@ jobs:
4649
"kevinkarchacryl",
4750
"sgomezvillamor",
4851
"acrylJonny",
49-
"chakru-r"
52+
"chakru-r",
53+
"brock-acryl",
54+
"mminichino"
5055
]'),
5156
github.actor
5257
)

.github/workflows/python-build-pages.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ jobs:
3737
with:
3838
distribution: "zulu"
3939
java-version: 17
40-
- uses: gradle/actions/setup-gradle@v3
40+
- uses: gradle/actions/setup-gradle@v4
4141
- uses: acryldata/sane-checkout-action@v3
4242
- uses: actions/setup-python@v5
4343
with:

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,6 @@ public class EntityTypeUrnMapper {
7777
.put(
7878
Constants.BUSINESS_ATTRIBUTE_ENTITY_NAME,
7979
"urn:li:entityType:datahub.businessAttribute")
80-
.put(
81-
Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME,
82-
"urn:li:entityType:datahub.dataProcessInstance")
8380
.build();
8481

8582
private static final Map<String, String> ENTITY_TYPE_URN_TO_NAME =
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package com.linkedin.datahub.graphql.types.entitytype;
2+
3+
import static org.testng.Assert.*;
4+
5+
import com.linkedin.datahub.graphql.generated.EntityType;
6+
import com.linkedin.metadata.Constants;
7+
import org.testng.annotations.Test;
8+
9+
public class EntityTypeMapperTest {
10+
11+
@Test
12+
public void testGetType() throws Exception {
13+
assertEquals(EntityTypeMapper.getType(Constants.DATASET_ENTITY_NAME), EntityType.DATASET);
14+
}
15+
16+
@Test
17+
public void testGetName() throws Exception {
18+
assertEquals(EntityTypeMapper.getName(EntityType.DATASET), Constants.DATASET_ENTITY_NAME);
19+
}
20+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package com.linkedin.datahub.graphql.types.entitytype;
2+
3+
import static org.testng.Assert.*;
4+
5+
import com.linkedin.datahub.graphql.generated.EntityType;
6+
import com.linkedin.metadata.Constants;
7+
import org.testng.annotations.Test;
8+
9+
public class EntityTypeUrnMapperTest {
10+
11+
@Test
12+
public void testGetName() throws Exception {
13+
assertEquals(
14+
EntityTypeUrnMapper.getName("urn:li:entityType:datahub.dataset"),
15+
Constants.DATASET_ENTITY_NAME);
16+
}
17+
18+
@Test
19+
public void testGetEntityType() throws Exception {
20+
assertEquals(
21+
EntityTypeUrnMapper.getEntityType("urn:li:entityType:datahub.dataset"), EntityType.DATASET);
22+
}
23+
24+
@Test
25+
public void testGetEntityTypeUrn() throws Exception {
26+
assertEquals(
27+
EntityTypeUrnMapper.getEntityTypeUrn(Constants.DATASET_ENTITY_NAME),
28+
"urn:li:entityType:datahub.dataset");
29+
}
30+
}

metadata-ingestion/src/datahub/cli/delete_cli.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,11 @@ def undo_by_filter(
265265
type=str,
266266
help="Urn of the entity to delete, for single entity deletion",
267267
)
268+
@click.option(
269+
"--urn-file",
270+
required=False,
271+
help="Path of file with urns (one per line) to be deleted",
272+
)
268273
@click.option(
269274
"-a",
270275
"--aspect",
@@ -353,6 +358,7 @@ def undo_by_filter(
353358
@telemetry.with_telemetry()
354359
def by_filter(
355360
urn: Optional[str],
361+
urn_file: Optional[str],
356362
aspect: Optional[str],
357363
force: bool,
358364
soft: bool,
@@ -373,6 +379,7 @@ def by_filter(
373379
# Validate the cli arguments.
374380
_validate_user_urn_and_filters(
375381
urn=urn,
382+
urn_file=urn_file,
376383
entity_type=entity_type,
377384
platform=platform,
378385
env=env,
@@ -429,6 +436,12 @@ def by_filter(
429436
batch_size=batch_size,
430437
)
431438
)
439+
elif urn_file:
440+
with open(urn_file, "r") as r:
441+
urns = []
442+
for line in r.readlines():
443+
urn = line.strip().strip('"')
444+
urns.append(urn)
432445
else:
433446
urns = list(
434447
graph.get_urns_by_filter(
@@ -537,6 +550,7 @@ def process_urn(urn):
537550

538551
def _validate_user_urn_and_filters(
539552
urn: Optional[str],
553+
urn_file: Optional[str],
540554
entity_type: Optional[str],
541555
platform: Optional[str],
542556
env: Optional[str],
@@ -549,9 +563,9 @@ def _validate_user_urn_and_filters(
549563
raise click.UsageError(
550564
"You cannot provide both an urn and a filter rule (entity-type / platform / env / query)."
551565
)
552-
elif not urn and not (entity_type or platform or env or query):
566+
elif not urn and not urn_file and not (entity_type or platform or env or query):
553567
raise click.UsageError(
554-
"You must provide either an urn or at least one filter (entity-type / platform / env / query) in order to delete entities."
568+
"You must provide either an urn or urn_file or at least one filter (entity-type / platform / env / query) in order to delete entities."
555569
)
556570
elif query:
557571
logger.warning(

metadata-ingestion/src/datahub/ingestion/source/fivetran/config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,10 @@ class PlatformDetail(ConfigModel):
167167
description="The database that all assets produced by this connector belong to. "
168168
"For destinations, this defaults to the fivetran log config's database.",
169169
)
170+
include_schema_in_urn: bool = pydantic.Field(
171+
default=True,
172+
description="Include schema in the dataset URN. In some cases, the schema is not relevant to the dataset URN and Fivetran sets it to the source and destination table names in the connector.",
173+
)
170174

171175

172176
class FivetranSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin):

metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -119,21 +119,31 @@ def _extend_lineage(self, connector: Connector, datajob: DataJob) -> Dict[str, s
119119
)
120120

121121
for lineage in connector.lineage:
122+
source_table = (
123+
lineage.source_table
124+
if source_details.include_schema_in_urn
125+
else lineage.source_table.split(".", 1)[1]
126+
)
122127
input_dataset_urn = DatasetUrn.create_from_ids(
123128
platform_id=source_details.platform,
124129
table_name=(
125-
f"{source_details.database.lower()}.{lineage.source_table}"
130+
f"{source_details.database.lower()}.{source_table}"
126131
if source_details.database
127-
else lineage.source_table
132+
else source_table
128133
),
129134
env=source_details.env,
130135
platform_instance=source_details.platform_instance,
131136
)
132137
input_dataset_urn_list.append(input_dataset_urn)
133138

139+
destination_table = (
140+
lineage.destination_table
141+
if destination_details.include_schema_in_urn
142+
else lineage.destination_table.split(".", 1)[1]
143+
)
134144
output_dataset_urn = DatasetUrn.create_from_ids(
135145
platform_id=destination_details.platform,
136-
table_name=f"{destination_details.database.lower()}.{lineage.destination_table}",
146+
table_name=f"{destination_details.database.lower()}.{destination_table}",
137147
env=destination_details.env,
138148
platform_instance=destination_details.platform_instance,
139149
)
@@ -176,12 +186,12 @@ def _extend_lineage(self, connector: Connector, datajob: DataJob) -> Dict[str, s
176186
**{
177187
f"source.{k}": str(v)
178188
for k, v in source_details.dict().items()
179-
if v is not None
189+
if v is not None and not isinstance(v, bool)
180190
},
181191
**{
182192
f"destination.{k}": str(v)
183193
for k, v in destination_details.dict().items()
184-
if v is not None
194+
if v is not None and not isinstance(v, bool)
185195
},
186196
)
187197

metadata-ingestion/src/datahub/ingestion/source/gcs/gcs_source.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ def __init__(self, config: GCSSourceConfig, ctx: PipelineContext):
8888
super().__init__(config, ctx)
8989
self.config = config
9090
self.report = GCSSourceReport()
91+
self.platform: str = PLATFORM_GCS
9192
self.s3_source = self.create_equivalent_s3_source(ctx)
9293

9394
@classmethod
@@ -135,7 +136,7 @@ def create_equivalent_s3_path_specs(self):
135136

136137
def create_equivalent_s3_source(self, ctx: PipelineContext) -> S3Source:
137138
config = self.create_equivalent_s3_config()
138-
return self.s3_source_overrides(S3Source(config, ctx))
139+
return self.s3_source_overrides(S3Source(config, PipelineContext(ctx.run_id)))
139140

140141
def s3_source_overrides(self, source: S3Source) -> S3Source:
141142
source.source_config.platform = PLATFORM_GCS

0 commit comments

Comments
 (0)