Skip to content

Commit 180087f

Browse files
authored
fix(ingest): remove duplicate mcps,more typing (#12557)
1 parent 140b71f commit 180087f

File tree

3 files changed

+21
-303
lines changed

3 files changed

+21
-303
lines changed

metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py

+10
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from typing import Callable, Dict, Iterable, List, MutableMapping, Optional
77

88
from datahub.ingestion.api.report import SupportsAsObj
9+
from datahub.ingestion.source.common.subtypes import DatasetSubTypes
910
from datahub.ingestion.source.snowflake.constants import SnowflakeObjectDomain
1011
from datahub.ingestion.source.snowflake.snowflake_connection import SnowflakeConnection
1112
from datahub.ingestion.source.snowflake.snowflake_query import (
@@ -100,6 +101,9 @@ class SnowflakeTable(BaseTable):
100101
def is_hybrid(self) -> bool:
101102
return self.type is not None and self.type == "HYBRID TABLE"
102103

104+
def get_subtype(self) -> DatasetSubTypes:
105+
return DatasetSubTypes.TABLE
106+
103107

104108
@dataclass
105109
class SnowflakeView(BaseView):
@@ -109,6 +113,9 @@ class SnowflakeView(BaseView):
109113
column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict)
110114
is_secure: bool = False
111115

116+
def get_subtype(self) -> DatasetSubTypes:
117+
return DatasetSubTypes.VIEW
118+
112119

113120
@dataclass
114121
class SnowflakeSchema:
@@ -154,6 +161,9 @@ class SnowflakeStream:
154161
column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict)
155162
last_altered: Optional[datetime] = None
156163

164+
def get_subtype(self) -> DatasetSubTypes:
165+
return DatasetSubTypes.SNOWFLAKE_STREAM
166+
157167

158168
class _SnowflakeTagCache:
159169
def __init__(self) -> None:

metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py

+11-14
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
from datahub.ingestion.source.aws.s3_util import make_s3_urn_for_lineage
2222
from datahub.ingestion.source.common.subtypes import (
2323
DatasetContainerSubTypes,
24-
DatasetSubTypes,
2524
)
2625
from datahub.ingestion.source.snowflake.constants import (
2726
GENERIC_PERMISSION_ERROR_KEY,
@@ -467,7 +466,13 @@ def _process_schema(
467466
context=f"{db_name}.{schema_name}",
468467
)
469468

470-
def _process_tags(self, snowflake_schema, schema_name, db_name, domain):
469+
def _process_tags(
470+
self,
471+
snowflake_schema: SnowflakeSchema,
472+
schema_name: str,
473+
db_name: str,
474+
domain: str,
475+
) -> None:
471476
snowflake_schema.tags = self.tag_extractor.get_tags_on_object(
472477
schema_name=schema_name, db_name=db_name, domain=domain
473478
)
@@ -837,15 +842,7 @@ def gen_dataset_workunits(
837842
if dpi_aspect:
838843
yield dpi_aspect
839844

840-
subTypes = SubTypes(
841-
typeNames=(
842-
[DatasetSubTypes.SNOWFLAKE_STREAM]
843-
if isinstance(table, SnowflakeStream)
844-
else [DatasetSubTypes.VIEW]
845-
if isinstance(table, SnowflakeView)
846-
else [DatasetSubTypes.TABLE]
847-
)
848-
)
845+
subTypes = SubTypes(typeNames=[table.get_subtype()])
849846

850847
yield MetadataChangeProposalWrapper(
851848
entityUrn=dataset_urn, aspect=subTypes
@@ -932,9 +929,9 @@ def get_dataset_properties(
932929
"OWNER_ROLE_TYPE": table.owner_role_type,
933930
"TABLE_NAME": table.table_name,
934931
"BASE_TABLES": table.base_tables,
935-
"STALE_AFTER": table.stale_after.isoformat()
936-
if table.stale_after
937-
else None,
932+
"STALE_AFTER": (
933+
table.stale_after.isoformat() if table.stale_after else None
934+
),
938935
}.items()
939936
if v
940937
}

metadata-ingestion/tests/integration/snowflake/snowflake_golden.json

-289
Original file line numberDiff line numberDiff line change
@@ -4240,295 +4240,6 @@
42404240
"lastRunId": "no-run-id-provided"
42414241
}
42424242
},
4243-
{
4244-
"entityType": "dataset",
4245-
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.stream_1,PROD)",
4246-
"changeType": "UPSERT",
4247-
"aspectName": "status",
4248-
"aspect": {
4249-
"json": {
4250-
"removed": false
4251-
}
4252-
},
4253-
"systemMetadata": {
4254-
"lastObserved": 1615443388097,
4255-
"runId": "snowflake-2025_01_28-00_01_52-5vkne0",
4256-
"lastRunId": "no-run-id-provided"
4257-
}
4258-
},
4259-
{
4260-
"entityType": "dataset",
4261-
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.stream_1,PROD)",
4262-
"changeType": "UPSERT",
4263-
"aspectName": "schemaMetadata",
4264-
"aspect": {
4265-
"json": {
4266-
"schemaName": "test_db.test_schema.stream_1",
4267-
"platform": "urn:li:dataPlatform:snowflake",
4268-
"version": 0,
4269-
"created": {
4270-
"time": 0,
4271-
"actor": "urn:li:corpuser:unknown"
4272-
},
4273-
"lastModified": {
4274-
"time": 0,
4275-
"actor": "urn:li:corpuser:unknown"
4276-
},
4277-
"hash": "",
4278-
"platformSchema": {
4279-
"com.linkedin.schema.MySqlDDL": {
4280-
"tableSchema": ""
4281-
}
4282-
},
4283-
"fields": [
4284-
{
4285-
"fieldPath": "col_1",
4286-
"nullable": false,
4287-
"description": "Comment for column",
4288-
"type": {
4289-
"type": {
4290-
"com.linkedin.schema.NumberType": {}
4291-
}
4292-
},
4293-
"nativeDataType": "NUMBER(38,0)",
4294-
"recursive": false,
4295-
"isPartOfKey": false
4296-
},
4297-
{
4298-
"fieldPath": "col_2",
4299-
"nullable": false,
4300-
"description": "Comment for column",
4301-
"type": {
4302-
"type": {
4303-
"com.linkedin.schema.StringType": {}
4304-
}
4305-
},
4306-
"nativeDataType": "VARCHAR(255)",
4307-
"recursive": false,
4308-
"isPartOfKey": false
4309-
},
4310-
{
4311-
"fieldPath": "col_3",
4312-
"nullable": false,
4313-
"description": "Comment for column",
4314-
"type": {
4315-
"type": {
4316-
"com.linkedin.schema.StringType": {}
4317-
}
4318-
},
4319-
"nativeDataType": "VARCHAR(255)",
4320-
"recursive": false,
4321-
"isPartOfKey": false
4322-
},
4323-
{
4324-
"fieldPath": "col_4",
4325-
"nullable": false,
4326-
"description": "Comment for column",
4327-
"type": {
4328-
"type": {
4329-
"com.linkedin.schema.StringType": {}
4330-
}
4331-
},
4332-
"nativeDataType": "VARCHAR(255)",
4333-
"recursive": false,
4334-
"isPartOfKey": false
4335-
},
4336-
{
4337-
"fieldPath": "col_5",
4338-
"nullable": false,
4339-
"description": "Comment for column",
4340-
"type": {
4341-
"type": {
4342-
"com.linkedin.schema.StringType": {}
4343-
}
4344-
},
4345-
"nativeDataType": "VARCHAR(255)",
4346-
"recursive": false,
4347-
"isPartOfKey": false
4348-
},
4349-
{
4350-
"fieldPath": "col_6",
4351-
"nullable": false,
4352-
"description": "Comment for column",
4353-
"type": {
4354-
"type": {
4355-
"com.linkedin.schema.StringType": {}
4356-
}
4357-
},
4358-
"nativeDataType": "VARCHAR(255)",
4359-
"recursive": false,
4360-
"isPartOfKey": false
4361-
},
4362-
{
4363-
"fieldPath": "col_7",
4364-
"nullable": false,
4365-
"description": "Comment for column",
4366-
"type": {
4367-
"type": {
4368-
"com.linkedin.schema.StringType": {}
4369-
}
4370-
},
4371-
"nativeDataType": "VARCHAR(255)",
4372-
"recursive": false,
4373-
"isPartOfKey": false
4374-
},
4375-
{
4376-
"fieldPath": "col_8",
4377-
"nullable": false,
4378-
"description": "Comment for column",
4379-
"type": {
4380-
"type": {
4381-
"com.linkedin.schema.StringType": {}
4382-
}
4383-
},
4384-
"nativeDataType": "VARCHAR(255)",
4385-
"recursive": false,
4386-
"isPartOfKey": false
4387-
},
4388-
{
4389-
"fieldPath": "col_9",
4390-
"nullable": false,
4391-
"description": "Comment for column",
4392-
"type": {
4393-
"type": {
4394-
"com.linkedin.schema.StringType": {}
4395-
}
4396-
},
4397-
"nativeDataType": "VARCHAR(255)",
4398-
"recursive": false,
4399-
"isPartOfKey": false
4400-
},
4401-
{
4402-
"fieldPath": "col_10",
4403-
"nullable": false,
4404-
"description": "Comment for column",
4405-
"type": {
4406-
"type": {
4407-
"com.linkedin.schema.StringType": {}
4408-
}
4409-
},
4410-
"nativeDataType": "VARCHAR(255)",
4411-
"recursive": false,
4412-
"isPartOfKey": false
4413-
},
4414-
{
4415-
"fieldPath": "metadata$action",
4416-
"nullable": false,
4417-
"description": "Type of DML operation (INSERT/DELETE)",
4418-
"type": {
4419-
"type": {
4420-
"com.linkedin.schema.StringType": {}
4421-
}
4422-
},
4423-
"nativeDataType": "VARCHAR(10)",
4424-
"recursive": false,
4425-
"isPartOfKey": false
4426-
},
4427-
{
4428-
"fieldPath": "metadata$isupdate",
4429-
"nullable": false,
4430-
"description": "Whether row is from UPDATE operation",
4431-
"type": {
4432-
"type": {
4433-
"com.linkedin.schema.BooleanType": {}
4434-
}
4435-
},
4436-
"nativeDataType": "BOOLEAN",
4437-
"recursive": false,
4438-
"isPartOfKey": false
4439-
},
4440-
{
4441-
"fieldPath": "metadata$row_id",
4442-
"nullable": false,
4443-
"description": "Unique row identifier",
4444-
"type": {
4445-
"type": {
4446-
"com.linkedin.schema.NumberType": {}
4447-
}
4448-
},
4449-
"nativeDataType": "NUMBER(38,0)",
4450-
"recursive": false,
4451-
"isPartOfKey": false
4452-
}
4453-
]
4454-
}
4455-
},
4456-
"systemMetadata": {
4457-
"lastObserved": 1615443388097,
4458-
"runId": "snowflake-2025_01_28-00_01_52-5vkne0",
4459-
"lastRunId": "no-run-id-provided"
4460-
}
4461-
},
4462-
{
4463-
"entityType": "dataset",
4464-
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.stream_1,PROD)",
4465-
"changeType": "UPSERT",
4466-
"aspectName": "datasetProperties",
4467-
"aspect": {
4468-
"json": {
4469-
"customProperties": {
4470-
"SOURCE_TYPE": "Table",
4471-
"TYPE": "DELTA",
4472-
"STALE": "false",
4473-
"MODE": "DEFAULT",
4474-
"OWNER_ROLE_TYPE": "ROLE",
4475-
"TABLE_NAME": "TEST_DB.TEST_SCHEMA.TABLE_1",
4476-
"BASE_TABLES": "TEST_DB.TEST_SCHEMA.TABLE_1",
4477-
"STALE_AFTER": "2021-06-22T00:00:00+00:00"
4478-
},
4479-
"externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/view/STREAM_1/",
4480-
"name": "STREAM_1",
4481-
"qualifiedName": "TEST_DB.TEST_SCHEMA.STREAM_1",
4482-
"description": "Comment for Stream 1",
4483-
"created": {
4484-
"time": 1623110400000
4485-
},
4486-
"lastModified": {
4487-
"time": 1623110400000
4488-
},
4489-
"tags": []
4490-
}
4491-
},
4492-
"systemMetadata": {
4493-
"lastObserved": 1615443388097,
4494-
"runId": "snowflake-2025_01_28-00_01_52-5vkne0",
4495-
"lastRunId": "no-run-id-provided"
4496-
}
4497-
},
4498-
{
4499-
"entityType": "dataset",
4500-
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.stream_1,PROD)",
4501-
"changeType": "UPSERT",
4502-
"aspectName": "container",
4503-
"aspect": {
4504-
"json": {
4505-
"container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c"
4506-
}
4507-
},
4508-
"systemMetadata": {
4509-
"lastObserved": 1615443388097,
4510-
"runId": "snowflake-2025_01_28-00_01_52-5vkne0",
4511-
"lastRunId": "no-run-id-provided"
4512-
}
4513-
},
4514-
{
4515-
"entityType": "dataset",
4516-
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.stream_1,PROD)",
4517-
"changeType": "UPSERT",
4518-
"aspectName": "subTypes",
4519-
"aspect": {
4520-
"json": {
4521-
"typeNames": [
4522-
"Snowflake Stream"
4523-
]
4524-
}
4525-
},
4526-
"systemMetadata": {
4527-
"lastObserved": 1615443388097,
4528-
"runId": "snowflake-2025_01_28-00_01_52-5vkne0",
4529-
"lastRunId": "no-run-id-provided"
4530-
}
4531-
},
45324243
{
45334244
"entityType": "dataset",
45344245
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.stream_1,PROD)",

0 commit comments

Comments
 (0)