Skip to content

Commit 4448fc2

Browse files
authored
feat(sdk): add scaffolding for sdk v2 (#12554)
1 parent a98d4c2 commit 4448fc2

22 files changed

+2902
-1
lines changed

metadata-ingestion/setup.cfg

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ warn_unused_configs = yes
1515
disallow_untyped_defs = no
1616

1717
# try to be a bit more strict in certain areas of the codebase
18+
[mypy-datahub]
19+
# Only for datahub's __init__.py - allow implicit reexport
20+
implicit_reexport = yes
1821
[mypy-datahub.*]
1922
ignore_missing_imports = no
2023
implicit_reexport = no
@@ -54,7 +57,7 @@ addopts = --cov=src --cov-report= --cov-config setup.cfg --strict-markers -p no:
5457
markers =
5558
slow: marks tests that are slow to run, including all docker-based tests (deselect with '-m not slow')
5659
integration: marks all integration tests, across all batches (deselect with '-m "not integration"')
57-
integration_batch_0: mark tests to run in batch 0 of integration tests. This is done mainly for parallelisation in CI. Batch 0 is the default batch.
60+
integration_batch_0: mark tests to run in batch 0 of integration tests. This is done mainly for parallelization in CI. Batch 0 is the default batch.
5861
integration_batch_1: mark tests to run in batch 1 of integration tests
5962
integration_batch_2: mark tests to run in batch 2 of integration tests
6063
testpaths =
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from datahub.configuration.common import MetaError
2+
3+
# TODO: Move all other error types to this file.
4+
5+
6+
class SdkUsageError(MetaError):
7+
pass
8+
9+
10+
class AlreadyExistsError(SdkUsageError):
11+
pass
12+
13+
14+
class ItemNotFoundError(SdkUsageError):
15+
pass
16+
17+
18+
class MultipleItemsFoundError(SdkUsageError):
19+
pass
20+
21+
22+
class SchemaFieldKeyError(SdkUsageError, KeyError):
23+
pass
24+
25+
26+
class IngestionAttributionWarning(Warning):
27+
pass
28+
29+
30+
class MultipleSubtypesWarning(Warning):
31+
pass
32+
33+
34+
class ExperimentalWarning(Warning):
35+
pass
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import warnings
2+
3+
import datahub.metadata.schema_classes as models
4+
from datahub.errors import ExperimentalWarning, SdkUsageError
5+
from datahub.ingestion.graph.config import DatahubClientConfig
6+
from datahub.metadata.urns import (
7+
ChartUrn,
8+
ContainerUrn,
9+
CorpGroupUrn,
10+
CorpUserUrn,
11+
DashboardUrn,
12+
DataPlatformInstanceUrn,
13+
DataPlatformUrn,
14+
DatasetUrn,
15+
DomainUrn,
16+
GlossaryTermUrn,
17+
SchemaFieldUrn,
18+
TagUrn,
19+
)
20+
from datahub.sdk.container import Container
21+
from datahub.sdk.dataset import Dataset
22+
from datahub.sdk.main_client import DataHubClient
23+
24+
warnings.warn(
25+
"The new datahub SDK (e.g. datahub.sdk.*) is experimental. "
26+
"Our typical backwards-compatibility and stability guarantees do not apply to this code. "
27+
"When it's promoted to stable, the import path will change "
28+
"from `from datahub.sdk import ...` to `from datahub import ...`.",
29+
ExperimentalWarning,
30+
stacklevel=2,
31+
)
32+
del warnings
33+
del ExperimentalWarning
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from typing import Dict, List, Type
2+
3+
from datahub.sdk._entity import Entity
4+
from datahub.sdk.container import Container
5+
from datahub.sdk.dataset import Dataset
6+
7+
# TODO: Is there a better way to declare this?
8+
ENTITY_CLASSES_LIST: List[Type[Entity]] = [
9+
Container,
10+
Dataset,
11+
]
12+
13+
ENTITY_CLASSES: Dict[str, Type[Entity]] = {
14+
cls.get_urn_type().ENTITY_TYPE: cls for cls in ENTITY_CLASSES_LIST
15+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from __future__ import annotations
2+
3+
import contextlib
4+
from typing import Iterator
5+
6+
from datahub.utilities.str_enum import StrEnum
7+
8+
9+
class KnownAttribution(StrEnum):
10+
INGESTION = "INGESTION"
11+
INGESTION_ALTERNATE = "INGESTION_ALTERNATE"
12+
13+
UI = "UI"
14+
SDK = "SDK"
15+
16+
PROPAGATION = "PROPAGATION"
17+
18+
def is_ingestion(self) -> bool:
19+
return self in (
20+
KnownAttribution.INGESTION,
21+
KnownAttribution.INGESTION_ALTERNATE,
22+
)
23+
24+
25+
_default_attribution = KnownAttribution.SDK
26+
27+
28+
def get_default_attribution() -> KnownAttribution:
29+
return _default_attribution
30+
31+
32+
def set_default_attribution(attribution: KnownAttribution) -> None:
33+
global _default_attribution
34+
_default_attribution = attribution
35+
36+
37+
@contextlib.contextmanager
38+
def change_default_attribution(attribution: KnownAttribution) -> Iterator[None]:
39+
old_attribution = get_default_attribution()
40+
try:
41+
set_default_attribution(attribution)
42+
yield
43+
finally:
44+
set_default_attribution(old_attribution)
45+
46+
47+
def is_ingestion_attribution() -> bool:
48+
return get_default_attribution().is_ingestion()
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import abc
2+
from typing import List, Optional, Type, Union
3+
4+
from typing_extensions import Self
5+
6+
import datahub.metadata.schema_classes as models
7+
from datahub.emitter.mce_builder import Aspect as AspectTypeVar
8+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
9+
from datahub.errors import SdkUsageError
10+
from datahub.metadata.urns import Urn
11+
from datahub.utilities.urns._urn_base import _SpecificUrn
12+
13+
14+
class Entity:
15+
__slots__ = ("_urn", "_prev_aspects", "_aspects")
16+
17+
def __init__(self, /, urn: Urn):
18+
# This method is not meant for direct usage.
19+
if type(self) is Entity:
20+
raise SdkUsageError(f"{Entity.__name__} cannot be instantiated directly.")
21+
22+
assert isinstance(urn, self.get_urn_type())
23+
self._urn: _SpecificUrn = urn
24+
25+
# prev_aspects is None means this was created from scratch
26+
self._prev_aspects: Optional[models.AspectBag] = None
27+
self._aspects: models.AspectBag = {}
28+
29+
@classmethod
30+
def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
31+
# If an init method from a subclass adds required fields, it also needs to override this method.
32+
# An alternative approach would call cls.__new__() to bypass the init method, but it's a bit
33+
# too hacky for my taste.
34+
entity = cls(urn=urn)
35+
return entity._init_from_graph(current_aspects)
36+
37+
def _init_from_graph(self, current_aspects: models.AspectBag) -> Self:
38+
self._prev_aspects = current_aspects
39+
aspect: models._Aspect
40+
for aspect_name, aspect in (current_aspects or {}).items(): # type: ignore
41+
aspect_copy = type(aspect).from_obj(aspect.to_obj())
42+
self._aspects[aspect_name] = aspect_copy # type: ignore
43+
return self
44+
45+
@classmethod
46+
@abc.abstractmethod
47+
def get_urn_type(cls) -> Type[_SpecificUrn]: ...
48+
49+
@property
50+
def urn(self) -> _SpecificUrn:
51+
return self._urn
52+
53+
def _get_aspect(
54+
self,
55+
aspect_type: Type[AspectTypeVar],
56+
/,
57+
) -> Optional[AspectTypeVar]:
58+
return self._aspects.get(aspect_type.ASPECT_NAME) # type: ignore
59+
60+
def _set_aspect(self, value: AspectTypeVar, /) -> None:
61+
self._aspects[value.ASPECT_NAME] = value # type: ignore
62+
63+
def _setdefault_aspect(self, default_aspect: AspectTypeVar, /) -> AspectTypeVar:
64+
# Similar semantics to dict.setdefault.
65+
if existing_aspect := self._get_aspect(type(default_aspect)):
66+
return existing_aspect
67+
self._set_aspect(default_aspect)
68+
return default_aspect
69+
70+
def _as_mcps(
71+
self,
72+
change_type: Union[str, models.ChangeTypeClass] = models.ChangeTypeClass.UPSERT,
73+
) -> List[MetadataChangeProposalWrapper]:
74+
urn_str = str(self.urn)
75+
76+
mcps = []
77+
for aspect in self._aspects.values():
78+
assert isinstance(aspect, models._Aspect)
79+
mcps.append(
80+
MetadataChangeProposalWrapper(
81+
entityUrn=urn_str,
82+
aspect=aspect,
83+
changeType=change_type,
84+
)
85+
)
86+
return mcps
87+
88+
def __repr__(self) -> str:
89+
return f"{self.__class__.__name__}('{self.urn}')"

0 commit comments

Comments
 (0)