From 5309ae00141875cca6be64e587a94efa854f448e Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 22 Jan 2025 16:21:46 -0800 Subject: [PATCH] feat(sdk): improve docs + code clarity (#12422) --- .../src/datahub/emitter/enum_helpers.py | 6 ++++-- .../src/datahub/emitter/mce_builder.py | 4 ++++ .../src/datahub/ingestion/api/decorators.py | 2 ++ metadata-ingestion/src/datahub/ingestion/api/sink.py | 12 ++++++++++++ .../src/datahub/ingestion/api/source.py | 7 +++++-- .../src/datahub/utilities/urns/_urn_base.py | 8 ++++++-- 6 files changed, 33 insertions(+), 6 deletions(-) diff --git a/metadata-ingestion/src/datahub/emitter/enum_helpers.py b/metadata-ingestion/src/datahub/emitter/enum_helpers.py index 89949ab3717ff..89916ffd899c3 100644 --- a/metadata-ingestion/src/datahub/emitter/enum_helpers.py +++ b/metadata-ingestion/src/datahub/emitter/enum_helpers.py @@ -1,11 +1,13 @@ from typing import List, Type +from typing_extensions import LiteralString -def get_enum_options(_class: Type[object]) -> List[str]: + +def get_enum_options(class_: Type[object]) -> List[LiteralString]: """Get the valid values for an enum in the datahub.metadata.schema_classes module.""" return [ value - for name, value in vars(_class).items() + for name, value in vars(class_).items() if not callable(value) and not name.startswith("_") ] diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py index f5da90a86c9ef..9fa060266a7ab 100644 --- a/metadata-ingestion/src/datahub/emitter/mce_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py @@ -440,6 +440,10 @@ def can_add_aspect_to_snapshot( def can_add_aspect(mce: MetadataChangeEventClass, AspectType: Type[Aspect]) -> bool: + # TODO: This is specific to snapshot types. We have a more general method + # in `entity_supports_aspect`, which should be used instead. This method + # should be deprecated, and all usages should be replaced. + SnapshotType = type(mce.proposedSnapshot) return can_add_aspect_to_snapshot(SnapshotType, AspectType) diff --git a/metadata-ingestion/src/datahub/ingestion/api/decorators.py b/metadata-ingestion/src/datahub/ingestion/api/decorators.py index d32c0b85ceef4..b521dc5e9e7f5 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/decorators.py +++ b/metadata-ingestion/src/datahub/ingestion/api/decorators.py @@ -25,6 +25,8 @@ def wrapper(cls: Type) -> Type: # add the create method only if it has not been overridden from the base Source.create method cls.create = classmethod(default_create) + # TODO: Once we're on Python 3.10, we should call abc.update_abstractmethods here. + return cls return wrapper diff --git a/metadata-ingestion/src/datahub/ingestion/api/sink.py b/metadata-ingestion/src/datahub/ingestion/api/sink.py index 655e6bb22fa8d..44148c751df82 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/sink.py +++ b/metadata-ingestion/src/datahub/ingestion/api/sink.py @@ -110,6 +110,10 @@ def __init__(self, ctx: PipelineContext, config: SinkConfig): self.__post_init__() def __post_init__(self) -> None: + """Hook called after the sink's main initialization is complete. + + Sink subclasses can override this method to customize initialization. + """ pass @classmethod @@ -117,9 +121,17 @@ def create(cls, config_dict: dict, ctx: PipelineContext) -> "Self": return cls(ctx, cls.get_config_class().parse_obj(config_dict)) def handle_work_unit_start(self, workunit: WorkUnit) -> None: + """Called at the start of each new workunit. + + This method is deprecated and will be removed in a future release. + """ pass def handle_work_unit_end(self, workunit: WorkUnit) -> None: + """Called at the end of each workunit. + + This method is deprecated and will be removed in a future release. + """ pass @abstractmethod diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py index b04ffdb325893..e401d6e976b6d 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source.py @@ -404,8 +404,11 @@ def create(cls, config_dict: dict, ctx: PipelineContext) -> Self: # Technically, this method should be abstract. However, the @config_class # decorator automatically generates a create method at runtime if one is # not defined. Python still treats the class as abstract because it thinks - # the create method is missing. To avoid the class becoming abstract, we - # can't make this method abstract. + # the create method is missing. + # + # Once we're on Python 3.10, we can use the abc.update_abstractmethods(cls) + # method in the config_class decorator. That would allow us to make this + # method abstract. raise NotImplementedError('sources must implement "create"') def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: diff --git a/metadata-ingestion/src/datahub/utilities/urns/_urn_base.py b/metadata-ingestion/src/datahub/utilities/urns/_urn_base.py index e8e22cd85ac9f..55144ebf61b19 100644 --- a/metadata-ingestion/src/datahub/utilities/urns/_urn_base.py +++ b/metadata-ingestion/src/datahub/utilities/urns/_urn_base.py @@ -6,6 +6,7 @@ from deprecated import deprecated from typing_extensions import Self +from datahub._codegen.aspect import _Aspect from datahub.utilities.urns.error import InvalidUrnError URN_TYPES: Dict[str, Type["_SpecificUrn"]] = {} @@ -270,7 +271,7 @@ def make_form_urn(form: str) -> str: class _SpecificUrn(Urn): - ENTITY_TYPE: str = "" + ENTITY_TYPE: ClassVar[str] = "" def __init_subclass__(cls) -> None: # Validate the subclass. @@ -286,7 +287,10 @@ def __init_subclass__(cls) -> None: return super().__init_subclass__() @classmethod - def underlying_key_aspect_type(cls) -> Type: + def underlying_key_aspect_type(cls) -> Type[_Aspect]: + raise NotImplementedError() + + def to_key_aspect(self) -> _Aspect: raise NotImplementedError() @classmethod