Skip to content

Commit

Permalink
feat(urn-validation): Add UrnValidation PDL annotation
Browse files Browse the repository at this point in the history
* Apply UrnValidation logic to StructuredPropertyDefinition valueType
  • Loading branch information
david-leifker committed Feb 7, 2025
1 parent 65376ee commit 2c53b38
Show file tree
Hide file tree
Showing 31 changed files with 885 additions and 189 deletions.
22 changes: 22 additions & 0 deletions docs/modeling/extending-the-metadata-model.md
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ The Aspect has four key components: its properties, the @Aspect annotation, the
the case of DashboardInfo, the `charts` field is an Array of Urns. The @Relationship annotation cannot be applied
directly to an array of Urns. That’s why you see the use of an Annotation override (`"/*":`) to apply the @Relationship
annotation to the Urn directly. Read more about overrides in the annotation docs further down on this page.
- **@UrnValidation**: This annotation can enforce constraints on Urn fields, including entity type restrictions and existence.

After you create your Aspect, you need to attach to all the entities that it applies to.

Expand Down Expand Up @@ -494,6 +495,27 @@ This annotation says that when we ingest an Entity with an Ownership Aspect, Dat
between that entity and the CorpUser or CorpGroup who owns it. This will be queryable using the Relationships resource
in both the forward and inverse directions.

#### @UrnValidation

This annotation can be applied to Urn fields inside an aspect. The annotation can optionally perform one or more of the following:
- Enforce that the URN exists
- Enforce stricter URN validation
- Restrict the URN to specific entity types

##### Example

Using this example from StructuredPropertyDefinition, we are enforcing that the valueType URN must exist,
it must follow stricter Urn encoding logic, and it can only be of entity type `dataType`.

```
@UrnValidation = {
"exist": true,
"strict": true,
"entityTypes": [ "dataType" ],
}
valueType: Urn
```

#### Annotating Collections & Annotation Overrides

You will not always be able to apply annotations to a primitive field directly. This may be because the field is wrapped
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ public class AspectSpec {
private final Map<String, TimeseriesFieldSpec> _timeseriesFieldSpecs;
private final Map<String, TimeseriesFieldCollectionSpec> _timeseriesFieldCollectionSpecs;
private final Map<String, SearchableRefFieldSpec> _searchableRefFieldSpecs;
private final Map<String, UrnValidationFieldSpec> _urnValidationFieldSpecs;

// Classpath & Pegasus-specific: Temporary.
private final RecordDataSchema _schema;
Expand All @@ -39,6 +40,7 @@ public AspectSpec(
@Nonnull final List<TimeseriesFieldSpec> timeseriesFieldSpecs,
@Nonnull final List<TimeseriesFieldCollectionSpec> timeseriesFieldCollectionSpecs,
@Nonnull final List<SearchableRefFieldSpec> searchableRefFieldSpecs,
@Nonnull final List<UrnValidationFieldSpec> urnValidationFieldSpecs,
final RecordDataSchema schema,
final Class<RecordTemplate> aspectClass) {
_aspectAnnotation = aspectAnnotation;
Expand Down Expand Up @@ -76,6 +78,11 @@ public AspectSpec(
spec -> spec.getTimeseriesFieldCollectionAnnotation().getCollectionName(),
spec -> spec,
(val1, val2) -> val1));
_urnValidationFieldSpecs =
urnValidationFieldSpecs.stream()
.collect(
Collectors.toMap(
spec -> spec.getPath().toString(), spec -> spec, (val1, val2) -> val1));
_schema = schema;
_aspectClass = aspectClass;
}
Expand Down Expand Up @@ -112,6 +119,10 @@ public Map<String, TimeseriesFieldSpec> getTimeseriesFieldSpecMap() {
return _timeseriesFieldSpecs;
}

public Map<String, UrnValidationFieldSpec> getUrnValidationFieldSpecMap() {
return _urnValidationFieldSpecs;

Check warning on line 123 in entity-registry/src/main/java/com/linkedin/metadata/models/AspectSpec.java

View check run for this annotation

Codecov / codecov/patch

entity-registry/src/main/java/com/linkedin/metadata/models/AspectSpec.java#L123

Added line #L123 was not covered by tests
}

public Map<String, TimeseriesFieldCollectionSpec> getTimeseriesFieldCollectionSpecMap() {
return _timeseriesFieldCollectionSpecs;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import com.linkedin.metadata.models.annotation.SearchableRefAnnotation;
import com.linkedin.metadata.models.annotation.TimeseriesFieldAnnotation;
import com.linkedin.metadata.models.annotation.TimeseriesFieldCollectionAnnotation;
import com.linkedin.metadata.models.annotation.UrnValidationAnnotation;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
Expand Down Expand Up @@ -48,6 +49,8 @@ public class EntitySpecBuilder {
new PegasusSchemaAnnotationHandlerImpl(TimeseriesFieldAnnotation.ANNOTATION_NAME);
public static SchemaAnnotationHandler _timeseriesFieldCollectionHandler =
new PegasusSchemaAnnotationHandlerImpl(TimeseriesFieldCollectionAnnotation.ANNOTATION_NAME);
public static SchemaAnnotationHandler _urnValidationAnnotationHandler =
new PegasusSchemaAnnotationHandlerImpl(UrnValidationAnnotation.ANNOTATION_NAME);

private final AnnotationExtractionMode _extractionMode;
private final Set<String> _entityNames = new HashSet<>();
Expand Down Expand Up @@ -226,6 +229,7 @@ public AspectSpec buildAspectSpec(
Collections.emptyList(),
Collections.emptyList(),
Collections.emptyList(),
Collections.emptyList(),

Check warning on line 232 in entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpecBuilder.java

View check run for this annotation

Codecov / codecov/patch

entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpecBuilder.java#L232

Added line #L232 was not covered by tests
aspectRecordSchema,
aspectClass);
}
Expand Down Expand Up @@ -299,6 +303,18 @@ public AspectSpec buildAspectSpec(
new DataSchemaRichContextTraverser(timeseriesFieldSpecExtractor);
timeseriesFieldSpecTraverser.traverse(processedTimeseriesFieldResult.getResultSchema());

// Extract UrnValidation aspects
final SchemaAnnotationProcessor.SchemaAnnotationProcessResult processedTimestampResult =
SchemaAnnotationProcessor.process(
Collections.singletonList(_urnValidationAnnotationHandler),
aspectRecordSchema,
new SchemaAnnotationProcessor.AnnotationProcessOption());
final UrnValidationFieldSpecExtractor urnValidationFieldSpecExtractor =
new UrnValidationFieldSpecExtractor();
final DataSchemaRichContextTraverser timestampFieldSpecTraverser =
new DataSchemaRichContextTraverser(urnValidationFieldSpecExtractor);
timestampFieldSpecTraverser.traverse(processedTimestampResult.getResultSchema());

return new AspectSpec(
aspectAnnotation,
searchableFieldSpecExtractor.getSpecs(),
Expand All @@ -307,6 +323,7 @@ public AspectSpec buildAspectSpec(
timeseriesFieldSpecExtractor.getTimeseriesFieldSpecs(),
timeseriesFieldSpecExtractor.getTimeseriesFieldCollectionSpecs(),
searchableRefFieldSpecExtractor.getSpecs(),
urnValidationFieldSpecExtractor.getUrnValidationFieldSpecs(),
aspectRecordSchema,
aspectClass);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package com.linkedin.metadata.models;

import com.linkedin.data.schema.DataSchema;
import com.linkedin.data.schema.PathSpec;
import com.linkedin.metadata.models.annotation.UrnValidationAnnotation;
import javax.annotation.Nonnull;
import lombok.Value;

@Value
public class UrnValidationFieldSpec {
@Nonnull PathSpec path;
@Nonnull UrnValidationAnnotation urnValidationAnnotation;
@Nonnull DataSchema pegasusSchema;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package com.linkedin.metadata.models;

import com.linkedin.data.schema.DataSchema;
import com.linkedin.data.schema.DataSchemaTraverse;
import com.linkedin.data.schema.PathSpec;
import com.linkedin.data.schema.annotation.SchemaVisitor;
import com.linkedin.data.schema.annotation.SchemaVisitorTraversalResult;
import com.linkedin.data.schema.annotation.TraverserContext;
import com.linkedin.metadata.models.annotation.UrnValidationAnnotation;
import java.util.ArrayList;
import java.util.List;
import lombok.Getter;

@Getter
public class UrnValidationFieldSpecExtractor implements SchemaVisitor {
private final List<UrnValidationFieldSpec> urnValidationFieldSpecs = new ArrayList<>();

@Override
public void callbackOnContext(TraverserContext context, DataSchemaTraverse.Order order) {
if (context.getEnclosingField() == null) {
return;
}

if (DataSchemaTraverse.Order.PRE_ORDER.equals(order)) {
final DataSchema currentSchema = context.getCurrentSchema().getDereferencedDataSchema();
final PathSpec path = new PathSpec(context.getSchemaPathSpec());

// Check for @UrnValidation annotation in primary properties
final Object urnValidationAnnotationObj =
context.getEnclosingField().getProperties().get(UrnValidationAnnotation.ANNOTATION_NAME);

// Check if it's either explicitly annotated with @UrnValidation
if (urnValidationAnnotationObj != null) {
addUrnValidationFieldSpec(currentSchema, path, urnValidationAnnotationObj);
}
}
}

private void addUrnValidationFieldSpec(
DataSchema currentSchema, PathSpec path, Object annotationObj) {
UrnValidationAnnotation annotation =
UrnValidationAnnotation.fromPegasusAnnotationObject(
annotationObj, FieldSpecUtils.getSchemaFieldName(path), path.toString());

urnValidationFieldSpecs.add(new UrnValidationFieldSpec(path, annotation, currentSchema));
}

@Override
public VisitorContext getInitialVisitorContext() {
return null;
}

@Override
public SchemaVisitorTraversalResult getSchemaVisitorTraversalResult() {
return new SchemaVisitorTraversalResult();

Check warning on line 55 in entity-registry/src/main/java/com/linkedin/metadata/models/UrnValidationFieldSpecExtractor.java

View check run for this annotation

Codecov / codecov/patch

entity-registry/src/main/java/com/linkedin/metadata/models/UrnValidationFieldSpecExtractor.java#L55

Added line #L55 was not covered by tests
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package com.linkedin.metadata.models.annotation;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import lombok.experimental.UtilityClass;
Expand All @@ -13,4 +16,23 @@ <T> Optional<T> getField(final Map fieldMap, final String fieldName, final Class
}
return Optional.empty();
}

<T> List<T> getFieldList(
final Map<String, ?> fieldMap, final String fieldName, final Class<T> itemType) {
Object value = fieldMap.get(fieldName);
if (!(value instanceof List<?>)) {
return Collections.emptyList();

Check warning on line 24 in entity-registry/src/main/java/com/linkedin/metadata/models/annotation/AnnotationUtils.java

View check run for this annotation

Codecov / codecov/patch

entity-registry/src/main/java/com/linkedin/metadata/models/annotation/AnnotationUtils.java#L24

Added line #L24 was not covered by tests
}

List<?> list = (List<?>) value;
List<T> result = new ArrayList<>();

for (Object item : list) {
if (itemType.isInstance(item)) {
result.add(itemType.cast(item));
}
}

return Collections.unmodifiableList(result);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package com.linkedin.metadata.models.annotation;

import com.linkedin.metadata.models.ModelValidationException;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import javax.annotation.Nonnull;
import lombok.Value;

@Value
public class UrnValidationAnnotation {
public static final String ANNOTATION_NAME = "UrnValidation";
boolean exist;
boolean strict;
List<String> entityTypes;

@Nonnull
public static UrnValidationAnnotation fromPegasusAnnotationObject(
@Nonnull final Object annotationObj,
@Nonnull final String schemaFieldName,
@Nonnull final String context) {
if (!Map.class.isAssignableFrom(annotationObj.getClass())) {
throw new ModelValidationException(
String.format(

Check warning on line 24 in entity-registry/src/main/java/com/linkedin/metadata/models/annotation/UrnValidationAnnotation.java

View check run for this annotation

Codecov / codecov/patch

entity-registry/src/main/java/com/linkedin/metadata/models/annotation/UrnValidationAnnotation.java#L23-L24

Added lines #L23 - L24 were not covered by tests
"Failed to validate @%s annotation declared at %s: Invalid value type provided (Expected Map)",
ANNOTATION_NAME, context));
}

Map<String, ?> map = (Map<String, ?>) annotationObj;
final Optional<Boolean> exist = AnnotationUtils.getField(map, "exist", Boolean.class);
final Optional<Boolean> strict = AnnotationUtils.getField(map, "strict", Boolean.class);
final List<String> entityTypes = AnnotationUtils.getFieldList(map, "entityTypes", String.class);

return new UrnValidationAnnotation(exist.orElse(true), strict.orElse(true), entityTypes);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ private EntityRegistry getBaseEntityRegistry() {
Collections.emptyList(),
Collections.emptyList(),
Collections.emptyList(),
Collections.emptyList(),
(RecordDataSchema) DataSchemaFactory.getInstance().getAspectSchema("datasetKey").get(),
DataSchemaFactory.getInstance().getAspectClass("datasetKey").get());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import com.linkedin.metadata.aspect.SystemAspect;
import com.linkedin.metadata.models.AspectSpec;
import com.linkedin.metadata.models.EntitySpec;
import com.linkedin.metadata.utils.EntityApiUtils;
import com.linkedin.mxe.GenericAspect;
import com.linkedin.mxe.SystemMetadata;
import java.sql.Timestamp;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import com.linkedin.metadata.aspect.batch.MCPItem;
import com.linkedin.metadata.aspect.plugins.hooks.MutationHook;
import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection;
import com.linkedin.metadata.entity.validation.ValidationException;
import com.linkedin.metadata.models.EntitySpec;
import com.linkedin.mxe.MetadataChangeProposal;
import com.linkedin.util.Pair;
Expand Down Expand Up @@ -243,7 +244,7 @@ public AspectsBatchImpl build() {
ValidationExceptionCollection exceptions =
AspectsBatch.validateProposed(this.nonRepeatedItems, this.retrieverContext);
if (!exceptions.isEmpty()) {
throw new IllegalArgumentException("Failed to validate MCP due to: " + exceptions);
throw new ValidationException("Failed to validate MCP due to: " + exceptions);
}

return new AspectsBatchImpl(this.items, this.nonRepeatedItems, this.retrieverContext);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
import com.linkedin.metadata.aspect.batch.MCPItem;
import com.linkedin.metadata.aspect.patch.template.common.GenericPatchTemplate;
import com.linkedin.metadata.entity.AspectUtils;
import com.linkedin.metadata.entity.EntityApiUtils;
import com.linkedin.metadata.entity.EntityAspect;
import com.linkedin.metadata.entity.validation.ValidationApiUtils;
import com.linkedin.metadata.models.AspectSpec;
import com.linkedin.metadata.models.EntitySpec;
import com.linkedin.metadata.utils.EntityApiUtils;
import com.linkedin.metadata.utils.EntityKeyUtils;
import com.linkedin.metadata.utils.GenericRecordUtils;
import com.linkedin.metadata.utils.SystemMetadataUtils;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
import com.linkedin.metadata.aspect.SystemAspect;
import com.linkedin.metadata.aspect.batch.BatchItem;
import com.linkedin.metadata.aspect.batch.ChangeMCP;
import com.linkedin.metadata.entity.EntityApiUtils;
import com.linkedin.metadata.entity.EntityAspect;
import com.linkedin.metadata.entity.validation.ValidationApiUtils;
import com.linkedin.metadata.models.AspectSpec;
import com.linkedin.metadata.models.EntitySpec;
import com.linkedin.metadata.utils.EntityApiUtils;
import com.linkedin.mxe.MetadataChangeProposal;
import com.linkedin.mxe.SystemMetadata;
import java.util.Objects;
Expand Down
Loading

0 comments on commit 2c53b38

Please sign in to comment.