From abb64433fc7a1d5e7c852ee65675c8abebd8fda1 Mon Sep 17 00:00:00 2001 From: Gabe Lyons Date: Mon, 30 Dec 2024 13:32:28 -0800 Subject: [PATCH 001/249] fix(tests): fixing QueryPropertiesMapperTest.java (#12241) --- .../types/common/mappers/QueryPropertiesMapperTest.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/QueryPropertiesMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/QueryPropertiesMapperTest.java index 756115cf2054a9..a0251adca78f9d 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/QueryPropertiesMapperTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/QueryPropertiesMapperTest.java @@ -55,10 +55,6 @@ public void testMapWithRequiredFields() throws Exception { assertEquals(result.getLastModified().getTime().longValue(), 2000L); assertEquals(result.getLastModified().getActor(), userUrn.toString()); - // Verify createdOn resolved stamp - assertEquals(result.getCreatedOn().getTime().longValue(), 1000L); - assertEquals(result.getCreatedOn().getActor().getUrn(), userUrn.toString()); - // Verify optional fields are null assertNull(result.getName()); assertNull(result.getDescription()); From ee54f1fb6181c85a85194dde7cc86ab7af5745b7 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Mon, 30 Dec 2024 17:44:09 -0600 Subject: [PATCH 002/249] feat(delete): delete logic non-strict monotonically increasing version (#12242) --- .../linkedin/metadata/entity/AspectDao.java | 11 ++ .../metadata/entity/EntityServiceImpl.java | 18 ++- .../entity/cassandra/CassandraAspectDao.java | 33 ++++-- .../metadata/entity/ebean/EbeanAspectDao.java | 53 ++++++--- .../metadata/entity/EntityServiceTest.java | 105 ++++++++++++++++++ 5 files changed, 188 insertions(+), 32 deletions(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java index 7a8c5c76c31c3a..0d5bdd9ff64286 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java @@ -6,6 +6,7 @@ import com.linkedin.metadata.entity.ebean.PartitionedStream; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; import com.linkedin.metadata.utils.metrics.MetricUtils; +import com.linkedin.util.Pair; import java.sql.Timestamp; import java.util.List; import java.util.Map; @@ -155,6 +156,16 @@ default Map getNextVersions( long getMaxVersion(@Nonnull final String urn, @Nonnull final String aspectName); + /** + * Return the min/max version for the given URN & aspect + * + * @param urn the urn + * @param aspectName the aspect + * @return the range of versions, if they do not exist -1 is returned + */ + @Nonnull + Pair getVersionRange(@Nonnull final String urn, @Nonnull final String aspectName); + void setWritable(boolean canWrite); @Nonnull diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 8ae09111204cab..75f16ae4d981d2 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -2229,8 +2229,9 @@ public Set exists( } /** Does not emit MCL */ + @VisibleForTesting @Nullable - private RollbackResult deleteAspectWithoutMCL( + RollbackResult deleteAspectWithoutMCL( @Nonnull OperationContext opContext, String urn, String aspectName, @@ -2288,11 +2289,14 @@ private RollbackResult deleteAspectWithoutMCL( // 4. Fetch all preceding aspects, that match List aspectsToDelete = new ArrayList<>(); - long maxVersion = aspectDao.getMaxVersion(urn, aspectName); + Pair versionRange = aspectDao.getVersionRange(urn, aspectName); + long minVersion = Math.max(0, versionRange.getFirst()); + long maxVersion = Math.max(0, versionRange.getSecond()); + EntityAspect.EntitySystemAspect survivingAspect = null; - String previousMetadata = null; + boolean filterMatch = true; - while (maxVersion > 0 && filterMatch) { + while (maxVersion > minVersion && filterMatch) { EntityAspect.EntitySystemAspect candidateAspect = (EntityAspect.EntitySystemAspect) EntityUtils.toSystemAspect( @@ -2305,11 +2309,13 @@ private RollbackResult deleteAspectWithoutMCL( previousSysMetadata != null && filterMatch(previousSysMetadata, conditions); if (filterMatch) { aspectsToDelete.add(candidateAspect.getEntityAspect()); - maxVersion = maxVersion - 1; + } else if (candidateAspect == null) { + // potential gap + filterMatch = true; } else { survivingAspect = candidateAspect; - previousMetadata = survivingAspect.getMetadataRaw(); } + maxVersion = maxVersion - 1; } // Delete validation hooks diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java index 4d177d50ea44de..c5a6615ac4face 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java @@ -36,6 +36,7 @@ import com.linkedin.metadata.query.ExtraInfo; import com.linkedin.metadata.query.ExtraInfoArray; import com.linkedin.metadata.query.ListResultMetadata; +import com.linkedin.util.Pair; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; import java.sql.Timestamp; @@ -110,7 +111,14 @@ public Map> getLatestAspects( @Override public long getMaxVersion(@Nonnull final String urn, @Nonnull final String aspectName) { validateConnection(); - Map result = getMaxVersions(urn, ImmutableSet.of(aspectName)); + Map> result = getVersionRanges(urn, ImmutableSet.of(aspectName)); + return result.get(aspectName).getSecond(); + } + + @Override + @Nonnull + public Pair getVersionRange(@Nonnull String urn, @Nonnull String aspectName) { + Map> result = getVersionRanges(urn, ImmutableSet.of(aspectName)); return result.get(aspectName); } @@ -148,15 +156,17 @@ public boolean checkIfAspectExists(@Nonnull String aspectName) { return rs.one() != null; } - private Map getMaxVersions( + private Map> getVersionRanges( @Nonnull final String urn, @Nonnull final Set aspectNames) { SimpleStatement ss = selectFrom(CassandraAspect.TABLE_NAME) .selectors( Selector.column(CassandraAspect.URN_COLUMN), Selector.column(CassandraAspect.ASPECT_COLUMN), + Selector.function("min", Selector.column(CassandraAspect.VERSION_COLUMN)) + .as("min_version"), Selector.function("max", Selector.column(CassandraAspect.VERSION_COLUMN)) - .as(CassandraAspect.VERSION_COLUMN)) + .as("max_version")) .whereColumn(CassandraAspect.URN_COLUMN) .isEqualTo(literal(urn)) .whereColumn(CassandraAspect.ASPECT_COLUMN) @@ -168,21 +178,21 @@ private Map getMaxVersions( .build(); ResultSet rs = _cqlSession.execute(ss); - Map aspectVersions = + Map> aspectVersionRanges = rs.all().stream() .collect( Collectors.toMap( row -> row.getString(CassandraAspect.ASPECT_COLUMN), - row -> row.getLong(CassandraAspect.VERSION_COLUMN))); + row -> Pair.of(row.getLong("min_version"), row.getLong("max_version")))); - // For each requested aspect that didn't come back from DB, add a version -1 + // For each requested aspect that didn't come back from DB, add a version range of (-1, -1) for (String aspect : aspectNames) { - if (!aspectVersions.containsKey(aspect)) { - aspectVersions.put(aspect, -1L); + if (!aspectVersionRanges.containsKey(aspect)) { + aspectVersionRanges.put(aspect, Pair.of(-1L, -1L)); } } - return aspectVersions; + return aspectVersionRanges; } @Override @@ -551,11 +561,12 @@ public Map> getNextVersions(Map> u Map> result = new HashMap<>(); for (Map.Entry> aspectNames : urnAspectMap.entrySet()) { - Map maxVersions = getMaxVersions(aspectNames.getKey(), aspectNames.getValue()); + Map> maxVersions = + getVersionRanges(aspectNames.getKey(), aspectNames.getValue()); Map nextVersions = new HashMap<>(); for (String aspectName : aspectNames.getValue()) { - long latestVersion = maxVersions.get(aspectName); + long latestVersion = maxVersions.get(aspectName).getSecond(); long nextVal = latestVersion < 0 ? ASPECT_LATEST_VERSION : latestVersion + 1L; nextVersions.put(aspectName, nextVal); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java index ea580a97c51886..ad8333407a2760 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java @@ -38,6 +38,8 @@ import io.ebean.Query; import io.ebean.RawSql; import io.ebean.RawSqlBuilder; +import io.ebean.SqlQuery; +import io.ebean.SqlRow; import io.ebean.Transaction; import io.ebean.TxScope; import io.ebean.annotation.TxIsolation; @@ -247,10 +249,18 @@ private void saveEbeanAspect( @Nonnull final EbeanAspectV2 ebeanAspect, final boolean insert) { validateConnection(); - if (insert) { - _server.insert(ebeanAspect, txContext.tx()); + if (txContext != null && txContext.tx() != null) { + if (insert) { + _server.insert(ebeanAspect, txContext.tx()); + } else { + _server.update(ebeanAspect, txContext.tx()); + } } else { - _server.update(ebeanAspect, txContext.tx()); + if (insert) { + _server.insert(ebeanAspect); + } else { + _server.update(ebeanAspect); + } } } @@ -864,20 +874,33 @@ public T runInTransactionWithRetryUnlocked( } @Override - public long getMaxVersion(@Nonnull final String urn, @Nonnull final String aspectName) { + @Nonnull + public Pair getVersionRange( + @Nonnull final String urn, @Nonnull final String aspectName) { validateConnection(); - final List result = - _server - .find(EbeanAspectV2.class) - .where() - .eq(EbeanAspectV2.URN_COLUMN, urn.toString()) - .eq(EbeanAspectV2.ASPECT_COLUMN, aspectName) - .orderBy() - .desc(EbeanAspectV2.VERSION_COLUMN) - .setMaxRows(1) - .findIds(); - return result.isEmpty() ? -1 : result.get(0).getVersion(); + // Use SQL aggregation to get both min and max in a single query + SqlQuery query = + _server.sqlQuery( + "SELECT MIN(version) as min_version, MAX(version) as max_version " + + "FROM metadata_aspect_v2 " + + "WHERE urn = :urn AND aspect = :aspect"); + + query.setParameter("urn", urn); + query.setParameter("aspect", aspectName); + + SqlRow result = query.findOne(); + + if (result == null) { + return Pair.of(-1L, -1L); + } + + return Pair.of(result.getLong("min_version"), result.getLong("max_version")); + } + + @Override + public long getMaxVersion(@Nonnull final String urn, @Nonnull final String aspectName) { + return getVersionRange(urn, aspectName).getSecond(); } /** diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java index c00632e5cf5424..6eda210baf7d4a 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java @@ -84,6 +84,8 @@ import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; import jakarta.annotation.Nonnull; +import java.sql.Timestamp; +import java.time.Instant; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -2670,6 +2672,109 @@ public void testPatchAddNonExistent() throws Exception { "Expected all tags"); } + @Test + public void testDeleteUrnWithRunIdFilterNonMatch() throws Exception { + Urn entityUrn = UrnUtils.getUrn("urn:li:corpuser:deleteWithFilterNonMatch"); + + // Create aspects with different run IDs + SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(); + metadata1.setRunId("run-123"); + + SystemMetadata metadata2 = AspectGenerationUtils.createSystemMetadata(); + metadata2.setRunId("run-456"); // Different run ID + + String aspectName = AspectGenerationUtils.getAspectName(new CorpUserInfo()); + + // First ingest the aspect that should survive (run-456) + CorpUserInfo writeAspect1 = AspectGenerationUtils.createCorpUserInfo("first@test.com"); + List> firstPair = new ArrayList<>(); + firstPair.add(getAspectRecordPair(writeAspect1, CorpUserInfo.class)); + _entityServiceImpl.ingestAspects(opContext, entityUrn, firstPair, TEST_AUDIT_STAMP, metadata2); + + // Then ingest the aspect that should be deleted (run-123) + CorpUserInfo writeAspect2 = AspectGenerationUtils.createCorpUserInfo("second@test.com"); + List> secondPair = new ArrayList<>(); + secondPair.add(getAspectRecordPair(writeAspect2, CorpUserInfo.class)); + _entityServiceImpl.ingestAspects(opContext, entityUrn, secondPair, TEST_AUDIT_STAMP, metadata1); + + // When we try to delete with runId=run-123, the version with runId=run-456 should survive + RollbackResult result = + _entityServiceImpl.deleteAspectWithoutMCL( + opContext, + entityUrn.toString(), + aspectName, + Collections.singletonMap("runId", "run-123"), + true); + + // The aspect with run-456 should still exist + RecordTemplate survivingAspect = + _entityServiceImpl.getLatestAspect(opContext, entityUrn, aspectName); + assertTrue(DataTemplateUtil.areEqual(writeAspect1, survivingAspect)); + + // Verify the RollbackResult details + assertNotNull(result); + assertEquals(result.getUrn(), entityUrn); + assertEquals(result.getEntityName(), "corpuser"); + assertEquals(result.getAspectName(), aspectName); + } + + @Test + public void testDeleteUrnWithRunIdFilterNonMatchVersionGap() throws Exception { + Urn entityUrn = UrnUtils.getUrn("urn:li:corpuser:deleteWithFilterNonMatch"); + String aspectName = AspectGenerationUtils.getAspectName(new CorpUserInfo()); + + // Metadata that should be preserved (run-456) + SystemMetadata metadata456 = AspectGenerationUtils.createSystemMetadata(); + metadata456.setRunId("run-456"); // Different run ID + CorpUserInfo writeAspect456 = AspectGenerationUtils.createCorpUserInfo("first@test.com"); + List> firstPair = new ArrayList<>(); + firstPair.add(getAspectRecordPair(writeAspect456, CorpUserInfo.class)); + _entityServiceImpl.ingestAspects( + opContext, entityUrn, firstPair, TEST_AUDIT_STAMP, metadata456); + + // Metadata that should be deleted (run-123) + SystemMetadata metadata123 = AspectGenerationUtils.createSystemMetadata(); + metadata123.setRunId("run-123"); + CorpUserInfo writeAspect123 = AspectGenerationUtils.createCorpUserInfo("second@test.com"); + List> secondPair = new ArrayList<>(); + secondPair.add(getAspectRecordPair(writeAspect123, CorpUserInfo.class)); + _entityServiceImpl.ingestAspects( + opContext, entityUrn, secondPair, TEST_AUDIT_STAMP, metadata123); + + // Then insert another run-123 with version gap + _aspectDao.saveAspect( + null, + entityUrn.toString(), + aspectName, + RecordUtils.toJsonString(writeAspect123), + TEST_AUDIT_STAMP.getActor().toString(), + null, + Timestamp.from(Instant.ofEpochMilli(TEST_AUDIT_STAMP.getTime())), + RecordUtils.toJsonString(metadata123), + 10L, + true); + + // When we try to delete with runId=run-123, the version with runId=run-456 should survive + RollbackResult result = + _entityServiceImpl.deleteAspectWithoutMCL( + opContext, + entityUrn.toString(), + aspectName, + Collections.singletonMap("runId", "run-123"), + true); + + // The aspect with run-456 should still exist + RecordTemplate survivingAspect = + _entityServiceImpl.getLatestAspect(opContext, entityUrn, aspectName); + assertTrue(DataTemplateUtil.areEqual(writeAspect456, survivingAspect)); + + // Verify the RollbackResult details + assertNotNull(result); + assertEquals(result.getUrn(), entityUrn); + assertEquals(result.getEntityName(), "corpuser"); + assertEquals(result.getAspectName(), aspectName); + } + @Nonnull protected com.linkedin.entity.Entity createCorpUserEntity(Urn entityUrn, String email) throws Exception { From 96c60580a6f3171314787a65dc072f2debb42771 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Mon, 30 Dec 2024 21:04:17 -0600 Subject: [PATCH 003/249] docs(graphql): create graphql best practices (#12229) --- docs-website/sidebars.js | 5 + docs/api/graphql/graphql-best-practices.md | 1022 ++++++++++++++++++++ docs/api/graphql/how-to-set-up-graphql.md | 27 + 3 files changed, 1054 insertions(+) create mode 100644 docs/api/graphql/graphql-best-practices.md diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index c18d8671318f64..bcb06affedcff0 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -780,6 +780,11 @@ module.exports = { label: "Getting Started With GraphQL", id: "docs/api/graphql/getting-started", }, + { + type: "doc", + label: "GraphQL Best Practices", + id: "docs/api/graphql/graphql-best-practices", + }, { type: "doc", label: "Access Token Management", diff --git a/docs/api/graphql/graphql-best-practices.md b/docs/api/graphql/graphql-best-practices.md new file mode 100644 index 00000000000000..d4c85d52e29f98 --- /dev/null +++ b/docs/api/graphql/graphql-best-practices.md @@ -0,0 +1,1022 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# GraphQL Best Practices + +## Introduction: + +DataHub’s GraphQL API is designed to power the UI. The following guidelines are written with this use-case in mind. + +## General Best Practices + +### Query Optimizations + +> One of GraphQL's biggest advantages over a traditional REST API is its support for **declarative data fetching**. Each component can (and should) query exactly the fields it requires to render, with no superfluous data sent over the network. If instead your root component executes a single, enormous query to obtain data for all of its children, it might query on behalf of components that *aren't even rendered* given the current state. This can result in a delayed response, and it drastically reduces the likelihood that the query's result can be reused by a **server-side response cache**. [[ref](https://www.apolloGraphQL.com/docs/react/data/operation-best-practices#query-only-the-data-you-need-where-you-need-it)] +> +1. Minimize over-fetching by only requesting data needed to be displayed. +2. Limit result counts and use pagination (additionally see section below on `Deep Pagination`). +3. Avoid deeply nested queries and instead break out queries into separate requests for the nested objects. + +### Client-side Caching + +Clients, such as Apollo Client (javascript, python `apollo-client-python`), offer [client-side caching](https://www.apolloGraphQL.com/docs/react/caching/overview) to prevent requests to the service and are able to understand the content of the GraphQL query. This enables more advanced caching vs HTTP response caching. + +### Reuse Pieces of Query Logic with Fragments + +One powerful feature of GraphQL that we recommend you use is [fragments](https://hygraph.com/learn/GraphQL/fragments). Fragments allow you to define pieces of a query that you can reuse across any client-side query that you define. Basically, you can define a set of fields that you want to query, and reuse it in multiple places. + +This technique makes maintaining your GraphQL queries much more doable. For example, if you want to request a new field for an entity type across many queries, you’re able to update it in one place if you’re leveraging fragments. + +## Search Query Best Practices + +### Deep Pagination: search* vs scroll* APIs + +`search*` APIs such as [`searchAcrossEntities`](https://datahubproject.io/docs/GraphQL/queries/#searchacrossentities) are designed for minimal pagination (< ~50). They do not perform well for deep pagination requests. Use the equivalent `scroll*` APIs such as [`scrollAcrossEntities`](https://datahubproject.io/docs/GraphQL/queries/#scrollacrossentities) when expecting the need to paginate deeply into the result set. + +Note: that it is impossible to use `search*` for paginating beyond 10k results. + +#### Examples + +In the following examples we demonstrate pagination for both `scroll*` and `search*` requests. This particular request is searching for two entities, Datasets and Charts, that +contain `pet` in the entities' name or title. The results will only include the URN for the entities. + + + +Page 1 Request: + +```graphql +{ + scrollAcrossEntities( + input: { + types: [DATASET, CHART] + count: 2 + query: "*" + orFilters: [ + { and: [{ field: "name", condition: CONTAIN, values: ["pet"] }] }, + { and: [{ field: "title", condition: CONTAIN, values: ["pet"] }] } + ] + } + ) { + nextScrollId + searchResults { + entity { + ... on Dataset { + urn + } + ... on Chart { + urn + } + } + } + } +} +``` + +Page 1 Result: + +```json +{ + "data": { + "scrollAcrossEntities": { + "nextScrollId": "eyJzb3J0IjpbMi4wNzk2ODc2LCJ1cm46bGk6ZGF0YXNldDoodXJuOmxpOmRhdGFQbGF0Zm9ybTpzbm93Zmxha2UsbG9uZ190YWlsX2NvbXBhbmlvbnMuYWRvcHRpb24ucGV0X3Byb2ZpbGVzLFBST0QpIl0sInBpdElkIjpudWxsLCJleHBpcmF0aW9uVGltZSI6MH0=", + "searchResults": [ + { + "entity": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,long_tail_companions.analytics.pet_details,PROD)" + } + }, + { + "entity": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.pet_profiles,PROD)" + } + } + ] + } + }, + "extensions": {} +} +``` + +Page 2 Request: + +```graphql +{ + scrollAcrossEntities( + input: { + scrollId: "eyJzb3J0IjpbMi4wNzk2ODc2LCJ1cm46bGk6ZGF0YXNldDoodXJuOmxpOmRhdGFQbGF0Zm9ybTpzbm93Zmxha2UsbG9uZ190YWlsX2NvbXBhbmlvbnMuYWRvcHRpb24ucGV0X3Byb2ZpbGVzLFBST0QpIl0sInBpdElkIjpudWxsLCJleHBpcmF0aW9uVGltZSI6MH0=" + types: [DATASET, CHART] + count: 2 + query: "*" + orFilters: [ + { and: [{ field: "name", condition: CONTAIN, values: ["pet"] }] }, + { and: [{ field: "title", condition: CONTAIN, values: ["pet"] }] } + ] + } + ) { + nextScrollId + searchResults { + entity { + ... on Dataset { + urn + } + ... on Chart { + urn + } + } + } + } +} +``` + +Page 2 Result: + +```json +{ + "data": { + "scrollAcrossEntities": { + "nextScrollId": "eyJzb3J0IjpbMS43MTg3NSwidXJuOmxpOmRhdGFzZXQ6KHVybjpsaTpkYXRhUGxhdGZvcm06c25vd2ZsYWtlLGxvbmdfdGFpbF9jb21wYW5pb25zLmFkb3B0aW9uLnBldHMsUFJPRCkiXSwicGl0SWQiOm51bGwsImV4cGlyYXRpb25UaW1lIjowfQ==", + "searchResults": [ + { + "entity": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,long_tail_companions.analytics.pet_status_history,PROD)" + } + }, + { + "entity": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.pets,PROD)" + } + } + ] + } + }, + "extensions": {} +} +``` + + + +Page 1 Request: + +```graphql +{ + searchAcrossEntities( + input: { + types: [DATASET, CHART] + count: 2, + start: 0 + query: "*" + orFilters: [ + { and: [{ field: "name", condition: CONTAIN, values: ["pet"] }] }, + { and: [{ field: "title", condition: CONTAIN, values: ["pet"] }] } + ] + } + ) { + searchResults { + entity { + ... on Dataset { + urn + } + ... on Chart { + urn + } + } + } + } +} +``` + +Page 1 Response: + +```json +{ + "data": { + "searchAcrossEntities": { + "searchResults": [ + { + "entity": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,long_tail_companions.analytics.pet_details,PROD)" + } + }, + { + "entity": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.pet_profiles,PROD)" + } + } + ] + } + }, + "extensions": {} +} +``` + +Page 2 Request: + +```graphql +{ + searchAcrossEntities( + input: { + types: [DATASET, CHART] + count: 2, + start: 2 + query: "*" + orFilters: [ + { and: [{ field: "name", condition: CONTAIN, values: ["pet"] }] }, + { and: [{ field: "title", condition: CONTAIN, values: ["pet"] }] } + ] + } + ) { + searchResults { + entity { + ... on Dataset { + urn + } + ... on Chart { + urn + } + } + } + } +} +``` + +Page 2 Response: + +```json +{ + "data": { + "searchAcrossEntities": { + "searchResults": [ + { + "entity": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,long_tail_companions.analytics.pet_status_history,PROD)" + } + }, + { + "entity": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.pets,PROD)" + } + } + ] + } + }, + "extensions": {} +} +``` + + + +### SearchFlags: Highlighting and Aggregation + +When performing queries which accept [`searchFlags`](https://datahubproject.io/docs/GraphQL/inputObjects#searchflags) and highlighting and aggregation is not needed, be sure to disable these flags. + +- skipHighlighting: true +- skipAggregates: true + +As a fallback, if only certain fields require highlighting use `customHighlightingFields` to limit highlighting to the specific fields. + + + + +Example for skipping highlighting and aggregates, typically used for scrolling search requests. + +```graphql +{ + scrollAcrossEntities( + input: {types: [DATASET], count: 2, query: "pet", searchFlags: {skipAggregates: true, skipHighlighting: true}} + ) { + searchResults { + entity { + ... on Dataset { + urn + } + } + matchedFields { + name + value + } + } + facets { + displayName + aggregations { + value + count + } + } + } +} +``` + +Response: + +Note that a few `matchedFields` are still returned by default [`urn`, `customProperties`] + +```json +{ + "data": { + "scrollAcrossEntities": { + "searchResults": [ + { + "entity": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,long_tail_companions.analytics.pet_details,PROD)" + }, + "matchedFields": [ + { + "name": "urn", + "value": "" + }, + { + "name": "customProperties", + "value": "" + } + ] + }, + { + "entity": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.pet_details,PROD)" + }, + "matchedFields": [ + { + "name": "urn", + "value": "" + }, + { + "name": "customProperties", + "value": "" + } + ] + } + ], + "facets": [] + } + }, + "extensions": {} +} +``` + + + + + +Custom highlighting can be used for searchAcrossEntities when only a limited number of fields are useful for highlighting. In this example we specifically request highlighting for `description`. + +```graphql +{ + searchAcrossEntities( + input: {types: [DATASET], count: 2, query: "pet", searchFlags: {customHighlightingFields: ["description"]}} + ) { + searchResults { + entity { + ... on Dataset { + urn + } + } + matchedFields { + name + value + } + } + } +} +``` + +Response: + +```json +{ + "data": { + "searchAcrossEntities": { + "searchResults": [ + { + "entity": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,long_tail_companions.analytics.pet_details,PROD)" + }, + "matchedFields": [ + { + "name": "urn", + "value": "" + }, + { + "name": "customProperties", + "value": "" + }, + { + "name": "description", + "value": "Table with all pet-related details" + } + ] + }, + { + "entity": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.pet_details,PROD)" + }, + "matchedFields": [ + { + "name": "urn", + "value": "" + }, + { + "name": "customProperties", + "value": "" + } + ] + } + ] + } + }, + "extensions": {} +} +``` + + + + + +### Aggregation + +When aggregation is required with `searchAcrossEntities`, it is possible to set the `count` to 0 to avoid fetching the top search hits, only returning the aggregations. Alternatively [aggregateAcrossEntities](https://datahubproject.io/docs/GraphQL/queries#aggregateacrossentities) provides counts and can provide faster results from server-side caching. + +Request: + +```graphql +{ + searchAcrossEntities( + input: {types: [DATASET], count: 0, query: "pet", searchFlags: {skipHighlighting: true}} + ) { + searchResults { + entity { + ... on Dataset { + urn + } + } + matchedFields { + name + value + } + } + facets { + displayName + aggregations { + value + count + } + } + } +} +``` + +Response: + +```json +{ + "data": { + "searchAcrossEntities": { + "searchResults": [], + "facets": [ + { + "displayName": "Container", + "aggregations": [ + { + "value": "urn:li:container:b41c14bc5cb3ccfbb0433c8cbdef2992", + "count": 4 + }, + { + "value": "urn:li:container:701919de0ec93cb338fe9bac0b35403c", + "count": 3 + } + ] + }, + { + "displayName": "Sub Type", + "aggregations": [ + { + "value": "table", + "count": 9 + }, + { + "value": "view", + "count": 6 + }, + { + "value": "explore", + "count": 5 + }, + { + "value": "source", + "count": 4 + }, + { + "value": "incremental", + "count": 1 + } + ] + }, + { + "displayName": "Type", + "aggregations": [ + { + "value": "DATASET", + "count": 24 + } + ] + }, + { + "displayName": "Environment", + "aggregations": [ + { + "value": "PROD", + "count": 24 + } + ] + }, + { + "displayName": "Glossary Term", + "aggregations": [ + { + "value": "urn:li:glossaryTerm:Adoption.DaysInStatus", + "count": 1 + }, + { + "value": "urn:li:glossaryTerm:Ecommerce.HighRisk", + "count": 1 + }, + { + "value": "urn:li:glossaryTerm:Classification.Confidential", + "count": 1 + } + ] + }, + { + "displayName": "Domain", + "aggregations": [ + { + "value": "urn:li:domain:094dc54b-0ebc-40a6-a4cf-e1b75e8b8089", + "count": 6 + }, + { + "value": "urn:li:domain:7d64d0fa-66c3-445c-83db-3a324723daf8", + "count": 2 + } + ] + }, + { + "displayName": "Owned By", + "aggregations": [ + { + "value": "urn:li:corpGroup:Adoption", + "count": 5 + }, + { + "value": "urn:li:corpuser:shannon@longtail.com", + "count": 4 + }, + { + "value": "urn:li:corpuser:admin", + "count": 2 + }, + { + "value": "urn:li:corpGroup:Analytics Engineering", + "count": 2 + }, + { + "value": "urn:li:corpuser:avigdor@longtail.com", + "count": 1 + }, + { + "value": "urn:li:corpuser:prentiss@longtail.com", + "count": 1 + }, + { + "value": "urn:li:corpuser:tasha@longtail.com", + "count": 1 + }, + { + "value": "urn:li:corpuser:ricca@longtail.com", + "count": 1 + }, + { + "value": "urn:li:corpuser:emilee@longtail.com", + "count": 1 + } + ] + }, + { + "displayName": "Platform", + "aggregations": [ + { + "value": "urn:li:dataPlatform:looker", + "count": 8 + }, + { + "value": "urn:li:dataPlatform:dbt", + "count": 7 + }, + { + "value": "urn:li:dataPlatform:snowflake", + "count": 7 + }, + { + "value": "urn:li:dataPlatform:s3", + "count": 1 + }, + { + "value": "urn:li:dataPlatform:mongodb", + "count": 1 + } + ] + }, + { + "displayName": "Tag", + "aggregations": [ + { + "value": "urn:li:tag:prod_model", + "count": 3 + }, + { + "value": "urn:li:tag:pii", + "count": 2 + }, + { + "value": "urn:li:tag:business critical", + "count": 2 + }, + { + "value": "urn:li:tag:business_critical", + "count": 2 + }, + { + "value": "urn:li:tag:Tier1", + "count": 1 + }, + { + "value": "urn:li:tag:prod", + "count": 1 + } + ] + }, + { + "displayName": "Type", + "aggregations": [ + { + "value": "DATASET", + "count": 24 + } + ] + } + ] + } + }, + "extensions": {} +} +``` + +### Limit Search Entity Types + +When querying for specific entities, enumerate only the entity types required using `types` , for example [`DATASET` , `CHART`] + +### Limit Results + +Limit search results based on the amount of information being requested. For example, a minimal number of attributes can fetch 1,000 - 2,000 results in a single page, however as the number of attributes increases (especially nested objects) the `count` should be lowered, 20-25 for very complex requests. + +## Lineage Query Best Practices + +There are two primary ways to query lineage: + +### Search Across Lineage + +`searchAcrossLineage` / `scrollAcrossLineage` root query: + +- Recommended for all lineage queries +- Only the shortest path is guaranteed to show up in `paths` +- Supports querying indirect lineage (depth > 1) + - Depending on the fanout of the lineage, 3+ hops may not return data, use 1-hop queries for the fastest response times. + - Specify using a filter with name `"degree"` and values `"1"` , `"2"`, and / or `"3+"` + +The following examples are demonstrated using sample data for `urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)`. + +

+ +

+ + + + +The following example queries show UPSTREAM lineage with progressively higher degrees, first with degree `["1"]` and then `["1","2"]`. + +1-Hop Upstreams: + +Request: + +```graphql +{ + searchAcrossLineage( + input: {urn: "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", query: "*", count: 10, start: 0, direction: UPSTREAM, orFilters: [{and: [{field: "degree", condition: EQUAL, values: ["1"]}]}], searchFlags: {skipAggregates: true, skipHighlighting: true}} + ) { + start + count + total + searchResults { + entity { + urn + type + ... on Dataset { + name + } + } + paths { + path { + ... on Dataset { + urn + } + } + } + degree + } + } +} +``` + +Response: + +```json +{ + "data": { + "searchAcrossLineage": { + "start": 0, + "count": 10, + "total": 1, + "searchResults": [ + { + "entity": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)", + "type": "DATASET", + "name": "SampleHdfsDataset" + }, + "paths": [ + { + "path": [ + { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)" + }, + { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)" + } + ] + } + ], + "degree": 1 + } + ] + } + }, + "extensions": {} +} +``` + + + +1-Hop & 2-Hop Upstreams: + +Request: + +```graphql +{ + searchAcrossLineage( + input: {urn: "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", query: "*", count: 10, start: 0, direction: UPSTREAM, orFilters: [{and: [{field: "degree", condition: EQUAL, values: ["1","2"]}]}], searchFlags: {skipAggregates: true, skipHighlighting: true}} + ) { + start + count + total + searchResults { + entity { + urn + type + ... on Dataset { + name + } + } + paths { + path { + ... on Dataset { + urn + } + } + } + degree + } + } +} +``` + +```json +{ + "data": { + "searchAcrossLineage": { + "start": 0, + "count": 10, + "total": 2, + "searchResults": [ + { + "entity": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)", + "type": "DATASET", + "name": "SampleHdfsDataset" + }, + "paths": [ + { + "path": [ + { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)" + }, + { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)" + } + ] + } + ], + "degree": 1 + }, + { + "entity": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)", + "type": "DATASET", + "name": "SampleKafkaDataset" + }, + "paths": [ + { + "path": [ + { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)" + }, + { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)" + }, + { + "urn": "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)" + } + ] + } + ], + "degree": 2 + } + ] + } + }, + "extensions": {} +} +``` + + + + +### Lineage Subquery + +The previous query requires a root or starting node in the lineage graph. The following request offers a way to request lineage for multiple nodes at once with a few limitations. + +`lineage` query on `EntityWithRelationship` entities: + +- A more direct reflection of the graph index +- 1-hop lineage only +- Multiple URNs +- Should not be requested too many times in a single request. 20 is a tested limit + +The following examples are based on the sample lineage graph shown here: + +

+ +

+ +Example Request: + +```graphql +query getBulkEntityLineageV2($urns: [String!]! = ["urn:li:dataJob:(urn:li:dataFlow:(airflow,dag_abc,PROD),task_123)", "urn:li:dataJob:(urn:li:dataFlow:(airflow,dag_abc,PROD),task_456)"]) { + entities(urns: $urns) { + urn + type + ... on DataJob { + jobId + dataFlow { + flowId + } + properties { + name + } + upstream: lineage(input: {direction: UPSTREAM, start: 0, count: 10}) { + total + relationships { + type + entity { + urn + type + } + } + } + downstream: lineage(input: {direction: DOWNSTREAM, start: 0, count: 10}) { + total + relationships { + type + entity { + urn + type + } + } + } + } + } +} +``` + +Example Response: + +```json +{ + "data": { + "entities": [ + { + "urn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,dag_abc,PROD),task_123)", + "type": "DATA_JOB", + "jobId": "task_123", + "dataFlow": { + "flowId": "dag_abc" + }, + "properties": { + "name": "User Creations" + }, + "upstream": { + "total": 1, + "relationships": [ + { + "type": "Consumes", + "entity": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,logging_events,PROD)", + "type": "DATASET" + } + } + ] + }, + "downstream": { + "total": 2, + "relationships": [ + { + "type": "DownstreamOf", + "entity": { + "urn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,dag_abc,PROD),task_456)", + "type": "DATA_JOB" + } + }, + { + "type": "Produces", + "entity": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)", + "type": "DATASET" + } + } + ] + } + }, + { + "urn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,dag_abc,PROD),task_456)", + "type": "DATA_JOB", + "jobId": "task_456", + "dataFlow": { + "flowId": "dag_abc" + }, + "properties": { + "name": "User Deletions" + }, + "upstream": { + "total": 2, + "relationships": [ + { + "type": "DownstreamOf", + "entity": { + "urn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,dag_abc,PROD),task_123)", + "type": "DATA_JOB" + } + }, + { + "type": "Consumes", + "entity": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,logging_events,PROD)", + "type": "DATASET" + } + } + ] + }, + "downstream": { + "total": 1, + "relationships": [ + { + "type": "Produces", + "entity": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD)", + "type": "DATASET" + } + } + ] + } + } + ] + }, + "extensions": {} +} +``` \ No newline at end of file diff --git a/docs/api/graphql/how-to-set-up-graphql.md b/docs/api/graphql/how-to-set-up-graphql.md index 2be2f935b12b10..8a6db7f6c105a2 100644 --- a/docs/api/graphql/how-to-set-up-graphql.md +++ b/docs/api/graphql/how-to-set-up-graphql.md @@ -1,3 +1,6 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # How To Set Up GraphQL ## Preparing Local Datahub Deployment @@ -29,6 +32,7 @@ DataHub provides a browser-based GraphQL Explorer Tool ([GraphiQL](https://githu This interface allows you to easily craft queries and mutations against real metadata stored in your live DataHub deployment. To experiment with GraphiQL before deploying it in your live DataHub deployment, you can access a demo site provided by DataHub at https://demo.datahubproject.io/api/graphiql. + For instance, you can create a tag by posting the following query: ```json @@ -43,6 +47,29 @@ mutation createTag { For a detailed usage guide, check out [How to use GraphiQL](https://www.gatsbyjs.com/docs/how-to/querying-data/running-queries-with-graphiql/). +To navigate to `GraphiQL` on the demo site or your local instance, select `GraphiQL` from the user profile drop-down menu as +shown below. + + + +

+ +

+![graphiql_link.png](../../../../../Desktop/datahub_graphiql_link.png) +
+ +

+ +

+
+
+ +This link will then display the following interface for exploring GraphQL queries. + +

+ +

+ ### CURL CURL is a command-line tool used for transferring data using various protocols including HTTP, HTTPS, and others. From 7198f711dc3ae580c5b5ca205acd2f5ca6dfddea Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 31 Dec 2024 08:50:44 -0600 Subject: [PATCH 004/249] fix(ci): further consolidate NODE_OPTIONS (#12217) --- .github/workflows/build-and-test.yml | 4 +- .../workflows/contributor-open-pr-comment.yml | 2 +- .github/workflows/docker-unified.yml | 2 - datahub-web-react/build.gradle | 2 +- datahub-web-react/package.json | 12 +- datahub-web-react/src/Mocks.tsx | 1 + datahub-web-react/vite.config.ts | 8 + datahub-web-react/yarn.lock | 141 ++++++++++++++++-- docs/developers.md | 1 + 9 files changed, 147 insertions(+), 26 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 98071b536a336a..784dce0f11b2b5 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -109,8 +109,6 @@ jobs: if: ${{ matrix.command == 'frontend' && needs.setup.outputs.frontend_change == 'true' }} run: | ./gradlew :datahub-frontend:build :datahub-web-react:build --parallel - env: - NODE_OPTIONS: "--max-old-space-size=4096" - name: Gradle compile (jdk8) for legacy Spark if: ${{ matrix.command == 'except_metadata_ingestion' && needs.setup.outputs.backend_change == 'true' }} run: | @@ -157,4 +155,4 @@ jobs: uses: actions/upload-artifact@v3 with: name: Event File - path: ${{ github.event_path }} \ No newline at end of file + path: ${{ github.event_path }} diff --git a/.github/workflows/contributor-open-pr-comment.yml b/.github/workflows/contributor-open-pr-comment.yml index 2f700290ee0f28..decc7ab27a411d 100644 --- a/.github/workflows/contributor-open-pr-comment.yml +++ b/.github/workflows/contributor-open-pr-comment.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Get and Format Username (PR only) if: github.event_name == 'pull_request' diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 47c26068347c07..a5200c7e917d81 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -445,8 +445,6 @@ jobs: run: | ./gradlew :datahub-frontend:dist -x test -x yarnTest -x yarnLint --parallel mv ./datahub-frontend/build/distributions/datahub-frontend-*.zip datahub-frontend.zip - env: - NODE_OPTIONS: "--max-old-space-size=4096" - name: Build and push uses: ./.github/actions/docker-custom-build-and-push with: diff --git a/datahub-web-react/build.gradle b/datahub-web-react/build.gradle index bf1aa401e3f560..3dad778a2b3038 100644 --- a/datahub-web-react/build.gradle +++ b/datahub-web-react/build.gradle @@ -79,7 +79,7 @@ task yarnServe(type: YarnTask, dependsOn: [yarnInstall, yarnGenerate]) { task yarnTest(type: YarnTask, dependsOn: [yarnInstall, yarnGenerate]) { // Explicitly runs in non-watch mode. - args = ['run', 'test', 'run'] + args = ['run', project.hasProperty('withCoverage') ? 'test-coverage' : 'test', 'run'] } task yarnLint(type: YarnTask, dependsOn: [yarnInstall, yarnGenerate]) { diff --git a/datahub-web-react/package.json b/datahub-web-react/package.json index 2d1d667a89f14a..a608698d7602c4 100644 --- a/datahub-web-react/package.json +++ b/datahub-web-react/package.json @@ -89,10 +89,11 @@ "scripts": { "analyze": "source-map-explorer 'dist/assets/*.js'", "start": "yarn run generate && vite", - "ec2-dev": "yarn run generate && CI=true;export CI;vite", - "build": "yarn run generate && NODE_OPTIONS='--max-old-space-size=4096 --openssl-legacy-provider' CI=false vite build", - "test": "vitest", - "generate": "graphql-codegen --config codegen.yml", + "ec2-dev": "yarn run generate && CI=true vite", + "build": "yarn run generate && CI=false NODE_OPTIONS='--max-old-space-size=5120 --openssl-legacy-provider' vite build", + "test": "NODE_OPTIONS='--max-old-space-size=5120 --openssl-legacy-provider' vitest", + "test-coverage": "yarn test run --coverage", + "generate": "NODE_OPTIONS='--max-old-space-size=5120 --openssl-legacy-provider' graphql-codegen --config codegen.yml", "lint": "eslint . --ext .ts,.tsx --quiet && yarn format-check && yarn type-check", "lint-fix": "eslint '*/**/*.{ts,tsx}' --quiet --fix && yarn format", "format-check": "prettier --check src", @@ -100,7 +101,7 @@ "type-check": "tsc --noEmit", "type-watch": "tsc -w --noEmit", "storybook": "storybook dev -p 6006", - "build-storybook": "storybook build" + "build-storybook": "NODE_OPTIONS='--max-old-space-size=5120 --openssl-legacy-provider' storybook build" }, "browserslist": { "production": [ @@ -135,6 +136,7 @@ "@typescript-eslint/eslint-plugin": "^5.38.1", "@typescript-eslint/parser": "^5.38.1", "@vitejs/plugin-react": "^4.1.1", + "@vitest/coverage-v8": "^0.34.6", "eslint": "^8.2.0", "eslint-config-airbnb": "19.0.4", "eslint-config-airbnb-typescript": "^17.0.0", diff --git a/datahub-web-react/src/Mocks.tsx b/datahub-web-react/src/Mocks.tsx index 73a789030ce6fb..2da9e733eb4072 100644 --- a/datahub-web-react/src/Mocks.tsx +++ b/datahub-web-react/src/Mocks.tsx @@ -1714,6 +1714,7 @@ export const mlModel = { }, tags: [], properties: { + name: 'trust model', description: 'a ml trust model', date: null, version: '1', diff --git a/datahub-web-react/vite.config.ts b/datahub-web-react/vite.config.ts index 2532b24067754d..c43470dee031a8 100644 --- a/datahub-web-react/vite.config.ts +++ b/datahub-web-react/vite.config.ts @@ -68,6 +68,11 @@ export default defineConfig(({ mode }) => { envPrefix: 'REACT_APP_', build: { outDir: 'dist', + target: 'esnext', + minify: 'esbuild', + reportCompressedSize: false, + // Limit number of worker threads to reduce CPU pressure + workers: 3, // default is number of CPU cores }, server: { open: false, @@ -92,8 +97,11 @@ export default defineConfig(({ mode }) => { css: true, // reporters: ['verbose'], coverage: { + enabled: true, + provider: 'v8', reporter: ['text', 'json', 'html'], include: ['src/**/*'], + reportsDirectory: '../build/coverage-reports/datahub-web-react/', exclude: [], }, }, diff --git a/datahub-web-react/yarn.lock b/datahub-web-react/yarn.lock index dc7260efd183fd..f16e8aa506e2cc 100644 --- a/datahub-web-react/yarn.lock +++ b/datahub-web-react/yarn.lock @@ -20,6 +20,14 @@ "@jridgewell/gen-mapping" "^0.3.0" "@jridgewell/trace-mapping" "^0.3.9" +"@ampproject/remapping@^2.2.1": + version "2.3.0" + resolved "https://registry.yarnpkg.com/@ampproject/remapping/-/remapping-2.3.0.tgz#ed441b6fa600072520ce18b43d2c8cc8caecc7f4" + integrity sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw== + dependencies: + "@jridgewell/gen-mapping" "^0.3.5" + "@jridgewell/trace-mapping" "^0.3.24" + "@analytics/amplitude@0.0.3": version "0.0.3" resolved "https://registry.yarnpkg.com/@analytics/amplitude/-/amplitude-0.0.3.tgz#15ccb76094d6f1003979a4f3aa5d3263781bd776" @@ -1017,6 +1025,11 @@ "@babel/helper-validator-identifier" "^7.22.20" to-fast-properties "^2.0.0" +"@bcoe/v8-coverage@^0.2.3": + version "0.2.3" + resolved "https://registry.yarnpkg.com/@bcoe/v8-coverage/-/v8-coverage-0.2.3.tgz#75a2e8b51cb758a7553d6804a5932d7aace75c39" + integrity sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw== + "@ctrl/tinycolor@^3.3.1", "@ctrl/tinycolor@^3.4.0": version "3.4.0" resolved "https://registry.yarnpkg.com/@ctrl/tinycolor/-/tinycolor-3.4.0.tgz#c3c5ae543c897caa9c2a68630bed355be5f9990f" @@ -1941,6 +1954,11 @@ resolved "https://registry.yarnpkg.com/@icons/material/-/material-0.2.4.tgz#e90c9f71768b3736e76d7dd6783fc6c2afa88bc8" integrity sha512-QPcGmICAPbGLGb6F/yNf/KzKqvFx8z5qx3D1yFqVAjoFmXK35EgyW+cJ57Te3CNsmzblwtzakLGFqHPqrfb4Tw== +"@istanbuljs/schema@^0.1.2": + version "0.1.3" + resolved "https://registry.yarnpkg.com/@istanbuljs/schema/-/schema-0.1.3.tgz#e45e384e4b8ec16bce2fd903af78450f6bf7ec98" + integrity sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA== + "@jest/schemas@^29.6.3": version "29.6.3" resolved "https://registry.yarnpkg.com/@jest/schemas/-/schemas-29.6.3.tgz#430b5ce8a4e0044a7e3819663305a7b3091c8e03" @@ -2009,6 +2027,14 @@ resolved "https://registry.yarnpkg.com/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.15.tgz#d7c6e6755c78567a951e04ab52ef0fd26de59f32" integrity sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg== +"@jridgewell/trace-mapping@^0.3.12", "@jridgewell/trace-mapping@^0.3.24", "@jridgewell/trace-mapping@^0.3.25": + version "0.3.25" + resolved "https://registry.yarnpkg.com/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz#15f190e98895f3fc23276ee14bc76b675c2e50f0" + integrity sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ== + dependencies: + "@jridgewell/resolve-uri" "^3.1.0" + "@jridgewell/sourcemap-codec" "^1.4.14" + "@jridgewell/trace-mapping@^0.3.17", "@jridgewell/trace-mapping@^0.3.9": version "0.3.18" resolved "https://registry.yarnpkg.com/@jridgewell/trace-mapping/-/trace-mapping-0.3.18.tgz#25783b2086daf6ff1dcb53c9249ae480e4dd4cd6" @@ -2017,14 +2043,6 @@ "@jridgewell/resolve-uri" "3.1.0" "@jridgewell/sourcemap-codec" "1.4.14" -"@jridgewell/trace-mapping@^0.3.24", "@jridgewell/trace-mapping@^0.3.25": - version "0.3.25" - resolved "https://registry.yarnpkg.com/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz#15f190e98895f3fc23276ee14bc76b675c2e50f0" - integrity sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ== - dependencies: - "@jridgewell/resolve-uri" "^3.1.0" - "@jridgewell/sourcemap-codec" "^1.4.14" - "@linaria/core@3.0.0-beta.13": version "3.0.0-beta.13" resolved "https://registry.yarnpkg.com/@linaria/core/-/core-3.0.0-beta.13.tgz#049c5be5faa67e341e413a0f6b641d5d78d91056" @@ -3974,6 +3992,11 @@ "@types/react" "*" hoist-non-react-statics "^3.3.0" +"@types/istanbul-lib-coverage@^2.0.1": + version "2.0.6" + resolved "https://registry.yarnpkg.com/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.6.tgz#7739c232a1fee9b4d3ce8985f314c0c6d33549d7" + integrity sha512-2QF/t/auWm0lsy8XtKVPG19v3sSOQlJe/YHZgfjb/KBBHOGSV+J2q/S671rcq9uTBrLAXmZpqJiaQbMT+zNU1w== + "@types/js-cookie@^2.2.6": version "2.2.6" resolved "https://registry.yarnpkg.com/@types/js-cookie/-/js-cookie-2.2.6.tgz#f1a1cb35aff47bc5cfb05cb0c441ca91e914c26f" @@ -4714,6 +4737,23 @@ "@types/babel__core" "^7.20.3" react-refresh "^0.14.0" +"@vitest/coverage-v8@^0.34.6": + version "0.34.6" + resolved "https://registry.yarnpkg.com/@vitest/coverage-v8/-/coverage-v8-0.34.6.tgz#931d9223fa738474e00c08f52b84e0f39cedb6d1" + integrity sha512-fivy/OK2d/EsJFoEoxHFEnNGTg+MmdZBAVK9Ka4qhXR2K3J0DS08vcGVwzDtXSuUMabLv4KtPcpSKkcMXFDViw== + dependencies: + "@ampproject/remapping" "^2.2.1" + "@bcoe/v8-coverage" "^0.2.3" + istanbul-lib-coverage "^3.2.0" + istanbul-lib-report "^3.0.1" + istanbul-lib-source-maps "^4.0.1" + istanbul-reports "^3.1.5" + magic-string "^0.30.1" + picocolors "^1.0.0" + std-env "^3.3.3" + test-exclude "^6.0.0" + v8-to-istanbul "^9.1.0" + "@vitest/expect@0.34.6": version "0.34.6" resolved "https://registry.yarnpkg.com/@vitest/expect/-/expect-0.34.6.tgz#608a7b7a9aa3de0919db99b4cc087340a03ea77e" @@ -7282,6 +7322,18 @@ glob@^7.0.5, glob@^7.1.1, glob@^7.1.3, glob@^7.1.6: once "^1.3.0" path-is-absolute "^1.0.0" +glob@^7.1.4: + version "7.2.3" + resolved "https://registry.yarnpkg.com/glob/-/glob-7.2.3.tgz#b8df0fb802bbfa8e89bd1d938b4e16578ed44f2b" + integrity sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q== + dependencies: + fs.realpath "^1.0.0" + inflight "^1.0.4" + inherits "2" + minimatch "^3.1.1" + once "^1.3.0" + path-is-absolute "^1.0.0" + globals@^11.1.0: version "11.12.0" resolved "https://registry.yarnpkg.com/globals/-/globals-11.12.0.tgz#ab8795338868a0babd8525758018c2a7eb95c42e" @@ -7612,6 +7664,11 @@ html-encoding-sniffer@^3.0.0: dependencies: whatwg-encoding "^2.0.0" +html-escaper@^2.0.0: + version "2.0.2" + resolved "https://registry.yarnpkg.com/html-escaper/-/html-escaper-2.0.2.tgz#dfd60027da36a36dfcbe236262c00a5822681453" + integrity sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg== + html-void-elements@^1.0.0: version "1.0.5" resolved "https://registry.yarnpkg.com/html-void-elements/-/html-void-elements-1.0.5.tgz#ce9159494e86d95e45795b166c2021c2cfca4483" @@ -8154,6 +8211,37 @@ isomorphic.js@^0.2.4: resolved "https://registry.yarnpkg.com/isomorphic.js/-/isomorphic.js-0.2.5.tgz#13eecf36f2dba53e85d355e11bf9d4208c6f7f88" integrity sha512-PIeMbHqMt4DnUP3MA/Flc0HElYjMXArsw1qwJZcm9sqR8mq3l8NYizFMty0pWwE/tzIGH3EKK5+jes5mAr85yw== +istanbul-lib-coverage@^3.0.0, istanbul-lib-coverage@^3.2.0: + version "3.2.2" + resolved "https://registry.yarnpkg.com/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz#2d166c4b0644d43a39f04bf6c2edd1e585f31756" + integrity sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg== + +istanbul-lib-report@^3.0.0, istanbul-lib-report@^3.0.1: + version "3.0.1" + resolved "https://registry.yarnpkg.com/istanbul-lib-report/-/istanbul-lib-report-3.0.1.tgz#908305bac9a5bd175ac6a74489eafd0fc2445a7d" + integrity sha512-GCfE1mtsHGOELCU8e/Z7YWzpmybrx/+dSTfLrvY8qRmaY6zXTKWn6WQIjaAFw069icm6GVMNkgu0NzI4iPZUNw== + dependencies: + istanbul-lib-coverage "^3.0.0" + make-dir "^4.0.0" + supports-color "^7.1.0" + +istanbul-lib-source-maps@^4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/istanbul-lib-source-maps/-/istanbul-lib-source-maps-4.0.1.tgz#895f3a709fcfba34c6de5a42939022f3e4358551" + integrity sha512-n3s8EwkdFIJCG3BPKBYvskgXGoy88ARzvegkitk60NxRdwltLOTaH7CUiMRXvwYorl0Q712iEjcWB+fK/MrWVw== + dependencies: + debug "^4.1.1" + istanbul-lib-coverage "^3.0.0" + source-map "^0.6.1" + +istanbul-reports@^3.1.5: + version "3.1.7" + resolved "https://registry.yarnpkg.com/istanbul-reports/-/istanbul-reports-3.1.7.tgz#daed12b9e1dca518e15c056e1e537e741280fa0b" + integrity sha512-BewmUXImeuRk2YY0PVbxgKAysvhRPUQE0h5QRM++nVWyubKGV0l8qQ5op8+B2DOmwSe63Jivj0BjkPQVf8fP5g== + dependencies: + html-escaper "^2.0.0" + istanbul-lib-report "^3.0.0" + jake@^10.8.5: version "10.8.7" resolved "https://registry.yarnpkg.com/jake/-/jake-10.8.7.tgz#63a32821177940c33f356e0ba44ff9d34e1c7d8f" @@ -8542,6 +8630,13 @@ make-dir@^2.1.0: pify "^4.0.1" semver "^5.6.0" +make-dir@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/make-dir/-/make-dir-4.0.0.tgz#c3c2307a771277cd9638305f915c29ae741b614e" + integrity sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw== + dependencies: + semver "^7.5.3" + make-error@^1.3.6: version "1.3.6" resolved "https://registry.yarnpkg.com/make-error/-/make-error-1.3.6.tgz#2eb2e37ea9b67c4891f684a1394799af484cf7a2" @@ -8819,7 +8914,7 @@ min-indent@^1.0.0, min-indent@^1.0.1: resolved "https://registry.yarnpkg.com/min-indent/-/min-indent-1.0.1.tgz#a63f681673b30571fbe8bc25686ae746eefa9869" integrity sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg== -minimatch@3.0.5, minimatch@^3.0.4, minimatch@^3.0.5, minimatch@^3.1.2, minimatch@^4.2.3, minimatch@^5.0.1: +minimatch@3.0.5, minimatch@^3.0.4, minimatch@^3.0.5, minimatch@^3.1.1, minimatch@^3.1.2, minimatch@^4.2.3, minimatch@^5.0.1: version "3.0.5" resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.0.5.tgz#4da8f1290ee0f0f8e83d60ca69f8f134068604a3" integrity sha512-tUpxzX0VAzJHjLu0xUfFv1gwVp9ba3IOuRAVH2EGuRW8a5emA2FlACLqiT/lDVtS1W+TGNwqz3sWaNyLgDJWuw== @@ -10891,6 +10986,11 @@ semver@^7.3.7: dependencies: lru-cache "^6.0.0" +semver@^7.5.3, semver@^7.6.2: + version "7.6.3" + resolved "https://registry.yarnpkg.com/semver/-/semver-7.6.3.tgz#980f7b5550bc175fb4dc09403085627f9eb33143" + integrity sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A== + semver@^7.5.4: version "7.5.4" resolved "https://registry.yarnpkg.com/semver/-/semver-7.5.4.tgz#483986ec4ed38e1c6c48c34894a9182dbff68a6e" @@ -10898,11 +10998,6 @@ semver@^7.5.4: dependencies: lru-cache "^6.0.0" -semver@^7.6.2: - version "7.6.3" - resolved "https://registry.yarnpkg.com/semver/-/semver-7.6.3.tgz#980f7b5550bc175fb4dc09403085627f9eb33143" - integrity sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A== - sentence-case@^3.0.4: version "3.0.4" resolved "https://registry.yarnpkg.com/sentence-case/-/sentence-case-3.0.4.tgz#3645a7b8c117c787fde8702056225bb62a45131f" @@ -11381,6 +11476,15 @@ temp@^0.9.4: mkdirp "^0.5.1" rimraf "~2.6.2" +test-exclude@^6.0.0: + version "6.0.0" + resolved "https://registry.yarnpkg.com/test-exclude/-/test-exclude-6.0.0.tgz#04a8698661d805ea6fa293b6cb9e63ac044ef15e" + integrity sha512-cAGWPIyOHU6zlmg88jwm7VRyXnMN7iV68OGAbYDk/Mh/xC/pzVPlQtY6ngoIH/5/tciuhGfvESU8GrHrcxD56w== + dependencies: + "@istanbuljs/schema" "^0.1.2" + glob "^7.1.4" + minimatch "^3.0.4" + text-table@^0.2.0: version "0.2.0" resolved "https://registry.yarnpkg.com/text-table/-/text-table-0.2.0.tgz#7f5ee823ae805207c00af2df4a84ec3fcfa570b4" @@ -11873,6 +11977,15 @@ uuid@^9.0.0: resolved "https://registry.yarnpkg.com/uuid/-/uuid-9.0.1.tgz#e188d4c8853cc722220392c424cd637f32293f30" integrity sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA== +v8-to-istanbul@^9.1.0: + version "9.3.0" + resolved "https://registry.yarnpkg.com/v8-to-istanbul/-/v8-to-istanbul-9.3.0.tgz#b9572abfa62bd556c16d75fdebc1a411d5ff3175" + integrity sha512-kiGUalWN+rgBJ/1OHZsBtU4rXZOfj/7rKQxULKlIzwzQSvMJUUNgPwJEEh7gU6xEVxC0ahoOBvN2YI8GH6FNgA== + dependencies: + "@jridgewell/trace-mapping" "^0.3.12" + "@types/istanbul-lib-coverage" "^2.0.1" + convert-source-map "^2.0.0" + value-equal@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/value-equal/-/value-equal-1.0.1.tgz#1e0b794c734c5c0cade179c437d356d931a34d6c" diff --git a/docs/developers.md b/docs/developers.md index 401169490dd4b6..0d398e6232b23d 100644 --- a/docs/developers.md +++ b/docs/developers.md @@ -9,6 +9,7 @@ title: "Local Development" - [Java 17 JDK](https://openjdk.org/projects/jdk/17/) - [Python 3.10](https://www.python.org/downloads/release/python-3100/) - [Docker](https://www.docker.com/) +- [Node 22.x](https://nodejs.org/en/about/previous-releases) - [Docker Compose >=2.20](https://docs.docker.com/compose/) - Docker engine with at least 8GB of memory to run tests. From 4ed128cf28513d97961e551b4235ecf50d04955c Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Tue, 31 Dec 2024 21:36:25 +0530 Subject: [PATCH 005/249] chore: cleanup extra lines (#12248) --- smoke-test/tests/cypress/integration_test.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/smoke-test/tests/cypress/integration_test.py b/smoke-test/tests/cypress/integration_test.py index 33c67a923c278d..fa7274158f9e23 100644 --- a/smoke-test/tests/cypress/integration_test.py +++ b/smoke-test/tests/cypress/integration_test.py @@ -212,9 +212,6 @@ def test_run_cypress(auth_session): else: record_arg = " " - rest_specs = set(os.listdir("tests/cypress/cypress/e2e")) - cypress_suite1_specs = {"mutations", "search", "views"} - rest_specs.difference_update(set(cypress_suite1_specs)) print(f"test strategy is {test_strategy}") test_spec_arg = "" specs_str = ",".join([f"**/{f}" for f in _get_cypress_tests_batch()]) From f5ecee1501e7646f6dc0a5a13202d32c7b096ba8 Mon Sep 17 00:00:00 2001 From: Jay <159848059+jayacryl@users.noreply.github.com> Date: Tue, 31 Dec 2024 17:56:32 -0500 Subject: [PATCH 006/249] fix(docs-site) hero image typo (#12250) --- docs-website/static/img/hero.png | Bin 380503 -> 380547 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/docs-website/static/img/hero.png b/docs-website/static/img/hero.png index 44b11c2781143f106c6929cc439d5d21feff18ce..a520a6d8a6b6ff8208291bbd79e0f1df58036267 100644 GIT binary patch delta 97725 zcmXV1Wn7f))1|vXx>-b!lJ1s}5>Su^0Ricf=B7hp=`N*|knZm84yj$bbLoBY`M+QH z!@a+2u9-9EoSADrlPoBUEcP`vHZcMMHZdYXdM0_C03Fg8d4Nuc4q}K7Qiu+6tqw|` zEP$D5q2R`Xk>jDO`%(8J8>ZdIKsF{2gQ>Z4R}doRQX~q|G2sF`>4i&>9L#l@G^)7> zv6;-lqf@-D6Ow(46D9svprQRZr!%~+(@t5w$|)$=iArgKeM#yyn^HkpU1{-%*BjUh z70YH%1Tn1DfQ{F9?p9g%WVL~iMXe{A3J}}`)O!Z6Tqq**D*2PPixpzyY)tORoENHz zmubKyudE%?FvDCX)NP5(k(uf;-9q<>Sl;|FAIXZmcSCyhf!S@uv^rW0 z_nIMAMW$;<)uBb~;$bwvoPsEt?%{nsm|2hPDecpCWDkuOI$RMPLPZp*W9X@?418y7 zyev6^E4G9N6X>%2Ok$3AQxsru3eNw5LZoIT-bBKPGbjbp1CGfg7Pdpk>&MIu8Pa+W zFbo!4k{X%O9&*{SMDX8mD1WWbZxO)b7pzG~iII;1(4K3$F?A<3)kgY{K+ zF_IIzWwcMCQtbV$G^ijhC*T7JFvz3DI=+`zvxRSZuMsYPnb($hKZ>k0G0Q02ERD0g zZs0@mE-2n{a%=6~tyC~m^a*uE+{@Fv7W`zSp@FaM4%{l_vGi#fIbc%HzfXSN>!u6S z8OL2G9;Tqt7<8k%zhs?c_V=1A4zc0Fm>{sx-BhGk1VhyXbk2FFMaumfQ15$R*TBV$ zv~{eV6VJ)>iSz z{4?R&_&HDqi-iUWyK)DB95N>|3)ew|N-z&vLHctw zy1ey3vws0!agaauW+F;D6mofo93dp5tR$%tkoYoASvEZH^Hj(-T&UlHW+%LCPpp@@ ziZ>1inVe)rs?V-AghKB81$_~3-fHZxJ`!|BQ*w6(8g5zCWV&!5Hin=l&p?u{l<(rs zY!zyjoj)(d;NUyLr0ZDa892=5QyjyT?=%tJN|D9AND~wJzV;T>i8+#vNpCMV zqM2V>!Ao6OF8cu3L|oVm#}JEu13Ui=n)|AlnKi94WaK?t@X14!AcVm>c+TY}a_*Z) z*tcVYmT7tr2K+(+C}2jNL@KXaW7JK%o~4*0)MNX(>Rb*eloplz@p-GSp0`g0X*Qv6>n5V=1qV&bV?zs1txBNmSq^6RZ?`Xx7ZRXc&cB(l+ zq7FfUtI?|-@3X!_yvZQ3zsn&*WPJDiB49r-lR&T^+6;Y!=kZIrHTsE5YUr*h$Xe z594`728o|tK3k2{+V*?D?`%(hRI*_kW_59ol3uTJ88CDHmyKz1CWi%8-=S=E_j(rRxb z5=fv|x+Tmy%~KJw@aU`rmLuVi>!Vc71{h`>#HHs^ED*^avjz*$cJlg75fA45JD?_ z+tCqn=_wW-^c(s{^Sa6!f#k>4UsIr?((HAIF0ML{J5Fm@w^L`E{Ao6m@-ob+)?$V-6C&7=CvL3LhxweD($%hg{XZuRdK;KithL|1>ZD z-MMGvLV*ePv)UJ5_wDRNma>*uDl!*RZ*QbA|cz=V%hrcHWVNk^u??GQOA z2O;7_hp#PIn?JF`e5IxmF*I$5eQ8(7g0|X>LuY#@L!p=b+xnlarV9%^<&3c$k3FCR zao6r&y@+iWY6SXrGKxfyfo|_~hNDj55T8U0Ga;_P+f+yDo}1PSU`-wI)!DAhyOiz2 zj_}@?iJ*pgvxi)KAMd6_i8w9Vd{ynC_LZTS8x&m?U4s5W<76&>=o*`6O+*8ImHo(9 z24&<8EZ94`EauxxnkW$4KvF4ZoO^7-Om1e8`JRZi;vNl)rufB^K(?wXwH5z9qW{V7 zeP3pWx)4Nm$5Oo&IJ?U?oLourKYo}_ro2&qg)EXYiKA~Ux+A58y^W+&dQ?#8fyxa> zhOcGV^oXvjjmiKk*k*5zNI}%3x5D7ItY(#Z*bN*bs9hZ+M5=FWQ=2`lqkWCFou}wk zDe^@6<^C}^AO^&~d~rHU-j%YtbW{Ic(X)}uoL)@n=8}ooNqBx{nT1XmCuiB4WVObI z*JnJf-!y*q!>%frCO}<`5BvT#Fc$uFv(fFi5jmRf!b~roS&qssNXUoz={3xRqb@9| z$2o-tvmc@)lD!MFX*H!R6UINX5l`oGTfEcs_nh|IwFy}tS*O=coF#v&))R56VVy-D z6T=H1muT_cqtrq#!f!GyACBN$vykCIH&sSPsYkL+c@Htlk%&%NWwmuKQG0)2u+^w_C-)gHytqdXdv(~0yfNi` zd1Hd|I<*E@F+M|OVgBI5UlVQ>#_#y?9B(4fO^vD4?{TD8b_!)F`CL!-xHHeA2;6)+ z2zz9Gb1BeklTz-zdaA`={#K`^=n6X#MS&kpGC9ujP(H2Rj!^qt0vXk(yBlV$XV+z> z>W64SohIDW_%->P+jd7)ZcDE{3L?}%LQZKSF-3*-r5lg#SvN1{MKr@hZpBoun#{T- zz6Ld@h6Q5=SM@1 zauGh$*3@KDZ=dd)0+>M%M)eWSo~=XIW8b<5F$>zS^hDoP&{Ou`=zhdoXkCAy#@Jjj zhuo}vw*F#@*Zp03%6KI^lt$$uYD~X%n8GE6YY-Mys;ZA1`@%kUC`$Z$@n;cF0yEp)%ar)>YaJFN)XaDbVr@6-M~UKB`(kSyDFEg1JXzK)xSSp=%6 z2TCB3!Q1h70Kx^M^NYc`+C$q{F~fY_j0ijkABc7bc}O+;s9c`Liy-l0kr%+urxtFw z`(V{L>+KdaUm1m}z)>wKiC8)eYHWk5v@BiQ9yCA`yCU5%PXgYIw_TB8h?!&!do|wc zjEfe9<0^c+PmPb?SZ^9608E+2?qMs%tjqn%jB9ZP-k)-{;CjDI;Npc%II;OGHi3lr zX_<+Z@Nx~NvXAS{T?T}Ll!nH5{|vD5#FKOp}mT@v0J&$mev)kRUR&T_XwZ=@i$#Ga-3aB^) z{$4)}3m=0tGJ>zyds-YvYD}l+l5GvEQ+{foY#=J91Z$#zfUI?4D)iF~E@29LpG(C9 z1gqMVaEHI^>wxDIX~X77^z-ND(7$E7d95Uz%dObVntWmxuw7Y+QIDY!o{0xH1p7ex zqONF=X_tO9wudN56r)(;^Kf^<@}= zIVBb&54$`7_uc6z+SIE8vrrYoPmSUWp-sB!VCt zIS5kPq0{rmL+IKRySV0(R&K9(pr(|T+H;BMu$a+82NVtE;tWpco6UTf-h~?D5nhANl-D z6~*uUc~pbKo#8#qoqK0hXx)Ag6tQZ7AxUmre{Uu1_M?O@0V=#5F|P7glWtW?b?2@wgsVdWVc^5J5w>MoR5EKqwnG^TBMn+%CG zv*^%^usDAI^*X>M~T?{ENZf;n#$Jk!36@zv)6L`O{IQ~{nxt-eC3I* zMEg{!ho9bQ9|;@(66B<1>*guZqEL#DJI_S5)IDbmzKDt~tFcL12Ni!WcoXjuxUne- z;QTZlM9atuLl?NG>=Y@G{-mkPvwaweb>W&USIEWDoeFaM?EJg%UxYCyOhU34t5(1J z)~Y3E+9Ilv&rFyKuK$$a#?7)q(2c?!h)>{&3tA&aDRQ9ffP0N;dmQOzY`7nbqPU;r zge7&nEmG-T9?K5c87*}xA^eBmy+Yf(g%9mAit6zM|QvY6>(QXS3$og1Zah+d9+ViDK`RGvS9hKpt zEuynCFJ*T$Bd?8JHEGVCUM83qzA>LDB=4-o-9kjL%y_`<;m}86PqqlpT*!~Ot8f%T zsm#mGeW^XdD^c^xELG&qDX^#fmd_Ek?J@8iVATm{io2hwZ9I3EQ}~Z2#*)d&?G>vc z*Q2-GOy6d0gL(FCw)2roOim^lY;(RpT!X_{%D=i+k(|4(6guQOR_CJCmid>5R|M*o4+i-m!I{HBCiTYTUtmU%+?9Hy6J5guLh}&`RSQQjvwHjLWC|PE+a*?oA$C=q}Y8gcGX^ zX){WUCfhX89^a@&y!D>>5t&tU$Vqlu5O`G*8~m)w_CiBiocrY)_De_0P>VuO#Lbvm z9%ig>F6(1DRK4-K5iQ!s5~!*!6QeEe{^}Q+47sh&Fgvb|mgF>37_B{AjsC{tzkLMu z^1?R8uc-rqM$g>&n+)8qNrlW3ZXNjk9Vh}5^tn*b4z=A0Ewn*>=YjCFe>?BvOFgUW zBKO@knnD5FVCf4%Yv>o03`lXTT+~5?KKJ|g1W5*CpLgJy1Lwt9K2P|pOv#+^o3&oG zP?c;7^$?*RzqhpXxQ>QP4?HZ0hCqFuMI9Xo+N6vWuOH~l!~3y}{+M1t@8|LNo4?l| z2H$DLJmqQ8Y2qz>Mso<>V10x4(mM$xg9lI zs;;OHF|*kRSS^Yws$7`NUrX|6x7x0a88|&@?N;%MXmDePK zHJ{cp+_#F!IHDbTC@?gwJI$Xj)CTGE`y{uN4NzR-j~v%c$WLyhfCwZRaZZ~MwK0)i@A8SW0x7vy4Q zGaE6Ng;EN(p_MQEPB#b-Q|{R)ROkslf;j&u+9%4`Qt1-42b+{v_~2Tgd%Ij%(WQM(iBQi&t#&MU zsa=U#Kp5JgLc}jN5=~e{+K0)E3mJnmr2KGtr`jIO)W9H1^O zyJgNK!Wv^*ohK||;H{6w%w~FtBoYdfFbhqo;HfTtLXdIEbu?dNOqR~!*k&B0N``uz zHh)B054!nr>zO~9+R@`?3N0?;_3LJ?8xlG1$ekI}p}qK=_h27OibfQ*i{d|0m4#2R z)!;vzZoX0tNDyUGEBZYh3*vVI-k01D)h_j2k@W|%BsJN-V zdNfq?KiW*M$7ogT$t3`=93Ic{Z^|<{u?_M%YZA!}bHm548&KZYYizRh$k{fjljXfA zq`VE25jABbw_{*pAm?G>;rv6cs=s9+mt~e2q7E&zocp1nUVM{0 zVScrvL|RRWdwbe6$|34vADmD;sX-*{6!UQ?C0SH4N~J+RW}BfSm2#kz*Gw-5de%Ii zLGt(4;Yv}ChrZ^_IyYguW&j_s?7YsH4&1YKb?)t}_utFtb6C`jx2ljrsoWY1au|bJw z=_z>KR%?KtO&-8ux}l9_j*4Z=&D#Deln}M`u$3G`MdSnEuh0S<;83pZ~?^sh=|`$YWYBq5)saH9vj{()E{OZmrCf- zVbx6EU+F7OBYfd%dqs5Q#+rny84mOf3`YJ^7TErB|0Qt3j(}aZvr>h+1ihb{1ref^ zYAT}}HYiQ7R-JI5gLg#ZsD4(eSa~hvG>ED5id_O#nl+~Oo(a{%i81A;B^0DCi8cyJ zw2J?L$^myY;L|Kj`3E%~4sQ>lO_@X_zw;z&=JgxrxXGUdd^bj=_9ynX0h%%oM+DMC z4d}{vpN|zf8)(2Y>$gFZ-PPidirg%`uBU9Qnr|hp{lj#q3Nq3eUnAq5CG0 z-yVCvsg}3a!nW^~&l;aQRZ4bt(ERZVijDxVT%>RUM4+$`<=50aIAmN(x@;(!9c6ho zFc`z3`e7UDRDrIE_85uJbXaLSD%Q zdDam}=GF$y(6_s5@M$Tyre2m!P;#TcV)#jfK7(9f0?LBoGZ1E*XNH80kVk&B2hlGQ zUDW;gpd~dhpwL$S#ge$sC#omYr)lh6;u;$IPs;CNpMB%wkOu*@kk~70i8zdzpP;YA z-@V>2)qicCUMla({Sw?Vo?&R*bR8znqN@+R8wh+gt~Te0Y}@~UsW4YzPA;{W=D{o; zDMO<}+pjQZ;OD#UEulE9!j)_y5@)>_Lh;{@&Q=Gj`>2~4^KW}vVa(r1tbs$m*Xn~YyFqHeJQalqo7X=jdrs?7W zB?w6wXlqccFXhz^`VEVX`=J_{0uqtR5Aq}W9}3L`qe(5^TE2_jk8Ox|1$@?Jr&O7D zB(pEK+zy1uy0<~4_EqsfFB(BXM zV3Q95!kvz0!7#O19|g^!n?)UJ)tBDyZ*?LUNFTWe(FF#}zuG<@s{~)7P#CrR1fqftULQ?7CwwCZ@Ql%N)5(7&sGEvllN0YmtIj;W_tso$YNd zIqyCl3mXt+irGc)=DW6g!lXUUcdeXVyk&7F+Kvu6PFGKaFvs{(@Jvyt5fM{fo1#P^ zUZpgcqA(&prYxJHZ~|lU=<`|P$nYKaESxpZ^UF`xv|_F}e%r(M8wgCi%$*Ulm+E7S zNDfQl#1Q(<+qRcuM)Iyix3{yt7uIbj!I&?ub zeO^NZ{Z>ewNTyBlq;dz&1E)>vsLOGAFl%*5Yg!6dyxXG(gbirx@AB+UdE)t>GPI&s zUwiAyI~;H_*+AYf5xOvOB}bZ({+`e^m>Q_6rgw|Dvn^9-thMGbrziZ-hFBdnx=qM= zBrcNQiZ;*?IFG%DYmw!xO?Q?Y+2HBbue~c%c_e*y@B`{d4Mi=j6?GaF7RT>5M{R4e z)%sZ`qKc0OJU2)3O5OvTOdiC0mkm?9yo<3RtqdiONiuOh(1{uQ#i4hYat6BlS!;3e zf0{z*mBdPZ0L8t<*pe6(#~0r(7<+!i0m36k- zs8NGkX)7;m9Dk!v0l)tOe()#gl%%@r;q19V0wq*3=c>&DmU24~+~2p8_xQaR?LH5R z$$!u;&!5^N>$>4f1V#D+3mL=zLuY?-+6oSmivyq29jP1wm#OL3d&%5zu1yc|Hf(mH zHSBn>I`0c|uoS>tIscYsz)67VGPPkKyx{1^6hPvMk*R+w9EaZ6mX`g8iIo*g5`c}p z#iaQmY6ZdZFAA}JPzRi}JG3n~PPmyErX|v|XFnR;(Ke<@Yjd@L&b3Kb7?_H9H$PFG zH)+Zz=cp9N?W8k`wUY6mh$Y^7lta-YN`|a{#TW9#yKE9tYE2eJpbbcn|H9=~YY9Rn zJW=~BaE7S#ay!#noixK5zxmiK0vH!$DdheKlC>dLN)Z@&JZct&J(90_{YaqmhG@bKtRnYUK{{D&gnrOlUT)sdB6TL@!y zlqS3pp-5MD#qr{2V-UK;IT!**tTu6@?}O7H^jMl^CuM|d)IOzt428Wz=?|_n?quEu zG2$nFAM*T;G0!EVGzUc1W$X=}U%v2E3#J-uREzc0O2C~+v7_h`%O2=oh^fL0BB1RO zx^?fZ1qtM=@6h;s%|aF=FiiTFi<~}}C=_0er_!NJgh`xz*4>7?XXMstQ&KC#3!Y~i zaaWyc`4HqV_Lc6Y193Wu=d#`^L0?~6^CkEKzKuKMT4-YcXpIMwoASagVZLsXxflj9 zPd3KOIR*FE30h4T>PrZD{ya}GE^%!5i7)!bAXA=_#(ydphec@M7@f^L&is%6=$4HW zVwFX!E@eD_-viy(e##%Hxs?t|6gwzny=c8ma-Lh1TzCV_+jv+q&K+ac%CFSU-*)k^ zBdVjMDFbH#(N{FlN8JV;TS@A-jW@#sbL%v#7lD0^(u0dmMANKSxrpV=-8BX*1WjMi zsM6_=>K$&f{!$a-{fyRN_>P9Lg)$fc$-d?)7cM9xmIt5W+R#-^3U8&8&3{U5f$DNC zKW3chOOFZ3G3KgzRjWo+<9@ekE&3NeiH7-J;HJ-Yj|sp2OOdpu_fmWcC!Mhrr4P|< z+MTpc%nK_inTQ1|Fn;W^mHp#ob_003pDsNla6bo8~_Ukvzql=+X_yd8V%gPUqF zo!xkQn*;5t+kLg8>+Z&4;|DE=rB;g7n%5l0@F!lu&iV9)9PiED0Wk+_b2zwYXJfP( z5a=#(w@3P4P(w*GM@)NEsRI-lCNy#-}R!)?Sb;KkH0xgeQ79MR+tY+XE? zn#=0MLyWM1aCQ6e3EV}|Tc4!FuRipBlR@k;^A}pK!w!b@9H^bc1xbDWgfB`S%7#o_fV zi`I@kT#PNAN$Rf$I;3NTF9*#kv%8@iF{15w`dT^l{Cm^4dif_=3csXIB zj}cy=kTbt&Y&d;1A+#Sm6Mj(>f2vhY2>JurD$ND-Z%G!As`8=UZ_(8J*@^eO*s;Ln zyIUmv^0}vEX(|NfmPIOcW!Yxhv6Pbio>7OCud!3|oPDDFN9ryiw+Sy^@0~1s(~*dH z^@%LHCsd5xD1JWCdmr)gyVjB$8YxSiDMNSUO^bovx6H%w9##wfco%Ugo5W3}>&1kR z61x<@#_@1WZ}nee-v>d17aU-A$ukXV%~bU3khh-nEF{3`8P&#iM*9M7&bKAj)25Pk zbk4>bq=hOAm#7P9!gxnn{a;MMwe#NOUAdNfLx&)2`PiS&hYtyOOf(ZuIJm1u#7PX0 zRtR@9w<2RXHd8Tq_Kzq z&wuUR5lM#W5oKdQAlPt^zR!f<30%$HJO5#JNso8>_+apLv>Qh>&x^hwPjJR zGfOn%rOuMP&J&Rh@gNJfXnxrHg~4Cd>_rn?!$B!=Ia*VC%lO>!E>j!w!NL<1enrHO zawdhiF5(BSJ`a=$5yKL1HTr{SfO+otbcxicCVHFQelL|dzStH}V8R%1!UU`kWh%OE zu36gO+BDoBqkpeyhsCLPwtKIs`#*aCvW~yOSVg%UIxL*=5(lnuJpJ)VQz-IT{Ke0l_qV;fDzdQEg)YvVH@J=^enUT9y`;v-P}j(>n~ zUxQ|yN8qzH5Gd;v6C-i%iq41RS~O0k{G!U!iJjq&z0{JyGLdShW{e=)6IBh16)&Ei zEz*qQUPS0@(v5!MYbdayR7!hfgyF171=l&yAXu8c|5dOz$snI(_i>2ghkBVf>1T8O z_vU9cEfT^}Py?kqyXU$?;D{B^u&4ireH=2*eT!O+k9S#qE zVyjga#MB})y7)h8Uq0&qV_d~>zy0Z)ka@$$*5{+l;W3`=(gS+6OamDVYsS1Hbd2Uq zEquA(jeT?txX$yTy?xmltXm#3XQzjiaZY2aBB&1H-qgB?caQY!K~cg(`V6EdhG{C!@eeP~^w9RSPXMoTUij!&@;E?^Am+SYowGup z@2f=Rd(U?w5*$P4Bdkmx!6Ui(3S`FCF8>_WrTbq1KhBnXzkD((bo{NYJ6`I=k3^+> z{@a)Brh&pZ;s(o&2=`y5m2x9fM82=u+m9k+jTR*>2=4vHZlRP5%2eb%I2yYZw4u?4jGgw9Fn)0PnBL5ay=H;!Y zN9zL-b?BVF0blMugz@2fPqL?i&(_+< zi{j~`jU%oFtRBH^#sSq8hYLv`$om5;3{15sc+hypZ@us~lVvmyI5SK4D3~VX-!$S{ z`}#o4vscK6m^Oro%AQ1`C5NQBhZ3XJbUj=a!P&C&_+Jtw z6&ZVB8O+5dXC-YLcLw2$&k<$l7(7)qDD2#xJ}3tN?A_^?_(t3UxwGkbW)ip!3q>I* z6$=Upcg=s)aUW?XWyU(ha9j+_LJNc0hnbH^)A; zf1V$@jeoc@LzKl&oen$Fyp;Uwk}v-9VGE+5-!Cia^rX%I_u13|wm)GE4vXfbg+1HM zn4D{sm;yrLS1CGJQ;XxAr2GEDiuWzGbC)L!MXwyhEF$)$6Jcr1G=N43WPwA5X-TS zI#nC*`0)HKLtNI|_p*8tm#4HIHyiywkr`?>AKXvwcd!ZPhwcR>AAx=skkmM^abXHO zrg7}N9S<{X{`Ms%^4XIZTE9kz)iVk8Uqd^=B3Z-?-+KY}DTa_{k5N>v_)f~15auIx zT)X?Yb4RCf1`(ZhtZGOWZi@SyfPZ7?LdJnRR-09>;F-9>_J#+7+ta`0nl&*ulI&(r zthROrN_Q|lPWug1;|9EzX?In;A=!3FvLKe!j&LHujeqjk{5Z?kAoti*UEkrJwRFNk zqcqj-0UTn8O+(>ByaJse%iuNlO_w?~orw9Z9!C!xJc;Uhaax>0lLOYG|EGVGR-s&@ z&BW{3MTmG&t3&zWl2`FbK8N91=x{?0x)+)b-BEegF< zI;7z8z4AnsWU~(j(YA-fbh!p&oEI}tKqd_wWt>p5zvR-YgD)7`()Mk7y^9w?KrM}V z2O8vS{l0y!!UGkXjxsn3+$rn7iL5LJ$?nMvtMH{cO>jNVP^P*`Nz96Ir(zNe`DS!!&B>jhC@e(>KqDYT8NFA zD2rdO6KNKqOk_6v!jYjGaF5z!Gp(|f3mxREbt;QJ@$ho```Qy#xn$K@p0Po*-}mkp z>|M$0BMGEmYDHD|!htofp~y2AWLINB@t%A9_aKVj1?o8MDvt4`W>p5|me1S+Z-EfbKdQU5Xv zUFQxL;Bohf-Sk~)pO&s#LHSNJsPe|y|4m<_^EwmyR~CeSTU?)H?N=Xe;?9P}zD_I! zafyqgyNf;AHrqY{Ls=Q?!rSkkXLZlqt-Ha*tUhpLqy2q$Nig0AjBe&}coKM@QW_)4 zhFxu&T0S1kIDR;<^ETPcL=|o8Z=ODb%bN+;jawb`a~a%uCSzJyG@NGFB{+7~#+)tu zvesLT#{`8$CZ6a;jKBO0cJ9@a<0ZoJn$++rzn1RySc!|D zXsPp8#IDUzvD=k1Ieh>x2N0$#TF>)Fg|=Wt|DZGJ;Pv>(t1nW%qo zKH1o&UGV~zQ4e&M7)gukEwXqZ$AKSGS7=F?5-td7bSddkB=Pl z(Trt;%)YglP+KBcht7~r`@=2$Ai_pQ`u&LZdi`+ z($viVVv89?I~6f@f87UF${z@Pk7cTy0cQc6-&gaohcidl6vs8PY3awICsj})vYemS zit&9`CGuI~@|UUjaPE)AaA3;r4Ayw##zU6avZ=)E$Lw$t;wR>n^9P$XgSeePP;l?3D1L-tL?Ya^z~2g6Zz&Z>%SVNlxvtp{0U8Ajna3`4aQpJ8L2GA;d*snCvY$*3@k*6pzJ znyqu<@EE?e={lxr6eFRE6v~NRHlp~OA7SpcoblNdHysKd7F=j=*)id{LomolAJ6C9 z7NfHyUzbx+7;Rubn7Sxhz8{6t6o6+v`;>H&J}9eKKg*b4*!=7?=yEdVE~eoy>N*&e?9^x&0D>0zeTA1adz?4{Z6`^1+BC(8-@WaK;>fQI&45Fz%f} zPRP&!w$1=M3&!gH`8RT2dkJ1K& z(Jc{UYF{F>*Rx)DlUDBhGQH}6#r59XfOe2N(Fp$V>WoY7zOVTP-Uy)Q^!+q+96V5g z|K=d5x)WP8nd@s=M&iBy9~PgDLE)SBV+jNsOfZcw8vJ_retiev6vH&~h{sJ!rc%UW zX!O`gIkArriO3RF3sR<>Iv$!gn0umiFuWGKHW$hcQ9|Bp<9+z^@nJ4Z0xhTgDyDMt zc*RpsndEuvy4-%bOAnnW^W<(2zvp7@=E|K8_RZ#*=Ofez*R}M4Z>bGqLwwRR2}4&H zt)J%`9JuNSY*i^B50r2Kk46OPbwCl)VE8A_*YA`Zjht?Wm$&B9eQKZ&LEyE8W8$wU zx-m)&KKIlX6!QKUa%!NutVQ}3#m45E(=WIUy3`DE}nqsGGc)cDNvBp=;}KmufP@ohJ(H)$5R!Dqva zF<%Kw2wnaVg#lTCCzFGnyG^)fwP!$L!5(^2OQRTTHBpM$Ok1Ar%Bgw_SFN)w)hor=S^dCOqb>x zE4DL1&gZSK6&Lbg*dQ=LxRz%t`Z!X#6X)P8#aX>4bGA{Em(D_f2tdV7VUbw!R6xr~_+h)#e zL~V@!{C`Nk{6?ZTJEg(ouLbAx)EG>+uPuCObXZ6UQ_ZKrcR$F25vNP0&dOe#c3v5M zF>U-rzi|_=cJr7#AjhMjhen&OO6pNP`bJOdc(oNd8L4IyL~kwpr<%Iv`Vg5Dumesv z#$JnYdMY?v_$C+mG)l5Ko;G3oi1n# zi6~3baqdgQgvVqywwuF4`ZB+fNn<%{$0w3Xu?X2T=vrc?mR0Befv8Ac$P?+9qZbb z&&7a?$&WO3FsE?lp>zS4$U@0C3Tbn8+<(1M`@k0%wU2P_Q{YE|sQp@tqjetg5e6il zv*|_?mxq_GWW+%LU3W7Tf+{P@#XUAJkxs&oioL5*X)RHyAI>)|U_9cYI?D`^HhI@d z=$7ji5Q6X#p#qm%et=@J`cZ^I&%983ar8E!;EVG+lEYLUL8GOUrMElzJJn~@t;JlZ znd|v3n4rqo2RUVRP`AX*2Dr0%?W&I1Q-sA~J$#t66(bpIrQ~87*QrTol1^!+r2L8Y zqcKST4acU=DvY?yi0=MIwRh^Py|tc8qHFI9uCV%t)1e4MAUDIjY}TAt8<`DZ*z zVir;>4Y=jLxaPt$FEsOZUng|GNiH&z{DWbnbE9DC5a2+&vL^D#&~i9q?O{XgcUOo= zt)9sv?hl*?a-24WDn8<`385JKy^Jdp6;SWQKd!<@84ei-o?>bM=-9LurW(reV0et{ zn7F%O%YKz|hvO!~a&NoWxSQj1ulY{eD)@8@*nJtbe(n^sPA@TlBV$>v?m{QD- zRS=7GI9N<)LRq58ud0>+%7!%zp+p=$s8JW-YJaJ|imfV7DBW}LJ0ZPgb9Lb6J;R+3 zMY#}ZlSO{>^Uhnr7N}r$>Xm7w-SB}ptf|30{R!c%aJQ?UXvfYWbsd|F;)O%bRzNeL)@ln&BLF)V%CUM@cE8Nh_K;+;4(6(kh$RCmRl5=A$ z)JIPE6Q@dhh+(&weOn_F))}ppQ`12Xz;P~gG-J*YVGaqSoFaBh^3rT2GHo>IXjM_N zL>VVjG8`dZ)}GOc?OYim8oqg&`0^f)3n+oZZ#9yv1lVbwTQCL&mpj=V`>M;9FF9fU zY69r)6f%XtMv&RsN_X;H6_mGgR|Ad$LU{~4z}4o_>G;2i?P_bI+x11eF;H>TK%A)= z20R`uud`&d3-b4DOWL{K)+(#%Y0%7xI%0|-MhrW_7JOV}ZKJ8klxEsmB~q|!+M%B> zx@x?&J2WQV&Gw&lz2;JdUbn);`LxpuRx?5lX_`K;TZ(@AEnvY-z8`KTZGmJ&AkV+4 zO1Pi;V)@y)jKok&(O^QY1)OiNbXg$3-PSM6iVStkzfh;klfq`t8t$&5TW#DvF+G@z ziIfwxv_gPI(i(3qCYunyN;|K&60gpC#D~~zES@CrT{5LmcdwQ=p@=L6hF|jx?5d%e zj^@Viez`b_pLunCHl{A+^`sh)F;vs`DwOMAzaRKhgmxmcL+zIV2SO*>;Y(lrF}S;! zQw%1Q9jB(ClmWj8Gf({HYb(4{+RUFHKAhd5`$6)KoGn$r=JVr^Ba%TODa;&(tF<=P z!>e%wpwDMDp}~*+^N%JrSqobiDlU)1M{cF~pmo2k6o8TGsKOvv^5#bM$gk*?nNEfL zgNp3_j{?x97BHm?$Xxd}-X?RA!a{b!K-M|g7>RC!y=3geME0dQ84gdMC(MWqF1mFr zsx)Bg@IlLu&dD7SCZ)*iaQ#1xY|}ZVAN3jQ2TwTWT}jKV&X#+Xl7FTi(;36=KtLb+ zr2J22dUQiNbP0lZkIr7as{60I%UfF#-Un5a?4A}ZJPEBpl;Hr2$V;q8Rt}w_ihN=c zu8S+2=Rx1&^KRBx2y>B2JrD|sw9~gVY4q10@wt@rExvM8?IArrxh>W)!kKS7a^sM` zt8PD)ZYXbzx#@+q<<$}yz8YW>=toe54gfk|G`}(sRN%&Lwhl{Rj7f8-px?MEr zqB!Zy@MQ10TKxG%;Ax~J;9I`xU@L)_idxfIfdov1)2khN>{>D|I`r|r`CPPK_&hNO zD`owOVUta^K#?xCP@8k_bvDcPj4e0XIrAWw(g43vd5tUz-uw!~pkE`E+U_3WucA#g zTD^iU3O>yb9ZoTy>{oq%@H#ft;CiP|(`X^Z5LJNuHYQ#^3m*PEKKS2y(q>wL8glMR zK2STyz3|exc8B2r$Coqkq;?NEFYVt_(EPxhCx0DU#a!OLqKW8>(^{AsS%6ZfT? z^x7R6Of@9E8$;lcpJOt@=x?0dTW8xJbNR#8dob&|dY0q>E+qxu%7=!cS9vG)tTnFLFzo&^@OR*V+ZM=gyQ;zJ z1mk;sQDDSW8V$mYs3&<{F9ANah7T+k8?D=TF*=@TJjnX4EndQvb>pLh=N`qfos=S}f$3&>3*`CD4f7hp^tUF2kngSu}Hepiol z*27_2b4yfn;_+d6UM(j#Va{WKsHMBDEIQs;vFB9)wUv^y)^>=+6UBGyvihJaPpJQXTRr%rylXLJZ?cLf>s3^i zWpGaUFf$F7!{4D+Elk0I2=&=FGrxG|{&1~lBfYv3MvExkrcqq@iRYQgF4puODL18~ zoFM(Jq4g1d;x(rUXh%e5&g3h8+~GhMil_~p#InU7;>q{~)fCAP6jESz{yInB!twLz z1NPW)?YV4x6o!WLsci7#=jW{8Y+dEyosQ4Mu*1B0hp6ha0 z+W4=o7UP-Anjr00$iuT5OO4Z9uHzVKYGqDHRq&`W=&5quN(Jn)AoLNTXWZp$PZQ(X zS3RwY<-P|8jB(-jsy{!jWf`|Mi?24JVC#&~9gsz|JU-{bj1Ybe)P-KG|>I;XDR2rXVJ{_{X{B)dqE?X&Q^Z;Q9Q)DD1mh zCUN}L%VC|^_S{{)*HqKf|3}kT_%*@3Z-XEq9ZGjgNHaz^h_uov-Q6+ihA|pONF&`H zf;7@C-QAtJTTDWo-mp{n|W2P&j+-wxrYN?&A={jkECGKezH!02~TqOy`BI z(OF$ow6_eX=H>(q6$}9 zN{aY7(0mpx(0Oq!YiE&n6zwj6*1RkRyD-zO`l!znirvS^efuuRcwg6&8I!mQMCYyW zylh`_|LkLuFek-@TPMpde(aRHFKLy*7Mjq|EAvjuXu&6|dG$O9Bh}YAnx*#03j<#EYQTv;H$!ds>?~JGuwgwOFXvh7) zK?H#DB#>f|mBjrRK=$E{Hgyz8R7O@Zr_Q4J@S?l}$&GuHZadIad?H>4Kpie8YAvn^ zUgLm#l~tq~zg?iyl%}ojQdl~4tvpp&|0Jre0KIo)LCYuqWh=ki zb1P;itdvZriU@1@6ijlvr~NSvSYvk)nBcNj2kj(5%`*nl2QzOW>!1~)@IHi*^c zj_2Tcrsux0LXxHP2G$A6&n1gHI4Nz0xj)|OeAqI|z=QaJMuI^B@!`IkI?3iAch)2# z8Po^vCcamh{&|isqse^NCrKQ!;)i??**Gg+kt&8{5#pTYy@Lz=6 zQ1h)>UT#-|$c_cjT&?>27L0fn%NR~VYYi3^j})Z!)PD&9%|o&%9#1L0Cc|C$a>Q{@ zpeX)HPc>cn-YL}O{$QuC{Wr993rxE?SF=-mO;9n*0v8Ua|FPWnVA_hD@LKJ}&s&f| zoGV}6bH!uQ_Lq&zV|RG7inseU{6*vAAcfk?>9euU>Lv~7`29ymb4}@(&Ii>J9UEsv z-590Lb7y&#(k$&I9I94L2Tr#|@5^wdWylf@BEyUIQPmR7H4l@IZm0qb_7Ud*J^|$6 ze0|h}eAWgwA4;97Zohm6OPfv8gj*9k7J^gC)zx`tSgWtZX=#o<^lqZlIB`{JNcA|r zj1JME@+yNS&Ng!}07Uc`ahmbk}DI5Kfte>~lmcK*;cRx{4qnjI;tALS@B zXSE{1)*Qmz^yJw#md4z6ieCzL=6WjKJ^>x3cN|fcoQCoHIFt^CnFY|aB0Yw%3{qM7 zpT|N)maX|2!WED8yg3jV+!Zg9C=_GNKfF6mBy|Pt(^&WQ{G6wGnk8Cm_Ftx2XnnZyAR(`;Yv6fFn7PMoU3PrpnZA{Ep$^N|MgJY1N67#XD49t9t@^`T4f1W*w!xQ( zt;oaw1i@hNX>$jAqOG3~TKL+}&Kph0S-&~*pNS`L#|AnMhI`^V(wCX6T!sv7;5shO z#>IR2fWFWoChymgxccT(Z?sGE(I)8>e$_G8KH0Ckr$W<0)V6S9RzN)`A3gc&8^tK% zE=eMOsJZ!^8(UQ`I3m=%xlYivBfJs~TB&TC?w`&OP%0a&6arv`ybq8TET$Onj+r#9 zN;vq~=hEJJ*I~Z%(L4{^r#%euUdBT#$<%ZUXa&5%0L|5PwKOHrT)YQvimK zYEyOJ7jhN13!9C;`&ruU*!Pm=qkxHpFm@nS>@M<4kZMt5LgQlkI8g1pJnZ5#=-*UK z^rkM>7;zG^Yexii`?}VWH5o0cX;;fuitu#;o9y`rGJ{0M8#z(|{oI&Pl*ZEAG>Udu zPbka=PLOoMBO70Z+$^Ed-fdrT8=4W8G@3<(p;TJ(Z zwwF`%9}~Dw`c_-;$J=3`rx}3~G!=i{Lt6LfN2=S&Vteeo7jl>JzOaObc)yI5aerbsVKXfZC zzjmosiLLV--(&ygV+%v^OISC^-|7y=(6Vaftxx#0chKM(G$L>Jzz~-Q9>8<4%K;fO zi*RmM^TOtqcr7=5pYc0+`f>2rQr1x!@%J|gHe^h1oizW^xQrTmZ#e#S_N8d#yRtO~ z_-G%tX%*3JsY~yF?plPc2Ur1OEXCd2V!)os1?CQM_3Hu{v)lkIm8Oq2oQ%RE5-~N^ z)k`%XDNM6vVPpdsK?M!x3~N74n_TbC?#LE?#*86;u{I6jD8H#gjgHzf;(TU&YNe)_ z&}GVCtQH-}Dv(ZmDE$4OR!=2fQT*Dh1Zfk0=>n1j+PO`0MT1DYi%_0X-rg7%4G#9r z@#Rz%P%|xSn z<)SLw0n$rldEI@Fm(Vh1pc!FR)J$neaJOR;?hlNws-iwSz=g~}mSrxvGsXk+dAMrc zkT*>7ejIvYAR8F`Xpf)06no6eZ7AW0R#J2w>qhJ`&Kd;772-ku$Sj4p)((!OYjy}1 zKF$h@@gCF08PJ&^i3P#>yS-~apO=6xqA6YYjkfdKS;9XWfE|fa2g<)q3KYc$ZbZF2 z?6)!~s)vG0ba;h}X0wv|7&B^t0i25ObB?EdWm9!uHw$N()zXRai7em4B%(`>R#Os9 z(?FFHJ3M3*L535lt|4%p1T|_{I1Klg6PcMl-B{0^$T`ksB*>29`?aRJ6N{m&TwJUJ zu*sonn;+g7mw-0m#Dup?!DT88H0njUv(y9&@Yh8{ zrA(_Q7$*fZ$g<8|wt`*s$?(~@9GZZ*_T7ntr--3AP&zQU%a?5+022SDF)H;GG`I1z zIjKrou{v?MYh&Iw0X;IjcYnUsq1?N2DETMzGFg}-V|#q%^K(`H(c0ti%;c#^%Xl64 zTq&rHuWC7NtTmAT)^m&b6aVo{0gBzX1uq{0@&lcx26_lyvfT-((Kz$(QoFOY}DK(y2#pOls!tI)26CPss3|c{oCKX3IkK z?T@t|-}bBjoNm*Iu@1_mXC@{1t_#9tYO#9Q`nbdMCUf(MsrRSXMiuf zFMJhOGgY5aq?T-+5&r!gCfwkHg0y6fXIxQ8!d+(8pBya{DuMY~=R5rr=&teek~z^B z?lqe)wFDlk^08ur+Zinlzs4Z*b#YmbN%E&-`zBIahW(g_!1eYV$y1?4a3;aWGxM{MRRF)xE0h0QxWXH*F_wVAmcwmmstgWY_q@Yoo(sP88B(GHKmVYny_`A>E z>IG0~2R6y`c4$4_XVxbNCHJ_;lmeZusyP}y0^)EwJ|drU9*w8osGkX?YdM`ivtD^J z`45RaSK=t})?0NU)go8(uIe7H+QB5LE;cdLPA?ZZ*hxV)D-Pem&_+oQDsN?{uF!Z@ z+2^tLV2ib4t$Uo~1IJ~~cY;|09X_)VSw+-Z^_~t59!WN>Qnnrt2M%tWPk2*^_lbh* zPaKO{TR5EbTW>t~(+i0!FMUOa5>-U6(q!759)b?+>&2@=cBQgbJWx-;pa8K*Ce@A!f97AB}dQdwr(um zhMhexJv@7g!R36T)t_u_%N5l9)uC)I--kv3eX&&_@)OrnkG;>${G>(CtoUXci|uHq zvsuh~BI{|7!Zv(l1A{h?M+iL^r$r_inkV&r;=s5zVD+*%Hj2WttBroqhAJR9;o@iyis|#BZw&9!5G52%kKbz}jOTQmLiT?^E8#iH~(Y z0SWHnal+TYz|KX!dti(&@L6*ke+0DL2MxxlWMU7PXevymap7VbRyXr3SdFj8;yfO! zVppb~sl-BP$+eGFN$9} zspeUajzb}mSB$bky-hMmC-o#IIe%mmnOgmch7SV-_fxQJ-;!M9D-RKFTSL5{Sj zh-P$htDlC(2owJuU z3zQZe&>q*lD@HFx&(XJZAL?g|^WNj?Y)bC5IwzYog<_`F+odyI|CL%!yRAyn$s%=nv|z2 zjxtP!07XXiLQmw*zcGuB`~pPRFFA%I*QA*}NvZoQ=A1n% z>nRwkPLc?sk0OWUx8AW#3QL;)VWvGUjfkE*F={#tqE4RY0)CIgnk-`I$ZFp3C<5n7{UmQS5K! zcsFj*oqdd?Xew1N;<|7tl3p^S&8LO3*p-WyJ`=!#L_og+Hnxx^@na z0R_C_pAQjpk*2*_NBkf?^6y7dcHaTmS9Z|l$2%|mOJ?|*ZNYYcN+|$m(}*-Y6P817 zWJ>_FHQ;QYNs><*$+77)GmqnOSX_B7F9fNIZWVpol(nl=cAP`jU3-m za}@?>FZ9+`_w#lRC@sT6Ji6DZNeHS07r&qP#YKzK-#3Dlu?9b^%nR$)C}iasvl^&q zgpqAwJ^gM#L3Xvb7@UHlraCP_3F4k$x3O}5m8l%aSoK0q%^3zV3+sZ)Xz9fy7{_;&U zGFQeZzd2v)^)Gqvs~#5%5YAu*laZEz=bn5u-2)mt1eDcM=l$}t&$GHd;mZGhP4zYy zTvP$t=!n`}2R%O-Xq`WU7v&#{yvp-8OcTbAF=kGhc6>-$_Y(;|d9J9nLrKGBn-*Iu zqUC&G^X|?Jko#=F)gKp&vPjX;ZyNx1ZCO1M^pLYk&vz>sa5sXWrOR#$QfZYGiX4E&N@C+ zNFz!_qio%Pz6u;MuK=z9iducLB6d1$l|;REP;Jc%j#3=#fjbkfcF$9-ei9*35V8)` zn~q4KwQZQM;Aa77=6HpOywvV|dEW9~)S(`K$6-$KJ~UM#<~ZwOWKmFsmcbv-O4ejT zI9%55*-_#<#N8V4IQn*f$N;Iu?ZoD0=`sK2NzsKf$XjsTwR^&dw$P}!@#*)I|K_1r zH+)SeZ5J2RMPw~SB~8@+d*6CK9MMP@WoxdpH23cvR5xA_?T!}n7bohmp?f@;;mLa^ z_~hb~1*&}I@S#jcH5?V?0obZ$jZzrMB)~%hq1WJ4X07%_sycw+eErW{0SP(S4cV1s{B zja}rhU$J|_v9|!DMB0HE-B*-Zw4M7wLBzy_d>yv-#fcpiGPlIcfqfL{4>g{o9MxF2 z>if)HfvtA52AhKuH|-`k+aB&ERFyzPaNIKoE;vDOZES*(+YyH?L)vZpIK}D>?>u$s z7DcD*DFzrXT$%;_{z`z9?j5@m9ujc|bJ}aXj!IR&s?7>$jL;js!xumf%$Q*Rm^t_yAU}^tJt$ zJMWF^D}m!y=8k0`CXnQ~ktTRU)jj4FHpb3E(R=1+#~vkp1Nw0Dmmux;kE9%H+I%#X z$v#Tt1;e5PM7?)mxSq1>625GbM60E4FP^;xOaQ^ldXXJmN9~JZqU2vB=++oz2!%I3 zlx$diLSK`sm3C?2Zd}^-d6Ice%LMMlCirx$Iz|ru7_zgVzeU~r*vCU1_xE30(sIqI z)aXmFtcFRPq6Y{G)1?bDdog}I(mJQr%17ga7_)jN$L=l(;{%jdeSk}Bzpg`7BT>@m z?gl3tcHJBm!~;VnADl6z8E5;4b1+l?nP68YlwBCu z5B%`sSKlYwE`^xDh2jSX6i8)Wk}M{mXk%-$2upq{pOdEuda6V{#xc&?@aWodLl2)^ z-mwWmTb^5%hZ#;XYCa+zC}Q0+ov%}(#@XQy@2)OB zc=eA+pivnd+3KRlhVALyzvF(=HOf?mn+#q1&Nj^fIlSYBaPRN>F5_<<4ZhIw+TUO3 z)Oa;rm*tZ25y2E%(7px4%dF-pM3cUAM3a%wJ%*VSgwk|V^Hcbr%Mk!VnJ(qIVKrBd zI!(EiBr!4rh`O%XU+~KW;ae|Rp_6QVf(@tuRoX0{sAU!NfoHowCw`l;I^q|wm3=E6KmV99P>W!q z`f91gjhA-kCIGfr&;ie+ZY`(kecB^)hAk{CB=_`qJxGJ_Xupihjfc5ARlDruq9orh zFU*c6A~|)mm={Fv>LNz1a+k_+$DY~y-WqFRdj=c)s`*)mH4AClynDv7Z_IxxftOpE zz19Tz5Wn7aarIv_ zpeT0Mq(f`OpTjaqO|f%V5t*EAlEJlQ-y{_5IG!su8Bh-x)gHFaHY-%!A!<{XU&#oW zj(*8j?B*`nUp*sy{ijbiSD6&YPrkrsM-5U2N@VH1t1dahOp&?zd0MR)WR5Z{goKmYZ5u7aH0}l8Rxe?nCOv&!)^U8*SaIAl40Q+C}lybmF+4ikrPE zO^Z8L>Ex|KL?QTtWSSyG7I=kZHRNw#oy!h- z2&qL~lkLARx&^*DYjqm>3(r&zchCL^HD?R=4Q8|c z&ZHmtcBzLBSnKaAb16#-IY9jkR0n-^*Z;%($J!Lz^J}umh0o6%ED5X@`KZp27>O^u(ho@!G+U6+bm(n8HxGUwQ5>v~M?9 zH%zf|Bh^!x(hh%SyLSDCaV3b7oxHMw*Kn_PdxQj2YE51xkY>==kyHKendqA}a$|WD zSRV0Em6Azo?kM?Uv@Be3C%on6U!3F>O&vZ52Gz~@=FGafPd(y;vH=B zPX^j}BLo_;c7?Sf(t0Uj5m-djBzk3C`0(`a>FT4Q2 zT2&hrZ&rtC^AczhMe#pk-VD=o`9|M5CHFWTVXWV=PVd|PGz+ZV)M|;30t5#-o#^Q)zPq4P zNevE0^+O@-1uLe)nmZ?oKXgY-8SNMU=~Si8;)BsohcMr1? zi|Kd|($(uhnP>PEwH9mEgtEuBS<5IeEZ16U6BTKSh$D-@fD_(<;X;_g|Bfuu4^hPB z;#4$n`kVIms?cESQG}2oTKAWJ{Ek>uuo#1i^%x4jt>~fTa^_wsj=mx|yB$&i&e~%e zqP!w@6!AJqh1K*RlO@0gh+#oKzuo%r`zOy`Z~y8JM$w^SK-oL7F~b5WaS)@z&c`LEIs(Uk3x-PJDb zf=e~g4iGx?ov3IQp*;!3eHp!LKZ+lOq~fMu)JR&t$TonQOs&SA|MIwU37o8nN_dgm zJ#Pz@GE=5D*#-udku;zGM;SK+IIgaXj42g-L#fnXXNt_ri?{5BzF2R@L0{sd+_Joq zFAur0iR&Z{^qeaCsr4Gm+i2-`w?2jm5sg&%Q~t2ta47F6`8BEJ+V3_l6Bgja_D`M_ zy3|tY2V!iVAs3928H;%6|CdrgvJ<&pm7^A-{eZt&Y~wENRL*zln%8QKwyK^~#dS7t z?=%)vub$TGJmA(scVGR_n=2k~y>Sc@+HGgASnGA(gTDgvC3Ok}vz`Gm%zU=#o_2OE zb1%2dd+t#aaKd7E9G*9!L@jkRq}D&EBKGZ!05up9n(0>NO7jMFty==iVwIoIKb^~% zMC@gkq>$9c#ZZ3yRW{iW;7=vPIn4o05`~yd>_?-O8TluNl_AE1jIKD)m;W}oUp;0J zm%CR{`L=IT*i(iS)@EMKb8BC)&|f7AleSE0d@{)vfEQJDke9%&|MK%cEd!)(FE|E# zndqgIZ(xIRj^&gK5INuEG?No_#M$vF5}r%s!4fWP zF43P^WhH5be-w*91t7FdMmYM6>n~U=E`&Y1V)}f;9>@uA8{N#89-+T7dxEKo&R~Dn z&}!vw8Ham8Nr)xdQYRnN|C{t3?oa-S&m^X8RDWyJRvag2?|+PTm^#rLzKFy4S(s!c zs__}!UH#OZNL-+*NGcZDJ?EFfh(0*FySL&=CH-sn7OqSeyHtraELL9DS6!wsjC15~ zfv+bF28bp5DgqUC(P`*@Sk|7?{hwLGZTjj2#Yn9;3+d&D1^tnTkc^4TC;E%8f5S3IEX;r(AZ+)bEmT3+?~TF&!}KxBrJC$T6LCgQ@yEZuF_n`eW85 zV~$)KKmXQG+9m14;Pulb1We(AMG()zjxWVn>r?77_GZhb$^z349W=gvY!grdaVPEpwgW%$z_&=Ogw6U zZx}KC$=X2J(fp*AY@r~LqP45y!GGeDuKrKcl-gCH%aK=W5slON#w?dCh`On zAjj8OnyZAnudAC+pcDCuI)!%?ig~b&j>Vq`UE{00#t8s`DO@IS7?&&e(`4X!*t1KO z1cPm<3$lbBT** zWcojLuLl$o@@ypor(F()0{^L;omz<(#n*p+0yeW9P=Wgt<27hdGpGp+FK-R7*K)%~ za^7)2!ChKL7bLgR4@j3qzmu9e!dfKk;}outP~qG5un1V&5aCqLp5TDfiJ7lQYZo!E z|H@UD2D%>JVU7BoZZN&w1~0b7Nn~E912!<}eT~F6~IE-4&U9quN=wn!N9Q zjDTU_EmhHj2e$=TwuMm4?vPE%u1P~pHp7)5Rx9@1 z7-YjIm^_zeR^uu+J7E&yqBX_KSlHSaY3J`;IGHiRQfd5mMSn`ZFw0U@#Kj7uhC73a zRkUaf?;i89e4|_qgdeqU^Y`%n+xge&UHVevuoU3o*<=VRsejI_%Rb~k6>I|;k0RqB z3S!M3TZU5Az~n|?)%bMZ_?^)#1!n+mY;(RC!fJ-OPvL}oqJgc*rsz#qFf7>^;jhgl zQC7)c+hiGUw}yrN)y4{kDJ%qK5xdMl>n}o$nAig*E5G_2oN_dpzX-NI)>@`m-RH#@ z+RCu}R~GdC?kzsH|JRET3JKb&35Cyu+V(#AT$PwLA|)OW^Uw9YpYE6BZP<%OW6?Jd zW-qYI?q3CtSEX>h#xxZ7$s3*&oM<^~(B53A8y@}hZ`}C4+yQ=t!*P?WBv-GUmRlQx zW}^!OfmHhd%+Yx-S6GC*&&Fb_Ju!1%50JSus+CpB@@sC`>jbHM%;BceX&)KC(8t6n zvUkl-Y(Jmq`;CPef>ajrUu3-fR~Xy1BFuvvOO_g{ak7Xma;p3997aWhY0YhQe(9e2 zYlAR1|0v{*xOx@{hrBJiG%&^1+tp2lr4|2N zI4r?`WxMMpbU5i<84>L^&~-Jz;Dk$42utRM5!}{_f=bs=qxMThin%7xvXn!!7YyX{;4|6Zr-;FD}5rQ@fH|GquIvxmiYY}-Os6Y3eQ@U zT)kh0W-Y5@hsG^MTBs7vTZ}2>CJ|(!HevGHCCGp#INZGgTgNEL5DftEju zkLzv6O~g*;5KIj2e<@a-yfNr$is*-T=DP>i7-Rl{a&u{Dd0zZ4bt1TC*DXL^o>_v1 z!0e^6i&HuDBh}%>f!7c2-<#kMqTnon?cFzCe`xKy-~zTxIDw(!aF&p)(wvlcF|8Qx zkW79PdRdquT%At+1U1QO<7&;5xzA?EZ_>PsqE+l>f<9nyc1>UQaQEhWBnV5rt;Y{? zADQRh6aM$4_|Kghrj0~;YCtZC__B+pRFAk5S#J;X_W#_02aemxV|;Ik^ZH}zhFIO; zaL(;#T`~P$&oJp9A3gDRKSsm0Krde={w;jJLAUPl@yt;G-^RiviIJo64g%b!~eXzF)y1!KNK(= zDD6AQb559+jBac-vMg;al`xQL5?6qJcs(kgRv$~kas?r}eCIPv39f79BoJ${j^cXM z`}U_ED19wNBV$Gz7WZDSUDJ2<_64d2H_}yB)l~NOC$GqAklA12$wawJyfL}Yrt6Tu zakYXxGu7sP8=~P@udr4h&)ox+F5J)k#NW!69GLTsE@PJUu`@%$#eyxCrncfsvm6Er zhMA7;_6$$NX!v2H{=to6i3{Lg=lj^L9k zvg*o`Gnx?9u;%CQW+t0o?~ImwMPw!Ur3sMv5C(B}?yG{$bwVdHx%`Oyl17lM6wqRqFmHVC!w(Fip_qv+WaYQ00-78Qw^%rBg576}9}==u(d_C(ea4af zcYRQj%V>BPKC8mKTNe9wcn6AZ8cf(Udi7NaKhOyG^l>{;D6qVTpNjF0T6vq8{9dbD z_+0g2%hTG?)URD8Ny&sE8I)M&hX)H-T`H$h3stq~>liUS@A|GMH*HOKg{<8N0%`<| zs|IWcAzSn-eRkksnSY6gF2P-n^F>Fm+*9>;u}4|4Llge|r29*Y6Abq?57-GTzyZId zw;wolo-wzOM|h*Iw-O9kduBgprj#oB_efYP(h7`2CZX7{@n8>VRmfc!55{%{5>>-6 zo3A#PLb^(xp$2GEVWAO2jfk`nnu=!j5RcY0SyLR63~9;)1P4$rGT%j5z^n~WJ;2V0 z*xCz;2a+#PJ@l3I_^(OCfmn9V&e_w|A>mPtKN+ z==Y*{EHfZ78X!bkgWA=DR5u7J`ww*UDW~(Wl!#*P4X|B;O5cQ?d=A-xJm{F4!eA;&(s@n@O_+hevF8 zJ5FGU>dy^ePDwQhvjz7xhVBRo-k0KJye1~2`aN^URqyX)3#601@WW&_F80AnI=+nD{FLTZJASi?Ikm$|6Cs>yVMwtODKs=t z=D#9$2%~n{2W}}Y6vS6`*yQtQm3m`+CZVvXp{ZGfJf!p~MT%Hv3^~tYO>St))Wpyi zSTl8)5a@K;*#YGhi9JU4u&PH}7BrQFC5>HVHl>#%d(z=9htyZ+hE*X0Z?&BjasCzJ zgrqsX4P)<7RcjmBis8B`>cQD8Ea@RrA%_7D$S^`{Ud7Vnf>R(2?PzozZBh&52?Lral!R-X%F(;8kYr^Ch$DnONev2W7Z84TjH-Y5iQ z7w2sKJMOgPc+R!*oQw9^+qpOYxO{ko!F5pd8rNMa027N(a>r(edpQg?`WlN|u|!d9 z$J)U<0xj4KP5sa89agKaxEg}+yKzKKBI*YY>I%x;*Wa#}=88f}4NEu*qgxlL~dpj)Y6wfO(v1D#T<ZAi3Nc*Rqc^ z7;ZL8!hbr2XZkCo2Y@bwbosGTQ`2VX#dsZK&iavRdplMCT6c;oUC6>mFt%M`1Jljh z;EKbc5d*hDR9lubX<~{* zbJ%%fvVLWt08_P|MW{t($Ex7@P?5*bt0y^t&ej%dw5KLINE=}*RDh+K6w|ZDCWJdK zI*a1mmrVtXO_73GsV065kMHD@5_xbT7MZ;K$VO&&W?N881E|uuSiXQmw4|V$s zlhu5^`l~>$`bs>zQfcPy-IQ*L41@J?ABY$m@)Us6K6=bWd8ZEU+!HIZaL=B0C$C#0 zhTp27T__2Z6>u?(3Zh2vz@6n^TuqWsBOe*(!b4F`4nsn5lI^9`49ub1X;T`mqYZ|A zPZZjrz_I5e%AtVZN~Sx7T9@@GUA3CssEW30LHmdlHMh+yj_i12o{%^HL-o*=WkwSa z8;tDx1Wee=Hx(=Z9l~|CDH;2^__E_>kl0oz27=atkZuYT8+~?Q?dpGps$Zq8vH7^% zDUY!1Jezz?*DyLK;VA8ZwB59v8Z8#Jp49vI#3wzVc~_nIYD{pFm^!uc25!MXPu0*? z6>qAJ>%HCnanOgD%pk+9sRAR^fo4lMOecYngc6$AZd-D;dte#`=GyPV*v`X6q`4}P zZ?mVetsAV2*q`%WB|Mx5)h?~@xq%+bqV)d$m`p9Fo@>Wic_r&@vs;zrl!+5-9klrIN3Z zS@M2n5SMVC2=s(a%ac}p%JGuKiK0K}yNo_`&OPKG$J&m)*yI7}j8%Ls14E@l5X#Pz2j917gQw|+Q@?9sS$kI{-|4!z`D)$woi*j_C29I_iylK!EBiPxI-&_p5Z#( z0V7MO_IZswRIrTUfPv!X@KUb$d+#w(cv{#OTC@<^gb#b5FxpOEppf9>FsY!UA$g|v zx%6E901Fmu2S?V({Zs{0-nu-jn#$+DuA(P(|IOwthb5lXJMg7l#=!}?J<*qMShYu$ z-`(-^MoVPw+S(@EiXbd&Q(#V#S4iMQ#MNAB%6)#LQxQcbGX@lav3o zZ|S~A+mN(KT>RQUPCRZ&ljiy(fb1l)dt-XeegyQ3%X(z4fV5_5la{V$?ZcOte~uw? zps;xtg16&Da!B=4c+^H?swf$y+n1-%Q2Rrn#V;ddhjZ;X^{}(_7S}EPer@{^fbY|U z_SepyE*w@ZWbB$w-Glmpo2vjc4%FItac8S7N`qQSM91a7TesL_1qYlA5#I(@eMQp*nGKM2(DwR1zkJ_8G;RstsOlgRc>1fBPgfWReXS(fT~e~LLUUze>pNua zVqj`Kw|qiE7$kglL3K#nW}<1EDP|xcCR8Q2%rap7VWF7={Ch29?D&bXZh1&T;Z>r~ z8yiWkJ;K7@Q5LePT8{!N*zhye8%xsv&|D=NK=x8S?JzQ9{Tw3z41O2M zR6mRdjWuEN(g!iJv@O&R{J#E6Vt8@c$s&jm;u@BFL7pk(gB~-Ck-oBBvR?`MM@{1x zTHLTsVY%Cl+E%6~uhctAf0WM6kMBfeF8ic)n57vC4Dyl3)Gi;16UL-Vg|sI4y7+*0 zN=vanuFu?iv8fS(4d?t}kB+0Jrye6okifRkaFX`=F`qm*q@Tp;%BJ2+Z;IXi0eO_t z0a5Ke9xy$Q(v?D4F$Tgrtvf(>TsPeaH*Pc5e9vq8fRmPn?u$>0NZFeLBbtiLc#M`- z=RX;I%j{$Z4&yg^(NE-8++(dA`{JrnI3=W#(}X#7tqqjNCFDz={9T z6<4k4MWe1jJ`fk^@1ak3=Wmx?LjbD$&}YvkI49M*Us@hp9+NQP`@$tMf&I_ zr0aA+YIP}c%yq|phZn~X+*x=|5)CAovW;$;KnfAy=w|b%@J)y0ayj33luyUAFvQ01 z2lpZU_jcz73Cke zBgZ^OGt^`9V9!6@sLE}=&A$`N*YP&%Kz-M~oQ4_39P&C;lst=sO|b$=9sK;U#(OfQY^28Hq^ z{4NDDld&$z^cVInU6G7RB(?P6YlYHVPFY+5bd${=^fx{R{L1wAx+XK#o*p^LVY}Ig zP*KntqB6;Tkwb>U`^u-QkfVtkm~`Itpn#yv*CF=5;hW|4o&CfW!{C%J8VFr3GRW2}KF;x!IGhw!TRN^Ukpx`*fWhY)b+(!66FX;lhcoG*J2<^B1^^`h&DxF zk@tM^KGqzmn$i^*4>SRqt`gl2au84kqdCsI&gaPvGX}^NXwXFeV3yiO26sf_>w(aG zJp}#xPO_r=ESi{575$EWZ7+cCUwli1K$|7V(;kAOyz&N^y^PS2nrhj`^EvCg0ba_J zcqcsEcxuN5V1$xp73D(S^t$T7Z>CB;r9N{@?+1|Y-3M+Ev z(?=0@0j&SxG9l3EeD!zWQMC&Q5jVYY)92@h6vC{DoG;qjF-bpV-R%C=m+6hP({uQ5 zfcUQ?H=uB^x*P4B-viZlosw9Y*AlIo+Oju{C&~1&3}c;4jV1jsbiOlg{>fo;3?ATh zE4s7fevC3aN)~+ZhnBbPGk884jCae9$RShE3ebH~1Z$>nr%WLALzO4!F;!ZJZuU1*vP2ee zQy;Px5kg5+o&O(6R~Z*Y`?V=WKw3e%S!oHSyIZ9ij|Y~AA?4sMT#^5~%H}rR z8qxmOew6Zi4DCHKkUXxG)wZ>oWzx2V+38(FYo+L zr0y*A;K1TxjRpO`ag^^))|}#(+>cMjqCXR@T?AZEe{jvI2q`FjqfNY>$wgoPeD1Tm zP`dulw*_5u?TIL|8#R@hmRz`@<}t_5gG1=C%4cco&rDd_uF&2Etmn7JScvi{=i4~K zR7v)6P(c&p#Pf#&{QG72&JQ!1?H-#D@!=b#z4CW^QvIhw10cE_L=l3=l3>WH5%ubHH zKg)~NP@uyPHg!*$;&$G%RcZvk-TYTqSbLrCv%V*c`>ALrnQc<R9f8+ZWLc05+P|g1a81x#V9+v**vl_l6;lG6 zLI%!L9;{jI`6B7l3Z?s7K3dQ>QApkT@;?L|Y6L&|8id*%1pVXWX6ZnlUJ;i;8t@gH zlO_&NF0NmdTWfE;iQ$PxGiqL)lcZ-lfA9m|WSp=pK>uX3V@pJzR#um>Q~y>7sKhRL zK>nwV)J^sH`B)cS{CP;Po8T^ftz8h)u43s)%_c3W@hZrWF@FC=UOtTgi7R^>Z$k%i zxxA{&&&$(n)c2RKN%$9@yz_+D>SQ`puX*R#dOe0$S29)&bx7C`#I&(4`dpP%sBARl z1a+o*nr#}zV=59@^uJRBKT*l`%BcxnQLy`+XPiC<$o&wOu^j_$yL>60{@CWBj*B|J z#G&MSY0|az#dbrUuZ2d&b_?;YGlxB~o$iPPa6{*hGd6v?M~GDxz&NJTL{1JA0ca8WCM`QltDX zDHp^}o=;mvFYgwtBiJH@*WwK5fo}o=8F? zR*SWB%u$lN*O|0>=ivPvTwjQ-Gfg{p-oKG26Q|La;-XWhf2_h|bPuQ>dpu4iD{w_$ zz6y`;8~)mOyS8DI@GOSR%oCLz5610Tj_~~$BtIC~K5ceX@MhkJ;}XXox{7cJ`KEAx zx+x;4S5tYsi5}hwD}*haO4x8Ld9Vpzq%dHO{H^z*yU@gN4Vd+NdNu+;Iu;Kn1ZU4U zE;eBNhmAJ|J`x=N0U~wfgOUSWUycx8?-f1aUmqLlF??BxL4bWlwk6*|_%n9pR74_4 zKPJp>&XN{bC%;S!dlhusLRSkTGqFK9k$A7wpDTrmY4VkZUKf{#$F}B9u&L$<`r_p- z1OpTsEn6>$f6wJ;kbMf%jg;Ed_IGC7@w?zzGkT-_y?duy1n~Sa%`RKg;2Z?nLZVYP zw9?z;qK#Bzr&;6MFjqXm&tb?u4Ow~0Bj>^H;E)q)&2@2+@SmWaQTCC!L#0QT5W8RN~R}z*0a;vqE)Ng{aRkez-s)2#&U)N zwnP>3TU?4Y8W_UNo|356`3)m4+Ie@7y9Y(tBRwJEz?&uu7#VsyzQv1YPv1FDa#sh~6tV=4D0h0LeWp}WJ;W>QhiN-17Y@qlP zW}A<-wsI~Gs{#QWk4>PB1<*w*TAN8bcD45aJ6}1CfDgI`)*!EKr;t($Z>D8_&5EQR zdgJvqW(Q*S{^*O{ir-qJ+PEyXeoKprKE;q55?GR4OJuDOwby|)Oeb=8*7v{j&>e`1 zUeE~4*p%k{-n94+yOcXz>*{(e7Ut7Im9zWLXlFjrsc*~KE5));fD|FiDS_*=r9_>H zzVT$`f*9#%;(HM}=HbaO_wHTIv^d!-7Sp;(Ek>lTRV~*WKI@U;$27wIjC>joLZ}(q zNy^bQ{2%dvUQS>ePw1hM|hL!r;TRzN?QjtDQF$&~o)VexLXwoz4ZnbcFa3*A; zj?GFkF1o0PNs{5qoNqqnpilCrr<(9P{m;y2$eU@ozQi2DEfnX(LAdcA(=- zs11qMoEs$TK)sLm=PJuh&)(uPcg?mFTfElMjs1`z6LShb|1bSK|H4%FuPmxd>)Gkl zQD|UgtZR_akjsN`MsCJob(BaL>lYeI*t{2M>+DL0wNFV{tNdqOOx$iZD~eO=a$Q*B?!0QfDM>o9eOo(pMIqbrMxO z=Uq6Qc9^_AXh=q@ITOC{RoKZ~4Lgt?1{9X)ItrHWtmj~~A^431?? z_N<*sCbNf!#X=W4;(Pvj`e9s&0*#=M*}Ry^qQZmN3jFVlmU|ybK6cR4i1!jC<%|nm ziM3@@q}MfBz;@UYSV<(YEqn3tg^?l|mybqLH4 zTPGGA7OX1xhZ&)_jC8O>8+u@i(I0Ul6P-mFC#*{Zp68~InisjyTBjql>@FNh< z*yr6Ja=Sv}2UF~miqV7=AE%hKw=oH=031B0dQFn*Ux^jRtGBO(S4!(wLVu&}8MjE_ zKEyk6;jxGWmsNRD?JfKT*2E_W*?0}Gtcyzs@ss8=aZNW@a2Dp!4z1w0 zf>AMORllGK#rW#XBnA4e4SM2S5M$iMvn`)x;G^1hh8W7hdk?v{=6MY!YI*-z*AZHyU6Q)(-O0QtVLaM;Ma# zJhv5n)>&P@sGIDb8B-Bq>bAZ6y;c080zOETo%f29F2CMVM9M~B5T$1>qmcrB7yf*w zy>w2C$LAX{uAOxAf(%x3G5PxfL?yzIssRFLne8{AMXzF6^`BP>ic?`3(R|_#KZ_hZ zX(FwDN$1*E6e?Pdy)XSd7U!$<%Y!Pik*7krh%^yXT=sd{YcU7r%#EL2?x#^bv|y4Z z@|tu>glsDgv@}xTf0nYc>}3E_|DakZ{>{LF=Qh{~PYZZ7fR_fXRyG90NCq$n1T53T z&(?HzVgpH(2bdIAv zPVO%%A?UiN_a<~3DJOE^EbSgF+NP4OnZHb6uMQl35`zZbRq7e^#uT+_<+<>*zQOvc zKo~F;7_!#6a4ByWhYHXO!{Xi0I0$k9SL2l!KJ6zN2bCvKuBp74a@WX>|#hN@Q2aN?qqtD>e;}+i#Yz3$NDUn-TO*f&T*(X*aKjUaj}}vj;D`s44AY@-Tf0f z>_AjFjl)E-@2`X=jLZcX4n%*#Km0;@r8wKeiAS|v4>9#oL_;Qkp88TJ${vaQ$RQA$ z^JSNcy6e3oDKgPnE%gmLkpj&Y4__ds4{d}tJO}x21_udeXi(r+5 zjwazys{*40=t=3Um)Q{~J#d$G%O4&`tz5X9oM{LIoezHX2)=|)-IISlgv+r;t zC1kd>v!q#4i2a;baafd`3T;yL9+~bqGqXC9xIbGkpm8KCI>A$aE{JDFU)whQC(b3$ z^unk24$M{`(m7!9P}f^-6-&2;>8ViMGPeF!2*4KfR=Ld^u4!=QVD)txA3T=Fi&RWe z4|}UlF);EY4bSXeAxfN7v-S#OD}ke)&p49I!kZyxxWXbChMsT$I=?)$&UTT@;Nnou z4;{Q5s$*@^i8G3SE%HzOB1#_W_mUza_I+CXoag%Qb^^dG6#<{xv)&`Qw= za#bn6?r-Y_SX)0|7VlnTCdf&#&gv(w*73FtkG5WB6p&VMT{o7 zx}2rojLTFFW_|*Pm5tl1ke4Z}bl8);X zz2?Mna`jwj#}O~zM1QslC7w7>i!l9d!Ov7n-8_e$FMr$wDFR;lCCj@o-#3mCH{ zNa>~4JyIBD&brF-**gS_ssElQXEE+vvuJ2Q+evx9n=yFPpkYCelmRvUB>5V4o8&9k zOXY(sol#ydXr#v<)?M|!{E0?kdnC3lTxfeX4;$rou?>9B2NBR(@ zrHc`bi8tl&t9Ip<%8p4ah_tmD641c-?(ePHIdX)s;~#7HV=js*VaO}R^wN?mDeaMr zVu#A5nVo`bkMv#N)`-PM`kM6HZQ8cqN3OS|zKkMRTj>q$QN(<7!M^!1a@iR3P|b1M zy+*ZgjW`YB&p1)dhYG2dXO>UZl&W*)_@%Tu`+9F?uLtqqWqa0a*@-%d=0Lb%iTT-# zklw>$)jPebvd^j{7FnCuL-7fveW^p<#jNaIMQKYq{*8P;W>lfK$#>Q@oS!dt6X(f( zr{y9rP}mpz@4i^Ke4Plic&2w|3JAP*+W%zG{~rqr&5$5&cosca{J!ap(B;h2A$jCj zalmUPr`4e0J!>ng_2xpZYT)yh(&^3GvvB3jC~9C@_A~kC6TIy`*$*ycMK1N5hX=K0 z#ED~YjsKG>W{`%y_L+J9^q{D`)7;jjN%C!6n8sFovIk;Sc0CJlB^0`yF#6LzrXNx&_BxhJ;$WoXd3X?Z|pz8=A2nx1<0(1Cw!E*YOT+g9n zw%z-=0@19F#yR4lW2mjAtnzG5Ba3*tdf*lM&XKw<_Q~#4>~w=`0pNh1H^4MQ!=TbnFKSt>D& zXnV?I>Q+K*C7(&tfSsy^eU{sJUbHy9%5(>5$~abX#-@h>-e(cKeIXd%6zg&y=>38@ zyvs{ih`U|c>XrS(y(VttX-Ju*o7LN)W3HUXNw^3?nrv zZGI=@(>LLOM{TIghvp4z3cN?BuBM$2Rxz&g?eMVAH~nbT3{Y}WYHrG-3>ItnuYX+N zsw^n2X%BC+7Ln3&qiCYwE1R)pFgeO}P35CEv}F;hPE%F&&zNFYJt$@c=QWRY!XFfc zrS!F^ltqu8#LHcY`i~vYDl2DUfJ6XMmQOBckgFl}LS$OC)$D(51jR%&zZui;3`TnA zq4dJ7S^7`Fb)7Q*)zrBcEg4MXp8y3Siveb_6`$F4pt7+1`Wuc%Y=cutH>q8JnItLw zi4WYsY3&GjA!~)}26;0TO#OGerAA5Y&LVE8B0hpVe*LxO^`$Lac|WM>Od|YQ8876& zmcFr`9N*(=a#26Wi9astI<&Rz8zC6K5vNUCJhcKa4_xzW?$gOZu4|Qe4FyDkSX019 zRxrbgBJ6-en+V@R9!jZ&jIC@#Ymv1P3S{H}t}e!;_|ygdk8I<8&tlz|1;xy>4E%{3 z?gm5j=<4k_8hRAltAEc02dDVhCppSg)bcIsJ*Dmac20fIUxUskgT zZW0TMQPuWOukB($5uY3sL*^(rBF~i5o(*sO&0u>IqQ}A zQvur@>-&vge#Qsnann5SBfiiGK5XS5CZEe;8baTNaJ`{NCjn2tkEGB^JQ6=h8p3o! zYAkK|$)QpBb1R|}%ukfks6?Z*xYnIW#=$caL4A=h(`q2lY>^>iw ztT&5iW~nr;J=>5PDc%Vl$-y!sS1um4yiLG4xgM?ii&sk`g+}tcXqU? z@|u5=Y<~sjH$c68aw93bM3u0!_zSb)F<2EMw06~PX}Zl*W{;}U{w8o%MGl~<`f5}0 za$>EHd3@Utp#PFlZ~Kv5lF3(FRHzaudb{yhGRYV|2`|o8yOq=8E_m_7O+-Vna1Y5_ zAVXD6l=Rs>oxjZgUBz7DJhTivKO`ta7TFf|%304pYG{FF3<|G*utf=gvMg=CCxP=j zZ6<{`sMgP^NP(j-gqB&r*FdV`Hi`7t`i86N^Ozb43_v+`lbkx@4RV#|^R8T~I3G!l z-F@D3f*ARno5{B~(lV4!uYikhx+>*?GhQoxLJ_z!~(lVyx;WclNzk%66Tabjg=yV~t0V4Q>dxB~lf@@S$4 zFB{KavNa`f^t7LNO0QP)axfkE+uS?DTg-;^5-*L!78)-rqW@bxH<&s&`#hzWRDRH> zSB}>5OoQKxbU=agRkhno_Qgl>`u32hCkH9M`FMqWz3c{6&}9ZH-nEEg)>P9xxuc|I z#-dA~#|ynqLlOK6KyuJVojC3#9enT&T7-Ss@O&Sa$KB3l@O8Fx2&<>uN2o41P>^u5 zSg!RJ6O4aSK5T&9p(|YLaa2Z;Iv&5}F6L;hZ5*-J_vi{SO;^Ndy+78?5Zc>fQW(i9 zj0cg(hwvGDrS-FL`9__1<@*-f!mg(!tH0jLZ3iQpj2kcj&~MNbOY^;YnhM41;>p_lk+~BCo=Hy z^r1$&Va(MIF>3h&|NmKfDm-|Qmh9$Rj+2mZN-&Ct=*jEm3+Qt&nP7TUDlPwWs>;pU z^B6e**grk>kr-r&D{=D)$&AZb!d-S3KpVCu?HF<7i!}rh7S{jtzTUPQZAJL}6wx1p zMy4@t(i03{4wi+pz0KBi{j&%}mnF0plr}o4hKQB8~d|U|r zypb;&$AbD=CBmOHM&xvZPWy3eT&OTcuBLv_31p~1)CdeL zIIbnIp9!0;1%^egBV@ia(QgGDrPd3Ub$Og7|N8h&8U2hdI9YecpYlX_ZvSl999;@v zp%R(muBX4vN=dHwwMAhDx<7~<#m!3N5MO?-kLz=Rs=o%&F-5w%2Wr{pR`0rf;c?C% zoG*}~jouq?-YZJi*uIOh8@Qv;(U$h0<>1O-UV#2kd1EuLFe}!PMJZGG$t&3uX={Zz zBaF@+xBR#Q`wiTO?VvubGzB)a_RWvU{kZrr_x{J8ddHfOV!Cw=U_3uWp9^BI;81PM z4Pjqs#PEKaXSTx~+d`)oxIVUx7(3$2_-`f0N0TsmnU0)yH_g#g`g9n*ad_Z#zvhR! zdQRvV0PQ(r+0BNBp;Q$PahpOp}qF%CmP(jgZqm%!u)UcVS-p=&V#*b!-KYa ztsg#5*T36%jwu1aLa)Zu>OcCSr`FTOk^8=+Y<}ce-`xlfJ&Zx4M`2vf3Z&0nv_)O_ zjGK~GkC+#He2b=|EGM{@%%onRgA>wg6`fYWgt}Fiv!$M!Oa$YEVfv zL6rk<2N?nkd}8nWrv(}NuZw2oYT!G4uH)NHFnwR#^0re;@%DF>&&jpas7by_-qF6l z;6rz>x>UlTKT~bQAO^#wC#!c43tA^65w;U_f+1SyVVWu?Lx(mCGm3(Pq^wXZAI;%0!!XRq^PZ)utH!ac?}>$n0)t8!@2&HDC0zFqDxVuyo~5B zJWU5Ki?2>HtYvFzx!=9sGH;O|6C_nn-a7W73WRo_Z41`>?&cAG`L7}!pmt3>_^o9a zAm@tNj!htW33_b?wyk$^_9iVW+=~By1engjf#P=sD5;CQl7Mww_}`TM)U_ z)kEJj`|wLHRL;sBfg3Q_mwZ`Zeo5?(eVcgb3;uQ5t{m!F{uQiebdi}0+2Qz*C)CmQ zzECg%$3>#W3tR>|u0E^%Y%&5*;*eK`02C}#bhHpm!h%d%6LIyR)0a5%xXjEo+_Kjz z{8q78YHP=QK$FyPpP5z8a8#Usv%kn%T)_6Ua_qor>4*2{5SH&ojt2s|k#Lt^ZtSWw zoim|r;@9|a5ArRqUHH0vx$JNy2otlfq|{xb4*9~6jF_u){D7WPuvpvZDQn&mVB%$Q zirqz4b1>P_C~{9V;iP8X{N>7QZt@7;Z4qInNCAISZkSl9vyNPIt#W7-AdywAR{Ywg zCqffTZ1#h(M4LF%zjLIN*QmHs&;1S`Tz9~kW8NwW7QV9i_~=`1Vih{-o(i7Lg~F4>Xn?6I?_E`C712S2y9hPo`;p#XGf(h^Xybq0T0sZ!aETG(KUk58(!Xg z`i;oft)6~>9EvD>*Z5|0&A*C4IuC3m`cB|W*1%oMv!6$BzErmL4HSIa0gXi9d_uI)W2nVCNidgnyonBnrh;(58({~i zq_cWKkkuMuE~o0^6h^+YW)e*67j^|uU&KCX9H2R7Lc8N8BlZ|@RF(wZsMOV+& zq(v2TJP$AP{!}(teaW1-e(R{}SVB6M@K@{)S3J!JG zmTeSZ!5qG%La!8R$0<^pY%u>`V(UEY%fu3`S0fjG9=<`&dRAcXocu8G)-7(tpJGk@}5zvGV5uwzG=0IZQe zMH?+49fMob?XS?@QEn+ae7lXEUWhI9WVD`O+mKNlb9_2_E^7#k$T9n-!YeduWWYEV z@iC;-4y#8CncYCtroX+4Y>wy~JqQIdTH|{}=7d1?(Ldh~IL}2Nj(l#rqGixMw+|^w-8uB z6$%7QDpFjJpQ-Lm-F3Gu39F4wkvC{Y$iU^pLk;Z&{@BK|SvP@G7E-XOdVqEoN6YSm zhoedezYSMR@ZR3ul_!5k8daw7lmPkS{S4fmQTfjPUv1D;_<`^L>isM1#8V~7*AH8M z_t%KNDqf)VcFH}ZM>6C%Pl}t*(wC}o<(+BwI69q`=%wXhq}Rb@7lHB4$0#YlXfWx5 zK8H|yYaH!cTK;a|(;qCtzFB~RMBj^|(BAiKWT z-G{$>0y(}+JgI>xZb&082}9@^?A=2n<$O^x_p~_rb=BFHC{nUw?I-jcdnw?ktwLiP zRas(r4u=;C?)Z^2U!n3xOWX%QH#^SUG~Qsj+7QPoZXWsBskeeQVD$9f6)F4mWu1TF z<@+05z*!|T$8&rrVW?(?voD&X_-n@E%ERxm?73<%*-(0(2JitbuQ=()z$9Sh{&OW< zLC>o_=G6JNO8${)*K*RrTbh_*jn9T-akoOY0q_DsCUS-HR|sZB_+%dprE15m~je5a9WYPQ+y)B z9S`GOKNI2>^{I`?FdZTD3E-q&>L%=o{zqIl#QLocVG}vq{FspB zPU1I&(~TYcSbvroko8`PH2R2^lv=B?r%%kzM!tO%#;BW9diWdWOGcF!Vd3WK`Q=B~ z_4jnGr4A-G`kbB2Pl+LOm$&Wnn4aVh&?gms2x<7sc}}IK)p&y8o>6~Vc)CX*twP2fn-}%2n!Tu z(G0o-BFl6aZD0L}qH!AAfzjk0Rh?2fg6Iq%PTVgn2rWne3Q_aX1`1zItd26fwyfM}+dvKY{gpR1w`pAQI=zqA*TV7Zm58>grGUc3< z0{*bGbTfp>myixVV#)@Jk%;o(Clyv7JX5#fs%uL*S`>U+H#47A^>4J9l)g8SyGb~r zi?#kA+4+D2wZxq8aGAO-P!q_dDW{1+(s<=QOSZ$tk$-D@5M}B`=-TDKXO*aB`bSWCSc_T(r;5ETwU)KpKW&LI;(*D{~a#v9@O=z_l6LOeKG5 zX{lPdW?aVhLyr!SgqI!S=zpR!pyZ+-lzfG2A12CA^!`4646Bx>9}&21Xm$f^gAf~Z zf9VP*O++2EGLCto9?j8?)l{a+1$pS?u!l#aZwKQ;C^NcX9MC6CQ@r~ZH_$=&5+a%XZn zeGT&p4q>!vzLDz`@1z>W+%$8R)#x{#VfP68(7wjjk}9#p=iD|MP8a^mU>OBi>tXAq zgSNG7)pmT^IW)jZ-?b^kZM%rm&gO%^n7sBsJ->$Wc*N&KJflm2EB!A(3;4yh3DF8) zuIMR~T!-YCFMnl1-d-t%ZDIUI&cHA_wL3hsBu ze?nUCP@T+s0=Dd#CfH_%mK&5}^Vmr;R}?P!HmZhcnus9U>#rN7^B(ww9JR|oIQW)# z>{t4?pn>3*@~PA6#|VHepuAS8g8^^$FBgd4h#)b7Td`;@8A1vIQ zR9Bx;<4JpB;Q#h(K~;ohF$_t`5h7$^=>wgp;dH~QL$H7U(12Xm$%S$+4?XU{p{=nt zN`)z!gjCQh>YkYY1#4olln4R8e!79Js)MfpjYy$nOK#rYp2AFZf5r>P?#cX)4q;mU z_<^KL$#5C4RE8Hcju z7Y^SofF2ZAbAZ0Qtyef%g&l$sIC#;d>9ApS0=6(q8=O^2Qwbf%D`ag~1r=KqXw_f1 zO)vQ&?05BAh@f%DGw8j%O`(#E;b+2Pdrth>EHk->OMvJP6uYBi856b!P>ZwC(u+ePX>(OeN4&rM^U;)%ntn zq8Ue(z%c+?N@PjYP|GzE){3KwhY6fE*c;~4YPcBreZ<(5YCks5p12ma%r{((;GhnAYi%2FCB;y&2=zw7$5#=(?=!-Pl$&#f zWjFHEjME~rPND`ST90o>$6~bT6RpzL$#+j8fe4mX!phLn#)O9jj^j9$7mil9KHO*^ zNFy3(uTyyu%SDXpQyjCmJT}5qhAonZ) zI#z{i59WChn!~Xs8|u1t{Ld0Eff%~KpuBq>3iXjo&lR%$JPlY>Xu0xYBVWdFmSdZx ze$&-7$%=1%Wt%;^dWYOeY_8uC-E3G0+~a`Y6h;HSdN!{OKE0N*%c}NGB?Q6_K#i!? zlC!tY;sfItfAr3kic_BEW6=- zs2cpiu-)3x%B1_L#>XiXPt05N8hHmN?&b?fr;IKy+;xaqYh0bK$+So%gP4Qp(CDc! zk&!OdLBF(-=kZhtbB7bun-u8!RHouT135(b*Gd|MsPu_2jm zR#uGlQo`gXf90u#BV%^f-NoIRQaL$r^>aI|Y8_UF&JBo$g1@AHpN|}Cc51C6$&V8=Y?K?eErl7mO<$sa0`z zx~LxYa$c>CQ4gxJ3NB_-Q+3> zCGMYmCe!MQA@y}xfjB?*+vlv_w`o1T^Bfo9yo2cUZzBa z-#z7#JOy40i?ahBOHKcH6QLiu?z;+*-&9zwmSy+M2N>!ZhSk)y`3ng%9Hf99zvB*tPA54i^!rYkZ#SBt0wN=hHs%(r&K#_-chIGC+E}WY+TTZb7T=i zJ@{tO<5$OOvYnKShfHWjSK8dS_vtK=RI=h9-*mzLO#2gXEt(%IFx;1g zY&ky*!0QhXSTWBo+(f%6_fX8vnhk#IJjO5@^*tUL0B5S+eJ|rm@8YNh63J}CAsuJ+ zN(BYKLCBs;Wl1wyu{U>IAUVyA>R>}|0CjFE$pn|w3`{kh^GXRqnwCCLqI}R{!=piI zk-x`+tD=CzxtUt)2>4~R#}6{L{1%o*7q0uX-D3?V`{0RJXH&&*<2YuHtmkNa`#YVI z`;WXUAYGwo4SRT)Dq2ow5PHHDMQ24ee8kOb9h$jgyw9ciw6FrpyUFiNFQhN@Z^o9o zWoI%ySw=@NW|6rqV{+^*(eyygxrTL)$%}>i1eYnClRW<5%pZUq}1(U=`WLa?>y&LtK=RiOH z!5~Oy_FlcjKLoCFncv>50zdXttYj%#YU9mIxKtc^rvetV%zK6VxK{HQ#k?;08!i?2 zUDq;#g*<~q0x%H$ec{;W`Z6!JFACZYh%&FWamH`c5lrZlsFT~~4_t4IEZHV+PcyA= zoiXi6Imi=%8&v5;9|mNdT5aN`hx07qDLB72HV)OjaXPoXX~fYO(R1F5d~^Ff_uDOi zFmY0S;p{JX5I&tNa~zZ&UPD<_d`hlq>?W7EVxBzY7PHqP@!mWxFtAnhC0JD*)sA4Oo ziU9bzO3*jDEK*#!WHr;PcbOY;waIV6uqs`X}8 zL#)w1_F)WVyA<-UFqfYZrHoIVFfCVq^a&H>`k`cri$N&`ps(p3tmx@N3O!x(Vd zl)FrlpU}u{SGC%1e#`>~v|^{XT+*}7ripv42UG@i(ce9n4XlJPW3 zNy>9nXLz{p5c`Pv+~n#Nw5?zz<2VVJ4H`j`lIQz!JcT37#Q}pBwzWJ8b&FxQ7z#Yi zZ+Kl56~?R6!wV_ixu&?(1;PFiyG0C}ST7Xvb`+!>#s&fFYF{c|dRS*^W1N=qomD%4$Bj@iD^w_)%7DWMU?9fZq7 zFza?zz}$NW%o+%e2j;w;d3kl25wv+?CQif8)2}d-f#mWqlFy)wR|$5pVjQRgU@PyI zec4(6BGb_aRimUeQ)8SIn6lD=0{o~_K9WJb>}eG^tn zNmJCu?6rYY#64Ed<`E3Z`MAk#b49D&2M74Uz#(KnIN?S6YdydB`Pj2T2bvuW?r%zU zzCh_IUv8(73`?rx5OAI}m?$bSrZr3W$Tfn??LQE_Ii1N^x>Dcs=ITE#7yYVkDz`Er z>*4kE)j9$_vS>2sb)ECDjRke@Y>cjacFv9dP&~D$WQn}M_TKj7Uh_o1=tkIeM4p&WS0f z89$YX)c0KBR>|jo3@ju@6gVd_P5EXf<|EZJF==tcI5u{0S^I_F*i^lPzS-(XXUUSQ zN)l{WATBM*(&MXA%bVR)ZD~(W-mviSLd#7l9+w{PPBHolVYQCZQtPKGJ~GqD7=$E; zVQ=%@KF~hZP%?bfX5m+!eLd65%pLd{pXnz#xZJu}eFH;2OOvzw=TzIKWm+#*$d`H{ zAUDJY%n#<&Z_`g!E&Ehi1Ok~FFoEtTI8e>H2~rlhj16f!o4%q!bBYT6C-EQOB4vn5(~Q+$nNc*)W~Tfa z1=$k+{;3gC;?b#?EBdwleLH}b<0U_>9)!osn$Y;hwJW*2`;Z-?$y!FV`C)GQI0f5$i&vt~vMzrO@8O9o>Y#sQThZzhlCw|q zBht9s3!8kpNM(~fO3a%t+0ozWtPLV>ijlhQ^e-272`pK%*hM&{jL145aKCeRKHhjH z!FcnYtPFoU2vYLFsh^&nx(0d=0W=4%{k2oUEC5~DK00153g_C$iXN00boIu$Gej?F zTp^y2{u$+Qk^lAA4&1`A{QJB~#z!&9lCijDYoKW>DSG^Ede8ZEcjPMy-6AxrUq6Dw zzSgJ*Nx8_(m7OKI*Jlb7p>Wq@8VM(yQ2m`l1+|BV$tIvLrgayz39`!P0jhZUa=n{_ zgK`o)W1vZ!Dme^`KYFZ0oOuypNUkIxj?FWG&+99`?%I@$Iddl`dRcuO7n5VSc9ztr4xiF>ugk#iahRopV?%t5!RWrP-xO>c6pqnF> z&lPygHL+0g`Zm%cPXsV!0^)xS3BjQJU92hZHKH`CM!uZTd4e_$i-s6{-3SRYq-2!d zo#9Huca@T+dQ%?kAtNWYF~)ZkwXxI~WjB%!&f7ivdvja=F#O!|a~&0_WWb7>MG&8( z-NMj^=*F69=4R)HaY?TlSA1tb5Sl3wt&PKhFnd!GxmhXwI~E-(z$^QWX}mYsDk)sy zRcdRhjZu@713~9b!Z+E<2X_KhhCnxgcZ9vh_Nzr#>KxnNBwnP|4CpCaxdgie_z9NK z)JB$Ju+k+o?6XQHX}voOZVehx@++<9*=i)2g`qfs_9#V_YD0Jyk$QfFcL8^o3H9cK z_9|O^(in5H5P+Nmjusk^q8v(;Db!|sL8a1@IeU9=0w-)_T8$NL zp_7hcu=(XjagI}?$NapXw ziAywBT&FH-^Wv4jCA~NHg5K0GM&q|H>g5WtJ`Oj8AjzCdRcpRn9#5BL>U=Z5iYwYd zXdZse0Hss;Qj8IEQ7v@RS-Z$RfA-q#$bwD2KL_xw^U759c+1uGRK#ikXPjp>>{$^A zApy;C5%L`gVL~g4AzwayKVJu5{~0aKvgSpA2@R;V*xBoTknYhayMVpf`8RE3Geg}W z`7sX1KB#XylAWo*bTiSZg?{$YrR6VL7L!Ymu5l}`UJR8lX(DvSQq||qS0x_T7=G_G zA^;f81tRnSQ66y19QG7XxQ0O8dEJR_4e%?gr$%4;tzdrhuW~_&!+w~#@Bw~lJacvR zV~{Aw4HKgrBiZWNw~6w|4L!I)R;~rf5iF6rMSl}A<(GD7#(}!^BJ5zuc_9u2aP+n{L?Jr2G%=z~6j^I~Fv>&VC9>j9Odj;wHzHs%yLF&J}#C z5{jo+Lbs;3ZCDGjxYVT+#Y%iz7RP?x@@FtH_#K(ZE2E6wrw&z8u2ZConRge!1^r8% zYOby?IA1IgNCFBa^vu(`3-b=Sdy*eRCDaDED$a@v{UotaAU5uV@PSe#&fUO%~0G&C8Jci?KD7(t~oE zgvIZ#Ka~}}ffj5)f)*lBW(wtXAxZc5#FCx4Rofq=gqZYAY+C9A4}gHRoVN#Ju@|7 zGA7%$ZEJEBDtXF%qYp9pq5Ric;7!;e_Z zjgs>CSU6orUrRr|H4(mld)R0~i@?tqE_mMAV`W5Y`zu%lmXy2qH-7tv=~ld>qQmnV zN_Z_s+2MpdVv#%|czYQS{P4mt3XzZFli>}^uB$Sy!#b+6GZBJj`0-N6c0(5dVM&x% zS5?KB_MuTWA9|FL$=|t9UH?EiQph{;5~itZ9^ljx`O%k^5rJHYTFG?R3PvO(tkGFw zLT|KzX$yiv=?u)CF3gdtZy z{+zI_OvlhXVK;Hy`2U#&QKE$;)hnhk`SAP9KwB9)&UzP^GI2A9H6s2Ln9dp}V0EHB z6RelwA*1-IP+3=8-S%W%pInR}nv~(}Ql*LJC6omzLx?yc=sW4H5-8o&yqHX;2X}7k z?!I$)JYv#DCKhZr9R)^@8V7`&xX39Mr781{>$r$sIKuM{epog=6xo}^p^=w%jjn<2=S~& z>;OFa6iW`2FRnzBrY30|-0iau)p?$`=?Y`OO!!p?y`#Bw8>(3+c4)%bPPCjKKTF{z zyC!k`%>peApiWkYj*Ce!lVt-1{Y&fdOr0#`%UbE!lw|?&jtob1owd`aisnjn$ubB` zAs4u@TPr3=)CIZ`P)+^4r`i;={)w9C>~%+*731eUot4tj(UFppB2`Wgjzr{Vd&!*1 zQxI2}WW_ii{57(B+S{?Y`~O>2k_0Zy(~@>h&jDAC72nj(l;T^ppF0)?6R#FR8BSk}_7kF9-@L{AP6s!1zX*8^$EtejWC?Ha4@0$Ytz;OLS zzBl!qArvKdSaTuE_P)%14M}B0K0Ol6>Za$2ll^~dNMj{0u9trk8y#vEPt7)p^%|vZ zbe%t5TLP{XS{);Hhdc17vP^~hNl{{*KcU2Bipl+~jnDoLl2n@Y&e}m~k#-YUq#T>L zzjv+wqI!*#^!f-e^p*KxbkBbPkEyf~-xkHdmJac zBN#~grZ9IkVlvsJC_ER$C@2_iZWH4Xr1u(YQ}VR7l8D?NuX|0eug1IsiB?hH$SJkf z|JLm-%DInZ#%H{~YcqOLFI4LO_Ve@m6cPSJY}Wd-O)|wp|xIIwqo`_K0s*Ptw@R; z)oK8p_@IsVmmkkR5F92hZSg7@U-rnC3oK_q%R0uu_nX%F}t4gj}-dXbt=>p z$=r3>Xpq_xKl@)-2je>$o8ZZNRi9Z1ob;V{@MODmp$l))TO7Ccdr3L@e$nzrt&!gB z$Zvq+M_DpTyFOPnp1}Gf)u?q0};Yx2s2yx?we6-fbV(%tjGMgIhEZEcgZ0=h7ER% zbg77s>3=t+<5F9%oHuJYt~*MgQ}@{>1a`uN9HAf~3zXQh%O)Q~b_rMo@&1+aqm6hN zf8%2NLCR0s3>*~-`(L-0QS9tR&7@?L-}Eyrh~i{CN^vhvVHRO`Q4Ht6f8NLS+i$0Y z#;M`^S<7ky_z%A(qGgTE>j>N|B3y%J6iL1oS~ugEVmv0ri$4JJTrU!w7O^~**)7IZp3#6q=kY7Hw)d^EULJoi(MF9@nTxJ z)Uj{02yf&|8P$-PrIQHl^zy}J-{Z_X;s3Y97km6$T)ABN zW1S2oEpOiRl_99czCw82z0-`mfIs;Wuk|N07OZBA7_o)6i| z1T>JZSpX3bnpU9l*Q&pua^ZfCuGCU23nM};qrMg{GZ^0eGOW1zMZ3AFEsV_ekRhRNOS06gH!OUfo6O zNRQINI_H+ZBq6m9!OTI#DahEvIF2aa0GnepBBuH_H_TK1NqQTrCv_WMEi*P#rS$}> zPxkAyxjx;n!my!x@8hVf`PeViAC5O84t)+E?X7)3z|*6P6J+Gv+kM624Kuy_RUoBh zC&nVvY{zs;^l?M$K3#se-JQ__U-e_UO0U{7UXcmN?g{8B;X5;P~?TxR(H!U z%k+h1#>AHqTS%S(x!NEw=et}1qo}coG+NDhG7G5(o8R%pGk_Cm@OMd{`FtNgO>g)& z(5f3+TSC>T7*XBbrZkT-$CL=Zz>G`8-rqQ{>v+(ubq_D~Ck`Ix!+?*_txpai|9PaI zo2J{%#FDM8M%wA@Bq~Sd3R3ySXV;j8a$XayVPtn|mNCgA^N>dn6fXeMqe0QKl|XE= zbl%l|(-DEALyVsj)Ko|+E1tE_*6k?pPkNAKx;GQU((jR0=FAr*;4ZrEGdl@8Eu8Z- z*8nKgg{zCWCrjz3y9kPOc?S_0)thew=ceeSpTzns{g^i3d`JOI%UyIa-R1l z7_vi>p7}+cI%$pD#I=g)z=0y;j**9R>)>%d9MD`c19Y61v-0!O5=;>VrTw73pu+hq zRSs&ZSMrz*MT|I)`3Rf-%4HLzE*sBW6**a1Y3?y*;t8EI;Owb2ADf56O2LOOR$tk* zT7Stg?waORgTL<@6(=p~=<0^;AzmC);;A=EOhJAG3;Q#RT zV-Oa!$abs(ZKqwOlE%daNPO2m@3U=+m7YdCD!D0Ghp;(y(y8P*P@#~kK)LT{rcpG1Nj+3buJEWbPhJX+ zJX|}964z$10zoVY2g#{H{I}wfh?vUcxLhs|zm5A8bv^DO%h@(JZ`9S5Lu7QM{z7a? zB2ZEy^^4VG$2^$n{!p#4)MeD@MeuFTWfcV(8F`v)ylJ=YfMAv3SwQDQJdG6_z9CwjC6F6 zmp?z}GBpBs4a1QPCRKgtGM)P!UkO&D1ZF37VXyf1i^)7j;%8}ap_vZF`5qFodkmO{*#a75Lj!RE|C zBadC^!g16%P z>{`{&MTlFsPrM

X{7}X7I`AKcWITQ4~my>*E=W^C-|1WwZ~E@@s3MI8*K3B#V9TG^%c8m$5WIMXc&HZFS6u`g%$QBMzR+u9tSWi6~jYgAJDqiV{`%oFLVCS3AI ziOsfcL`^?9g6qrIbAi6F-PnMi-xv!^mdhkNuaSdGoQ>eKehM1g4!>K-MX7Dg zV_zPs(4Y}ja7L7+;f**1sO+O;EC_>4=wIEJVc!v!=`H$CqOm;MP)3Diubo{#WzNWl zQ|1a2)W_ZfIy+`7dZYoVf}GU6oWFD$se2a_x8+3IuY;Ofmm8~flq^MEPCZ7fwzIT< zG(TW4n{S>!6mAqh`@((x{8<*xIlPrWvmLBx{aN<=H+l#{t(9@6R%}S{h$=^C0yP6R zms`>?I?ke`c_KD3sAbrWL_mv4;O%yf%Uz#v^x#4oFlod&{y{TSON4VaCy>tGaYW{# z%2vU`+4TpPQ#TR`sWdZ^Ak$S{Y34ms%}EX1cNChTxfxo1V?7|@Ml;G!Nlg)IkSVTT zd@q~Gij&Jz>j9PBMVLa}k!X}Kef|1)G9)R`<}y%yoOW8d8VGhuzx=?bqG<#1HgNfqQhPUN1@5o9+7&q5Gqnu@oBp6qYicguaC103M;#Zv}^;jw$p>jXh&==(f3H5 zxP^9aaNpVp71`Gs!`e<9MvDzwAISg9@-Q%()}g^<8M{JBH^}SJ58r9ycYbEHz0jR7 z99e>MlK3alsu_Q_7RWT&q~*ft4JP?mQ0mxnSsk_^TVf@euOpZ$>i&$iU>B>XxEQcB zl0hpngqbMypiUg4O89ogwUI{B|6plD?S4xbmpU&q5k422{9Tn4yY9Pa8pfD8ZA1EP zcuHE@;JaWGryBI=CCpx;wZB;YGJT7wLcw$<%TrY!MU@m1mqGShLJ_<2-U+?ubA z7xsRbzOVK!qhs-d&Ge1EjhaaGxn?VGAS2)CfE)2*!ZrUo7DoCnbR4S3LbZF#q;e*! z-F}%$qDy@9O6ZZx?ajFZtTF5$)P5SV7kACb3G*Bt`D|P<>32hg&1r$_Wxyr`)#J2i zy*i&{o}vB}!*7(lE3x*V;G5-3aZK)8hn?6l+(#@|Oogw;HDBxf)DL>+|DFV#HJJmD zGL?-DvR1D;|2l-`8*^iT0k!DUtK`b#57x<&ZMF*mDgzb0iPXUwT2`XTliuW{K^Auz5rI9TA+X?Id zXTj;|e>v11Prs8|BW<-|K&0KdW0=n;5S~)o@F5V_K@vqM$r%WPO;fFg+OwqaA@ug; zIdvJVdx<~vvH%n`-2A0v@b_3GMW~2{<0z*3{YX|1iC6HnJO*|pwOFNrxAI&}>|&RG8Gh=UxZg@rqeZXJTy9 zszk!pqV=v8BEV!cbo_BEeAI)0T?Nj|@a)_K{4o`tl=Uaw!>wK2kENTU|K%g0ZbLCv zG6NOJ-7A=?w97-D=kv)raUyPQJg^%5ge$9bl&kY(Y-nVC!YfytmVE`a!SBM6)_~Eb zJ-Pdf+rS!(tW;T;)#aJ6tc_ohPMF>f(5J9pUf z@^a`a9xnbs=Wpoo20~HNzLbotV2VtcLY!qS+YpwA<#1TLQ2E*ey2kI!MmtmE3hB%? z#1!&Yx;>Jrt@|KJ`*2A6S6d`P5K-JX`>Cq+e^1cuAR^~{P88sD6Aoq>8q^Prl3IEy zCjrjtu901p-3hUe9w~v10J)>pzDKTj^i)mzEI2y4hu!zD#GQ=ZGzgSuwijeMKeRZ6o+Dl*3(-2exJoP}0+gBU zDEYy$cmiyJ9bT^a>;2p|XE+%GUZ|eWFS!WJ+}_5J(1`eh>oI%*+auJsh_?p2MRUwd z({?!SpU-T_(Otf_mMSrqs70=gq*6!5#N=DPhLPdp<4Trv;I>xal@#WJY6OofgCN4V zq_GD^G~WvprQNb^u$nT+nz;UT1DIWp12%S^bLr<||GlNF9^7#kNi*2=g@;dy(L{GY z6u6KTYzM1;66srRDjHWT76^V06?nK~@{{{X9m5y)?+QgiPQuI0X5<;|AxiugCt$(2 z;j9+py#zuQBu(!qzP=DK>7x+V$^sP>3Kq-%+}g)xEW+FAUO^z+Uz}(IkSRt4)0Bj~ zW}QwJ7k%5$`S{sjc0sKQuc|GNvx@dC2nc^PbC+~zrTmSBR#hdatdJ+ZOxt;I;J zz@zW@`~5|}{e=w`pLfiYn(?>RvQ9dm(o>SP%TsK=+NSWgKGSlZ+=<6pjuxCF^`lc& zO~IVY@;MnUBl;*wQE6xZ5`5}0=%`cLSE0@RWry#2*iy*R@Zlm6j_C~t^YkP6?^(N` z|1E5IBQXK7I!08`wshexbZR@!olc$SrK4>I#k?j635RXbshVA#h7hnq?%9heMI#Xk zyIA7!YMg^Pv<02?KPlp24Kmtxpm{wIc26ff1+CjR($dyxb^zzbP-8v)UdxJ7b2g&<;K>VHcpw=T}V?~mo24L%(0XY1}2JQvMO z>j}-pG)d&mTlb1iDN*v;^^45PRwq=5xFNoLiG~|*G9C7|3O#CfsD^_ORp~xLpM}i3 zf&I$>JMabqumC&KSR-l$%9#4ASVdIlUz&$b&+VL*186`4t0zV6&DD!U%~d(UkHttj z4FXyuy=Af|6R|(T78b?hWIK!@LecGs@f1oQ#0oULVdhpsL^6W^#AkGrv0|l_==YG8 zFG*_31Gz4TZ$>bj=O%{jS0TnE6F4dP&tS7=789fDwSi!|yOC+%H*cSwU36s8s%5`# zy29nv*lCp5GwN}uo9Z-~uOvUzky23e8VR&wU~Fh+Wo2!? ziRQJx06HG>%F6KH-e{GS(YV~kEZqNHb)>M69oAb8-3hkgHt|2QDe&(j^7(8r{qmSu z=GrZoU+PfY-7dc0S%5z8#^=o*{92qHV3t|;)vR3Wv)t5HblB%4JZ7mOjZg3qVp*QduP{2OV`NNyR6=Zc!%pkCO@; zN#ZDF$NX2W?ZVt-w#<1u3Fs3DI$CC@CJ;i>Wc(Q?eStwmfezuSxh4ykk~IAM1Q}b9 zJjUL9WyE5+%gs-1HHj`}^H!Cw zY1})oB7rbbZK5n>?g1n)X2N68&p|MajP$Q^t2JBE=(LsO{f&Ho`0VSHd7p+a_d3&g zGqD8Vd_m{>YKl?CRfW*?mf<++Nb$e^n@FEH7Du3E7yXnXDrnuIlrifRDF$zIv%lSe z5D1YZ48BA&g4<2As>EiBpOyD8HWAS*ea=dWf|+dRXHO)ebupWIRlZUiZn0i_dwJNs z5O{Zhuf*=(Pwwi8je%I|GUF6@C_KFi!u1V|Z{ml;)O-G7&F2Ae9~GUiN(UHTbc$+r z*?yfAIEB}#Hlh7>`_F85%_+=Eq?6`#J5^BJfIh*=wVH zwRi6G_`28?G3E1)yY%*#;7Ep@=kiZ}9ezXJ&EaN{7zrnh3iU7I5Y@(MnIND4c_Z)m z!$E%N!y(>35f=SnQWwW<0M+jbMrIGP!OWg~WY0}4=Ax*NeoOh837^4tcZF;PJjgd+ z_LPAlGYP6E!+$EXyEGZ=CvI@w(4WE=lQB+0SKvNgIf@)lx|MVAW#9%Ym0d$sfP9f} z(Qd&?WOwOk5}yt6srB&Qj}{`{B#B@WZ?aJPccst*+J$-C%6rQ7Hi{~{iRM#U z%*u6dTtvOr-yav#i?0?J6?bYN)YjZL<%~_VdCS9^a+X9?@OCsx*w<4TvzK}HnSe|3EuObAHdwQ zscL}-yv`Px%T}um_hr!QMTRc1Cb{m=;j`*d2J(9Ik?aPl&RZesSmUvH9kE`r35H6n zo*foSPMM6Pre|`Ay&qOL`!3qg6E!)jzu4esD%HFdO@she2FIWn5H4-#_*#IrZxNly zuKjZ1<(1djWv={xy#^QaqhCv76*Jl4yF-r1jUBJ%<=YMoGS5iuI)y&{OZ_ceO?ss4 z8nMDqa*|0Xcy;tWo`xGomcurJ6DuD^Z`Dol? zu=Ywj1=J~SXVf<&G|IGj-Q)$)TasO(ehCmyGU zUkoPhNEhVa2kvh-#Ad|~_BFxpK6fpD*P^idSij>soJ~ZIOEwjPthM}jA(Y#0lr#gY zHSxP)<=v>Qjvm=+xc8)zziwq=kAH=hU=z4Z1f<(fn^vwO5ybyXgy2*NS-`wJ6KEj} z392j1ZJKd3CmcB@B_e+i@xD|d1*wEz#9us%<9H#?Z6BSU4V>zIc&>RI+B}Y%2?X6J z3Vvne+C|rKUpUsB>H0EoPk^`9u^O95>V6-Q9pkD3W#_f;x@Ld)M)iQ8?=$s@VrUS^ zdt-|59KCHrZM5cxXwCO44)zU}KV1+Dfls7SG<1HmOnBDFZ>vMY6f~k8%w7sACiKF? zZRCTVBIVH)Vb?X^DM4fsSNxUY7pY(VRF2u zfXIU6?8H}(0?S{S%?3W(I)Y8*iO33O^Rg#lA-j9#q$J$Y*))nl2ct|=@+5!|`2!wz z%$g&aCz{i$W1Q zv(I(G*y}!;Mi=(q74dGht)dkybZquMij&>8ZW|0xbpPj>y+n0Wt!%adm-)4yHr@4b zFg$`S5llGHBt^iz1NK*TK%dY@HvOiYom^1C^&rU3xaC-_g$QY5%^mZXJMjwdnEQDh zqu9d|@#}(?)el=MRxO5HB^rZ|+}|k~8B7IcnBj$A6n-^*OV>c!%I zdyFwMHiqw0iBHK$>3#Qswg+Do|DHs!H)jkyvs*blV@opqJQDK)Y9QI2k@xckSsA@4 zZvWDmQo#TCc`o+XCGP`nQu?GfR|x*N@3*Cg9Ys5>Hi@9si#LY3X2<`%OhUp>T28JI z(1ZS39M8yMOx;$n$=F+5ff=-8yqZ^0;at)~E=mYc0(A1V&R**P_xF$6&JYqxlAz98 z4c@DwuBAu;S-|<*vB@nMCpbHTE8bP>8g6B!1yb`Y@^Km*y z4i(fibF?gp-YW{@1@+Ll?s>Yjn;pLBcX@Y5i%U^O|MEI4@)V~XGJ(|9uz#}2Dn=Q^ z8-kYUVM9UGd)Fi%0ZF+%^EPjO{9IB7$K@o70-V$g0&9YKS7*FMS7RZxDH$UR7@5QG zfNe^@-fJ6T`3bC;+5g_+Ywrrp-jnRqMBw4Y%I<}e)t;b6Ks>V4B!kx?g4!d-mH5*39(WF$F%p)2i!Rdc!9gi1JO!ZH@4s&HPGz38AdE+x)fWaq+u( z@3jKZfmTrW(E9zwb~XWQO$wsQYSF`x==TToaqtx!x(mGo%^&ml--KEnEaG$ zqW5vQ6fc0G@V5&nlu7ZmM?SOpU-(@!n8t60_iXA(T-5nJnwUogBNKFfUPmOjPk{mw zAQ^{cM8xp{EDN2Fu)5P#eNucxkefif}wc@#Hp+=;BgL+EfZ&Aq`s)1eDOj+ZY;ulU;&=p+c;CTh{ z{{rkdrFnV0tb^2Uhj{pMY_g{75%g+uQyFPkgQaWH zbnCLQe5gOwhh(@jEX?;loWJzYACHxi*}HPj^2>Fmg==^3Ze}Aj$Prp*fQ4uL4P+(= zSJn3K>r%brS18`rX#5k z;21Biyq0-fb-g1U)^?T@H;`%&5TkkEFv0F_8FlpUs8Sot_aa_#r!yVVWDZHnU?CEv z>xi4#r-7JW5X@7iQ}1DWPlSG<_-ox?BZG11Wn%98>&~_o~C zQ?JZLt1zGY&f{#7^VNWv(S(p=BMUl*G#NQ17u=VY<Cx_Soz}u zmleHwCDjr3X4mhdM!N93wSv?HIJIpGSgjAnx=f_mkzpvg%EIe9rlLuWjre4~__k-*%Z{n^%5L#mRJe49Do3OJd_*86+A3sI&kp&C6BG-Hn^%@^=dz!R{l%*|ra z_jN0EBC?J}?yw{-LLf!qjl3Npm(t?m>@;*s{tNs7F^o!6hS=x5ac3W9G`~*)&&am# z*Bf+ChX-m?Ri>_F`*GI+DcAd@mjC?8-JYB}2Y8KL7|3yLYz5DVupSzx4 z#?#q$vEn3c;B#S)tmw6~2i|G#M8rH}QPXMInV9nvX(?Tsg{eBd`CKrt7+6LduFS4e zHl6<>MWg3|r{?BrUO`SXNoeEEjoR!4enZ76-b}LW?LpNeA?@ad5Ohc5 z0>L#k7CO$8sqWiwSs}kvFT2;93~o{nb2^H7Yj-lT(z?h0(NQ2Qb93`YEn5cNbBMdy z5iVr<@=zcl;ZD{C=8Eu|!Y^yFcpi%|kD7_O(zvE5WhCD^-rSBC&L}&NWM>@%v3X*? z^!lUOJJ>tT6Kfja;cAW=v+eUK7Y23Yx#$6|Q?bVyxKs(D*k#O!$=Q~Rxg7yhb!2*c z5<3+UPQaZAjvvmEN+6RRI~F4=_UL+&^#T`s+!r0kOdHPgrnd-5Dy_5w!$OHkXN5F; zWIANBa2`9Yt-cCaVH#+=hjD;$R_(U2c|RJ*XdSBaWe2O-205~ z9{;S_3le;qZ;ivlnRmB$`+k^-s-TVZUqPLs*W4#AMBw-o946r51YAwY4$YTo@pFND z_Fg8m>G2-X`(?KlgTl)=?TLJ9?uj*eYAR5%!_V6Sbjq6*CZ_5ytSTYQJlqB_BQ!Q* z;oBqFe7Cm(Yi~;geAoR1);e>T^vqp*1B`>u->s%MmxC)eZ0iva^OWhfMurFmsMDvI zj0>Yf6;`sHJS}NR093f(t@1&2+)^bGGdn>`liki5PnesI1y7A6*6lY72 zQJ?gd18Z@K)04HL0zp$n#hLfbBj0}oGu_U+q9h(tx>ZaIV%j_J?*WLPwVmtU>i*Ia z1ugbrI2QfHk#eZTc)irS1(21LF?@5(xdp~gBn}ZcGc8v@`MEigsMiUPmW%K6`onI9 ztcKXw%8PokRY4Zxhk&_OawOflkexIX5nA=-(NgHRDvLoWzbF12FA=C_g&gFu#)n;* zo=6$EEo~{RURpt1rg*zCPKB@V;^Hjy597%~vQmX1)!FbvHQR8TEH9pphdR@#5jrn^ z_kI2QO!y4|svyq*Mz2x{X0o#EItuVVPPOB)yDr{6DZo3e|IQ5*4jeM0NFUUg_mV-# z%i=VC>ApNd!R2{%UAVVu=A(Ced1|>FS3!sYhCz!boG1zHfPDZ!fP$mj>v+Aw;I@cC{@m2RG=DzyxRh#SUF4L8oRF=&WP><-GcX2Kn!X&`#Z?Pg&S79DHY@6YB zuU@WOhtu^uVdU;Sg`DNE!0K?oIw|&%_MPhXK|im6$z)cyN>&~uQz0kZnifs%df!2HI+XIZhOkno1lwYm*$9tr~ z#WbTb#Q56Zpubq?aZyH#9cf*&l55cD0GSlm%{o_FeJaAW|KN3c6OaF_KZ`|M@6W_9 zM&vi$9~ad^oXWL)6EnFdFRRlINj1$2^i;2nE-^dnHxMo;JQds9^it7mopczR3_p6; zK5W=_-}x6GzSK^^yYBhw_ayAA{{I&?0^7K)Ew-&a^N~7wnlQD>*DjrM(g31l^`7+) z@D72!h0>~cm@WHjp;$Z0i299OsKob>sEl>3*e|t7={1rdiD&)_(eQEf@lH0vZu92&HVs7z1sH7a!%8w{x&02$xs0y3Of_jxgA-t z)N~VP-r{Ala6aFCEfh2802n&MhYV6MF*n236{5~|Wgkwc&c@R7rH9(N56DBLpQR7J1$Tsfg;#Mj{IqwF}1^7H00&49F2BZBAJhi$#^$d=$l zcFKf%*3tRwyi3c;SxvZs2e*ji^XCZMvhh2RV zZ}Oc_(G9pg4bF4N_SgZc^Z90-o%&g1b^&vv87@M8g!Rt&Kr6=QZk3R_h(iwejFhjos|xhndO+6t$IaDm|2_Esgo60q3Jg{+t;$*>)vOzZNWxp-D zell}(3=5Jv{rv;fOkWV|n2o7f@>c<0whq90y%hlq3okNsl!H{?(D576nKvG;dS$QH zVMb8Ysba6?jgBp?k*6ut?ruI74@;m=+e6BNcJHW5nZqnK?##wav`C3-0B@nRfkn)e z1L=i@sN?&QSyQgf0MW1OHjNx5y7{CUUVb?ePM^Sv5BbNeJA zMtP?9SU}8XQSwaMgh;ty5B|3ZLY1ue2}NxuLztlr#Bu5WO^?CzV}!&*d-UB8)VFt zZI4G zSP87D-1K+fLEn}cw^169kpi|`EiccV#-6@%?q0wzDcfeNwPQ zuFz}x#@K3%%iz<~80;W;9mZ(rT;I5L)N$d7=IPa2X(5a9+T?3N*F`q?0Yhdswo_r} zb6PnCY|Bzx$!3e24I)^u%*eWv*gR=@#k`AWBB>EL)}0>~QFZ_?Yw8v2pLD5qz&Ovo zWo-q?i*fiB8S#JIY{50fZoBo-EKroPRPgv>CeAQ#{QO)#Hzicbt4d&eO-r4h^qdhG z!@vxloNmV%;PG$;7T=!_Mb0h(?vPgM6}=Ye3WMXL2<8_j2CYIyp-su4XI}WqFGoTOg$_W;;v8(53%B)W52&9_F_Z zE_Gx!br$YPukL+^ExE!eXedL;m{U0|_q!yoEa~!HTEIg_JI4j-Pl@d%w1}nPu7f2wPH5hBf8B0mztlkAf25K^Z=xR;sLfEmP`c zZ(GBbTznCaG$=lO7UYupC|CbI8!R<>B1Ep%TVkf`m#z0vgr&7XNjg@InWC#VvJNHRMQXu(Gz?soh41s~xJWW9ytLj(s84gFxV zs8?g>;o+@*YQOnGhmoqVOt8ZsdT3l6NY*}jzAs%ouMyU+O4FqXM`jtt1)jo#;nahV zt1~iUW(Xb-*-*c;;13fdQ?7Vp@rQFp%rGd$5gls(-1DlY!5QtDi7HD=&5g?93nYM7 zr~wI$1<9n8#9e;gpt;f6*fCTQJmL;MT@?c|LuSb)RW9PRRR;9HXOU=cJ^s?y+ihCL z65aiVLy6c?TE<8bU)?LFfVH>atpc=p?$9m1!`biMHeNMrGwZ|}D{&4{X#Q8!^cQY; z_G?!`Rj*7Wo)Qu!yq`(DhgZ(v^xl5}Vve03J{pZ=1}4JKsM50i*4<$}OXejaHt{xkTCE4F8r^=8}cKPMIDfsa?r4~)*c{nN|0bUW}g(w=#e=ZOr6jF!tO zRvjEX=CBr`l%eH`+E6(@f29^dV0?_SH5Kvl}~9OrZv7yGM~Z_k)UDbJ~3jt>w$ysaDVWejGmG_t9Hkd%b4N z=A<|v%u~s>bS+Hs*z6|<*7AlTCgl6P`H6X|;s;goJ!80+fL2a=0*Swom&R0H64Yn1oTV@TeA z%6zgqLnDk|?Z8MajNlTWnwa%7LCzsx*+h#jdQFHTZ{6>V-VB`rg^MYW%rc5!=8jrIps@=P1xfF`r=+Rv+ha6T|H zGY)bIApb>fP75KW>&FL8db&~yt4D{?r&s9bR~R*Mu*zi6NzYqT9g0Xm^LVhz-LDY* zI6}ItO5y1fyEi!TBE>yOFS2sZ^`-@hZw8$t@EU_M|CoLtmLZB!WQV9aLMX1_& zqcE4&oC2z{_Kty3mrwB4D@fLU5F^9G#+7^L@BCN3c20Rqf$EojJf{0HNn_SYd2r3d z_bcZOlDkJ*Y)aerrO&_Y=x_zT&umqbtfu8*VWNMT22iIz-T__@bf&R2z12?T8fdGwb_kv5uzMC za?0mb`^$#^{b7#i0YPYd2g+y=bCz*nCv7C_z-Q*O9j!%?iZ_r7kw&pWl^!)AI<0I# zRv=nKWh?{^vp=N7uyf zlbX0vle?cuvO=wE-S5V-V;(*hY5S|i&Zm`)TFpHe@n>g8$L1k^LjP6RaM-sJBK)hU zr>%|i4i3P7$fXo)D_Z7_?pDK%eY;-)Qg)8S!SIsqE1#OE53o-r1xsw#1We^Avu|Xp zkOK>i?d@GwEGWIMAsL7&nZkee6 z`-LKeMsxL7a3wmE082OukMYs40Uj7kwLQZOAETg@n%h(c_qL9&X#g|nsEq=QMn0^1 zJ)@B*lhAONP%i{!9(lsdw(f_AuuTlh4mIu3CWE`A8l?ZqW&8sO=%17iPIP##GlJrT z%VZs-rNY}5OZ}*8AH6WS=qou(QOd<+~h@BKN*aN{xJJvd!*kRFR$embvSX|1hzYQ?2vX z9<@!BxBJyfOOgmqfw1Q|PeQW;>hou(9_@`sP~cwqnUhC1a!wfvay5XJ6T}=lN{WxU z{J2T-UscY!jd!hOMesJCa^x(nC^DMaVfLybD6{yJ>+p-N;WTQdgQ#39J5^xJAzfF#`sY(4xf6#gebCUEgNJJI}%Be8tok9Av?aO!P(+>`Zo(y z{J%KdQrr^+Wqv0k*syO!C~Bic4kkr3u2K7p@~%^F0N}_>?mh#%V%9*(^ViJ95!Iuv zb(s%j>$==5{)_i2G3Y||(NrO#>CT(&^4Rm3d^L{+DQPAG!n=JpTD$J^vQtFN2mSwM zr~T`hx4cy`-u1?$3GI=Y`kOs_=-(O^@&nzI666Ze+AzP(h}^-Cr2gEo!R-|F3!lEb z*JcJj@NonJYD%c(@KSoFmmr@#d8U8a_PxKH_r*gm>UVn!ts?kqvBXgf1?SS%Q8hIf z=;t7HWa`eKy7mUf2^+{B_&65@C~F|C@U%wS2DvNe?;7>d$9Rr_A>LWOrK&Wu~#jI z$70s`+FXuFkqdbyXmN+=#b;X}G^lNa$QTEpe*fEQ*-p_oZ02pfCSxCqaJ&h?>+lA$ zX4|H1>#Np$e8{6Y+W{V{9%;zK2z9)5F@w}?^1}ocGRkf3CPkxN8Zf?r``KdlZ{(n4 z7iU;sZ`c`S71h>lwdz~5U#KwL&*-*0ETkjiFGJ^N(#G-xBO?!=3j6$Zz@)PkF}|;| zJ3Wl|fo?Lt5yNs{M1vvY5fw;C;x2#|_y5PzRRBf#eQi-BL|W-iX#}K8SbA9yq#NlL zmX4()q`Ma+rMpYIyOHijy5W2I{l7cI3^UAL_TKlr_ndpqbDn3l#*Yw4Bz%XNTd$vyx9W!o@4^4e&7fGrDy-XQMarZH#4s`2DIqZ*8z%7W# z`t?HoYVB|T>H?!98QzAb>2!X1ZCON%Zuu#U9QNWSipUh1d5pe%7hC(!8l%-^F}R$G zMC4NLT7=)eeIqFOI`rauKe*1Z%E+u2E5kZql$j_KR$*kax|n9IzBu7G_0UY0E-|Re zNO4qSxE8hG({l7-s&AL@JhcL^K;3eW@u>L{>U5a8l;P+W++xe;en|L5L-Kb)>?Yl^ z!$iC{6O#m#A1`Oa`F1yj?D#NM{~DCShU+RaibX!I5R?7HBboS6>fSFN6ET_Ph6^ zYcyAAK1{tPjh9&2l)0(OtJnJX3Kng}JCo)&?iCre>F_%0vPr)6rdH2tzL(+qEhfNT zntgV+1ar*JQ;6{X;U}=PdNr8X)2(5({zL%aa;*<=fwk`VMn9{@>ZC>e*m`b2Ks6nOY1oLn*Lxd6 zJ?aU23*YqTIGstA64S2p@bd*<{-TUkD*Feh&$TvRNjak9g^0JSP>BA{fxiJ9MFay|_jhQ9g7GxlF89=XMx(7^g$*AP; z;qYT0L!{mjg?J;PJRR&sg&!^52pYu2{F;R-ifqLQN$|wMgdNa_+1kLEK6oJq!bqaW zxP5nzjE(wAIL98zPSeSuuW11xHzLTvafI6L8{wKB=adnz%jrtY=GppTSD#)1iIAHM z@*sbT#h+n#PD0@dEHD=_fyP1bH9Iki7{AEU%^c76$`e9)H`&0rw`#J%1 z7{G}s^FSm9X7s*$p%%I&S2i`BmA@%@IQa?SNl46Rd3$k}qUK_Z2 z((t~DA=zRPM1!d}qUdA!!I&^HCcqp~+K?nJL$J=@kP4Z%G zEZjWFV+nN)5yo%4uxGdkyTW{QZ$jQ|GgFZ5(~N9Wc@4o!L&JtlWN0XZSUeG zR^`>5E&ix2iRn(~8#6v4@czNY_w;;BVX{~)xNd9buw-8n(G+eI-&;*_T3@4J#V(qLZ&E^`)5n-3>iF3lW@Vr{@l~oROp(y@NcjmJfhE)g zmaX&u24(>CZ`%^w&d-$6XF9|<4pgo|S%eXic8b#IJC`f{X>ah?pcW%Ry^3&nETs9Q zYvaS8r~Edq_P%@>k3&nnF3+n)KhL|%U#09JK1UdDhlVp|u+kWn;ApnR5Qeo zs-iQth;4!%=xE~D_h6*trEbGuNCs}qX$n#|(q1#A=_X&(6}mSySQ9$>_5ka{tl}|E z&`gY(c23+cP$B)JOkEx#4&6lpuCK7DC=?E_z?GTaMfsQ?-%%-Y#`nzPsApnqGFygm z)EGEQuzhH;!8WdSuXv@9@S`gJt!ID09H7dE4lzARcBX0_myi>6Ml33Ia*kJcOh+kU z|MXedrn?w#VYf+fK)w)byo_#9YW@BYh1-qoHGCO2>jJ@?Z~QqH2YL^E6Il-P%JAk9 zt_|Jy5@xx50)UAN<(Az1VGk9Rf6KnB#2=8Pk6-Wly?g?Mh35PWmDL&92~8*WRHRS1|KZaY7UZ{TV8QU4@$Jr^m5Pxp zynQ+{1EZEWb+1k5WhbKZKvsC5ir7|Dc+!zmx-2mrB?5Z+br$)vb>Tbsb95d;<@?5$q`rBxLy`+MAUJNbabX~zvqvRnIN5R0{sJdWd`~$MnsOq zth5Ep?&eR7xQ_&1U<%oW?331eG@a35Mehkn3S&T6Yf7w&E1Z zz&z{|59u*-zl;)uAAb$e@g96tKr5oLsCj;I0RVlyy*Gm$x8{Rg18$HG*_Vrd86vzA za_V#2h-%J5;UZssO%MNJ!m;KY#XLyus(r(1-i>UmT4ehV7Le+}ix`hrF&-Ok7etS; zJ1=@ayw2|j7KZz0D4Y@}%aLqFSt$`v$boLxsP;#mhUutOaQzGaLQ?NP(HPbrn9S4K zw{!KgIp7v6YAB|s6Dan{G(39xZSp6fE9jinArRhA*IOqhzHISI0Yk}1rs_^w=5{g- z)?}F8>@YIHz+g)fq6vaDEA7fc_xFzjg#wC<0kY~g6iUY+lJ0g)9jY5wcjm{oxKbGu z+4LVdT`9}gN@Jd{243ZCQSDhH>l1Gw(WS~|x$Zmr8V~q1mVe*h zk;eq97yZx0eJ7wgg(#b<)ghV~`4H+Qt$LY1^M|gDJBv0G(3{UhOy=hJ5?q8-1!hUI;Gp5NazUUQ>6TD zU_M2>@1ufp%wWUf$QWzoJ!InP!ODzyRF)GsH}B;>;Gunz8^5%PecMJ@8JbVKJxS~# zH~I~beF!6zXLsyxk2$W-pF4=tvXk93=&)Ox(Tvi3i%|l1j(!HR(3Vlqn1)6q?H61K z{SiM;i8A$GrPku`A;MC>)VW*xy=BY|n`BTZaa8?xT)q?Xm*zVI`X8lNy(Hlv9@!o> zIwgWDIE^=R)(-W}J2Y18P%y@fj@&J^NEo>M1D6eb7*e+z#_HD?bBnwO6z5XfkL1^l z=H?2G=NlT0ha>pH6n-;$U;m!&Sk{8K1G0Y2<02O(qV@X5pbSqnua^B<4}&M7z^Fq* zAsH(F_7e0`BZZ89T!r`E9>#tXCJ6S6foUl$ClXH!VJ}pdZT#EqO$a7uDeqM zzjHh)Vu@IGA0^&$i7|`4BE!6q<1f`}#7Ssb@y#7yj8>ijy_k2BvnQ@bgOE%-CY;HT zg?DSc!?Hj&`czxGEsn`Hc_fD*43icY8FTm;@$?O-S$2J4E+CC_VcyiVrvv5D@~-o~ z_E}(7x3AJyP%G8&2BHfZ2m5!T6Grr{4$@xVzGcnA^f3$Erm@l_QfpkP8$z9>+_8-( zXhP@?^lf?Q@^U{T!xe^A*)1g>LE27EH3m~Zh0mQG4eh#KOqXZ{zQGRQgmES&!6n^I z>3_yD;C^>9!NU}}WuMlm!Yo9Zk(`bo&Pf>V%{AE!nc!ng+ zKuG+byFTO9g8fYuxAqPkU{_J@<@&q$yYG@_KOEaM9Ho~aWgFReHmshVZgl&tj4XM! zcD};mYYY`8s|9-^+}x}hymL^3b7{C7GO{$wS&vEW$lB9vL}d^4CrZ831e1r)=*$Mh zAiWcYopE7=T99CByQ!kuy4lR#+by>f>UFLJj$T8WbWtKDQiMeFU`{9_)lmZ2np&B! zJR#hm9oim{n>|w>oGyN^RIiqUAuH;eV5;fEe0@x+hb4mSGHaU+ZPS&Gt*{NEAsMz{ zKg!M4UJ0ulf7Bz$mt55dGxv>pYES@j!AT+`;Tpmhl3ZU&ZU+DT z;GvuL;Tnc1(a)rS*|)b9g&FRc#c@%`z)+HsCwPj@*O_ITQF?z9xt3S39HVf!$KoH{ zMA9zkaMhV&`_6oS^o=Io$h>yDx#iF6J(h{qt#a`=l`!(I zFOb)fP89HZz|wqegf4w&J?<5)cioSPLwbn70fl+44v74We;jWEoi+EYiFs?yD|wke zb(`2x#or>6kY-wA>pnK+w%hQ1qkgH0ITV^#Jz`rP`kX11KoDm=Vy_+0QNrfKi~Gs) z4VmR6Tn3`@A_VvNZ*cd(1gAOL;}C!;AQ5@&3&XBACpP`{n_W=3>!{D%>fjjRVOE~T z1m-R!1^ww}YfJv~2bE+%V3sGhT)<>%@z;-n9KJG~>WT%vbNLLoCHR)An38w45JWTGDBK+N@!^d}OpCy~dGvYX-RxZ+<`3#?Gptud8`fYRdYFyE4xWY+q=FY77$ z#-HUzZg8NM7v>3df7!qAV-WJQv;UJr1n{rSrjehv#Yluf*K!UzRrV9K zIV8TloUAeuGAQ)qeLK+*?zb!S*6;2S$s&4|b4)P_lY6l12Lj_(@ahY1OIH~zC0M}} zBJY|0g4HutS6ESFq(ry+PTSNj7Z#@wd|SEno#tafG?Ioi8e*OnwL+h$?mS#Y{a|JA zbIy%otn_VKiJ-0BUrJP``-?N&cv!_&edgsAQmdp1)l%_k%PkIc{2x7C#CYCc5fcB5 zy4b4Zem|zFiI@biL9MBrC$#8mRm>PMkvSPEfNR|u#&MSZ!XEUYr`$f$id6RiaU|20 zY-o5c#!%8z%#Kb)#n>+)IYJ=^K^RR4Zs(c&?dkdNA(m}7t%IOPs+(^7uB@UGN6Pzgkt6W(4<#VN zF*%ho;GL)K;l`bHDb z-iFg^ex6arFzE}}4c7kte@5^ll zRnCrtnUz>W=YQwl>{!`KDYz4!XSoEA^MFiSDMb}D8!`5gXJEeyv(P*j7c<0}3ga|L zK5lGx4jNU#^0*C{;>;Hw3)6k4r8T0}#uwFW@Zz(rYEmCI^{@u&v~8B_oet-l0Z^3s zu=>sz_2E!`$mN#yMA$*E6v5YQW=vYDK3co$O)U0)8SzH*%FjtkV*K|))PH28#zTIr zx}NpPV5jqKXu%ju=d&tf}>YT+l{a{gNKW*Qt=7G_U_fzmziN8)QQs zq`rZ8A;$a)Hb%QmKcaViiiuqoM0uZ)9Qu*VA-?-Kww&o`aqSEAI0M2>8M7YL8df)2}~ZknsoIY6ySYAJ$Bq zGb11Bw|eb-%onOBc*137JX&-@uT*7WWTx|3D}wI zYs?YZOxAVM+&89I{q?DTHHFw`5<897Seuez{o96D5HbZNCB6vFpezz0Z9>d&?NK>oPmr(WRF^ZvqkIi36P;OwQV^aXGQ;(5B1!}HJZ8X(q7 z7QBO2XSg}S1OY_vbvKxCKb9#=qUQ}IyPOAQX7`IxJ=rnw1%Wa?XX(V>J|A33Go9P2yNFqCEd-j|Q zX{K1bzVI0Wl$cy>Xyx%zqy=_@)O34cE15rut>J)kJ^uYF+p+Opu;7(bGXGU+Phnhv zP74GQ)u7pRd{Htm5Jk=pJ?$fI_j<-=qhXxM&l|MJlXFHK&k4Q|^gu~0ptz|yL4fcZ zKG2a8#MBA+$A1`bwA>h3s20Ve^-x$61&{Nvg>wE;W=t2n==TFB!{$!MQ<=}=@I#7p zG9i5JDj0KHQAtdG?w=xO%E)4=rTV2Hi*j!Brki`IoT0(zgA*di$;eyIYV@q^^tPF- zVJKid5iR{ucq-{uAfTs=SoLBa-^w9&Jw(K7<{0D-nQvx24Bmo%;!m2V#R^8ehk|Wz zlEmYo_;5WfRrvu}d$0#}5>%`PB&$>bF0hVuN`}cIUA1 zl9NAxtb^Hgs;V9&y2Hi~c8@yATknRx@KT|}?kfeT+hfOR?GvPzi}PlR9&9+gQafp* zLvh>B!r(4XU#N^wze2v^#nz%*T6P{yxMPC&Mc&2y0pY>evWMy4j7`7J6+VDhkUw?L!ni zbfQq&8(t_77XqcYdg;+6-P?mfs|&`>)C!DXf94Igs4W9Dc~3z?~#HHBzFR*kShw-d-cm9M1 zt1(B@=Cz-nGt=efQt@au=(5Cx>PdkHKkj)gnL4DYoF3%a=w(27Stp67{l z|Du+%n=@tuw4$`KDhjZpq5|?cY(Z-{yeEAFFdBI;Nd-c~m^|_0T&p8gxha4(Vw4J& zGE}?Uy!99ARUdR{T7-DSt){z8aHEyKF^3-y(oD4(IA@i*hQ$J_LpG&R(>!-DdQ}~^QOY{~@jQ&o=uhAdrR2ea&l+5E|uOBKStd}$o zyFJ-dqk5lnPJv(Rv{`qf&!Ge=l-p=nIedeK{Fr1o*}FIG!pm8@^8q=!^1Y%K;SX8_ zsuAJ)Hjz+i87e*Aq}{(FF(`?Ev85;v1fZt!($r1EL81?>$)^6gdarwzQjathvN`7{U)<_6!21!)LRZZq z5j;WuhljhJQs5q zag{FNS;+an5Q+X+u(3)QqTWySDc)@J1QrSelQ`N=bH?H#*KJ_}gV|^H!LeYNUhf5c zc7oO6rQ><#}W4TrkiH%{pJlJs%TuIm0{(hZv1%BuS)D3d!Y&(FDf-Q2>Jt9iLt_ z^dCkQwfT4nHa#g;S$WMz=3T)SJn_jE>v2cCE*(zR5?aRZspj#u9CJ~n6E-&R5~5Xd z-DUZa+1L`Ws*hj2dSwt318@?H5f=OLB<^E~Lv7{_gND<@|3GOkSN*6kWe5ydH;DXC z)F88nVL{|-;inlz|CkkBk)X`Hb7sn6LD&3twSn)#Q(i69*awPwaqmf;`NN&` z&E6uQg$~micJ+M*y^S(6LXZN{lI~QvkNgUX&1aD~?tHy%+m*vs_s7y$vU7D?_~MaB z!=+<{Iz)^({gN&_r*RDCV5GqZ?<}Wiea6saDgPFE+9Y53B?Slf3R30o*ilEv6`Z*)L)jiAkc zcEm40#s~43yRvvC(ZdP46)cpa;XMo|g2zHV(p20r&iEkv7#IfNGI?Se3cK{;v@l3| zvK~DSlle{uim&K^Tf|GtW3$Sj;l8C|J-t+zm`Ku73iCv^nLiO1Y~Gdu6ZuA8wiTr@>ZV{% zxNc3&Dmg%{U~esvkYkMZ;>?bR@`2nK{BZoDriMFqz%tU|6Leq!c}|IQK1R@C}*!DFgK4Jfpb~m%0k{z!R}jvWWcU)1aA@yJ=A)rG$Kw zN+I$|JHBp!HNLiy@XkY&fL{1;G5DquI)^)1Y0%aW6N@+P|GxYKUwB_=3)7w852F#8GTE8vYw=WAPHmT0(0BZgcaZOSgupKFn$4 zZOd)k{#&1WQY(RDF>Sk53ErF72}Q=5sasqo@u0WM{ewN(Bg0#UV8933yGe_l$w+ycKB)#*Hv`{1IgK}k#$ZfMIe zaM02;{8b=nBYb{XqwQGTuRK2FdVbSzScSgOSZEPNC^FQej| zP#Xcx=v|M!zD1YQKF^Gp$-wTOvP{@(x;(Ou-GGhzKY#TiF88cJsRiK}(2J{IruwgQ z>GD1*v84`H))5cLU&f2xA@~AH=aj6{kSMhyIr&aaO zDuc5WAL7JaRQei;RQMeRg(mNL?WBVF^A`i!2fG3YhG)fj1Zpb1ig_P-P+C`6_`kN; zJQDU((*cEF!H_{o|KC6MFLz3$>@KFrdxl!C8FF)X^nVf6n=8oH$_=x$?eIz@>3+F8Ju^Xb1AenMlljxy7%#VLJZs>ii;0{m6Uope-#z_y?tU?l^*Yl{U=F!i1=BrlfnClSfK!l|@=jlM%Mj zi*?kzJ#*ck@bo^t@sj-HpLpv>kmY+4@J!h3Hj z*>Cn1B(+PYavo2C%N;(i+^JqhF0j;V6c2ZQFDQR+YAqt0MszYWp;K@F`to27U&z_n znRYtIXw!9P)F7st5=vO%@X$~VLNau7Ly8c(beY__B5q|kdwkM)66aKBT5;BMLzRVf zH#b+JHK=#~o8@vk67gopN@4t(MY}_b8+C~$1pP-*Q2GA+@^bPL6-d&~oT?|~7++V` z9k(?8>%?ueTe`sq4qNy4x5%oJ8!WZsRq+K%^?Cc*aMP=mU-Bss)c+RG+S}<#!h&*k zcq?~C`qw8}WjnVLeazJ?(`1SdCLi#4#8!MiqbY2+=7+m1DoK@=IF;;_{>^ob!<5`L zZet)T0MsJW3Ms7eI?|~ck9h)ZQ{OOWOJqFP$uAmR(dR2e^1A~TOP0h_k;0;(Gf5aW~`8|YUWI`<`4{-U{VuVpkVXO-* zKj7K4uH%8h;UIq#Xa*#QC@Ph^$+9iqP#McPiMu)G)8@(wqu})lwR*M13s2AH;I|c9 zBm_G=uPVf1F*G!toM1KIJnx8rK(iAoR02pnt_0^{uG&7`>=UFVw~;qL8t;6FSNwPa za~x^zwr&Rt219e2g$sGho-$-AL#r*PvoX*zzvvzMUq~zCE@MSW>Qo%u?<#l&NjVGq z&omu{%~os@e&MYh%%U2 zZhzU>KYla`l25}*z04q0%dG-0$_KVr)z*I7hfNQumm5-OmO$d;0JjtKs^74^i};5Qxg2!ljvo&HY$2?WWIg12Ry*@=DiSSx*rsgQ#P+iK8vnMg z>E~wMCn~{3-guOa+xkv&H85q0-pEQ6=46qdz40l#CdDk5oNrDVRe$z<{VVm&I?D)R zcQxsKxOOj&;}U;JaQ6q>@KC6c0_>Kg#XhFrXSeTfW_0^5nCqhuZMA^W#QE(U$BKa~ z+_6P{Git6Zjg2D6jyjWbrPHs-fe0*W z?PjF)I*aFNP3`#euQCR?uO^tC=JAuz2b4fe@^1W=Yu5$$Wu3&dv~U;(fh@*h{@3Wu znQC_(DVsh94gOP?OQ<7HYMl5$XJ2~nNPd3)&bi}!rNe&w1bdwut<&)w{_ag{>5U*n z&za-M;~&s@XW!Y&CZ(EwwfuyVo0mH_vrL+P3mMoglu2gX1A$%Rrc+J@IqVSyyv}VU zmnfSq9c@8sYPCEHHZXWj&Vl5Ueism{tca)IK&q0CB?j0wvYUYi(LY<@&{)6pEojJk zx*1%ua9SR17oI3g6O6KF-Y4xFpqy?u*{bkSXYt2 znI+_)J6NQWpaxK&34x~~y^f_V5%=bNv2XtY%@OSSg5^W<*c$Qjb&WJkE>?98L89iK z`3+sMnTE%KE_3|lYo`BH4ra|t($r~!`i4iwja!2MDpCvT*_K#$y+m^B2RJP_gGkcZ zzdr3i4_z0ur*AnwDPDdo8~>9+xr*Q!T6X&O4{a_t8~U z6dNUN6u{IgyndIf+7mpjK!{21$pH=;XY>Ez_q@kwVg7DoShXL<+u6qYW;>)bx0>Zd z2HT?wg}!G4ZMRp_;p|RmfH*`q--t`MP;1LRywTLubi@%=))%BbF4}>??SYtAjg&(> zulBGB>xf$I2mmt^+n2E97~8;vZ+w(KvcCf&tCj z+|5yP2ZX#|rorK2^~FN`oxk()kOMKu{)}G7n*TFCn5#^W_s?o=6`|t64f!H5O*J(+ zep@ABLAg7c9bTe}s{<5Az7Md7Na-EFxt|`;OJi{GT8Tu{8Ws{-oIwV576u4Ns0T`b z?Clee7t*#QXQXz8=!B35jgS46v9Th%L_MrC3nxOq?pCO_8Li6r(;q(YbNhM7A>Iuh z0C`s6?cgBQ3NIxU?;XAg z`LL)BAfkg-LHSl-`RCvSX^K9;C zho#7~CZ@BhC!8>cOyaZT)X<6UFb}$>U{B6l^-eGOSn$b-i6c;k!^1Dt!zpFZF8l&M zyiF?@PRMuQh{gShqwPCNE5-6l)+UxtXd>EC*4d=yasrnBl?%}@0XEj)9Yx)tUTaf4 z*2?oHMU%C|$ifu2ELp{bmVdT)i;^#+*E>jMKIXielxL-=a^xe;UT*B;QDus0)<6sA z$by^DGuh8+dB2Ci?axV{?-=o10!?yC0RdNf@~ajSwHLH$E5xCk1Rh}E4IEN@v1@+{|fRD+AT z)Jn92YwY=0k#&wyApEBskD6{ZO_@A83eNWSF99!*7u9I=e!meS2)jUAPCEwg1*bs|{1N2tGOAv&2m+9JOUN2o&HZtxMc zdp0-3$w(smy3TAaq_x)gJwXGpg?drc%o~@0FxYpcZ{&Z@LTo7b5)g-ZAFB2THcuB( z`S96fu2>`z;4Izrk|%u%kEA9{F#)})?FWNgj9Uf6!x? zMPZiEpN#YF$`g<9ADXlC<9Eqm_r&N$t%-)I-bvZ}Q2(Z!rg6s+9cQ8n2f@BDFxL#~ zBpiOUuUc||IWP%?amiigfaV%tf>fQ^+wO#`H;(r70|5QWjPpz|>7`bV*{=)_KQ|K{ z3c?R|xPdrrM;_73ANb-8;-Gw7s6-elrIwPZN zET}40HRWGZvWoa6c9Ob0wx44mL~M#LwHFqhsv6$t zoD*id&Ncrxx~HuYHlYiYz=fek#i(O0#yas5^V-UoZB;&AtGRYsC*M zd7J#n8i)O9mYHWAnG9E~wt7O~ye91Q-zFYhH{WWAUzomlp`}VNUg@Xt!Ud_F;V}xm zs6LKPZ*S?H)bEKC36n`4KA0V)Ji9iUe4nAReURfWE4OTwtqC%V{!cxNV#_>^7O@ZI5{;CkgvKgH^UHtDQMTK44+dz1P-2){Kv;KFkr7M zzeN$ZRe^VXgNjZdc;{Zo;V_f-Kuh&ynzi}bn81#SLF@(t`odyctszfim25@wi%3n5 zT$rFUufe+w`D+z^FwQjUbaSsXBm{mb>eP_2yv$NBGjQj3Ve(9V=8ai794~}TGCuKO z@U`B+MetI*jMqd=v*!9QYwg=1YJGCgx6R%KETD)vPPPvg?Fiv$INrq_jOs0OZfr{1?>4RnF($0y#Ci~V z%ojkpDQOpJFKw2Xi?ksDbZV&yKN(6p0J^46LWk8EhkJWXHV?Pet^aYykA17Wxh#?d zzN~m+Yje7Ub?dM|U%?qz9E#zjp@64|=B{;$P)n@qHiLyKv)HQ?6B8v2D?5@{*rUn8VeoyMtBnmB>e`>NMI5f`Xb_tQ)!b~{k@V^hK+q$@(p>8r8$Z$r zDwZpg93f)Rpze?Yr%zOe#5t>}&0M5#Tfez`KH^C*Q^NqZ?p!P_M_quaTlARzY8r!9 z7FjI*-8qZ=%&u99mf`Ew%HF|B_-F-~HYy{UWVzB?SpZfkpeOk0OPAp*Y|lbBaC1o! zHXL;881G|`D)$DYucX^&=%C^()LNxyUbgTb2aXxuM9VVgS+}sOooa~-%g4Au*W4nJ z_3x4K&@bWPVcinE{NavYR^BJH8sjp~+DPXi6rYQ3{GQBL7bDm?JUre#+)cP|Xl$G= zBG`eMz=}_Pn_>3N>uXSc|Av0HrFTz)i@W}1pkkGZ3Bt-%u3vORxQE##_L1K|@aRHc zWj0qLFA-l!hynw5K)Jg5RO41}Kek+K$t%He(pVxd4|nr7%jbNHyix}$ZqwifmaL7r zTQ$cj|8&tn>^ZngTMgRS;smjI%HWql5e1>f=!K3jfAL>qc$R`*{2afbvp-|-a=MWl7n0&4czCe>~~D06F=iffbZ zF@O_azwHa-`yQYw@O^}Hc;PfeE**9vl%Q?@eKv)QVrAB@l9C_u*)f;{`PjyK&mrws zsXO5AHF=E7O5Zu#>VC@iK3sFXFk=G@^Xo|QjN2^&|0fqfr%+|QWx3Vr6J+mgiq3zr z10u7;LZ8XX+O>DsH*HghG-C}Y?Qg+Jd8C}LrbrG)UrvuCH zuLGm3N)#^@lce*zkDO{v^q_=-eaqe{Ppg#Lg&tA}u-9F|;g^@5K#B3k%){K%*%J9P zr%wrm@7b0p42py}h+Zy1kDcaXfjDdGt_TO1`dOG=1t}^OVu!W1j&6s#3f}Rp5*WBm zw4vU2l&pRoU0$60@#N;j_8j>FBOdQEVy0VAjx;2G!=r1{b}qc2r8{(*8}Q5eW9vY( z^zCZg)1!Pk=D_T$ms_FKzp>Le0pOGwkw?TZED!@BYoc5VUY(;xP+{4zEhb7f`DD2ISjKeo>Gos_AJ44mwo6_? zb9L<%n)9?F4WHgk1CM9t&Z92i5ttd?$#s=)>)od+AQu9jk&hZH4FN$4K^*D@n_O5D z&kGwjxzA@|#d~+bw!dxUqHf%B=LQ-cS>zX#BR%v@-$Qs^Ok z$JbhZCgXJMB`~dUp(@|)|91`4)qbu&G+qepuufBl51M&A^r8N@>A<#og^s*Xu5^6K z-hgOWMtC|(T#|Hc!^=fT?u+rS^HsDDWFR`#XXP!5dJ6$5>EgY}p=di?Yc zr>%dDcFeG~?UG8jobDn2x>NkyGZz{#)bBHtr}!wPr{jQL0|OKxt=dq5*3>$|CgRo? zBLdWn%5HUtbP=VgrYg>rCv_en@w9yAn8VLk5@tQYn+;_)*DDx#E8DX%I`l|R#+TYY z`*F_e7Z04X@3kCocx8HJq>j4v>RaE1Yv@=w2iN0(7&p=vj-r3HK%*|!b)Rg+F(HQ* za!cvSzx|E(|BFx&LP(_Od`{-7y1p6i{+=E6sn>d`G>@zO(RYlWG44W*@i5)rfF{RN z4V>Vcl>q1;_dv9l-!U+T`e;T2ukSvf9y99))D~Bw(i(NIoYSv+LV)v$T`kfTPXj6} zqV9-_D>o4ftXb4j;U#j~CM?D^mdDpGwqID5)oQ|bET*0%8jBu%l}TE@WTorDVHIt zj%&3uGcbkw)g^#`$LfE_ntqf4^qItUkx0tDSE4wL{T^bF`tc2>tuc*(LwoS|)H<0{{}B^EQ`w^GCs#)C)FkOZCHm4Ol~PA*uOt2U&sw%H#j6k*y7H?J~Jl1 zcv(d7+2cQtX=DxCBf>&J3-7XI9qq!MF7zGpMM$9Y;#vD<*BH8ysgW5UsWb1SW_+rw z-VfjdFA$zCvr?9PfnV2y9e-oP*@9UFgU=Vh1M@>umISzZgGAdr|x7M?>}8xi@R7fJR5V@ZO)wDy5%Ak+y}- z4t4+v?+61ctG5UUHS(~7#afYuc(kRPJGG{()0t)qdcdZ(a1Dkm{sg!C*M}$WQ|JU3 zrMA87h`a-264eeliy!l`8APq;>hKnnLAm`WSM@Rmr$!EjD$(m_BjO~Ng$faCOpTV` zhGY0ct`8j4R@!c8bN{RbXhgeo)Bk{3E z&wBvBoz@s$psj^k6CP0$#*El<7X}Civ_8zz<7exzzJn|`-P}gNHeg;v&vuU>hYoy| zk25g8fCFuI!^Ozdze9)JKaJsnJWr}4bP*BENFJY9A!93Ld=f)R-Qoq`o?aHF~cf=fwt;}uH((IRiUcFpl)i4 zg1~!Q*#0qSM1XwplWFunYdY2_{dcx}uqYXj(IwP5Sk-squ>xO@NNCwudbSH<|G0g1 z{{=*c8=f1+>^xhMf*CTd6CrYm}{1ppU-m40;bag6z@;@)V{IA+ zD0qGO;ebLIsw<*fQA7IQ_5g|BkYEi?5#b6K6bRa(>o6J*epM$twc#hzM@$zjuoHL4 zP@c7H{A)J)rb8Cn&g%4A7tkY%qvC^RbojsV1S=^~2MVgxqR?G_XZS5@S``@v#hYiB zaXM=e)SbY*@=JX=19tltHm9p_B&|h(o>IYUCr@xH`A_&vv29PeD?8V{!Y!Ii0T)8B ziOFO3e=S{gTvXrFMnM5VDFKmg>6C_*P(WZ&kdOvJq`TwNA>9&7cS?7tAS?|_cXxO0 zeb?{rJ)isM{pXySGxN-ICVnhj+UWYaSno&&v&wildaq>tbW-^T3f^=``9VJbM>O2x zyVI+LNH4Bd4?T0qAg*w(G%iYh0#0Q-*WbK?VTnK2#)@s*JnQa{0~u>lYK5BbYAb%* z0~iS(UFrcn$@R}j?cuDk_O|tCMjYpbo-UW^2TQxbxW-N^!QjN76&qIcEEZ(~eDPEFo3$eJddK`RU zO=GD3c#|w+9>t|K9a`<)dj{N_hWYG2TE;) z`SzRPMVY=axXAIzWE%fe4%c+L+)oF9kpY8wm zxXdqVG=ySO@hBXz2Hys_{j>=Gxddr%TREw>l&$@K{hM6xlGXD*bF z0^dE<{Qcf)Jw}#VVg1kFO)LMom$iRZ1D9iB22lD4pj47G6QVgJFge5nOuYqBnZ)Jm z`H7{RUh2_)=eZll426#mV-x)CM~zN$;+DvjST((C;z<)oloh79bjL&YH*O-5&AiqG zSEpw5JL?h{#Y|29@o@iEH>S)tsx-7`N}bO}S)b-!PC@;IAnH>1wD2nHi(uXThFeEK z?$Jdl(};Cii%S$T;^z`WLGdTdGm7UDTKL;iq%k4JMVDL|V3nYyj6k|}_0jFj`6Bcg zXY3n{aR6oXl3PVt%7Cl0+7aGw9wIl*i?AH{`Lz z#wBXE-*ec6r-$a=nlkIJfkBgPtkS?(tT*-D@FK$ve&xBL^pV~0iB$>xaz=!ww)F(8 z`Rz8DE=sfZ3GA-$GLRI=HE|$hQ~Yw8tW{olRO4Ynp7;@bT)0AU^PzRotv~g`^ZB9{ zmWZCiuA~jBLPxO^d8*Uws{Cz$Gb#=eP<7v*vs#t4*xE+GNWLtmoRWrtzYJ~%s4dt zKL@?;qo%bR@xF+e8;^S%H~Gvtxm#&nWq)lHYqZMxS?HOi%y(X4IL6yp4){sn>d8yE zJrlLj3l>eWP?6?(Al%`mcjUD56ES2__1AW5SI?6=i?)56e_0Wz$PO)Jih^e{3+{?L z+PMf^9(pY-0`qH&z_H+{v90f(YxnsjK_a0fj%f*{h6E``M46VEGBhsK>Ibu}LaP{I zNa=CN7G1WTe&pMifwY9Zu~IUz^G=towI;)x>Hh6k)gTZ{M=9IA%RpT3=ut$Q_6=07 zdf9rNR4-#Kno4uIpCEVC3lP80Kq8r%v#Zi9eta_eiwYOb@x zR_q?yEmD-8PLzNC(!0S$M;EmwQRP57QAQb#B_6Bfe#mbv&`u)rT&0n~^2W`iY4a7w z z881QbCt{|a@u(5n!Vz<;>oVCJF!Llqv^HmCgy(M6w7hHK{+zd3rskgDmyzRHr*($& z)^Jv!NH_|Yp<+Tq+elg}RL-#d<5Pdyn*yCeD_|!hI!>bG5wW&X(?Of5WY9%SOfJ$&8cMo z@jNjsnf>3|_ELCTU(I)fXy|Ca&~3Q|^|YX*@O^pDkYC1rowC#j8#>7v`^I<$JipbD zLAA7z=o6q50t|X;+rj?L6p5pJG8`yfzMLQ(@BI278BLWRE^Vt%T6S9x!{pSS$X*SB z+j#VZX9OD)?3N;*{+{h=X=xe#IFJDtn${LYzgWDu_pP*k2B#~l za!qz@T<1&T?nS&NYPwT~F1uyQ zK@}m~!BeUe2a}GuuIzhXCifm=R)U^Oq0H$zU#z44Cx}L$y@!*@7J8+C@KbNAa;yEI zc=Et6#M_@E2bEcea5vQ4S;%sKjt2bYlTnwlo#e==wpjk9pBw5|y*JRrC{tj>P!P-f zYxV1I0f7M~?qh}@%0YRdcKNSRnI3KY)IIF2eYH0|{@Tj6J9ytL;VW-Ol@;;4$;3SK zBY1>ah$&%w`%0`3K|T64AYfq3H&r7`EbjuAn!g$BAMZ(9UJvy&No1bfN2pxi+_aA+ zdmW9nPJw~WxRQ%=ZLfo!X)XjcsNWp$`9NTg_P}Cziy#Z<{?b#QNBbZ=kxKS2E2IH>X zqR4-lofMUSgrJ+yAoJC1%SP{WY5vt4CJ-`+jBlrO+tvWi^VVVOIpUWpX=L#;0_57Rc#Vc z7JziR?%Uw@^^O9$b`tj(Ch5~;U;9~^Xc z!F=Sd{fL;?%c>6NV&jxOiK8n|!ew^OsyAF{q-_DF>9y;(&MGk9cg}}AoZ39e>GBmu zRS7osn^3>gyK72zeH9@V@d8{G-g$Y!(dAfsr&YEB#ibEtzs#_c9>R42Q9jI0FJofT zt%;m?i3;j^iTE7ysuS0R#4q4b1X~xPC-wytOgi}yuL_;j??G0?EOMOC5PDVW$Q_o= zfdHTH?guy|-R8H-E~u!J9|;o$JxFgL8wFsF8dM`g_HHg(ONNS|k~zjB5#z>mNa8!m z=3l$e=ck{4+WltNDA1qt`#OUA3y!UsQESdIum9G?MY(6yx5a+C`gL5CN@#~5t$ zqD4-dQDWCypY!I0e{DnCREfkTmJw0}@2|%{Cx=Fs>hYSqicb<6*~q?Ln>+@_W|BKL z6{S=I^J2!6+M9VbJ@Y8;=WBFNeAK@C9pTb)VtQymsjOh9w&~gZ`Kt<1yN4V8-GPXV z_~=NsbhgbzUFdARo$5kgDcHQtPdtRcoEM0T&8u2de5a0c4(yt?n~0SwIGdGIljwuW zfp7h72?Hc5<8L8c6T{%HHGne?l6p4Yc>4Fyd;GwU(RHa@pPDF+=_z=pU?Z5}tmRk` zS8zJqzmPI;-~`88JtjzdJ&&3*)4MA=ZOA=sTk!7kD>N#D?Oke()ZX^?`;d3;O#C80 z^OJeceTh11_O$#MsrMhqxvaBKLsC4g7g|`F5PC2-p_hH~S}IVhRA6UJ+@LR==`gm0 z;G>0{@(g+6Oy4P|w{s2N&U{UGp+l8>NoGN_>GS^pRUg6qhKP@wpABbHN{0a7vclW5 z?ob)lZePX(`5{4JMQoX%meE+tO<~3$rKUKyn&QPqkr7?fj8m+^_%@0IhAE}Kfp)fn zaqkWtSu-YwMX6na^;MCLQ04yo7?1c;~xp@wl=09YV6zSYF$^ z<<1A4p0E9Jd1!y_6ZO(F6xeWWBM*B|&axQv^lp4w3esm6PIj^bZ`;QfI^CtzGZGKL zh#5z|eZAtOVQ`(l(L4*R?h$H2MorKNyBMY$Uh38aBryVUoKctVmd0K3KLh5DJHEv+ zjJox^pWO;GlrnO_W4UaUM=Gy=W_fYc;3jwAeG^%5y|=KKef|N~&aXGh^^4HMnp`;j zTl`Q?5h-+~854gRJvjH`z8(xt95@VPPu9gz*ZQt}>oT~Ro*H3rvluEqjY=6ty$Yy} zgu2NbtloT`NUt2AJck@ujgi9P(Lv4 z@-D_~cU}o51fdqXKjXXjM}N^62phFM)*IR0ab$IcRfZjoqKUbD)x;x;fX`$5h|2ec za_y`@CFVXy_P%prCO)7gfzL5cOz)Kc08$B=lx0nRZKu-QFje*PB^#N%4=Lk6cu8w~>sz`eVfWshp zBeew>Re9uLPqMif8)pzoRpm_2+rznjf6ZlM)@wdQ7-~U||8Y~|l!Ah5ifC9K;_Y7l zd~nCPCSXa*U^6G`or_2zv$BmA)Qf+quNIzCIbxzLQ*X8A`ioPwzHh?or#4+bvCP;% z^=$oWMm_p2vq9!7ay%IMSOY-*YK~K#T0~AX!Y;h&j(;npSCjsC*HJSN2;iT8#$g5S z`O^11t4r|uif-L?r%GgF){!B2ZGU*M*3*O?hi>QhXT>CY>Tbq#tT#L6k!h?95k|qp zL$alK-Ch`~#r|n{O?%?n+OOm{CnD9}6H3jfVp6lJnf_e8ag)Lp!1aiq(nY23PeYuM zhBEBJ8R4rOq}>K3f4-Zriz*V#2y6*!YO;@E%xkgg)=*YS)#|-pOjr&I%ivS{eRtZF zGW9G^E(@$z1TQ72t$v_P>MtoUiJr0msn0nH_mC9ZV~!nn0hRkpw^aL5#t#feVS2X7_^ zxYo@{Nzf9p+@)JE?rtG7Bm8#K7)7;-x2T(G7=+It74pnZm_Q~=0Ywt!P)CNa<5aCx z-*S#tnKVxp;?T7rPd$)w@yb2)%TSD0x`4%NIgvPPXJy4+>d%aAr;&_B z&KLrv;&9kZ&+)@ra_ztIsLG!fa0rPL`KZOq`wF;w>v&mf^Im`S&&r&G#YB&a&hAB3 zl4ZB2;eJZ(*1%;R5s7>)JbOf>D6a=OtnMl*+3r^;Mw`%(V zQrF`e(3zO=eEW@aDY~?NOJ}@6H3;FIQFIZ`{;7uWF+Wis;#}#y zIMHh=h>A9RS;lTOz>f37pz){m-~_hkWW3U^Q+#HtRF&4Gv9mqT|2Q7eb+;E)=|8-O zFd{fSXtamgWae&yqc!Tef{+=2RLL2TfY|hG62ymjVWa+GIs*I5(=kcYGV#q%Nj^tw~FbNLp>a*D*m`gTxGS(*HW^6 zF&DL%EDxP|<|_wJYIPNjBthldB^IWaJ<6}}B6?uiH&eH~w!|oHJALT|=L_jVoO4CHAqyZ1XPZ zqA&r&L@ff#a)I7dI|{#qPK_kFuBPV}$ZvJuc^zk?fhW(E;C-I>b_?W~1^3e<33(Dr z{1Ke3hEkyNbs$pzQepyWlT=K;XO11KO8Q-2wpQejf!ViIc!Ou7O>oefj|eaz|ye@ zH7}!~o^HfBeLyD?dPc7jBL|UkG=M7Fwy|%1a>TwEru?mwWHD>?v``POb?h3G_G#nL z{V;<`hB={f9VfsR(gy-naZ{qDTBHAdp zDJ_Cs&I4I# zAxbo7;NF)lP|Ce~7*sgtmIm6Gcn)3HcNZO}UWxdu;#L_|GiTniUxlQRQIac{PHwzh z?4`+$`F-GdRv94FA7&FOs6XR?-Hoj%uUxg0i)fN3jg=12&`TkiScx;9xVWL{8!t*e zF|SkfqM;kmvTr*%x3PYK)I`LsP!D;YGzq6$b!&wZhSXRGW!4%u>;pu`_p^%0#}`9M ztJF4Wo+t8 zbD;sdMM0OpF=c&qtDUS+jFim4U#mE$yLM~wQ4@@7frZ#LSZmCF}oVEk*~#BkjMQ671oip`%^^Y^lVd1 z93Q;)uVlJX**M(}-a`J(s6{0ycX{Hc*@;L8ZOy5%8xV2$rj*l=85eB4Y?G`vKN?(p zZ|G%s)d_sUIU(;vwOm>SRN^#gEPjRZ`{o61#y?^nb>Mj_97X1`k)#r<#K8CYOexmV zrv;uXYG>a!Jg)F0C;8Q)3|_kFV&{O>7yK0gK5Y$QYd3pOBK=7Y=lb(k=b;SLzXQ^4 z-F6)~&*LUHmyE~v`Fetq5-GSgP;5tftU%u?w;R9$Y^(?MotY8e0IaA$rXMo?m!?zP z_kK}JWRR?KH2WY6ELe1#8wUBj#gIBTfQ>ex!{k4LWaQ%yWZ_3|tOtEBN2}aPJ%Ir< zGGoe{i~2EWVSTVLhqGf%{bBQo3+Oc!^{-Ud;Ch8-gLEYJJY7KGU}A7-MnsfI$PRe7 zocx28CCyLF*PgDZ#g?_qg{i5Sd6iKJhFvIH|DE%o93gL}oW5@?l!pfl$1+!`XR9Dj znPM#I(&dNvrU%ASMUS1CqEqr#mHs3pGRu!38rfVO2mFJvmiK9zHfwnpw-{PQh;+*W zcbs<-`wDHQk8D!m0jB2#x%|4>?*OhWBv{+0XFfy81h4jq>_GGQNmt75mKk@+U*T z6ELujHlycrF>!Y#3!!L@3)~8oU^j0&+B4O4rTh%3I-q5EsP5Bx8m`(?@wic6Otaa+HH{!rxB&X|db zC><m)`4}SNrHBw$=UVi=RLB z?zF|m65JmKM`G{w<(KzM630JDO$~ztH=x)BSY`NH)Pi9%TE$$pk6nWq)z2-Svwc%$ z4|PI9KV23jq>c2x3$|RSji@=yQd3$ZC=Zq8GtuKVzPOzMM!(ndTeg0_!kIB&s*pGn z)O@1J*Km^rT(n&x@&6a}li?V_g@>Psz4kj>3tT*966+UUJxKj<#YAl_65AZD_b$!o zR578A-a9ciro3k;nY(F-Opb_}1@JsMV43V^A1{k*sk%@?Qc&`q7N@yB{nL2kJ{lmJ zrwhifJ_M9T*QJV5w8;H&gyS3~IN7INMV$T_zj9I2wjQ#J@X3Eb$k)s0!7|vC~$F zNi!a}?(JK=d!YKBTC$@G12&;_E8Dh!?_2=bEIj{gqt&P6A=MymB2M#EgyEZ|rC8Mh z1EDKR0?O$fLOlQXVBJUg=Z9?cYo7`CLY+=Dj?dJvI;y91DDtlZVmB^)d}_RT-Cfn? zR#_~Y#4ePFT@~5Ygo$$x&Nio{D@2yujd@O|lX(P9{g5&6@Ph}3;C~5y#E>}!h~m(S zEui&2WG2r=5o%9Zq^QD(aTsnfX<{Ey`#tg}?eOcV{{H4OH&!$u*WLpz%it-Y80OU} zKAVsC3kPilP?)OP<=|xNp%+zALCNlF8bZx!KNeb86D+vayuPJZ7!Vpn)l$Q zJz=71g!T_2%-wAO^cOO1C(deZZo1W$1nVWic0&-77C9sqJs5^(=Pf4AW~72-?+yn&4MP%N=J##`Ujz5u=@RxHMDY*;Xaw_=$;l9m!NZbFM%bQ* zDCmByH;XY+@cc0#p#L$0kVlY~z9gkatDYB*{U#y)$HkMAZ;Ju3C5RDuayO_3N0+;Z zpOE>!iD>X#V4`IV=S?TMO)xj9l_9<<8^?a=wb+D%sWo{5*NfBF+Tv?j0-FiC(`kfG z-_^Q#hj|}c0cTz&_Z92d^W^*BjYPbq!lI}7BO&uI??8P4;a5l--FMoxs-K<@gJn|2 ze@8)b`?v0zUBZurxPo|s8e${|mut_ZmyZy0Rxlc8(m&Qaz*#M1?W-|rr=32t_u_+I zxD}-Zh~_`<9~=)9RN_l4e{dV26ZBDj;TLyt7BzVbe3pFu)DME`6eP(?_r2pQ$&$>v zI>Tob=g+y74b+)){T+1Oi6oIH%RG}q9(-p4HKn*CNOA+p9wf&t{vaeLTcbF_BSoI5;0C8Z zmc$ngrtP@K96}-|sl-o*iSbc;ZH^RF{dp|g;4lUH&vrxEIX6sCAM+&O9ZM31lK6(| z(m7;54Cceg5?C`Ww5Jt-V9?h0XeAPtDoz2-Es^UtxnR_`q3roF-f$AaVP&#lCE}_5v#SCez>A0Cp@t)2=QG16w#b=H?`w{K`^7kV zdB7fYXx}C(CWl9kzYj^>#dKa>*3B=e*xbG$Hk!QsJ|{tq?Wgf*mEt*Oc#ocR)QWp? zj+?7x^vN8JngC(nt3b@RTK*#@z{tm(+1;DirEPnzD}!HHDWCkGxu=iy*l}}e8xuIG za30PwZoPlw*!y+-^~!F)odt#egxOh6S4miA;5Q;C?P`a=b%PDtv# zR~~9bjQEmY9KJUYbjF;9e9$HWG`$Xzn`Kd>OppgL@2MNMk>Jwt^+|RqP&Xnu_yBZW zw&4u0UBmXyiKBTY0ZkXXX8rHF26T*{OFMozV94!98t6POXKd=3$jt~u=) z{Zzj8EnDOf$7W45U&40va{@K@XVy=QUQ(W{5i*uUX<@x{vZ6Tgh-jk1UD+h~P*hJ6 z5|Y51j5ihV`sIaO^I0v>6P8|9O!K>lZC&^ab4u}8e?C6QdSCTxG%$V0N=QG9b+wfz@U7!$Xmyg<~QZhnjrAHxoyIo|3XYB{S1j!pFq zed1`a{P3?h1*x)aKe?#S#8=o$-W9Ym&}b6!M|~KiU?{A{PqG6;(QxR* zs4IUnmZ7HlNXHS&Cyw3!YGhXO%VroSSss-Ua@;z0^q3?-)^Ghz;gxM<;)!J2sd-#Z z@ad@Yy*;Hl#w0y3@&@K%13QB0p2uPBw}BuUN`)oac}iFA08>jWqwr0{c4Nt;&!OThO+#-s6o6)59 zI}a{9e_B5ptlu4ihetuuWGc}PSLnCxd8_>Z`QZbb{6Fro6P`K2SDL>V!|xpGpuof++b@vqgUu)_1O)`PW}^}tLfQ?nOj+WHpF31{Ijr0AE#&s8^y1ZQ1;vDR>2!!Aqd&7h+Ym-i{g5&Fq3QsaKWDj z=c4!}e;7ukmx1j+jos?l-TvjYDxn(x5EfN{=##!_wn2w92@r>GrZ2@X%)CAsrcvS5 z#j$;?5JZSBp`2>Wv>oN>Q&vr>D$8!JCXHKm>VZcV$y%ivQ+4>OvFpwq~=6{e2_s%}B=6;ze@ zdz#-yfAvH4Zp2JL+M@%HI0)zHI%@0KvT0!N)l0-m3M~1sIShs*r!(JHwKiJ74@MUK z(n-sB1W&%BNewt>?vZCwg^co=dp(}}1akewdrDdS{`FSYX|3fD)eS&e*9H2Ij#I7$ z+&8)FdKO)1-N51OO=X{`Aosl@Hqrz*c_Kw(XXWth+m^BUAlx`}T+r79zQ?n5tI!*gFD*f1PDk2pxY1{e#g zpET`nKjj<&`%ci8Q2mAn{qCD?&P1y)YxFe=Km?dIBZn?7uGx}~+`jew6IO<3Pe~`z z_wiS2{xLNQwJt4`16 zN1^y*)Pb81i}3sUgRqdn3wj*ecOPYo^ft!j3RTsg;&w8AE+?$eQ<+iN&~EaPSYeu- z*eiP^9#3_u2LA|1PFbw-=ryzXiIpn2YTsw}(a%)8e|a(Fh1MRy*>|I~RZNJ~qRCGv zH&g;ZeK_W);6{n57a8{(zDS7vVF-x1E;^o&_9b;&&{z|0)>aM$FLy`4<~x0_V23HC zd$z(%Q=t%!x9(`5v541`e|SI=KQ$`i?pXomwHH)ycsn|nGdn>;S1;H^Pg2U)=zN30 zfi7EcpZHuoy^j1<+o>?IrtTU+OZE^yU{@BsVZE_RuuURgXK8t^5vEzcCYMKI?qEZF z68_jsH?KvYPTlrv0*|gq+6W86m-G#b@w^YHVP*Mv29vhju*mM>7Y~c(HPx!`hO7!1SPeBINCq}N&q~tHj!M2 zJK>e4NMcN9QLOt##*0 z@9^f)P3#4xps%DQ69D0DPpxMAtD(x1mA~Pw5(Hf{dK|Z3uOa3FJKk8xG|Pc*gSRLK zvw0W%w&u~aAK$LU}LL zXB5XRmSMRE-4U0o%LvAGO!v9(sDR^#SMlf`J(ZL@FCIWMCdVHz!uXhip8PTT`_|F( zs4MLI&*(~HS#pqG`<{~&iLg%XTI z|E+OWByH}*O;e0yeMt53hso53@Qm~!d!6iJl8=$nCTT_iE`e0v4-M5fLhC9~kTl8UOA7K| z{gn|amdYV|&&g$4h>Hw<^Ph&=ZSu63RVA^ zF{~+Ak0=y9;0z5}ty#Lm@J|>08ek2Hk%zgMlasb@V$!Pz4JxxwkC$&?C1D1e$YQ9{ zdt4Qf4F3iygr1KBp-I_N{Nj>Kzi_;KBr$xn(~e^WA&I67NF5Md21S~;l1%R!$j!^l zV<{S6znjnIVs+xMPZ{@(Whv2KU3sLYwK|Wmsg)>14GV}&OJXVtZj$9_=0Q$~>6__L zH1mm4j8?2Zd|6&0^leb>2KcBK7DX?22Izi45Z zpEN1ztK^0vFbv)jys5hK>=J)`=XGrYA1WhjfWVdO1ESdGhry0zLYe#i^rMTS_7Ii4q+Kd4T24|d#up*dmWsmxIW z)1-*wJhis=&^lJ5$&SfM6s?4kQkT$CiJ*9RiO5jFYVxwXcc9mlvbQnjNvhV5K_Z|M zqaUrHOZ9s~D#EQ4KSIFmVF{6=|CI4!FT%dnnWK@{ruXt8RnQz-aDY^;;EB@1X4aU! zLOx7SpRXPCC8A$@d_F2Ezg{fKK~xj4&MS20r6rb9ku8W7!u>~>J{kY1|A;A`a;mbs z_q69llT#}D)-d128E^x6e|>9uFZvvq(1FW?!4{j5H(+Zw3Hj^qOB+us7G&NsR{Fot zaC){F-9-IA2P=qIk-wVYU)4C=^g@u)os-fuXqR^CdI7UeVu_(Z19|VUpwujA2-nd15Z%+Ja@>_9K#1eO{+9DAj<|?n5+HM=)$M z_|Rzq^FK2Xa?V{kcL!uRw$!mzfFzMmbdyB9Zv+;h3#?DyH zLA(RMqHjhlE*~c=a5?vVl-(cB7}xkKHg@6$SsI9|S3Ug9kNq`5@$a+7lGu-*93`h} zpL|-%RcCJ2lh#lR&Qlev+bQ%;LxWamvT#|MBp&&p8lPG$jb7EqbZjOPnc70jaz zg6UtN0wn-BZs_SJOzxgBOI&s44^@_aYQN}yzM_RM#B=Hql@Vwjrc=a<>>>o}unL1@ zkMb>E+&GbUFx7rAc};%iwRM+%^ZL_{nbTM8d776IPd%#aMtRYi!B)3XEGpy6<%ooo zj#}XvV%x(0*@evLP_6cVrcT`}r^&md1J@qfE(m=17ikH(p9Ua-%sWu?395O)6AH?q zOWf`lsg*9rJ1>zoZ#KWw-xPj7dD{L;JVG%eqW=NDm#PM*WoI2X==f~(;x;gag5@hu zkwMq<#}D1avm?Gm6a161Cc0r)$j3FjVW{C8!TcZmo|b^O2B*IxtY3}cJQfb+3rRML^v-ev*U%e(M;*#2>hNy7Mx)aWqq)MZJ=Z%r63L-up{M`Toq1C89!i_nT51Ur!?6NHUk+9%;I3e%)9UN^_=E&g4q~ z&lmhY*2wAj;?GG%rJNZN4Ma!Z??>`A1+bVpbJZ73>)j%7xS%S&eM0l~FlpI##-%xB z=G=5bdyG^HvNP&UsTpU}-=e1d5#8kK+@TB+TJ|fJ|HPf$%-7BN_h7TxS8Nez>e$!*RmfAvV3pQLZ-3;s9VyyI01r)whHr?2sx3CyU ze2QSgof^g(5}?kA=leAM848iEmP{WJGt7(vt6cOgirnh@Q?#ySbw&K>mV3x;?i_59 z6i1UI`ef|47KW4<-S0M?PuB_^7d-d3V8ex?sWWuQW9;orreZ$P+cVxCea8;b^_6 z`cgg8ff~Cigr6l)H=hy>LFFG$|9ftL-bmZY`gshMFw9)?0XYiUX^5tDVxYOBJRvT` z)^LsLuS4AESXb;}vUXTGozV*=Lt{D^^8B>C!A~h!s*~rjRUzf|tZgT8zqUitZTj^$ z(M5pp+z(rV@?W&H$a?v2wMnnd}<&P)7N<^vix=)+Q_n*;#BUq&a!|0 zIp=LZfYW7zIHAOBAWfY}wii=J{?Qoe7gwYn(I*nzBo?l>0seqvJ{dcueFm0vX>+b~J)B0dbno<`)uZ-gKJJ_BMQZXpz8yN?e>g=q`q*R6mu319_=`>_Z)-h6h%8asN;tg3%0ArFY9f1A z5cN~gg{c3%rmh+W^M%b5b7Hc!VU{szULoZXGXVDK_3_#Lx-m09Q}Wo~Sm{4D0#X-; zBa4F)h-$v*ZnIyv>QeZ{yg-gD8J5}4YybP+)d9oVX1Gkp9oi6|YpDy!J*+q&67f@+%!fJw85R4oLP>UltHPSHD!mE>W>Cct=8<{0=KP87doP!>J zJ8^C$ay?3Y&%ZCKy++4^x`?Ia*?TFwWCSwhMM_CT(k*|%z4pGPH9^Uz71YvRFc-*V z!>yk{G4PnPZln>Ym3{7eh`h3ITz3YIHA0Ww01-bjU&^9sLKq{tKOUAl;Ll$Nm|xmH zoDgZCXVlw8&1c?U($wS&B5E~gWO6$dAyd$~?0B=&pQL^zrV@}&?-TxNZKpHZ))F!V zi&}t?#=_-rJLQASsL@W#jD327wj6aE* zDNH6wuk{W`gX4}O&M*Oc#{*vo$-TOi`TPawq#V6XAOp6KX;eApX1B@x zR45bBn0@XSpZK75=w>2MQhjHR{k|9BX~=~}JNxn|$wi@5aNe{0Wy`n)w2`h#U`DG)P? zG!Ndna^PN^{Ql03!Jo2NNH*io51=WT^b)c9XEjA^QU{YE#8AiAD=TbgakI6bbDvV+ zx^d5vMFbSC_yq~%zQi+X>ri~vKN?cqF@OpEc@U!8~Yx^>;deb(fBL@mDT} z!(v{E7enw0TP}H}4ROL=I3f)InStjS-hF$GN+V(AhAH=()n1?e6x9al2`Ngpo5!|K~) zi0h)4$pz%Dv@Y>#`rqu|Gb6A!3#J9wNk}bUbNs!))7(X9%H(c62C3Nk?IR&(Xx%Ar z2oA7|EP4HHBK}0I4GJLPAx|-1KNr57V=VArbZ~&Icc>p#DSd?OzMckMWD97gnC%ON z!k7oZz3^?0Zksz|F<3drYs5*ZMdb_aVBtyhk7(meG4}^7Ux6zY(R~>5!=4wIizUR} zjHF-223YktJ6vvOo)pSnF5dBt`8*EfUD{~}UY8j}A0yyONNTtOdZ~wH#}TD{9i>bo zvU2B}hFyzqB70w5t7pW|R2QS*y9;*@F1ru_4K{1ycxv9XzmBt|&0R)6C2`29bxp9) zyW4rP?+Ak$CzQgutPa*^jQMs6KOEhEqlyTB6yrp*VPplzL=^ids*wxxgrKkc!@M3# z41khnhR}rJY1$Q}7P9}cWt#25ax2^=$(F!Dhdd-gj zzU=h%=2-BC`q|b2fT<>oSWEZ>Git`s z!2q9jm}U0RtHRa9MJ0v5!jgcL9xr}n#KN*>eV-KAD>#rV)8T>?&uE@{) zRNjZVAVV+WJ4ISUi?h7#FZb2%a!xiv^3=7YVokq<*BpoT;+^4=eq;fbdI{MmanU7E`1Tw$ihPmGSbt(R$HHoomYKl`mh+56maIR(3){nB7lZM)WM z)iD4N-{_m^XrM>e?zFC8 zSRI^c1>h9xKAttA`xei63wTnH{%F?nES(L^MX5^cv-~2&!7pjBf66{b>lj9Mwa4I2 z?BWQV>p)`-aI`(MY=wqej;f1LsVUP^_iazb>|`2AZY6?lytIRjJDj1iwTXAhf5LUP z{R-W)%8`_ZD->QxpMu&8huc#m7$laK!l69v4T&>Z}{$mWT3(|InwONWyOJmM`jTT?C6 zb~zmxPh{6Diw1Z7b*sh8$|y449PIKyisiyvf?FWN$-CUcOIr z#2sxuSE_9c^#j|5mogah8Zi9uJvWi`)c^31s$+lFvi(#~oDGhWg^tIy-2ik`;kTpg-puyVF%=rK%8rj*c3L?W*?M_# zec#M3BIh1ApGaS$W7X8x%`RP2UI6hHRndcq=J#DD>nt%oda`laTxm4*kgWH3Pz{qe zX1BNWpvH2Yyc5>fzUhCt;B-k22Tc7n3mzNqfzI%cG0K{zU@B!IbiI=b^%hW|oJmbF zR!kINZ~8LxFT$`^f4h}2wH^D~b?6k7>mle+Y!3bsJE@ZY3{&R%#?%jIHYAre*mcej z#L0^e0t5i!)}u`33tvz2KtVK?ZXM9{O!21D{#ZAs#X{3uMA!Q_QRo0jK5)iTJvQEc zT=(Te(|kVm+hjgAqRKOI>P0U*dSTp-MG{_WzY2HA(Juuz*(20DwbW|$fK@90`Z{vzX$^;q z1?@s4Ta}Nt;&!wxv*WP^wNuGscocNJ1?;Qz95wSiwwGxYeGh!sNrquf|J&cf5 z%++YYV2jL6kA#BY5Ib83uS@7NOu$H#sjs%W6*8!vOMkk zD=NVgLIUUu8=?z7+UNK>aH#zy9j^6?T^ZTJ$f-H&=g(H`wSm?R;*Q_{3%MyoYNd+O z1ee)92@3vRD!C>MK9Tj160h<3qTQb&4E~Q@+qd_9jT1H0$`gLpsh{61B_Px7?+fqY zi(GII-#6o4bE3g1UK5D*slDKSE)2IN4$p`u0$0+`$jY?P)}j6l(IyD;c1tv}OhL36 zRAleDGeWOjeq9}8WmHySTe$Y8XzTl(hS;v+JJL{z;eaeb;_925G>(_`a&@*{0mmb( z#`C>7l?^D-+2`A_(pavC7hp+3W^)2e8r(B>>B-aF`p{bLG;eXeVfBunFm-I%UPZ@s zxBv^hutt^rE%u86{DQBvI#zeZo(JuuMLc7jxepm!E}TJQ&R=CI`)_-h3Fzq~dPFpL-iVZ1H}ZT}x_VS&>5l_86Yj7E)I~+9Ay{>Ds&OucPn8&{LUBBf@L%)i z{xPjghy@mTX?;XsLd*9%vCg0Ku@;xPsFZ%*Efkp2S|g74)4M~mmkc+LI3>ysPpSx9 zI;%z_Zc$8$^PmiiNG2+_YrrFkTN_gTJbA?~XMbBG4d0YCBE=6Qd%ejsz33aj*Xh2* z^}V67(IqOny7g`7RG7%NE*F?;PWpE1Y%d{T!&mS@#PAIm)xPiX-8We*D$1^0=7g>k z_Vn0k&)|J$#;@1@uFOqx_i6>l1v2PetiXUZ8|TlR7UTD;f$JImW`?TczGc`V>Fw3} zLd*k=YIAWgfh28q>YxJmP4Gz1L2V<$)A}mjG*C&?zmU4f zmnS$^(6Jj;KkeZ$Bn#K{kB;E*V}fpHcYoe(xM`dZ0J+F^WhjWOo44W@!z#mIyn zK-j_M{ryAt@`vXx*E z?|KH;vc(%o(m|M|ocevEAAr@Hr`{e9j!FRSh+SQSJG}qVUYX4t!@sE64HD$}xwkuj zie4kCLA@{MYAeuQo`avw4?Q7VF7iv9?$g9lJ>u)#rT60OYWlf=DIAMHIe>6eCp{r* z6ERh(c?aG0Ht6h?`ydFkpcek4%tm6hl9OMq4eVZ(m)6Q{dt;=xz9d@i{GtxgFn-NB zbw!rl-fAUOzk% zAuzy)oHM92@kgQ{`^zPB-q6uR5|aft(bcAVhOWonDgxctf&yxnEt20qSeZTJ3DTSz zNX8_-v2(|3TlUwA3~hVn@G>;3E;Xr$Ue^CADFE;95YBB4^1yu7vAA-34)_E}p#{f` z(h(?=?Wkx4O;ld9B4=kX`d&etJPg3)#`tVgtr!#!nEw=xFgQC;7?P8z4G(>L}%_C4&-`gVksq-f2 zi+sNP{SDg5#8D{~NXlB0o{2v$aEJRRx~eHs73&PW;H{l#;Eki>LhOc6e?jWeA#912 z9A{h`Yu3{ht4n8>QjND)D1S6?>Q`)eei}3Z7(~DaLVkyluwHZ7V@_YM8%%v}UQ=bA zD8=O!M&2sq-fB`adFI#CWQebBRXW^pUjKZMl{pL(OT!1UEDRP=|ACipSCxvF`^Sl~ zNO7uU0)?{%#~Zx3+m&5y6$2SMiMQ&$LVMF(QQXLYuFXW1Fjv|acr|j$kIrJkpOK`e z{~Ewlv|T@&l1D&m+Tu1?HQ zc{@EubDYdj6~Z8YWJPLD*U-;8?>hULjqNVLx9L2NX{O}8gI1vI zh!V+#aD>Sh9vI`mKfiD7C}xJp9;=I~x4w`%&H-?w%blh{-_U~AeY!Lj>%85>LPZ)*o^FYKLq|g09nai$Rt|S8hoe7a9MR z4TWe`M8!Z9kD|NeD;2LsrNBI8%h)h9Eg~Q zzmNZ*nET*NbNG7S8U8CnejE@w966qMW(gaU(^@kFN8Y43RZ#GmrhndqPO+>jpY1kl zb~@YTPL-KJmv{a=Gr-VM3W>+|4o_gSAK^T!>>>8okIR}$zI4UA+^b5wD&|MyM0C4E z!@0$GDyb}T4?%@i6r|bB6*1I?(a&}vCA#2G-}gFc>3q00wy^?TEGSTRgp`C@yZ2n^ zxUuB(g}j(aR_)dBou2}}E0D>5S3PT~`Mj5jrsNI-j$?Z+7N4zLTxBFC+mQ}i%P+DW zMN!9jQZbBSNFWhYnT=thA!AaTjbSJuw^P@QVOW6W=wRL)UgWp8^Hs!+Dp#LBTwqdu zG|{KsV5f+Lv_w#^&7KLv^K2cZ!}~$`KG2n? zA$$Elu(J{99E{@g@p~lgx&Ig!7a30qi9et4J$dLD?U14Ud%92Zrgg4YRKY`;7$6`E zprsC!zgiKuMw`XF?R_Z#4;!RRtu&7eE_;p2_O1>_?QZ>)`sjw2%2j>BB&(kl8;53H3CU3by8YxMp}n6yC)2u1>5#o%h1>ku&iC`P4?RB# zyf>unsj{A{@sT$`7mG$}_=Tz(2-vGWtkVtl0Ug=cToB&{}gfYoVraBZZDS z^h7Ws^t8S?z1OAhS6OEBuR%hgeMjfDHtG*ZtKpfDj?&H{{M5W5X@tf(5sneZz&gOW zjsMUT9MkPuFBUcUcI&;ec{;K5uRy@#hdcF1$ZS7!biA+$jsIcW@Y&??0=y+;+s{^+ z38x-pC2Rujv#BnhE6Mm%3V%bkwRV7~NiRgXdk^Q8?)Kh!(|awUpa_GvL+*PPAb;kc zr0dPRxsh+i9es=9Gl)Bfis}Ui?QG8|!uC^58p@rxtHQfvjv)xsvT&GK`r(nmt?ckRD_^%9*P2qZZ5LU_#nPj#z9o}t2BBi z(<`L^%Z+tDWHmaR4Rkd2xsiBpOVUpzKe5C}e#_;gK8xj^iYKit;Cd{Q0pxDuPl<-AKF45(y$YCGcQ^nC7zK;iNgJ>+rJL80H@hk@Z zCi(>>B!#Cnc+~#&Q>6;O5N@%kcJu2P8`zg=k(Qah9Uz{*Sy~zGivLeWgk6gD#{AZw z4f8cXD*2^5^bBS>5Fg&=us`h3{7%qQUkTrN>QHE0HiZN$zQ~{x zb^df8pM4{;Fc}rII#Gl#p&+Y>P_@Wnq?;coKGb*7scj!xCbp+C;1=V#MG&oL)mfC| zwNH?z57<^%RP(%ve_gL6gTVNyKr4r-Oykm$GHz=g%2xArGCArCt) zhJQ)$?y=#(FTIF`G9`}U{%!JOW)Z9Z!>b@s{qEIBoN7_kLU7$NGi+yq@)vWxLg*{? zw)-HPdRN88WziP-?GgLr+GVxEXh4w^t_*<4->UY4D*vCzjjdMac9ufZfA zFFn7YvWY+;85gL|>}Sd`%%_D>BMIzZFL$UNH5pXh;pw}_CFQiYDqTYzhS#zDexS(sWm$9e|mdU5A+FxR^XNkTt!X9wEZ z^&ba&%9pI;XS3^f3P%R;IxTe+?z*9!4mzrqffuS0~5gN+SzzQ%f0} zwo0oL!UCck6B$HHZ!eKjUN236E@;=6=ba<`3#$aO@*dk3ZLAnI=1`ARI+Nn)b{p{r zY*o)+V&-jRxHCMCKv z3;!49_Jqu&SRGq@)QPXO!)9N5SCUS+__pvd*IS2+am_#!gCvhTRaz6qS5C+FhT-Kk zx}f~1@#7fJoxv&Ev(vxe)wRE#=VbrtWU8hW3_Gxfkpa9b6?nV>U*gDS`hOUP5%&<5fA0dHmwd>AqjMpCCxK6qbca+zq zhZ?8D!)<Z{9j`_oYh+SPPdf8k0b| zACeuUw%S!2B{ZE_N~D`GUg&`4jmvjas?YaPinIvTHR`l08WZe{vN1W^Led^F$)O{v z{36m~7P^m@&qLpnZ#?Z<7;_|X{zOTeNRZ*N4%rS1-$MBGF`NoOM`5r0jo*~=GhksB zRKBy^`cX|Z)P$MKEq_V_wG>&Y~nd%^`Zqf)Sh0!n?o-BeA3N zS307O*E$KO<0NF$zBPeeP?;-F&)mM$!3kkXFB`-!g{D;hM`oEJ>(Tkz&vGC*g_Y@wdRFT-?OpgCi2w z_Qi#!?ukOBf*P!TIf_6;n1*C)5XBqcLWB&UjX zo&GKN@CrNRT9|fJ7wwf{S();+%h8WuPAd4e2u3-zqG>Hx^w6wmfe{y^muev|!Uuku z^12rYDt}icB>NxH~i_QQlRq0{yPziE%SiqkF zGkxeG1xmidXNd*>RvNy9gWe96jT>I40a7rYwW!+~9SHC}?|^s@a4blT99LOfw?bKu z+JFA=m9#z^zh@ABi)O!XI{60gjOB^maF6Qg$c_RcDXLPaj*qA7p<93EqTkX!l#@5E zA@p81yA8J}9e8}I0r~smbjgVs%7{+Kr?5I8OdON{j8fQmx~K}nczIpRFkXnZcXard z{yhva*@0$gk`eo*FY|J$4oPKd{?h>f+6 zTZF2)ZnBH?fx2y6D8UjQj?OBJ;*K|q7r8Q^e{a5KEA~(vlYHnvybx_FLx9DiB03o#*r7weFbzl*^R=5w3Sf=9hZ*ZE`K7ZuNjuHCMPO6MhmV*o9;YFm zZP&~5CfGfNyR=xjntl_z7tbTC4l}uRbWo|4hR%KFPJ0zI?@i+mbT4EtMb=-|7D>CJ z1HXerIDHn6oa~s`AX^>>nq`2j@dh^iv`2lW+*6F>1D~fynUDPnExhPeH1kDVPJ3ZP zi3*F?eH@r1u*2UkuRJ|9lONU5`;LdA{nAa`VJjfxZGtzpHa09C)O#=b9KBQ)dQj8g z7zEaD$No2ec-`sZUo>2*blopPb&~kMd%#4TIT{Ix!-7bfzy?r7@;M(!61`i4r4e+u z!%!57%7mM^6{#>L44R2)+JpoHoss27S0>^q(^aXvm4}p9vI^VRwuIV8p=kLAR5?TL z!){JxRi9X9`dqVMLi5SKB9OlDIg1}<$P9IJ%lF4j1~=dnpZhb05kZO#v6AhxhMLS#7N>0uV`r$6f>VmFN- zZ{2vn>t<=KnSZk}nc-S|hUi~QR|U6uC$p&rg{e2YjjFq!7n*US4YA>`eNrp4ZXH+2 z!D*%+s97C-T`(&G9xc3<&c^-@9eNuy`NF*$6}0y|!7`iuv~dLk>e-^R2T#%E?%nhM z5elG(A!ztnH-<`%l3#4L-ycb?@y|m?_iZvYSxI*$^|F;s*LM!TF`s+7FS#{O} zQq!MdedVSBSqrUjtBBqPF42y+dy>vJ_^W+jnh{+qn?PPbUk@z}XQCA!FH=M@WcAT{BQce2((%+h|K?R3`U%P}DJehc$hR((q(qoh@hol=O>91<{Lp+L!QZu`*zbP zC(3UEz}=MTMMK;~&rE}2*Sjm(-$sSd!z#jq{}gzq<-H=1SjGxITkvyhi*cQP6NTuliw0=O$ibB>N4#yHdQ3;P0hX4_}(1 ztRNUBf|g4{4jo;=%Y3eS*`M6EZTrZ$8@Wq>t4T%tbOleJS~DWV-SipQquO5BDQmvd zJD=-``A}K8#?`pP^ev5{W+&6{FDvTN0uf%N(WXRj{3W{=!e>9DNXylvpVt`mmC)-h zEjesm;6_ntUWVP2Y=;wr?H!1rLBoLabE!}2&@AeP9|c%XSna_(lDW^ijxB93Pm&SA z%2DfAd*mF|Ge%kxeD&G@e?rz&8Rzpc>9KOw)n$UnO^AWIFS=zmHRLy64<jsbV`DL1p< zHW!3uT1)@irZ6%B(b@)ujCR*mWnp)0@kNazs9eQ>0lw2Qeb|h937PC8UyMT=9yZ)dPo)aTzaxL@ZLq%UGiM@R=dUnPYRvfdT@-GoS!Ek zO$JrR_o9#v4>V7gZIe1?SZB#8G)98-E)L(jApyP}aH0{<|BOHEnmieZWZby*&~c@z zPlb@@64N)7*&}tsJ+5k-hTm+BF}S|4iaY-kUyUju_A~Q zRO0|1`#P^JQZ~G?j!EqAFAUdTuka?wofMDh>{4P!2B+jOUe9!3WyEzY8#;+An#zQw ztFco%x7F^69iNF8-xk&F@6%d5g1U^fZ29KSCn$WroNs;3S-{MKm=ef>qsjwLLLPd7 zX~)rW&15J>q7o$prxgXayK77I_o-Wy?GsSZ>A8_3V;NV^#r>a^ z;)GNX?v{Vu-p1~Y#ZXQ0RX!goW&&B>#qvg-2s54K!N=B&v5x>Icdt$!)#E|X=@b+a@o>6IYGEZ)&XMc$0r z&))=}iZB5rq^gm8nw^R2{T*mn_3@?VPe9N|rWg|7UC{nB&f}deudoxxwfow6?)OVL zVCv)gwtAB?9qFP1s+AiqqTKT&&?Cr$FE@eW2cl}N#t)`U2?0LuJ~>;M60(0@FS#EM zguQQTiS48sP@a@7FojC}`?kI&1KLI>3HB86Tp4`%gvvOO*CPX(%xk(sud zp~dKT^GLi|K;BX%9B>)M671P}ips{&cv~+BTG#;R-Q8D@uml0uXAR!~8W3&mY~ioqeo62B@F@5){^W0)}QYepv${T4E1-Kz}laENp#d}FQcDk+zsXmctIF+ zk2@E}3_g{Io%ZZqjs!t0vgseLdUEDkQuDZX<6I|84vgn#*cCEr(jIws)nb zd*|{j*G1w$=rquF%AVp~_bsLA{AmomVL;ZP! zTZpc(l!ZZf_rb#7iO0)$;7Y38J;{Z9GA5)Wzz*{Dg*4nqcyrTqV6(Wx4hwHv-h87cE_9p!Cs+KLZ}+)RpCd0h zo+xl#{x0=uxGgmmaN6&5sL^m|?4w)-4*%*Gcmu@uFj_ci{-IggjSgj;?R zbTgyD24UeB|`Io8ni>} zkR)k+wA_1_->BbcNprlMgiNf#wc+9T8m}DUh?e$*om4VU<c+9&2u85>)L2-BD@Wx>yt4|Vbf2H-h&ZuyBHTY*m z|6K=di9+HAu_#asaVR@kQ_mChWEYOUH4$lxEPL;!%bUo%3~OGQUm|}(F}nG=z|d=! z5{T8$8pT&8^#_n1ZIUPPKx&tvgD*Jd>0Ri5Q%xx%|8li=^=njKFIM#_*21|*^s;?+ z7?oi(E|IxNn!U-+7UdmneAVH)rfsmg75rpnrz2_LPPyE;luQG;&7_6~9#PG+4hE?T z^%G%FPoeA#UDDsNR9!9@LO5hA(7CO}`sgIUrqrL9K#98BZ zw#SkXR@OTaZLx2<#R}9Vr4U~wtvqXMz3gYL2lqLdOt!@(1=s$$j_%O+vTR!lxRvWQ zUl}-Zv?VvQ^~QVQI0(k9n3L`)p*d>dK?jBBw8M^PXouu<_>*aT7gvw`=j1mUZ>avq zCxJ*Z)|00=rA`Igpl`jn4PFwteJikyt-MUC9ZzTQpbE!T^rt)b5CSZNMrz{nZ4_c? zt5WZW!sbFM%*>GES5VUF^!Y;Dp8UJA4Z98TLrD!NQ#WAm-8o#AfPcVp7ymx@=dnNi z&Gxd;A<^%IY6-&asxn=iFq3~4GQQ_@75E;HIK}iP7PXSv5w9KZbaVW)ZD#tAuE?PB zoA*8aC(LY4i;It!+as@) zEUFKA7}Tr&)B8d)b^N&A)?!@u&-sdjq4S9Vo^&@Rk3Kj*nB#knEWNulEtx9_Byy z+0>e-WJlE!;ddPX=d_6FNiwfh_tCREO%J7pr&eI*89R+iGCV z_O0*kEfX|Yx4iyGZ&2@JQKVqqtaqUEP|qUBR_c=XZR3u;J-K1U13tKq!(FA7Aq!VC z2pnb5gV)KA<}h02igh;zahhvys>8p{xRqNet6dzWW8cizV%Wt8^R3N_e;*ve$b9*L z>&t0z#bKg06qUL*a&w|*AI@|Jfszz6=vIVQHNUR+TpYdgV=!AiX^Jpxq}n4k$M^2| zgXebPONF7ELbE9L1fbb$NJYnYs0T%gig_4;mqUIJk}-Ln>y#0>!oR#vC-*OW-wBKV z<=YcyFxT6H^uAJqh<4@A*8)xc<|i$Qq{e~rI&0709ZoscOxiQFshjY{qnOF7U^6GP z0e5^k6J5c;tDoz+<$M*gazk(hE0YeXSOp!FBkXINw4mI01Kb>*ET1u}U-axR4`k1E z?~4Mq%c3q56$U}rfxaC1R9Oujr$o9He%~Qi`Z~P987m&28aG+nA^sJfvrdHeoyt#o z(0F8U{;-!YhdRlUdW8>fYO<|Nk(IrvrbZ%v-A(s)e{2&$9)C==rDkv7{_Lh6DNopt zqN$SgqkD-4C?($6FH@&MZ`Nbo`DS?kksG!}*B^-`@>uK?(YW@AOoXR1(E$-;xlbGBIT}|aPKGMYXVSO%bcl`no8$pI z>X&|nPipc(HFf!Rw6;WS&zb?dp1LEd@?cld>j_OeK*{^<<;mo=R??ru>cK`iYFyY8 zUC8zPtgI#Qw*TSsTK-&1Oo>d)68m57b-P%a$`6$Ko7fh!i zKJpnJpT0ye7c$+aenBV3lXZGsHWvgTKkaWuhJFxo9N-f299TwE{~QiH*{^J-(BXrK z;qf>B?Xj-cSQ`1L@%}If@1!{hL1tMLydwiLDh<0i%&Aoy9CL&>*O!KQThttr{eU&x}*z^PD?9KO&0eX;y-0bUNpraB9V z(EY6*Q1LFf4_jUI6bj}FOzmAIGe8d)zB^A_Q9dc-VUN#h1Nn_Y-?~d;u5SMIo$ty> zVr#8)JqLdm*MkcarWf1>%p!4U{j(XSghNC;Iul;dnNw4oOh!N4oOgI|i1ku#UX8Gx z(Zt68@X7t~hXrXzPl=3l?Zu23xzIzo}Wp;{B;XrMren8w+>K1!DZQStpUVzEl@-6Q# zJRK`G)a_QM@3JPoo(SgZDspQ|cuJfxp%vnw_`*OSdI+_FlXdQ9vj|#hYH_F`IfCOkSZf>rrLoQGprQp|pi$l5ERGE=={M(7-r1N-&n z+kNDG4>v#OCzoRy)^?N%e_-af?)3hCz$LVkrz`2D!nwDvElu;6co{D}kqOd++-FS# zcd2V|zCn=O40d3%-eP?FuVZ<3W&d6dQi18Sgzvw|pi<-Gu8_vtewj9@{7PoVO5TPz zAGjrd;(C5S1+wOtQ*cS~H`@-O*H+>T0QD5IW2n?2Q=dPY%MP1;tUxo$`ei+@8Arg0 zk|=51+YI@$4L7TuDDQLM*wVZ#N2Lzo#>1I|=n69HDssmSvg7gGQ{C7=U~21u%X`bE zBM_$kii}=>NTF;a*Tlk0Iy~Jyz5fR5{N+27`K&CLR4h`?V7W;X`$Q&!`l&$NK+k*s zRW|kRYm$#2wu+6^1Y{RX-Uf+&8m!;$Eb+cB44+KXoF7$xQPWI9mT#lCci%~Y3}s6} z!{L`Fa(|^NlE?bjYrk8j2%vs*Nt)N`G0GU6%MSm;dDDc@zEFf7ZIM1$9Fc%?;hla9 zN4*@;DKQ}7*mM_O=A7{7aTok*Kx|lZr}wc-uCs8<(RYO#V4jLJSWCG0L}^E(;p4aT z4Gd*YHT2zD^|d&-fNUDj zi`)4bh9C?4xK-JB?or+QXa61eNvQ8lbgPLstLsxFyk1FLempLAe9V}Cz&?svp#}Np z*iXl==Qtcelv`o8GPOkS=y6L%U5jb()LNc2=-q2^q73Zj_Jm3dz`>Jxf46*pNa)uv z17BF1W?2zEA6L1-GHXPD=HSf6}W}`d4gS21 zu(a{%CH5eiw^}yZkpP$lgFVSx#^SWLogAiX^Jim~ZRgGPep(LYZN_U<-X(OeVwSe{!AAwootR%1#A8)sbwx%ERapc%s3O-z344{|In;Aop&)`zABjP z6r6egg;XNuqv`1*))~V$wH(v<5}WnF5P9rYyv3lAp4&peb3)|GN6x{Ke5LOQG6PBB=Pb0e z5+ZU_N{V6*vQSc{DmYbS&_N_y$swW3s#0{^sAVD!G~cA&1EMPU z+GtdO2Tg(L25H(YZatURFxy0KT9)^%4@v5?c#vc@3*EW0V`|)Q&D`lrK`GJJ9`v%`7{6eC}}bcQE>_#tnG^r zzoj4|7bjPZ{)yr95n?eHkCMvQO8XyWOM}Ek1#{$>PG;iw>gTV;Bh_V+6s3ca5^iR0 zb0dPb^3(xmul+CfnK{4&JcpXi(_P{15_4LwPdcK)pQ zRb|!mYMHlP04wr9oUHuO;pvL>^s*jTtI_0Dt3!6D?W*iN5P2N1P>A+_MQ)IO_;TLy zBq&NOSBu|4&y3iy$@<#NY#8H5>(Z~piiY`xDs4X*f)~OTMRZSfrR!27Zs|y6uvk7B}C)zLeh6%XAl!V zQazdln6+spQN=B}yK4%;S#%`p%J0pF$rs7JG}e`#QxV9eOaL{qk$E|vcQ|rg(AcUS zYCS$PL7&?ef0B+KKK?uv>H4X9-RuGB3Li(`*4)&`NAqa#R5E&J6#;{EdN7tQ>`DGD z6$};oD$Y2@gN2o5z&9+LxWjSti%eUhiQcgd5|O5QT|y0|oZt3I^K7k!E=oo6e>7cn zTvXB41wldtq@=s0L2zggq)Soh?(Sx|bi>d?cZifA-QAti-Q5k}@ZS6Wp5M$p=gv8M z@3Zz=>(F=WptgmG3`nFuC0i%}r3e%YI57zl2Dg>8%c??^2jcn|>tuzl#x3g1`x2U- zr+>4ZvW?3_?L#&^i+czs;#HcJ&?RLx8Z&}5Y?=&c`aade=OMv^*cwvV z{Q^;`)Y%Ea3Y8370ANrx_+jyv4mf)Mf2U*92661k#vQ1oJ-(N2&rNS z*KrRydhcLUk|D-?wzcrVv9hyZPaS`Ee#+eVc`uwe-%b_=(+62B$grg(Sae{j7#DpG zE5NEDT4fk7^7zO{#01UF!EagdPffqi!SeRq_ni9vQFD&$ zFxMw$6(FVW41&f?aL!l-YToA2CN6#(`V6mXBf|N%1PRqNIyeCUpYk#WiPN*{y(}}% z+N!1b<-0)IL+V1G*a*okujKU=cw^}~6Tx6Z+SSuH0AXRgQG%K%tBZ|vB~8f`4t9{^ zVI=>t&sPE$x0VGD$3l>cjgifCZSJEqdogKtK{JRwTZmOt6EbUL~FP>Xfn2&U%=W|6G-ZF8a&kr^WW z+T~z{4etusY6JLF*{?0JY5Q@sn`SVirp2|%7+wnTM8@urxa{Y7>z@qw8I3hwtYk*5 z{`)}1UG2fKQ%w3jiNnQNfE<#Qmr1Rz^I5`ZDg*w0<9|V-X+dJ5 zQh2*N75HsTNzY88L`gp&G^J*pX`l7$)zxNIYkH^AqQh&~ofbct_e)7d*gAHx3$j50 zntvkFJ+WatD~5@KlKPD6z;Db5b-TRgFC^!NmXKMRRf<(JkN+= zJpbF#L|5;Myoj+{UXG-61N@G;rk&Ow89kJa102dEQ08>+UT^=)`cL$H)pe=SDlO-2 z!u?WgsM3z(*jJm_sS8->?#tQ{w0)_-uZClvJT~z5<}o!TuUcKI=TF~Rtbwi(6nBUL zXkk=6l}`zXH_GvI53>!h&;29&ZC@n}>&u@`LZc5JzZ73+tgYDdJ?U3rUJgB+ zW?ODM+jUv<9X91#BvUG**84FJvE%j@H_M=9nC4z4@=iq_56Ti%CP+-AEoH}+0t_32)~D=YsgLw96OLU;jU&e z3qy~)j8tauD)&p{RNjpUidTe&ri0g?eRK2nwWFz>&Xvjx>@re|4ZVP}S#DmPZv*>| zTKZT43Qak>%MQblR&{rX)I4vQo#nZoiaM|@sTiPQwq#~aRsdbB znub@?_tb$4kpdnzA`XBsZY3d8a;sb8bJO%v^BCnQXy$Y&yuFlz#YA$`ERi`1uff8d z-8xAcY*mBncg2iBYIcv~UikwE5%Ncd1x|w2 zvDJ6e)Xmq$Nmn+Sjg=wy7^P~R_RT8As;OvZFNoO?%QUO6 zYw@=1^uQ)7xO zFQ-nHSbW&ZhCazP(+rbS!IColMxg6rB6&X_iOWM--^>PZ!VcG@9p`?X(~r{*rUi5B z914MGk3&IamGX!Gq=qBlO7z^6u8SDfn9`@3rbT0v=o&R5cMTMGeQAz8Z8;|yqdLpi zo%URVy{A299(I_xdN^~ub7`PI!%uZ_ zv?mS2Qh@)a!TtWvl67^Fbd*e!v#0$u>zzq6Tu@=y`@$-tO(rK4BURw!VEJJ&l}}kg zif=17_*i;fKe<}P-yZiHA>?#SZ#mO2!rOm-?6S|!S~XRMroex}q1*}gym=Yozd#Ng zNYq}*s}yYv9btJ`minKp=Bio|UNfwGbEYxt1E}zzTj!6Y(Kqda>}(AOYdCwo$Yb>6 zPMNghm8ofaM?A)}ZnJs#C5hr<-^!~6$WebRC|5Y;;`ZuUK}Nxt$B-WGkxTH((qs)} z^~9|1+PE+wZl0i!3Dp4x&^50hmVd7@Hpo>LC}}!lGBmQO=YXA#)pB>MIz2;L&vK*y zc@|5O7`Y9uZ;|i21J_eqnfr^UbmWa8Uf@W_-y18xPFsFT5UjiFq>T2elF_Y0#z&aW zD2L+5L#WW=9?Z9&+;v`>;1e}C3L58 zb9#Or%&pO0O?fL-qhZ}3IA0UDj;3>Ce;-EHXVJM5@+zTdidyDo}7;PKQJ! ztu?MEbg+HxZvrK~nuQ_x_jU`!tN~Omwo4=F@3+3;1HE$2CCN*oX(S{j56ddsf_4>r z{+f#wr=m?yl>!pGxl>qicE8^_l^u!oZxF!IVd9 z5%npLX3<+kJAyW~SE!5&+cC@xynX%yiKv*uO7% zTQoks-f*J2yw}1Kom%VYF@h4NjoSS1aMU1C6CDJVwzp>*D?h>-*!De_WXpkZAn37j zf?MXe73vw!08d4pfwbqMJ1I)AAr1t;r_O27eCKkjv`fd=5>I{6W3yHQWM0v3N7;Qz zA6-#Yty5~RJpl<~GBi#QMzM#+RWM z;^DY*uNR##5A>XMbp&l>R?-@OPJv2`&u0y+#yU!x#u!TE|Mnl_WU>3ATq9LaMHuP6xU>N|#6jKtr0IZ1kN~;)!5alNgA6y;gdWch#>8IR6pqNE zYCfF?MVMBU|G?qN6pX5Z-?m3~89>BN<^Lq#hJ)7AX1i8yFi|LKzlObcDUdu>;h7|fp5-AfL2QQNyo$!>_iDqs*}&=b94 z=;3~Lz9(`{0|zZ8RuITfF<B%qz$6#|AO*`sa%>aMg2~8 zxw2~u;jUyRsWMDDK9?ehM?x)|vKP8oO>bfpc7Kfc3KgD+`LKeCSO6tg(qH&Y- z;irqSf^TaIGnkdI(x5m75Y*n_jE4SfP$9S)c4;9`KpPC2(O4Gz9QNZC-=sQPks2(G z%0&4`2HBS$y}qSwH#gKFkt@}GDvMT$>?y?hO~mm&k~FxI6QJzr+pqdanW0a1ycPt&SonJ={EAi(w+0h8WDNM2C$BGu#vi(;AO+BKH1YKGJ= zDp+64#SZMepWq5sfy_8huB$BHvuUhhLJJ||R&koFm@JldW#3sb2NZXWG=BAw{=i@F zY_m``amD=en5p2oW}`|s$L7;K|C<`aThlf-P}1)t<>@d6a1uAU=Lixg;VM@Me{Z_$ zF5%7xjr3y$BZO8R2-=}uTAqJ$SrB-aCUua^%N=M)Zrv+kPv@75i!H3eEsl519!W## zJ|v+yyO8wu$ON|At?hzuZm&Hv7rB((5qnPJ5AyyUV!9bE6kzw2x-?!UJJ0eU??o8h z?Oc9pV{6(5tdhv2(z8-6mM_fXD?jUprS53Dw)Hb#kBUxIAO7uXPWm-(1DsHwrsDWx z?>we4lkmEE!cN$MRsM}VwZ+e8150_@^d?_gF4Al8$p&wmZWl41ES)j`q>sLdE9M1Y zT@Cay^SQY^AMxkzPe*>jEvsc3gi>dbe-|`6s%$y|qi52u>J5@o-RC^db2E)FjaUbt zWP~k^)EZ9Q`PbYGW9}N?Tj7?YDgnu8tGeJA*ULPw#ajHBSOb-4VWcsYcP5%-3>Dll zZr_{NQZJ5US&CXh)jP4)p9wHB`IoWm?}LBpNUP9lSoBae5r4s*Y4<0&OgA41p@Jm{ z)W8E^B^0^3>|BYg+tg~q9{ID?cgY#88evkk4`!IZ5UQ6=6s%(=rT=V9_VUc-U|oL} zkJ(oNHg%XSqp)1|l&WqZq_KWA3B-$WGP;Fa{bV-L;ZFQ~W7ra~s?I zGrh8rd2tgNWwp|?d^IrJlx5p4S@!HWkucu;tK@INvMc_>1Dh8GcA|R20g7}BGusi0 zB_@Mkaj4`Xb@=`7B5gz%llN67mJJ-hCrvw2Rp*WZRj_L06w-uA^=Oz1`o_mi{c&e9 z9Lsi*1aBD-b>gEY_q2Qa1&a6?u8~W2cEE~4tI1_?EI;K-N-Z`|zKQT%zd_C+LnEEy zL!>QM42+;%^`*NKxa>Jak%`;#QEIRb{-?(BC;c%rZP(|wUfH2Ea$au5Po(%-J$ zje2OoSCgB>k&}ssb0d!YP^}S>6f{Y9<6H3ZFu@D{36V^6qGug@>df$}uCmnp(2IqE zjQz8An;Khlfe+6duML8_FIkz`6(!>Y z{D&R4v9i~L$>*JG-^I7Nb`XP zELC!wX6MQ;bS@8mwFl#2?$>t$Ut8~1v?uF&To9|^8`%~_tSb!uv2Hn367-$gZFZL8&k6mBNXlwH#U< z*7>t9!IV_zoYP@)(1+bkyr#7vz+3-#Abj8)4;%Xfjg`)2zym=A+yzkQ|FiT!g5mBD=yi0`#AhX|b#*mE;*|}0o0cBxwA?nG z9H{+kc6&qjTFTL@iBiyRmv433YpaJ?pR?w!aXr$besk-MTF5Y!;JjLNR?w_IQCpq z+g;PjgGsxrw&!fjU#>nso~O3Bne)br~mDyp?}p>EbKlgP*V2ZHtDWTqabBDmWxCKlcqrL@l^>Ga0n(hhuA%j=>Sgw zHEo5F&TO-~SmD!c##UzRsrA7$+w16kyyUMeHZll$st80WeuR)pyTgYIKkJ~HbroM_ zTQe6ENp`9@sYyJ3-*@}YOR1=GU{y!dNj8$>>)Secwi8?Ry@K8NjnShSCsb5I#?T5x z@_pZ9vT0&G*nx`o1z+qK6Q0EC*F(ag&I7p_ob1_O2F8bn)NQX!O2rzPc!K6FIwofW z!b*OL*p6c>1wa2@yb@nYwkS1kjs%*tnj;bCk&{J_VJs01S1Cyf!cd_0n)awl+OGE& zqGy!($aKwWVDr_F5&^oAjYtLbQ>-Jdn@uSH0Uh)zboARzZ^_d~tb8Q6^Zx1WX?C^m z)!8f1xoy+TO{Qb})K4N#_%;yz6>m?zXnI-aRAjuVHf8=?NBrP3xBVF7_T0Y-MgtXE zfa*vfcH8I)&Rxs({Se-)3$)7Jf$Cd{HQrgROs&taWucg_Tf2^K56MA5`o(%6<24g? z+!wIP<(vvcX4a5n(vhk}<>I2M{&(+=RhjyeF3q;Mn2>Gr`w(vb#<7O%A;)OVebruk z-qVm~Su$V0R`DPt?7yzRa0=D;UFcr#{Mp(g_cYV2_S(0q*`Cqp5#4SpsmVV<_N}pL zpHczH34TUy9)y^)PmVPKUEuQoWA@Cv9U|%7&yyD%y*@kydwinMSJ~oZF*iyXXkU>) z^bOAf7U#Gmr0(+>4gvqBJk7iF_K-T<#r9iw7mpzW9d?*g@dr43sI7upI=7)r$raP2 zj}*HpELF=ZPWtQw34y+d_VMdP(Y;%T+Oo%LHx=8@pL3Zir04|z1Y|xvWTm6Noi6UL z+T6`dVVQVLBLt2Y=y6~L?4pe%ipx2j!XlaMKM(gPO0>x{i7F8rMx0^1$kcHbhQ?om zsys5Qn$+v=_r4_f%=!6dr(0Z0DP-dkz?86~6QYKsXd+YVP@Yw~9?`=|FbGjv^_wgW zGRfqrz5Qem?cO*i(RYO#daRwqo&1!i{6&vgZ`u3|;I_-Q>soXAs61p~LU@c-r-eT3 zdM|M0W6x(;5eWU5he!(~uMukSv^_eY*$(pTXugLD)P()?n&3!R7Hm84SI%{6xgGrd z4AU1}>~r9+^=awlYnSPcLfm%q@O!`<(f=MMbrw>m2C8gG4r^fyhE(-m*r@`q=&j5* zl!7%tL~HgcoX=21M(Q4SX_|bOZbH+|SJB9^X;@W6c0Z`cK_CFDa0-1xFQ%JeRBX=; z`Ldp1djWNsrT+?*XspdPt4`)_~5{B1>PN57~s>vOFS-Mvu`{S@T^UOi?tApX+2?=VCk~ z>)!Ly@gVx@4n-$JUPVMzcKr5nEiILK@s^=Iues*x(xJp>;dp7*jG7M$PYpuf#~w0x zm?wXbdM~cMsCiCGriVmdWM^-Rzc+{HZhD0 z!U#_RP~wJ1O`yI9<_!p2U%?EG#W<^kuN)w4qQ=$NCZOh%;yy?>9p{bMKMWv?C?`l1t zVVVqAt96aPu=>l$09ShRjDlB1B@NlkZBmN!i!qiW&i02pR^(LMjrE`PKV>^E5ot+_ zgn#q1t>LXzaRv+g9>{QORvHT;=kgV&MkfVklGZGY;yEqWfoyY;4P+&!&zpIkgxb^~ z(Vevkx2mE3D^*H$qxhN8q8!{##22AE1z(KsX~Jnm(AbxvL0tJOrBS|BU3&A5a9Q4_ zk#^e<_lq?$GCB2%7j5%C(y8B|$RjS!V$ql+vCR9Y5dQ+%$XU-Q;vVLYe5kuk(g#2t zZ2>1JPqf{AUubNfk8eyg`@FS}heb5s}7S@khmsE;}MLbns0f zuJulsFauKbdF^C?>f{Mb>XI@cUw37>H2Gv@Z`*GmDP}2TvG3UMoR5|IBBWxsRQh9= zeyz{2-IXDQgIZ*(S*1AxnOo(e%#2GUR)4mGdQhs)HKa5 zx%de=GkV>;;rC>b8jMj_;Y?3x_Xc@$si8gN zA9!04HcpUR@WA+NP^v4L(8w8ErJY+nptw4TymhRDX=kHwK-?FKC!$Mz24dT(qM=8t(i`_;rg3s$-DJjtN`v92fx;V*inMUw|e7e#1Ass^>nK@Azr1C9UdkmR&yT@N}Yt% zA3O@dae(~$6Z3golcKBvhyBUSM-9_GW9zYwtiTI_xd#^ae@Y^p4)=7(^E^(Q|5JkP zwRQoS8FEGMbelSr-%Cr=PlWS6&-7CBKGPE(3SF}9&YFmDn};;lZ)ittzGgSvc6mF(_RTEJ8QIN40b(jr#2a_UHL)AeIE^3c}l4FUWD*+@E?>f4ML;lk+ zewV?Qg11#yjJ?XUM=-jO>CV2CThIH}5@rLFwb;weOjN6qk8ayBht9GWMm5*Qbo8Be zS+tlzi%Y4dmm6V>&9K#&>iISikKP3oSN|RVzTGfwch+rhLLgT3n?1dR!hzn~vCxk) z<4D-eLhn`iAtKne?sHYs5hyhN5DdXgpY3);>)~)K#K)WwIo?D&UD>h!I+O)aN^(wd zq$Y^O7>@5npq1!_Kr2cRV}aET1A@jBhvU6les0ZUeX%>wKH(*iw~A(h`+vFrs2eoG8f*gA`9;bbptS9^ zJA4FVFXp|hsJyQ8S#g*8;T+k(@+=^!Gz7vQbmWH(ig z)mC9W?vvd7ril51ZGIHhD3<7cj6#?1As+@@q-48Rh3xjgFU(j@~qI^OkeWB45in3d2E?11hcos(-J5_i)0NzY05Zocqo0YiUtxx{Ow-1*P ze6?nX@#)U00X8%}tLO;yn{JL)GwBK$%niCQww}1dZ^T!HrVI)B2;aM@G^6_mJfA?D zX#d&OFRnMQGae^Hgi+Rcc)OJiyJ(13YuayxcfX3qx;xnSi%HiXy&lk`*_LPrMR*IckE=2W(u>>Pz2lZ!>WIaf|6A}kO2LgYb$Ty# zdH^Qn?dW@brQ&DWMdr>ntY7{uNQauqhb5t zMZ~Em*vcrL$kTeh`VsxDDDC}DOquFTNaJ4%gTdzvPNobxGPcZbSeo(PAYFH`@~)YEo@EG zJdBNe(J;foY^-C4h^c~gne~-i8ZvZF*}?E;Im}S%yNNE0wBn>W3x!HyK9^|D-coY% zNY^(n!z)=ig9wT{@kw|(BZ%`-`6q_@t|yQ#{jJn?`lcK%StVK_dsi}@Ii%aG`xa(8 z51W&0_bK}W@h#EjxdTUYEYS0DDJ(~xtv3r+Si!j2;pVyO5Jn5QleN#1X^5+M zKHz?>5w)PlVC# zFdDg@^$|DhuaS|%6uIoFj)vr#!IPH2Sqc9M0W@tr<`PtDy8utzi7C3+lOwv{8X-hw zwC-I>J=nu_L0M>XGN)@@eos1i=i;XaLvxi^l7)o@FebQCV3})-oplkGXK`zFS*JZa z0tt=z@=t#>pP>O%tdO!k2c z-Usz4?(gU_)aTqiVy81zI2Qtn(61+2_vqVt!iuR+_c$VkF?hJ7=K>H&g$WOA`*62< zLKXhPoQ>8C2e=7-`X3f#pmqdqh_}OdAS}Ae{w(I~JCVq`kMV7~{}O9m@d@5Cy*jFb z0H&R`hVUyV0<9b$k&3VaUVS+IF!5%*$6_N#@$(us|1Th7~HXQ0-;+ z?=mPnvopuqY!uFQn{c`C;@hUmJSOjGlWo>MI+)-KsG3&lAVzS0JCT=-O8@V)*qR6m z%W45q8h63rgPtmC_{zs(++{v@k|Tjyl;tK1qWokbv+c^mV567))&WXP+Iy-V$z=y< z1Ty&Eocz<|I@&JM;qKu6r8(2vCRF-Mg07-Y-bPdR^zO`u!wKOF(o9HXS85-u=_*;9 z(p=USZYrmKFWVzjnZkaGnUE+zCO>SZ523rSES}(B?lQR_M^Aa4>+{5MbZnb0g4NQo zn0G6m@2#EbSzX1+CBiN)b_+aGFi3zrDjA%L&3zKV z+HjB_qfw>ISqNM!n76?en*L5wc3dr8C^}Qix$%dythim@U0jJCj1WkV6% zCKliCw`RPj@VaSR_FtQ|88-s` zRsrTQ6XrKRJrrMdVl*Wmu3b($PXD#~PS@HCW&YFrH(o!W4TgF;j!B|9ia%r`Jb?R4 z8!Exbmu4ZoZ@<&?V)<6cCio#YJ4scu(L<47(;y5i*9{!3LhbJYJO&F3xb0)l;#TYhS zdW2%~gChSk1O>7`n0u~Y$`=}MnP=@HBGKQs|0JOl3K&Nw37r7%w@D$q>ps-dTy-o7 z>p=NgGJ@3GoV{l6^bix~ar3YFWuuA1O9S#WpQ1NByANp|D%Kx&T+rrPD*-+q0G5-H*04@llRBxf7>$=aAv-J zlq}v9!Y+ejuP=h7m;B)o2l@;hF#_PmX@u3jaNv=+5g*YAx^>D$y`;&Gb;?V=s!z1pWj8bnWXMF5bUy@0NH?wsY?_ulcQ(Ltx)TZ3UW- zQ66GjyozZ-84U)p|2y9lBV}XN3jm7;>Td=0QeO5^Gt1t?mJ@uAZ*rz)bzdC93L53VXxNZdO2% zhJPD^+2Q31Tcm%kK%8t>*nx(dM~O9;S0pd4eScf}h@gXks^A00{25<&lC!^Wubh+ZyoM*QqF~+JG7P!G7KL z%xC@iToD!5b%;+d@u-ZcI^xo-sZ|mPc~@Y*KX~{Wb!DR_KK%d$O5T zCW$J`Kxy&4%DJN?E^v(B-9yxshzjH91BPfgt`5J zN-c9DDCPTb-#fTv0~~#lJ5qn6!_6N(LLj;Z=ZIXy>5=YDD$74uWk31MX+Y%sbEJy% z#ISi3zp{0^Q+!y&ISC2kQ?{c40hi-xFBu=U>L1~uWf^~)puKg7%9w?9UDFdU#XvDF z8E@@B=!belMu4c8ckH=GH`Ng~TP1kxR2wUlVg5fhmM868`7dzk%de#6EEYJRZ8!w8 zubvPMt(}>^#!{o^e2vw2?$bUd9+=^q2h6*vB70gD zLr+(vtb00|jMJx3x$%)+9QI~;0oQ9QMG2VlS0T_6^S@SObjY=m+qeogxC2s3zH3w( zm`>|&(v_&+tJO|$M(dT*D`aJ&T+1|5B}b>?L|SsbeFI-szCk75=#S%UJRo3uTQOKF zzsYr|KR%q`KnL#BcC^e^P{i8`0G2r|U{CmieohuNWzLr8{S2>Qx zUSnbk!X#g?y3Q`q;w}lcbdWe>KZ~{C^0k+`j0-7m74g0Cin&bTq}~2NecH!%Lsi6y zK*w{Aviv}nj{MK1mVdDkb#NZG2MEwI9;Qhiwt@S|P6wJ#%crS_|JyQWe;l}4ugu=B zh;S$Omil4Sv9LB#LBgV#CU9Nl(+4rTC(E&CU>OdHJIZ(P?e>=rdnFqZ&k*C2^z>e@ zT~36Yw;J{nyy~HaXh#VKbP`u$B&TY+qpR7FkbPtreRjygE92Q@eAD{?KRxfCu| zCl@1Up+aS(3TQ18wxeq%CEqq*!Rdd7to@)ZW($+l0~-qfOSRlfEPfk^3e(&Pdsu*? zbLnz#MXiKlSUE3LRotbM16%e!`V%|+al@eJc?3B|(df5H4Tnh3OcsuP_ylT)t}c96 zRrt-#w!;{T(EQ0{g^piV&e#_>4TD$r9?Ut;QQFc8CAm4b^Kjke$&|AnF$6Th~q%YF6!|YDQxdWjl%{|6N)3D?JZKC->xj zR12~#UT`=Dt2bz#N^dhCLTrYN^k&j)(EppG<%EW_Rrr=o!p?=sF6#P08`=B9E?{xm zb??a^PL85yn(I68Fg5E(eZS)96fpW{_1D1Z^r5C>Hf2|W95AdDxS^*;N6iyPn_7<; zY*^@LfZlc+@o4<=yq_4bm@nLzD6|5ClH}D@NUo8a{V~|TqC~nXm+<12na=89U<5N~ zm+}}$rQHOipR1?J2tyXP(jY;3`%;e9oMrn+ew00IUYjD$Pq5g&TF1}h)20i&y!yVL z?_9_rFc*VAhIsba7{~Xu3yPF}SGs{btEyBwCN2U!HIJ_F&zbodYft9?YYIbKvd%@(qF7N;Q>Mco9tK!pa4Y<__C3;B6_8zby zTd{BLc{97C-7Ng`{rJjWM~!%>3djR5?w-x|gI~EW56d3_+*A{V8yv#c*xKP$q#Dif z3){%_BoXg=@sA==Vw^~AM7S)x$L@|Xw&eZT0P z)bb`i^guwFXd#7f)#(4NdIKZ;4!P34(6`OpP#85Z553{y>eUs9r6I2SK`$RI5bTKe zc9M**uMZXLh2C>S|!vU=z3YdtV>z1so&n{V(9O~PZ9~6i29Jplis5@U)PP^|atwXI=$rDy5X5jGB^hg~trFm?wJtmg(ZvPLDDzzh-tc|Ew2EK{pLQ~SZo$;;y@ zB+L*ir0CHn`@9yh_9>+JE4hA-my|FtV6|*t8X90nRVicrVZ$_HrxI%jNu(|Ex@+~+ z{NR(xR1&~#VyPTCYi%DBvu2A%*K-K!$QACitPg`3y3!zM?4d@pn=<+<{B&@4Z0q=R zMQ%m6jp2Np=wpRG%ga^O%ufJI=I$Z|pB>K0U^O*L&=$Y_KtPSbRS7+I!kQoYY?gzc zf|Ru%ALsYme;wEx^ES9;n=zEd=)Q3)g9IOgt$_|r#k=Mwt#=uw2Y0Qa-pDB}RuhYG zDA$K=g8T*bEU|j*@LGFM;PaFD7F!z$TkCnDDb^nu{Ej3T(Bb)1*?;t zZN>)|H4tb*s)Rk!OH3Tqv$Mn6+TNbmq4^{UAENM(lAa!zltc*AZWoke4pT@E{#Bq>%V7?} zP^^F6i8>c2lVj_)mU{mlcyxXQExsoV0r=&b@Sdf_$Q5?14%OW`y?D%)bNjNQ20&ta zBl^#se3O(L8up|1PLUUO41IAMc?UV`_xvVC0ZLdk(=!f9{oaTj7(*@qjiqxv@!O%y z*lhUVrA88+6aDbJ02%c{rtn_BldbI_G0HN z$oW9kxIBcSNS-j`DGH@am_`EF-rCZg$d_m5=hw*PO={(MBteH4NWlpwiVG+D)mw|p z+VQcrziuCu*x=;lDmAg;U0$^$^vYRuwkv5{K^#>4{JW(YdLv4~W8`3^9lEn(y!u@6 zN(`d`zJ|&{F}qfZzZ(}mg9MfLK=pS;xdELF|y z;KoWHD?kdzP)CQJ#h{@;6kmAbRFZDU>z4YRO@IgPdw+qV{si_Wd;>mX5MtSaoqW&P z<4RmZQCnfXg^TKzCX@AKSKr;>*y5Y`H1}|L7$b9<#4UsS>j?mGt?9pikjpzQRaW*0~hf zGEJYx#OQ&ohrIbcrg$yEWTAxPN3vbwXYXuGk!O$;;riS!V_&STyIp_o#5u!i8~rY* zXv{x73Tot!b=&v-KAdCQRDTnOI&hlA#gS=O5Z};y&jCu30sEG}y${PiSwfrXx5+D2 zmd0LZs|T7X(705+mY8-m3hcZX_B*?XsYRe{;=$ZFh3S8zlYKf@hvj0J6L`+8Gxj@B zWvzU7IaxY5_~ZTQA;<8)Ib$muHQgFD*|gbWbZJAhrID%F(lvgPx7)oxvA5fADaO6C zNw+D{&H_gLprRIp(w`U^*^+Sh%qOXX1uSNoP;#O~|7csb{4_r7#j}Fl0l}h(pRAIu z4VDcNDim~<8+f=LVv>G5CM<9oCR6szIgGUE2L2UMr7TW0#1&Y|^UkjRH*Idx}X zif7f8)cjPFb>@19{Y-NS|FZ49D5t{6qtdym-}duT-ItdP28+wEYeg{0t)Y3;QU5vt z+HKjY*3O%__eSHKE|qV;R5;vU9Uj_WIfbB-JP(xXkZdEuC{JIbM%rXeagfKnE2K4| z@T9pUip!bTZLHoMC+g1{3J)Sh4w3pBNMf`-jESYWEC(3;sbwc$#{yTIxg+KC$ah>M zSXp^ZXw-CUZPor=dUNY97n~ca9myY9!!?Nb0Ullrsy&2A3?Bk@A3~)6Ka#G3Ey}Lz zQi6!Ih;(;JNk~aI3P^{-P}1Gpq=0nyfP{c__s|_fBV99e4&5Km`~8P=?X&jUD}aRC zdnz%b_G+ z0y4P&foq@GgT(hjpG>|_ep4i)c9vAg5R^XUTrEj3b##O<@ZgHo<3@kT-&zXjCv!Uc zO!nbbx|Ih#d;0496c}9t8(A2)<-w}*!OEh0WA|Nt}!%bWFX6;aCF)6c(h{K z>m}q6ark4YVl{n5=;K)W*crh^zuKaA-)5!9W^Pd)WfymZMi6~x=F4r@!=cK!W1w97 zFlB?iiqj;!4|MZ|2lV)6WU6{QbhV=VX$0;eknC~cd2yhJ|7RdEQ0?-p{pp`2#;gBW zr1v(UoDc?1HMzCt9&5d1NP36oJAw?$nWoCh@vU&d;%6)~aNVwr^Gy6rIGo4y37f}B z*tYs<(m`P^FmwsC(kzd}!BpC+Spir<{i@ z?;qkKr)4b*o1N2tl6OQ7M+&a-6;uO}T~)hIAFmVrmSlT2Hg3rc z4}JzeL9JH9zCE)gsxHGa=^rUQM^|>oAVDTSL)IlWBv(xy@}l!So891@WG8T0_tM9W z%O5$r`T~F;6`&Lh=Vrc_99=cdxy}CnSdptgq#`iB=_4Zn=Tc3k6lfHgYW`@R z9)xhxazO1HY!#Z9PH7^4iG6#@ys91Nxr0l>?reTf&)6vx`4LVI6GS zc9E4xb96uZg--s7d7MP+r>};7<0C%>z5fAQmUBVC_w*Wxl}w4FDXa$Oycfimrkodd zEjp*hJl*ax^b=mFo4ucjXZ^H!67eK03vlGAAHL>gaUsn0IE zJ1Lz~7bZQ^r5?wC05w9`a?il+_$9k6BZGCqI6o+UKHi};vs&!ztlZ7XliFz$s7B1j zo)>KU+Y3vWy@aA9upLXyqNB-%5VTmES_%h2cfNbM$Q84Cx&kj^4-(tkp`5z_f z`?>X?=DE$wg*F!c(HQF8xXF1=bQka0My>XtwkCFhZoFG$*gx5b$oGl6x`5Yt==pGz ziBvrNu;QcmaU;CrTM?P1eV9H4m>HfJek99v7Vue*p^8qK_;J?WNk|=lw$0z}00CoA z1vlZ}{6I^HMfEJ~U65fZzcSjRxL;^{T7Ef`c)8x|>sD=~yWES5;|rC8)~tt28r45z z=p7cNx2d6Kr?NZ61Y1Zze33fr;Jkzl1JBA^CMjnG1Y`CJY4O`Zv94 zsaF$IKJH(1zN2KI-T_MiDlOz`lYN-rNYqF^$27C>;V^pHreL4V;24fJIhSOIE<(3E zz-gTC9727%<25vhdGjgav%n$AE^aLOOXS13^rJRYRR?Aj$9^BD%r_kJrd)3=g)#n4 zk!ct6Dw1JxX|3LX<94->=a!jBY|}*BLleKaXW9dJ3+VV&>;Nm9gCbUw(LC5WHr=$; z`%em1zICfbtAw&W5+|FraMJ#$okCSG1h^-1|xl(pl9%)VQo{ zB|f(h|L9W$lL87G(&T9Z=L7B=e z6*mv~A~a-*G`Gh6eO<6m&DL;tqIaI|+uH@vMAy&WK*hwJaoZ|R-|X_0A=!n`bWT#n ze*VmFWsF#Xo-gn-QDKRLJ`<0mFtIdf`|}Mz&}HXyEs(rWXFZpmfcgB@Q9GWjbzY`i zu8ELHm=K1m8Z<`-vr)!$=asU47{3<(h41x`%mQUXs{dl&BENr0*a*27VRg^>f&a+ z{VHxgpmjnfnkrK)S>@$|aS{x+K}x7lJHP4duWH&KQZTANjA0^}hfl9SUgp&;theu~ z{X;x5jW?}!wA`=2hiRmHjGw-I z1ELPXR-J>KDNMz^qpYJ5KZ;s0svix#ndBSQ|F~r}_t8dcY=pEQt%UKZesR;JM4DgQ zxp}%~Z(MuvF~jcx?|#M=w{xO+z*(i}UvJIe`{Ci_}^bW~Rz^m1hXpGBk zJwhr~XHCxFevxm+v!{1;bhOmgT+-aGKc=wFc*1Mw85F8AjynifR1Kn&J@vy5p3_7IU+6i5qwk!?IAFxBA7Ya~GYTxgu*rrs?x+GW?4Ah7 zeE=WhmnGsPc?xoKWZHD^t%oPAd46~)NCt*6(0|PKxn6M-`uJ?@7#!o!r_ts4Njelp zN9%?3T-iEC^Jk-`>nN2gbaac|27TvWd+`u?291|%c?y(PW?rT(>Hc*Kj#HYq_S>cd zQh#~s>|SNG*a$+;ql2(9y4{zmi44;XP-|cuv!4&|VsC&&sAb!|bd-=_5FXU}u0c{S z0Gi@bcsI1Cp5bcJB+{-SR>DiU%FU!0$9#??QipX*AILLfKb^FDfb4`2p62(Le5tg| z((d7|;nDnLWVu#j7n(oLnG7}@Joau@Gi$-J#hh$&3ZcBFS_M&S`JjK%KNXv-2?g4N za+vh&>feqjsxZ;gSMXDH^|Vk_mtp=Jg;5Fe3=p?CVc()-5J-bNUV6N{DKBC`y@A9X zQ~UcgL8gM4t?ESHJa`!Xrpw*V@g}C2}WMiNi14W zYjryzs`_y|%6Z*76#a*%9fAMmiN06qi_^c!UeD06Nig^sRzyJgfNg5qPhL%|Ant57i{meuGk42&evxS`4 zAKjLi9=-G#M)fH9keoczJ1d_3)_2hUA9kCuOm5YQv{iA!RFFJ%u8n48lH`K zU}z1wx?o)y!&7ac7HmcFaiP@~RCDA&krTOzhM848+=xhava;Ru#7O z7jgPi#(V3DBCSTDnUNd|WcGM_L3%?)yNvr%!zQ5l+0HZZU5T#aj-@{O5BH$zfCp=sbQ?6=@lnYe>)syaCG$vun+!tpB!MOB@qGgO)4U~a&Xo&omY#s5S^ZlW_M75Kw zo&08(G_f^xU3AXmlvO2SL_|Nm5^Nqnofa_S5*fgcdr{n~JBbV{26Q(h5b*_I!c#*X zBb?UP)8&TRTc1RJetWS1mva5&DsxIxA0&;0V=%eg>w}J@?wsP@TV0AOf3YyA;w|5N za;XsG*(_i*Qfg=^F6PY!vkf|@koy)n3B&@?#d;cM$e99Lab>!0)SkIDx+6 zNKS9P<`d+)ULAoZK&CR+N?Q&5gKcH>wXHBM^yUmbr&j8+Rr2pZrxw|vm6 zS+Vcr2${G2Y>q+76~T_4Nj8XkB&P=u1pE-Gfy1J)RlrjFrpZVs8GbobwIA1sq5sQ> zySNof4XWwXYo$=_sn}(F(z<>ubcR-FxQNplUdC%g{~loiluBka4k>FKRP*~T`&vG% z^z~MhZZR88>0|9%=kb(@h$}#!@=+8<<#1epLe@@}fZ%P@&~#zXS8K&W|KmMS=n3%I z>XPZ7wGP4C7>~zKuh@|Hrpj4u&)KvH!AO~ko4|8~IvxA3lO5_p>8? zG1UHOasVYOO!>P~Rlf4}^)Zqx!zg>jE0S`7N9H$LnUTR89cBlsw+qDUYk9a`N81KC zyfw8;Uq;mDBl$Rmah_c6m{5A4Bvp`>SKF%5K0Da0K^_MvmdEgDF8xNdg*JQtAY}XqtA| zU-<*GXr_Y3HeyIs+Z@-WV}^gEnwdzx?;@tB!2`d9YNn%HGZqZaSB?m>)QG>dB*QLv zvJF6U^}d|OLC_wH@AvV^+NUTMpjfU%{BYj&=FB}$j>HXeMbU`dTS^j1O8$q^5MBhU(*Y1W?7|LCXNoXxA)*_mB-&#MveCFVy1_@( zHmd7ZB=*8nrk(DFD&Arw-WH1h>Q4v!=)WjM5nkQ1Q)l9zIv5t!$dA~Gv4vv~eD>ru zK`hOSF?|Q2hl2z5Ob5+ABH%|R zCgoEG!*+14C-rzdVHR%Z>-LChWLCmTdb$uU&arOMr;``gd)rU7)X(8mMB;CSqwW*X zEJTf-_|uLp1^FKZt#FS3JjhK$c3}TeyQq#feThu`0qmfMdp@L>G>N0f(Tc=7l5k+u zjwgZrtcWhMEp+p(c|P%HNr%|dB|tdP7Eggq?~lkgPi&oK4U0JVXbEr8xTnNbB17?u zXk9WCHnxh4o)n?WIR?_k>UF1mF{G8Z^G;;zqdupPMCjsyeo~$T~!0(SMwnoGE=43k3Z}_2QN*1*HkE{~{I>_-K5j?h4>6(Y^WP1(pYG z`n+1q`QNOtxmP&5+L~(M3rg=cq~pkBsYK7<-it*a#Gt%;yW~4;vHoxAH)oop7ak~8 zby6*ItSEMzePUk_^GD~* zg#D52j3zp2q|aM((ugXakA;8WO&&4miJ3 zc>5rSMn^}AjA;Hn*k+paYh6m=&D&-N6=!o(JQXv-?<2;wGTAQ}eCnarS>2B5=r8cu zL7{+1qOTLBLO=<5(C43lFlst`QiiHc)@o5pLv>wboSe%%tR2oOY!ha*^dc6CR=^OU|RcaP9LWi2D)jI~(DzNF4($UTWVs0K0gz>f6* zZ<|3Jbr*#iv~0gOYS9;fKA|CT5Jr{iQ&t|0FW#qpw5fR3p*j2rO8#4cKIjR*GM?45 zUwK-4bWoCyhZc@q3P`c$4g6bnLa25G-Mf}386=i z7>!qKfML(fz{n}3urYhWEGRmjU)4zI5Uuul3Dv_$$cvxSZv($^JGnhr6zVBvBYw84 z;1+h6sOj0>slcpIG$<5#-j!uMHP$;8`FF%)$;~E_^YY7vF*sQ`7L<~rILdK>02kL8 zoi_pbncot7DzrlDR#J@BT=~DfO|0c*s&Gf-s-W=|d2ZHaw!&TiPT1Drg%vB3@N7ex z*PW;W3T@>A<}Q# =#wbo)^Yo|d^EgT}7mR@P_690J`7 z4JG;(*E;JfKdB_92T9~*r7Hd*TZ@!9X7L~0B%#K1#0q-VDbOz$siL z@1;uete+S7)b-|G4GC;1-y9_CQ2z3)zoktWx?=S|T^@gDc=rr7Zo`<}&xZSil*Vy_x}&5E3@m%4G1q|x#UV-P{W zdfS4(ZXnKDY~eU5mNjjGBe8VHZmYOz$;>B11Bty%@9OVdDf~L zZc{FtE)EynN7Ozk5hzL0NRBEPMi)$HFZ--+KS!t*W=P<%Jb2$o>B7a=rTB*kft6Nw zPd?QjUd?<{1e!}de%gSr9@a3&6M8iz8mO$Xc{VDN7cF5itPJeasJ2F__K-~^x0cEs z3ZcN8+04rNw}CcPirsgeO~RHb72-1w&nklMVsv^Te75BXoipj6^X@h&HG0*8)As-T zqv@EB3E2C9q4RZxs=fS^(8AF7p5Xm|OP{Vop}quL-H3C1PfaNN)Tu2niuRK>p{kDe z1h-*HQb5(w39M_*V2T)C2Z zOuX?2=R>@!`Q94=AJ+#6W4d^`d=>V6vo2xufh_ULEMMsBgn3v$3v)VZ3z57y2v8jk zN>EP0SEr;F$L)Owl8#h@efGxQpbSo=iOCLQPdWaYdgzTA*GfO!{+;@DXH)L01uwVa zey3eYdXTduPepXHuuE}ks`hjm5Ld*ROq{Fj8Tnj}qJ12_+qcN%zeL8+5UCtYzU*H6 zSJ~U(!MfYDJ1DdM-=wse#HcB;U25iy&~N?3$(UhRdb4I6YAn#C_L*Dt zy(Oi2(W_5W1zzo*$(&FlpshSfK}5|!%NYDa6w|QUn(+L-Li-~~4144VW$cWD7z;nae1gR|!pnnTfo zZr2KpSI>XOOensTE6s)9(}9D2twc|lA*UWNND+epL6!NyE4hs^Ge?ZEUfz`kh6;Ew^PrM2 zlJ4Sj=s9}Q3<<#Yslmku$&2ljn_`Hpv3BR=2*#-hM)rVI^sFv!f;QuMnKWQ|8r!ayK>~QLP?wvd^BPk zJZKNCyQWlkUmp(i;MO@s+LJ)1zoEZ)dt(kyy1@6#dOiG=ZOT8sAz5v;?c%1UWQ7-R zE=SJoWeLvf;!H^5{MF<36%pV3Mx#!(Vey${d%A%xS2c$vZuYp0*7vw1EwSH(z-+Zw zA_~8d5#Y(lcyVKF(eXeRwI)(kDQuM8xV;j(Rv~s*aCtmfC|gm<`|kCoX`RBDKAC#L zW^@Z>@JvTI>|?dhPPWFkd;8=}GSv2e_r6c_i%66*)??2uwT~X>D~sz#T)vEaMVayg z(G*`Wg<*AHK)Z^R3w0iAbqsrX$raYlX!L@|H^5}n$-%J?iOoVab{O8qY_ydmJheOV zoQ#v3faynCZ^Cf&RP~y)jSwrwGPG3VWwq~KRW(je?>F~#F;6e|FS71<6tG~YQ^aSrCBx0{zagn`2IF{e zJn&IVOjbIch*6qf1`?Q$wvJ66PpPSCWD$MT;ygl1oC{fYe6$Nsj_4f`P9dUraCq>6 zP#_N$W{}3TseS*vm7x8;Wo6d2k^l}j-!%YB`V?ci2OZJD4h8#8$Kjoe>M_Z>YH!~B zQc>IgV}iQV^nnxPlRc*(;-*;Ed@oQj45-^_t&vWk<}N`-3g!1%+f5`idn~p!`yjVx z6X7+OrZqr+j`4CTzR6w1}WoiH9nH0Ts;yh%4M8<4?byX_RWgAHGAiD z2$MNe;eMA;gxA5;kJ}sd_K{Z^`>bbwMWG3g_Z5V(Nx#>7HrKxRb4(%C@K%050m|$} zUp4sLj9p#}5g&y_q_-<{t<%Y^$33Xk_n%=!ybr$~xDcpox5)Ms_IbMU-Byq9%6_~V zrQ5VwY<}^lP@t0+?DsbFs?@pTo1ktgkmFHEfAke0+m? zDes_;bjMN3W4%7Y8XbCStw(xzOXf7Fq%x-dC_8xAlANFl=lgvT_QCxf}_I~tuWq;&lFR(DRu#p?+9)c<5Y z*yMN;F}iy7?0M~vW@M7iTyu$hhwL9X@B^T^A(BP%H+}0HUmCfOPlf)R-#;I;x9J*97>>w3)w+xfl8L+I#YI&fDCz3Aq3i zHt3A>jiw2eexpU?99op+bd&~*W*R;_43iqz>#151U(B9jz%p`@c*9}Z=5nwZPmVh; z`h0&JRypj%elrZa&Tw(hJODYNqwZl$`sIkha090~kJyEZUR`Of443-QPn}t?ugZSw zP{)?Md!e+pqbfp=zn+Bk6OL$i}t2gxpBr+upAU!V*cvkQF^Mvl+evV7j*&zGw&3T>a`L=4yC?Y-+sA7Kfh zYvF}BU2zg#k8q&nMR_wKq%35+IJzCQNN(V_EAwrDo1cH?vHWd#;}wZT7xZzOY%Kue zs=I0Q^oJmYWUL?40#<9p6Y1wZGWUCX`fYS%J?9U&1MSv0Pcu!6$VWc8Dd0=Aca(%G zc0X>Srskva+!-y#Io4&B#_S)LpU2u0a`@O`!}KMZ1v2mj{Nx(kZ;REHfbyIowNUD* zFHp)gEw73epJ9BnAg~7ZzN(vzFup%F4gqR=3Qo!J6PkNF=~2T+BRGA~OBrW>!PtKy z*=9` z-jLXpx`yU!G(%({>i#}kI)^SZ59^ztLWeEN>z@k9dbVOulMxGTLik(xgYR+v6c#S7 z&YbJ{fsSsxb>69v_jWW3>MUS*OW&*&KMQJj@MO7zCoUlmuXD;#G zo!uR~!HFSP4B?i1rUMptb|xGxAP%;EdsCz>7IAK3RT&XySbK8*{pXac@Qu19D=Nz_ zQYxG}375yvKPgSa@lDjF40)}Iq0lf|ShT;#s)ErZ%h{WLxwc7gG-tfg(yPQhT7u$< zp@fA7i&-GRki4{zCRQUa(YTdLOlV{MBC+(b`Tolmk|4>R>4WV4my@c;jZD6e$KWKI zB;#V!U&O|W6FhY-H?~_6m{sCS{%V>XZ_10S+rw z$v?!_0pj#znz>F_+^%}eKypm}CJ1Ashb)%WkW2tg^@ldtJ4EF3U@W!B2SSWnB|`#I zBiO{%r2CL(S%;(?S2x>kTPx$)sQs5?SFQ`UU$2>}HC#saG+S_!s!u#f);AIQ3AY;K z!?}a!zc>{kGWU9vF3PvF8&WdF@~4}o1A96jPsf^G14_;=V@*k$FgZ;NQ_|rO9x7KnCe6*6e#61@8$z)aprLqOHtrXQG)+4+s9m+HY`<)9VD zwD;@AI*mWv-qiW#N;iTv*reR|6aX_Y$uZl{zDof}NGT~OiuhmGlYgDDe7(1lxmSzb z^OM1RNcH+=#W(ySEs6~fMepVT%@D|NvhItyYZ-Ro6=occpK4nG`)3^(vIeJstm|uM z7)1JUj(exBR)1bVJ>I^_uHzw5ps>m>J0!133XH-@I)us3SxIUZr+?emX2K^@`e=o8 zaU8?TT^;N{8(CT%%~~e%pl>$3YQ1MZ9H}Fyx;5{!7z$e~*kfwc7qu13^949GJd2jq zrRnHy__+!_#QP-;s6<2`QT^+0t>nO81&g114Ah8nqxSHe2enY_o|K$I-{23NnN6V% zn{xQ@*W;8m(A>%8Q4d+@lUlaG3=5>15=_3%Y4=*qU2RD3#iafkjt7D|5XZk}xoL)Y zK2On@@AI?iyW_;TOzxg6Wdm^;_ZTnWQAi#N7*HykGUu+wp)SWMN%Aa0xnE3!Hn3=$ zRAzZ2$oj-r&wDiP^xg?cq($6#Gl=J|geucSC;FFNwJIIsa-4XPV)i5^+v20TOrw>Y zQ8`P7Z05@}7;2v~Wv!f_ks$TrAiXGq?~5$n3P|(W4$E#TZAC^g7H~9nEb^#+@Prjv zNxa<^R3Ajb?K-|1ZiY)6Y1ecCd95H#Z4#ww1Jzelk)8^|6~{N6T$dx8t>}BPZ7<4N zeReX=>~;^kls>9Is|O8~cmroe0*4$HndG_>%{eD6l#3Li?OR09{sgt-7?=)8zI91l zW-t$(yD!Yy$n&!(py;Zgw4UvCD{+4Ra}M(I?vg4RN7%{ibCV7CVZcH#?({S2v@iSz zMr_1HshIS~*vEhy(!Ca)`dHH3ZZa*hh}ssLH!B@ER?d%&BK_0(x6jY{Sd)gh6Ecmj z_tXdi6HZ2tANEoeRFxbiAAgUYF!iIDJxzEbC-+^8$7U)4ue0xA;yxe`5Z~>7e;Mmk zmTVP(Oqu*~Xn4Z5(I!GAIbZR}(V+PFwp4toLXnXPxlt=oY*EQ?QaiM~->u?6XP$kCD$!);#VkbKip^}80z+0_MId8_x7lq{n z>K`=RyVR|L{M z^_EeXwbM(_Y^qc@%Df4us;sh6Yp#}vSsiA=$|1{H=r=CEYmj2turj)LP?U5(;RGQc zSGGP%j*ZUb`^kabhs!+p>SIaF<;^J-D?x6FQNIy@#V%0ce?Oc{)_;Z}_9Fa!1f20N zvl=-SORmSGNKevTq|0nwVsbuKY?)>L8O?6_vJc|KXDyF3AN3PNO?=x>YIf8Wu_aoe z?4cKEP$1&EUmNh#R^H6jC8OeUFm`WvJ6rn_QooZx6}mmjO4Jv{Ri~Xb!eZ<&Kz$OS|{x%)dbI}+z;aIqcnMJn|R zr9Q=FzAH*nm9!JVghlrW;-CAP1L#KKhMWw@=y2u~{zwGnv}XM5x#T$~r67z}jmY<+ zrWl9;GgK!I$kEE}J6RtNMyWCNShz6Ho$-nqhA+$r)~sO*3fDF?$kA_~>QwvfjLYw3 z5rX9+=I8aEj&=l3pF`@lQFOdEpCp_(X?!*awT-QI{IApx@4~tVuwAP=uoDi{biM(0 zk@T)9jTV!G3_>(`X^+W697_kHG0GzZP3eP^#U~^ti6K|J`H9N4ipgL*ua@I>Y*Dtk z+@*=ETmS2=dWQje%fp90m@m*sX*AjYNz$W=FQf@{L_R)L^h)Vy>elj-9lBrOQs8O7 zw=Cr#Yr9B8h1^6*4G*8eGPVN?90SQigLCMO;#`E-fYE;3*GW4MNQT8$OA9HSBMQZyGouVS(*PxG1 z3{I>=H6?{Zr<(}UPV2(I;2rS5I_uwuvk}QA;@`zJrN&#bO7luP z{9z7hPfv#u-M>moe(LY^gvZQ2dpFTtGR4juTKyTZ^_^088A8QR$p@s|-3#X=Q|qyF zMD!z6mFnZkX6WiLx*8x?XTdMAlQVcMF+KR0`02N+ICX*3%d^woibr}+;L#ZzVA|Qr z^|e6<{eMpk-@x}`CgX4O3BI*0x*C{b&){5zy3I48l~|B;K$#-yQ=DZ2Pbxn zX8kooTH5t47Fs} zhC_+J#Jg=1H5I=33$dS9><6F@T5uDz`lLJwo9#w z&eHs^n0{`bZv0H7=~(K-r#gVWa)q5asOlE%>HdIyh@56Q+vp8!8wU6$LHUSbd2qnF zM9dxBHzyWgHE)u-Tc|icML~#{f6%Le_dJx<#4n~Ki>0ayi!CR6KN(X8qlW5XfWoF* zl*@#!j>R+aq%%}?vuQBHb!f2$$o#HLn8|fz?974*vxeh2H8j5QL>JF|sQZx>(Nh$7RL{(-#6I+inSJbG8r~-Gs4`5$WWj&`n=K z%zUP}3G!CXBF+MyE%>M5skzs)W%{0e!;+qO2Lxxr6KTnxjkCc%1oq@OSH)n}HclS-L~_(QR~BbJ*$oXfQGKy@IkDJx?;y>M)A45l9#p}!B;dzIIDFTC(l zI%2&pPs)7dSZ4$AUIfBMjOY3Kcoj>)cK0||&7OtI00ZJf5pB=_RzLl``o!BUega_{ zfJ5!>-rg}*x5}D7Y4iD;%#4sFX#{?#C+aNKl&ZBv)m<|(A%|%#hWC; z=s~3$w)`gVhG+xd{MQ{;XI0r7S0Ww@Y8wo-u7`~!_(HRyT=!$YiZVx~J z{>!mhlq{!qgU%~{k?n)2p7+wMSwP>wC%t-3(J8SAh^T;t=a^l%!h*5`F!D>3WGGV ztpt~c6vbv@oGkvYqWTC(6Ry^y^RZdfa z#Z{~*3(y31?~(+F-IuT1a)3#tK;JVChlk99>vFrn03mQSdO||LY?Imc-u9nm=4eeh zg6*{-EurdVm(mv{1YJ#bN9xXp6IdtEn!^vMTG4mzBTO0l3ClRNkNSOAX+$n+A#<0N z9DSr#+p(;6qgH0cnUQy|j4+2J)zv*N2dyL_>RtXDmel;5#TwhaK|t(btCL1xrYzN~ zx7yAmkeD~^cfuJt6+`>qJYCBW3f#tqz3mm$GGRjab<=UdIIz2;e}X0yH?Y56#zWIF zpI3^-5{c&&RvzraT|->FueJ!Q{S-*LS?miB2-kl<-MQ*~V|P=y;`e z?DzYVAIm(()@rJBs({QZIk>mLR-BJCiXNNPFH?^5Yq3mEZ`*hu;TJw4VhmXW9D}qm zmsYUQr|Xxh0*6(WH#N%O$?qRTG7S}>ohb%f^kE&a9Kv3?sIl3FWE=-t9zE8NeuB8V zehX6rh9`)VnV{M?;Gm%h9F}Tt8}+nkZc*)*W+UA1KVKELX#fE>6jMAmTGZf_SC(K2 z>5c1a50I!8_V2d@8X9%J`eFWtcx+?;%?J8zS_}@2xtTrVKLfI%<1lEQP=Ml1L5jny z`G+R@;yfu{s&G=P)ab+b96zBC26|xo`Nojr99p3ey<-P>^YEDOW3kJ3rceamQL#4= zP{!pUs?-9VZOek?5sPn7Q?@4fLu*EP#vhKLR)kss+ku6GcBcZ<@o{WSzLsjs z=A%J73Z33^yM~@T;jeikC))nM{q`829nP|%!673!B|*MrqSa93Jlb@(l*u^pWS6h$3gMi#8CDnhOFhUP9yp@1&Q zHV+ro=cQq%pKQqwRkg;=Bk(^jyp|xeo2)s*)EQ@nCulC0q?g zk~aWPXN`Xyy$r^x_nxqVhiDmv+E4!pI4sj;_bc;{zP2+4SRRj^AMM#?M=0-f$s-7s zP%7|QdU1o(vdZTku645H_pK!4FM8+;RrG1+jzD)p@eJR<46A%o?ekJWe?*fb442EQ zj60t}u98cG_xDL;Rk_zqY`RW}=Ek#f3`c<7lC1%LS0nYyCM~)OHBb?I!`*de*xO4e zLmkR{fGq7Yj*FjFEA8`JVt2|Qd|>oO&|!d<7jphW59NQj!^!gNwLHjpu#CfYH0YA9 zsNoeE=S&BYb;xN>lcfcTG zp-;41S(nz~gFOV=mbk?qxazQF15v=j&WlE@hH8EPL@zK@SYH>D-45VDsSaPCQpfS> z#nODR6N-8r+?bBf7G6VWtaO79vl!r02o=c^5pI60bBo_&)qU`3wvwv{e&BK8B(M_0 z2OPT31P>^pK#lO^IV~ZYzHZ@D{xv8u;Ls(1SWP2h%Q?|GMcmWL8tee!Lv&g98^%v4 zKc#djS^a#>E}weQGeVm+1w0iWZ#v^XB*~#_D2-&y&$)PyDImxV`zO?^M{uH8 zQc&6^|Lz8*5vo1}( zDWzOVXW(=lGe$gCTA3s|u8UGlRuLMj1XjkcU%j+jN8&vK@RWh%{)G1&TufZD*4owx zOd`U1(gKUY_gM+iKU=ho%cN#8*;fa5`%T7e_I^hq{j^V7MdZE! zDt5!MF;MK(6a9M~v8h`7_5Q2;VASRhhD2O@)VkLJ38|HKgDKdq0@M4NLJ7Tk>8j)m z`4q!&P_~MG3y2f7l-7FIqHtQZ{V7%v#$V)w7of7}*_Y41wQ+MF3)cSDR zs=2ONN#67Y3vX)}M_K9{&r;K`!{RSBIVJ`+rYDtNeJVw5&`WTtzX2U~X}SIypB{4` zJi~}04!MfSA>^{sQ^ec&`Jy`6f+mI__)29c!C z>wv*2a?Bq2fd9 zD0@~{pRQ7Y)T|C{wFfdki?idLLfcb(g^6{@a3qXgeH=_XmhxWdM0eLQ5Oy*y&uQhB z%E5INvgr0WYiX}gJ(ADoet+#|;B?xwq3+>FDv19ZH6pRY=B>*(-r-lpSoyI5QhuXq zRjrzS$#1e+ze_L>52pXT)`G9bPhKJ<7727*+(Q|F-7`$uH{m+uBqR!9eK+*NWkf6i z?1B-!v|EG|c;7fTVKO3*xEQz`CPgUav|Twl)uD&dp|6S$z6*t*i2E0IfNO|j`m?9( zzU{tM-(3ibG9W=pi4#-q>HA%x!=3{wFOU9ke&w3xjk%m9^QMH5S(9 zUwXbgcIBike-5h|k)$8BZR>|8CrTY8qM5>hJ=B;4+Ji4#P-Vnt%-`w_JZ1aI11eo} z$N=7rLjrKwexKP!)>tOy)OByJKv?$f3t<`R3LNUdVCA=X8 z4XJuN_+&8Z`T8_lTw~}w^dzx8S>~6BIzF-61aE0JUar*~= zhP&k@hfru?=xLA6A#5uGJv&4o>+<8f1M(iMS;N59mGi~8j~w6IFFML}B8%!JT6A9O zn8b#udRs;Y8^(xciDb9>xiX3c9LE8>T}RJ}4>iKTlagQVhu)S`Cii^#Dtg=c3e%N! zex(<+xLe;0+%%JqkX2hb!6JQ#fe1C%t}CayU4n^Ba(OF%rn=2u@P{d|Dl)OnWyU*ZBlhd4Niq!yll?S9lDd3fLLxq5MhBx z<{R+JtHHsei))|F)iBHAc-ud+F*h>h*juKe{DERppv%NJUf6^4^Riq0ygRH~Uot5_ z6@sNJXg(@=&GwY!O1!ml`E8z-jMrfa9!MJUzq>&&_oi$t+^rY7n;e`#@_l+ROZfug zR+6@tiM719guwyjS)~kvKw|>eTeY*na>=c^>_juA6y5LWvWjzV9wt8$-T?iZ*@UvXrsqU2A`a) zxW=`p1?ha#-3*rg2RkI6=??ykSIkC9az!_#ulK)Co@x&L9h=hAEKAys3*KFe1VvqD z#FkQ8!z^g3QD!F0#01_6IpLXhWBg`I_}6}=Acaop|7?2<0)ujsUj#gD4;G=5zD^pU5%lMtLn=F@%133Yng;Lg;s@U zPThr3GQ7=%g9IuJ4*W|1qSi&?W9aJQ14H~H75(|6_T&SCIBmqicq)nZ)bEo`fs$Rt zDeI1sghS<#UNDbifZBGaqX|jp-C|SX!<-KfnbB4?z+!N`c32CIiyk|l7<=#g`mf@F z3MW0mz1X?!YN&|@76$!l_+gk*H6|&cz8#cgR4@5f%(L-}ydi^(&;Gl&WHF1q()u@H zx~`A=KEti`7Y~z~V9a!NLy+J8o^k5)!7o_!KH>QCoVV?Bb-yTuaFQ#D?85?26mq^Z z_NQ=SAb{OQ+4QuUE>Xss=F0UR+a3q%?XCjpu$#)JE7>C3+*uS^((V54PU_~vrU4&4 ztFCTn0YBybowCKP*jog>4DqWIbLthanB^917pUapc1LCC5hG z@3B01a$@eU>y3vKlPb@#ZB0;zlSGgD$exC#(jB8%k8mEI6L)QU){s$^Uj>C)>>h&Q z0I|+0-oiLj1Ku8;gAubQq zkU5n4;#EB`#PCun2#BsVj_*>PJNP=Ufjo~KZ6x?QxsML}`>cxB+4uHOhH^hF_vD=Q z({n$<-$|YU*EbQ;;ND=*+%tt7@vUq?cN>}BcDiiA_4v{Og`egQiILF@X zG3j-}wH;W}Enc5Pn`N#gc?c|=;VUQYLWAdh)s)plGB)gEpvuTq>KulB3PbiO5<3ayE~T<)szdo{xj4+nl03?E+wn~5-`ls?nRN*v(WgFld$(Z~3i7!@cjXEPv*Yvqtn87D8=(HDK~&LZdU-RZ zScPDK1Ep5c%XlA`%RxXVDiz?sxs)R8lc@KbTMmbMs-=~dT0En^mBf)3z;(ICmD187 zS}Vv1lKGOLj<#(v+P7ft4}g~n%v9#Y!8xrhwkdqFLd8pzj~l~;Bj>@}z<@bf|BGQM zqnQ__#%$sfYll1x0Nt6>WK@*5AGZR6ZyVv$ALb$N8!GdO#~%>z@`fES_J*&%vYB}$qu71gMB_cF++6ujIj(d_CT?G#R9z%MPEx^$UiWL|G zS)+a>&%cJFBX1ujPadXT2%5xjb>nh{Z2c)MN%J&UYhAm&0??qP&-)~^6dH_gQqrI| zsZ+P>l?ZC~yF1tKiJ8CXvUr0<2=6+hk!(;cb2^sVn&_EGn$9Bc<=s;Uc%nqKRMzFR z9GnSTQlSRs7$dwWArnzZ$!n7$%_0&N%szxO1P zd`y{pVNXLg1J5KPuIAsiGOWDaW3m)qA7`zV77J6;CY2OfNK#jBx~&V}Icu zvIu!jF6VY;MDA2TFr?2^EzRykQbtQla#*;-7q!**1(@k_srZ(=OIeoblk%DY&(h+W zn6v?$n6nIk=i$-7$LJfI$gZN6?E^fA5^)yH;Dmn$$?6t`JmrHRxdM68r}SwX3p9$t zGM8P5_dun5L4^05fQL8SbrB+=u2)o^ZO|FR_03UI)&Xk7(J~M1<_;ZLL{UCPR@0Vok5jU`U4_*Kw>NcG*4%dl<-65P4`BitIeQ z??~TF%~=gZ&02_i>YfWnf0JE9pw<6S4(oqyTV?g>qgK>+=eq5t$Fe}(lzF-6iBaB& zC>iDCmnY)&1dpE#$g#+H>hj<)%-=!~4Lt!rw)(S1sYrG8{aVTa!g#bk3NRp2l2 z`#h3LqmXou|Ak&IqIMn~H2?+7(w84|g_wu8EDNaQzO8C}E;ZY>nsb}zs8A2d6`C>c zMRIE}{^&s$^w~s{MiN{8w-KktUET+rcf7FK1o$86)}Xji3iYVO)>OJ*CoFD6e=aUv zX?QN@_Q%Oe94KXZ-BA`qm!16a;|EsMzUx1{52sbr{qSJ@21|0jI7NtXYMTN}=m*pe zPFj_0&wrAjb~gN*ISxXYiMoY(ik z4{dKIv-&NX#OUSu3{E>k+c_F2*2k=IYcIczr9Q(Rx~e}Ph~4@2Wr*HX_{6kR?yeAl zocgF1Cri>NF|;UdnsQn%JK}3COkv@XTLU@9L~74}FY$lE}FP##i$fRQ7i#Spf+8h%ytO`V4Nay)6l=q7{s);&2tPO=SnGQ#@lCW`hg z$t}t!ChhOP<$w$+jhNv%lbWa`O^@8UcnDPCE3A8wH_BGcDltP#*sw4ZV;d8MU;=J@ zs;Ild;N`^F_^tQ^_k+{y%$WPFBXej;NWBZe6#W1qD56?%#==eH_45jsK&UJg)hDQ2y!ycUtESIH-QF_Dc%n@(#j$4!q|%TAsX7PMx>00IuJSGI~9#nsdF z;w`TV8&|jj|9t>Y4D5Aykr2%L3IN1M<;dCyYF7 zkP5SjF}3*i+3qY!0+--s1F|bK8R9|-aPcymXL-R1|XVaQlvWc0AP|Wi5y2 zvK{qg4b1~A=fvs`&3@85DBNL?=oa`iRWDUy_{qgLED7gJ8jn%xb6{rC_3(q}@y8fZ zVa|sxa9s|&UUR zjrpsUZ0y#uljqD7Rfo518W#$D{RNT+%e=F*Zal=-v(sKekRy0#Xw#qeGi#vOj;uS$ zv@%vS_Dr%){SK?^9BJa$idhq|U%X}WDIG=w8KBaQ5{3yFIRp#l^@Me4!pYdPPElmc?ws!m%?6=;Qe8Km$7$_J}RD zGT{W;;CuV1W`mNq-A6Id>L36KJ{ciT9)9zB#py#E91G0LcA4C32FjL3;E*o-`x04n z)liu%WWslfY&e+!ReaR~9*^k|ePg99D$-TbWstwM)Mrc<(U@}=VolGj-lz4P&s&VR zXCK2v_4$3l&c|u?7g%pxY`KMjEW7wMnNpeW`p2O8#aOT}nGEXQp$!d0B8k>lj+*mN zSxF_Max%03?1TB;O(SdJqbTPV8llEF=h(cbY9>o?UeJ)GP5+FdgD>;180_7&`}>nF zI@mH=Bf{{aVg{C)`&#MF&!e-F4}`e>Iu8l`3D>|Ix-C07Km;y|kd5uI!KoKMxv22? zv;)F=zxBpzVwGUP)%~ia(Cjav*Pr3U8b!0czkkSFCRp}LtL9af%8S33{8nKc)I{1( zy}gKf9weVrnVp~(F?>n{2R>;~5vQC@QyCW{h8Zs!Cq6&k zQH%~$CQ9q>-=en?$x9P1se9!pvpi-)75|!T%UPcg`m1ZsZ-R-cT}Mv0?H@HG>MrZ` z5b3n4lUZ#FTU(KRy=KP}=KrQUAC(9VpT3tW zX$Is04Y?f*|Ki8pau8!1il%HhoWHz+g$9eSIsKKj8x%r9wQ>MvSl#xbAwyU zI!nt+kFtov;5&64LxduWeKEKMGoHjV#!=t^@I@nzd+xGKoI$#t9dqv7GBJ$*=;$t= zA#_oC8I~Z<-dMBbbtY6*QP?WvO$_|o?(CS3f;#t8-L6o=u;~>Q1&9v&ufbg(R4s*8h z*7^5EeZ!HAl+?&NQ0ZG!aZ=OgL=K}8@m%vw3WZEOX`Wy(=?gNsOectgjPpQtl8v1S zy9uwhmi5ci*M1tr!Qz|%d|1>|iKSOSsAOD|LP(zKJC0<&|FUIW(U+`(*r+0n{rxZQ zJ4HXQw!|8eX7cAI5$Kn@>TD|I-C^o(?Y5N^zZwtfQCgbVVR2w${?R;2zF!6!mN*jS z>glyz8Ukj{t+)Wq{3j3ShwCyG94cMBzUEb%Q6DhZ7Wz+8)rL`$N_^6dLhPCGD|GScKuCh zqDBr)@C#!0YHiou!0U1DU!sG9H;;mgTtRac$i=MHy{K6c$B?h~Ix2B!EI#ymN)Gwf zTD7yd9F9LF%frsm?k`xxD!pD{F_pxybzaW*)i1P2dl`B8(FHQAXmaMg$20%70uVEP z*eMBH;da&%QhHKXeCUM!FUvzf{6wCK4{;?Nb|KDNx#~1ASJnIJ>X{?8_F10^a-;>$ z=#Yy)o;p?3b%ebQYnh-3UX|UI&)1z>BD!eSy;oUD$z-F`Kg^A=?-saKw^y|8RY%Iz z;bJLt!3$L-BOcOK3wboCVEP>QPQhOHy+p?u|GGBFhSIdrM0U;u&%V$j$pBqVZ4PYJ zJq2WZGGSq+QFUl-}PadeV5?dmTu?>!<)7D-ykyAP}#+RQz>{FiA6p~joc zQd@@eZoZTAPvNqP0BJK9GJH5DqL_qfagw97n~@u|*}v781tkqz$|7PcJ!yJ~TgIpp53zkDY z)*w|{cAHe^LF!`mAGd7vT@Nv|{#zs%2Q~jLY99)Y-`m{+#J!PL=TP;j{cPeexXl5U{q_?#{Omv?JkpgkcnAKRr$=6mA z66COkn9%{`1d$P-tPF8?3;x>a=yFa9ygw5dNZxYAB{K6_l4>|VX)~JHc5pxE_suh4 zhnncZ78P0M`3t?gJV*Ot6BTkJ5%I$bELR;x?~fSlH7%RrDgXLbBnYbAzX)^l7y2)U z#zKwr?8&>fHEKtsR(?q@j*H(iQ#j3bqBy6890n4Z*~F3%=6~o-6kG&PDO+|-dU*iX zek^BGN5{-)awWlf`?7FUl(e(E>9{IpFM^Vp6bw~ftAFI6_pWY^=6;kb23bo>%3on0 zQ~vP{$4pJ~rDy8XGD!D;B?BqYmcG(Dh^VOYW1!Qam*~Z{dy?tDFwQq&%(|>ZV7zA7 z3j$*h7N@*#Il^#4&15?o&+niP~5h1(T0cR044V!=fK|+pc ze*U88<&TzoQ!6vvq`A520<{u#3W2dRm3?7dwk_luxp~k)Be{j*Yxe>%ET6ntSI~cem}2LMC2rqq?3aII(W?xZge#|yg&}1QQ)sh z+^d9eK>g0w!VtAxq%0ad2|J-`iI-FD(;^crklGA0oB(PUh2T}jB8QY&pWm@Pn zCAXm9r*M^^q|nBWjsH+*4krZW&8h)!eB%y){iuEnO8(={h2+$~#s*%e!I-dJKau|o z2oe&9gV7n6#4?c_6LHctFmx|qwTra>Aym0=t01ezFi56a9M**k_+XyxSOFwlBkki=v17cH?b zJgnMt%Ar1!x41$-c+OH;Tp19lC!-FJi{9+c?Q4EXkYN?%3tbNswp(@Mhk<$i3dQof zqaq})QdA3@p1L6_L4c1V^y1c^&djO42S)I6(Bc{iUnkUB)fVr$xh!LxcN+um(yR&BaWS`}1AhYr~0aX`c z+tRG=LH%9-kKH0A?U-6({p~@$9AdlR&OovRp>|tf!D}nv{iT=kNd^vr2^LJkHkEjlRlm@(jO+BED3VJzI})fUAcr5MwL z(X|=&TT2wA%}A)e$AJn@lZ$sK+zj@vYJy1*b8{S^Y#}SU!QLJhk7y`ZSuhlM-$yAw zJnC5{#=O8Q-btR&o&M&~mn>JT&Q3`iN+b}}8S5}DoE6D@YKWrbGrul(@jM=V4;2Ha zPyqimb=B>SQat&~_^q9Ln>PZbrKo7PbKuiFQo18DdKM3ghp~|r0&bRg23W8xaG5H% z+3*@b-LZ#uVH`NxP2eWJmJ6ChJn>V?c^GW)+OG1Do|5zs4%ndO&i+SnoRP3MYPz?@ zkV)AU4TsIgbe5bh`MQTToY3Sl3wh3y##^q+sbcX0L7E(L5vb&z)MjYV zoJDZ*70&#t%#+*t+Lhk3PO*eRm^1J7%+h6Uqq z4+d`T4f$QbXL%dQ0|!3jIHArjYGJz|&9mlrOV6D~*qp(V;?=-nRKh_97JdRtP(6Oz zgSFu6UxU@>ZlY$>gSqC-TeZt_*T&6gE`}dRc&m4r;cP;}K~o!-9Cq4u7%bj1H%{wM zQ~llD2#Q+tX=xzI7IjD(J9m@0bU->|a~InDOtloP;|qdyZ6e9*Zep4@rPZk{)bS0t zheC5AH16c^%y6899p8~rI^#&X4i*i?{N-&%B1XQwiAJNDEqVDQ%Sf8@De=~&1%@X; zQQBUxn8tj86M@J=$jx>9!n)gEvg04dOB5*NZG0+kU{Cy0!be_Ge;sM zc(u#}-gI4xbRWzAO5oSnp(`qrT*KCTxUsg=*Xr-;Dj;m>*u=Ac9Azmc7$oB9Ka4xi z8i?H(-^M;@*lNmAQ_`9Izy2GV5n8p6#uH?+R!~zdpmaCj`nHv@^OBjybG~zNdDw>* zU}9&i=Q_a!Wue+i7gw$br=YQcS%=<}-c($KVODs4nr=wniNKc{(*}G5ysiz2#P)Cn zH@{x<;9fg_dU-e2>U8RhbGmM{+}c(!{1HRr*xfnfoD4)sovwMpoh(#zFIi>< zA~xC${LFr{1NlyJIIVL`h0cEVd&u1AaXfEDW#AuuoZIlHdVh}lfHG*dXUJadK+XAh z_PlDD0-)M*mZ9`Zq+yBO-5T>X-QHi@;$?TG&FKdicH>8JzL!+9{LdT1pVs}buHHex z`m!)c-UUhkvmPWXu~;PN_dQZ2W%_I7@zt7J^JhJIlA^u=N4-o*0x*!zq^msgqo6!K zrGNssL+-Ro=PgK?BsgsemW>=w?4_g8C&)@zO3bF4oT3`?qtsvjtuc!l6cE%|Qr#Zl z+Dj{1PO$0vEKZB?8e@78qWJWS_!KY?vhz}8_H$`#MQNGKt@GQ@K2Ro{^v%9WQxyzbRKd8Psy^ zKmfe-ce@9cxLXt;2@4OextK-=yA1wHcFX6oHU!?6=!xd3cu-aABV_v-SL&8y)f>Ns zJAr1oUSyEwAue$rUU7C%PZD>Ywz}8tR>k8#eNQ6~3k^abqyF_b7t_xP>AFs1Q^`_Rgv@PN!U0u8 z!u9a~^%^uVB&-YfTw%MbA}H!s@Aj~WorZd!#c6aJSD?$yb5a7xJd&Iv1^SO3XH}!q zTzXY}%qG0I$~1TGF9;)69X`$>Kv$>2AU_Xf{td7(6NR|-A3|=yU_dOjb zsWD|%)7sO~AUPgK(B~5RW<9V72F*mxjY&!0bDbHjNsG*(bhV(JT}bU}tyxtt((P|{ zwO~8WEat4&V~Vlf2TvRM*zQ?Z8}Wf&D+c-XEK-E1<{9u2Ot*S}Xtyn0aDXD=)@h@Q zw|0iW!dn+YvdR(Mve_Lm2n%$wM|!?#3S`&hM*J@m{z&=3NiC4U$`m2$(#Uzs{T=0u zg3lkiVAmi5mDonoMgGeFTE9!ybjp~Y(7&U@>+QraPT-;)Ip=A-*y;VKc=NL*C-XY%aB!O^L9Gk)RKV5EjKk=rebs6#!FfB$UMEEP1zz_qu&7;6V)0EmJwU5B zrNUCw6YKKt^i;L=TE<~~uIip#bfsa8>W7V+r^oiTrQN_IX4~bhX`E?aiskQ{LfdcF zwlcMw8zZUpud707!`azNnx*w!Rz)!WlEn&tiV$N6VJq9tTfaK`pXhpm+jY`S-d;an zkF<-)SQChqkqp-H)UfP1e?_F4n7jLkm-cC zhP|SV$CaVW`>yq%cP?|sRivK&NY?$LOfo9}g5N4Ak#!V2QnL{6uP&-nGQ-cZtu3T+ z>Jw#{;E{IF$w5j3M(Gj6I+FT3v<~r>z3nnFz9XM&kEWnB-<#!lLp59-t{m`k9?2Li&5Hk6( ztcX^nTcC#zyyprlg3RAbh3R4<#M4}1*U~;`3KF^O{~qS!4iu^Fq{C)6FYA~z2{}59 z7tC|@{n)TZ+nKOW8 zq!L+2bU>9y#6dy@+2On_%|^sOa21V7f}boWe$9<9^b#TN z1!p|9x!$PjNaXvfc7>1D*%@Hox7%QzL!v_~;&M{`wAIw8_vRaRSm1Qvl*dsu0WI15 z-e{e$Nh#Rch0mlf;_eP%=|H=!ECkG_y~uEO`VVmiJUM+%;#gYF!(959LQZ=82`mI^ z2n8M@{r42p47GJb1%acM^vhR$-&cQ!nAZb&Ck)7urC!?7-lv)F1qtov2)T_q1<_Xa zWAQfrb9~xWvA%Ek3?5#BjoO6!L(0Qq|vP^xJYd|#~Y`$t?iozM;iND?u6dk8THg0 zrd1}B$;2S?;%obX(MEoo3Tww6orV+_ebVx+Sbj!r<+y~!5kCzR`jif#&U$Srld^KJ z_EKHlFNptYHP9!@c@P4yqLs1XbB~wD5yu+ExYaCRsgGM@!E{h>MpUeWOQQ2jV!F_+ zS_uPvKT@Q*1T$5?hM32oO~9x4jjzipVse8`(sDFUZFbEcXSaBjQhZh>gl}s=EdN5e zA~|;V;%^{}QkddgqUqb(yUxzt@0($P0XW4>QswZUU*3@zl<~}gO$N)49AmQxmGw!? zB0L?TU-6YI*$twt$8Pxe!}o!_(f7M?%bX2{rEyO+ny;nKye2;5FT8spJP7KHgplRkaPTPYC>yQ>{$GH7f{t-_ zp+m85Z<0k10Oak=X}PB3H`R}VgC2V%X6r6ke@C}5RK<7Mt^fMQh?CE7meM&X@IrX% z{V1kL$QLn0;*Ir^?S%nVr>4DPFgV7{|HTs2F1##Ir*bY@qAeZXVt9ROx#gbP@hMZY zvfD`JODp18ceI@0mv33=QVk}RIuZtx>fhKr?FZ9rECBppk?4e8r{C0(b(WX@g&OeB zbo_O=rOhqiOujxCV;VahC@VLhS!GH^(!4g7p`Q5qpE>e6Kf=7tISS`{Q)X_XVvNur zN9c)HBt(9vxDLe>e}$~2B<~{ByDSwGyzrcbMMoE_zo?01grF7T5qPu}HM6+A2~BNx ze`Q9C2be{uG0MH2uWwyWIH@7o@%CUb)IQSZiGWom zG+a((?tHW>$CI7Z4C|$iEZBv%6eHy!NN}aQo|A~RSpQ6(oRr3#OP5uz^zsuYioOMA zp;8%kAv5i;ubnIjA&*zU=Ll2(?l2S_^_ zvcw{tN%+dZm(7>=jQ%T|URsJQiIh`}jy1J&L&8i-O3O%(RQjjg7Qm!giQj$M6HJ@= zm}GyJc6=CdKu}*?bdAv;Qx(*NQZRR(U%I!oNu{kUB?@l+BKs^Sxc;iaj9)0Zql6%1 zFVyJ$-FA_q4h%!Q99Ws=?r!}Uz6kJWY# zA=Os->B+T;F8Qt~=lXFdTjEIX2wx_?BL-p_4vg7^&m(eVS(cH1l2-{`rv8y7g~d}( z{d6%`c;-Fhy_y&2lWUdO7iqDKati&;Q4k@Xw4j6D$zA@LY(8&}ivQy63Lt}ibgm;K zI5HyrQs#@b{;-T9p-ue8c{K5*X(!ZjRPrDdsfM1(8udT3gX8`7yNkcx*G*%{^FiO2 znXdeT2zlFD8PtBOukUyDjf4WMcvgW_0ZWM}|IV0TQ`vQ~CPl3{@l8>;^<{T{&DJbW zDx8c87FLTuye`_slcxR(Sn<64bpAZ?)_m8Qwdk>~69!uSald6OcsmWX=)5z<%FP|~ zbCM4etYmaE{S$$>`Gwm~_=d3l!A(XJG^d7{mYy|GKqf#_T+F6hF}+@Ncz)K`I;T9e zyJg(xox-YaH53h|2uri*(@H-@4=W9zU`E{}8aQNUw{U)sVt$$fIEYk}hpRp-KB);x z5H_X1*!Q1Uwzfk(G(6;|nKhZR zp4P$?JjIob$txh3IIhMGLW=R~mG;&QLh1C}N6LAx)DsKiLDlnonyT%34Qb3L@?_a! ziI`RG+Hy$-S?K?uh;N~?k7$WhSP;P@k=_+>_ve#XxZh5`y#JCC7r9yQHFh)RZB(D7t~r%{W+!i(84$q0Xc}1@vr5y@Ad%@5#>lb>@&$)y zuZOphtOUzsF9yfhKIc*Gr?x*1Xm^<)KMSl(|67S^>=9Fs`Yiagn9pl|gBMo*7kx(V z#Z^i9Dt~T=T zfU`kuL&%LXpYR;T^4v{DYW)u2Z_B>(ux2;~oyY+$nMsY-MNJ{LrK}c>(oqFYoj&ke zB^ugG00KA%%dVU3nn1Y)+Cml|k6ni(WP*D^Q`Cov1-KXE>ok(C6C6`F37- zP2IyNi%u)>&v>VqyT=LZoMN)dl>9u~U3nHSV?@1MR>Z5XBZ9&LpTm+4`WkT3&P}Kc z@>fOATrexvjU!RWX;0WK$?4!-nyw( zJKkaXupt^1F5~%fy#9gegH&%=rp^d;GH4lx$3N`SFrmwtTf&9#9STi-#}< zlV{cVatFRP@!<@>s}vo(HN6N;4Z~yHs4o8eaMPYHk6#;ViFs9_q;7HL%D$mizFUzq zuMdv@+R>!FuK7oOuoG`$Hyyk`=A|TEQjUUY(G{8D^Fu5TvXU(D7{tzoqu0v02oAdO z12i>~F1*b$#V(BSSU&P&!Y)q-Svu=kYK|!doj0u9(7A!^HB-0(6mWOW!`~V;ljy<2 zw93Y-%jZ|jKM7e(O+az!*;q^2pXw4jY3RAx+^!Lx zjT#S%<`f5hB4&yBQlHy#EoH=lKLlV#PfUc~8ZUH{t3f@+fA9p@DMAeW_`Z5zFm8DR zFP`7i>rmh>gHtMZlo!Z9c^x}>T&;!$V%Bxruar=|o=nxf0Kx&R@u2_99l^ufGqC`6 z?FvmxwW$YH+{fBK+7#-9dzUGK58Dl}j-%)LugqBR`}amx7dbGcERC9i01XtH1|8E& z{?!bpd{r-RPT9qxW#aU-wtFZJC`zr+ZGwOb4o3gBWC5+HsI--v1;!UfHF!@zb=A&q za@zLUWA^5XQ{zT{epnJRVbzWg#B)9h+AR)TO8n*&%L2>aX#XiW6?4)to?Y=d>A~$%nebgvtWq5xS$0pt{2Ha;*Ko*Yd|s%O$z6@D zDY3R&_ri$UV;`h0{x(f@TW)~-Z#^(wm7w+cSw-~o8AEIJdp)wsKJ4Stk~JZoa8P=> zvs)EzcYUN*$+(e^524K9AgfOLV5dai;o*_^OX;sax3?xveZa_XVwEw49$zc8+yp7h zs8#jO;Hbs+@1%nHyhlL_z1Rwc-@rb^h&_FeS+8Z8dh~(%yPFc^6c z#ctJ5mF$H|Fb`*2j8#tydEMWEuWQrdH48#_bRy|}#PV))x6U`uN%_?R3)bW7X5`CY zeqZE$`T;n30|SvWbW8rf322~+%tK>UKPT1o*nrdCX8+c`07?H@SHP#X2g2yeyA#bV zsZ_hZLYBpO)^i!*E2-$%c^txy2Yp9zN99E>d%B>Hd6t;H)c!}3z+F3?_PYwlwYw)4 zKRR>!qq@WKX0or;KJdx~Od_@yGhKgfaz`Z`=HFItfdbE*mC^=+--uFRo~J3V@7U2{ zR=SvONrX`NAnX0D18Dx+;HDXTXm0SIXq4QpV%TCwVeCB@f>9zOT2CBSYoBrO!YeRx z$|lGjEdd*T`FH&-?Hm3JYMjwTsRpn8+(?>E3&feekm46AjI`GRFQ5B_RL$5}`|`ER zNWxDoK-WLDMH?@wQ>3zgXm@B3d9JOVTCacE>RK89ETS z{n(Xxr^G%V5_xNY*IkJ{Aku251}_G~!uu(=g}R+b{4oRu%F6o47+m3%ai}p3yqa)8 z*0bs~axN}v-)osMb0QnJISeb6i2fAF^WK#sUf!I1U{CQ~q~j7E%A_+gGI0D+QkG0Q zHyPoa^Z2cG_q(``Xdp=Z*Q+J{jF=?Q%KtNW#+Z=TX%Q_ef3fbv$*p_(@{u=+I;OEy zTE-HVJI7NvYJhx>Zbf3%{19}S93P^!)FF6O|4g5dD!=z&9Vl6WUi@Ef%vi()1inWe zmXkSd&KXT7ZGo=gI09g6q3}4L#BkZ?F)lk-c zzY3@7y%+jzxQc;TI=*cK4dj-`4~q^0hj_?ldzKV|R)Jl40rpB~a#rEOy|-AXSb@&V zUnJ6!Q^SAkXhcI$b=v&5#sHBh*~y2li_sLyazIK85T;|*<@|f!S zv)E#E##FkQC8mAgqh&q9r3a>Xr=0P1AASYxd0sc05bdh&0 z4Ea#fTB>-)7ePPEeg@)%E9lc7b%r!TlD=GU9qISTih`1f`Z$e#N`yc7_b+r~rEWO4 zgq;U!W#8ae-87Vjr3zbj>#g&WBaEE%OYwIwnsfO`rTII^utXba>>m_l3UkCEh|T}a zpdK!9qv9Hp7b;;Gpz~M|79Sy+{*mD8KuGc}rC5z{ezcBUzXtEpFik@mMGQwr;%)@OR5#>}BM2WnDi^F(0=L&-$@e0GSk}?{RRGKj6+R?&mB5 zZnB*lm)BaVqA&yBC|oM;-9Tx3XnM%@i{3$kqBYx+s9sdho%xUQ({G#X-IPdMo&s-G%r8LC&y(Z*tq zzIU(G{=Q)tc~bQRJ-JwolBtDc^iPxop$?($7o9rm&*OeLe*T|-G)FM%e#j3rAK^vg z1-Y8}5vpOTrDaqYnfTzVC2`QEM)8{O@q^lJs1Wwp0UsU*@sib8FWB+b4qmp z{xWEL^G_v+@drj``np_OKL<=6%dpU@>uZ+$2&l)K%xQ3NW_lz(Z}FOHyzfI+ZP z$tu*u)n;#!5eqYc-2|Rk1y=d%rB9=s2MOO=6BM_Eq`uSdrsii#b6kDKYk?Yl)UW^1 zVC1bh3wRC-F%A6O7!a#0`-yR^MykLeC;J4!zH2M(xJTI+=>)e&)<8ysTr;}H4M$_U zEGKTGAZT~mG=&B-bb+5-3<878%rcov`N#^4umwSx}VmW?lF2r^E8;=gFpg=8nfkzPeOu&RuyK{IRq1 zI&!gxI#u;hAS@0Tj@WBz@(keu%po8YNa|${i02NzT+75n?Akrp;~h}+5`GlPVR1Xj zCGaCmfCFl9xj|xGN`w+!rA2>AK@(VX_^S>#q%}Lz5i4EZS8}-)B(t>!oL=}4-5_b~ zLhWj5m@c~WqyxFxC1Kk*165yFf>h2w69jga@6+VEafymlA*sirzUQJqz{lMb8lisEQT(V9Bh9Zp3h(_afOOY?ly!q1k_dk$Kh4uUMuFOLh{BH{;ntjd%-+ z_G2b|#i)V$PDFCBbwmEW~Y*-hTL z@}<{DPWbe76-)K}oCDUA2}ePjs~P2Cy%EtLyz{*QFGO)lopoLtIt~DXc=}a=^>%QmaeBt@%U6Ez$~A?b%I@*~qbXKjB01{@1n=cK|4kz{MFzogmZ57iJXp1UMI zv%DFe?qA2#6#xxcyF-C)c5J8M?w+o^1UZp@S9`SsA+eJ8HiXVLCf^bH+ho33#GT-u zWn$1Q#$DYS)$!K}@f6tXmYR^Nb@n9ZkNGo>MGqUg@42pm)jOSdgjgwMIZbSYl5Xur zOeb(=Jx3J0`hVfd7Qp|2*=vlzFfks~;LnVT3+cuM2|(gN)w9B=dKc`F6MX3YSCt!6 zgCvA)C-S@SF|(MDDS&fu>Erf~gR-T@oqDrAcU=MbNL&tx1i(U9qt;Iil8V&wF>tYRjf501Q zcC2z`k(1=0f!BL_Nr$N8^HJ8Lv)?mUsPbye%6Z3~paY-3sbqaiM2gzR?p%-`|iX6v6rrEdE;l!(`g`!=RRmZi-dF3WBVwWan~}| zm$kVSFn(9owm~zCE;A_th05N=0>FSTI5I=C@Qb~a-%Ex+mn$ieJ8JS|%FIqM6EqFm z(sb5dW+7~hTVFM1$H55evvip0s_*x(`#ArI^IUbd&lIJirQ zOR)<{Os&B@6-Wb#=Ypqe(WVj;OWaHz#L2$@$tL4dQ)svE^Mq4DZ(R=n81}m)`eazS zxk4kMIB@YZMGcirH&5G*qiLGFAoZ+1*7}H*=i5Ms6ge+uWu)hVQ*f zX{_wDb|dX^lj+dzbxwoJ2DI)e`S!^ z7z4_hhAE6^r>6+8sGp$$Mv`f26+|hQ^E_a5AFLj3Xvw57EUZ@26K&RG(eYTUHbNNV z`BWjmiV@MR&f6oxp)y8M;@A2JquuEws>#9|EEC!8>NdehGRtQwfb9KLlWEz+><^u% zUhHkY(_ARa@3~1|h#aw7xM&nlB*^psSh~uvsJgB#2m*qnAdQrOgrw9^f;58y(nvSb zT}OK8l1@pH?(Q6VXoi&T?)--5{r0cW9>2$bij$wn-BBO?Be$L^*srDjpotTHr}hDv@v6-X{37N z^jW{q`$cY6TNR>vFOn{}?)_m2{Yl`8$G+ZH#=PEBdY_Zi{KLnby2C%LWEE9$XY>>- z?b7RD_oLWunRj@_4EXr%GMNHZ{)Z{A_^$&pa-Aw~?9}g1;TV&bhk`!_U4ZwV%K6=v zigV*i1@VEt<8|p-63{YV>Yy_%&-Sdcug5V6~KSL&Wn44V$UTrN6k-osKCaE*+ArMZ}38Sb=Ay<+0e=gt26E| z$y;y|Mr}kq)+-X^sv+Fu&w#i}{2)tHW96$t5BIW$*6FoM$gC`C*Mh1%C`NE}nj%+# zU&t?AdFm6PrGid(^K|-fN!mDB75D?}lGU_Si1z&e`nR}>`85w~IKHiME(F}0_3I8S z;x(TA6ce#+X);=ZV>hB?Jc8Wj5pF~nSe&wn&*FIZe5;sxj(Dc~)okRl- zE^gDe5Tid8GG;KaZB8>S^ex@!i>rV_dJ&oocC=Ewl|wKf4o>~5E@z7$UEht{OV=9% zrQ7rhOr^7wpiy;mf8C4k`Sl_EyM$xITi@8RG6|CId@mKtV{BsktBpGCmVoqw60F~I zbK^b>$ETaG%H?hiPV7mA+Vg`?D@t-TbpL{9yOuj~Af|Wc>8tN1wimSV!-r+tP@$vu(UG zTiag2yL6J%8NSdTcT}{9zT9vfnrMhXImQT*1SkT4HZ;G)o5h&8-xG9+jW298ua8YA zA~Y>d+F4UU{(;_aPCj!_EYKHHQOH#>`UF6r`v+hN4E!+r@a9Wc#0TEzZr{Y}C#)Z~ zoU)DX5CtZfFt`6nf1DX}&F6vj>5}h0-&adzKg$oQmbou5NtR|&%Q=m$@@Z&D@l=Ni zdEtW$1{A^eoEpB>??uA;;seh1eDC0M1D8)VlzJcM09{7Q;grGZ=2oG1{HP}b6h>wV znE(_;)VZu{&8;2d!>iWMbYss`2Y)hn4`cPDnhvGTw(c8O*9KE9>VZ_8I1 zeg+<}W=l#CY&*lVD>-mOv1Sp!v&>7zLQ|yltnl=1YUaC^i_4b6IsB<%I^NgpH<_l0 zoh*&EU6b@$DAeI(cA@SvuM>7UkVd})&j$=eU$z1of+aU6C?@27!zAL4-a9=7X4f~& z!yoj&DLTX_xCpDZSBs>FU~|MAA&ADZj8Bky(GW>7*>o*)-L6Q_6-+3XMoqm01%;|% zG0A>A&xI&m2ihYXH^IZgh*X3A2D_HJV<^U2BFgK^4@jg=fJju{#*cBa#*PSz0jZ+- zVOlyxw}%hyK;&g6hE2V^Mca?WO;CW2)~|4LU{TjSRr33hdS`@qfVU-)9Q8_sCsC4* zpC`D~YrnAWu*IXyW=Z&r(1c@j!RPlZ$r)*ugS$JCva+%r!~4$-!0V9D2(}OzYkG0L zbm#sTR@%)kh1TsSFio)dWz9=;s*%@gDlkmF#Xh)Xg4l^!jcXgGW#lE|6JySeZ;TL# zCX+NpdK6Q-8S2mD{wBqz+l*Au!<+N*u$@zA9cW-xsNGKOlz#X|J=hvQ%2_t zWyNuMw%NB#qeSGL`g&bo!~a)_+xikD(P4MtT-@)fTr0#?QZs%;Kek^I9qhNkC4v8vy<#1U=6LY6Rs}?``63$2dg**OT9dO zziMq91)@;_xa_tk+{NYL>+@Bc@0Q47As^*(WVdIkAjDNu>92d$0{xqej~!5>9fZ=I zC33iR^K=A%2t68|gBDyDCLw!!)aoi3D#I#7C^Sw>R{fvgkLMD;j@54Y#;02TKd2cq zj22|hAN887MARK>h<=^L@TTkLtOB3@>rYQY2?++!c^( zm*ut0HBWv)hJ2;tPc8?!#upBh%{%GnGO3Axvuvtm!KLeW7yC;uo$HE33iVBK`IZEAKYhl!EgaYtE(H3n^x~KLKrjOtD6d}r=>EMQdErBsjQO!CDqrsE#1gly%*(| z>6h93l$sflvHiVzjiFF0w56p*4*>)(1C z&^9f1RgTI_-m%35xY0#2FVPQOWe$d%1FsgNVsV1n#y+VgkKth-qfqORL6+RKO{1W_ zvAAb4-r^3youC+nm(PuYrFFH9?yr|-C2}l$ZJp+VV_O3ySP~ZG8zsPrlGn!8S%mK~ zcTNeOzvq{kZnY`XW~lsEU`fgQ)^ACm_7#D0+q6w=@UUMtDeodLnGUx+$ow;s4qzM* zY#uIdNSYlRZt3h)+5xyZp~-fC!5DZYv6BYrRKNNyM<-$v*)39c!h<2UU0GJ;dP=7I zzQIP%E%VG1Wh!9ProGlMW_+PBIv&I;%Mhb)6zl!^@psTYrZ$8elx}>x+&RRWM|}^0 z49TTnuRu@pn?JLX#svs%{ZXY<1cn9O`Vu1KNH7_ zEiSQWzO(l-p)HJseU-UVC2bK2781)sy1o3n?>!d{Kb}Zs?v`1TA_L%(vD=W30lo!~ z)Gpr0{Tg~^XKRc4o?umdddaT>mOC*UaiN4qk&E8|mm}uN9)?12G0??~;TD&jZ1*RlADT3+1{mE2zfEM|i{UZUe1`EH2oQz4pACk)y!ccxM>a+t zSi|*1h?gg&*&Y*I0J0(m-)S-{g|@d(o`Rq7j#Vf`{%c{!90;Y;g60bG<~qS*_cW4w z_vJbAK#Qd=`D7C?7$~p~tsZ{)AVlP4mqN#ui==MxEU7bkOLF_FU*hHJVk}CO@oO)- z41;!h-)QK+21^-WEkBxxD4=IQ2iB%OLY->Csjy!~_3xmXZsR$?%err#kPfn}y}#~2 zGdj8nwa}vU+K=C!x8+vxSkWVNK+>?qZ?A9QCI9Hy1S0?l^!=k@7*#L7-=BmMR0I_r zE#hgkrr8go(c0Ob*z9hTIdnEK_%;&7lM7Q^a~@~Y6J0ayWgp{PwGO9MuOEBhxObA< z;D?_qu2KUsn@*N$D`Ri8M-fw+?RS#VMdz$#th1flgAL)2_QKFkm}@BVr~RKr8yDxE zTmJ&!UQ9q6n)w!kJKM%F5QeB(@`Ze^&c=_-)s`Ff->O0}CC`5wm4Yf4LA1BM!-M$+ zY#(C;jzBg`-dl6QhT2$Jy}rfS6ZA)jmV25s8UnKQ1%>JR@yVaR28$JWJ-?^VyW-x>HXbIpKBF;Bfc37%7kqdt zA_59C0Qbkf!z|Uw&2=eYy+PNJiSP%-RB4c$LOz9VcOExj&e@Lr?AWwP@A3H>zUm`4 z^8TN~2x|(}ffnCtDDfRG=~D1CCQ^l2B^t0bf&H)kqdmhXOF75P5ojQK^ID>`qBZ^O zc$d;WqU!g~nA<)q=(I>z&?P3;hE;TXj((=t|FUIws5e}6kk(gyg3`i~1wwE6?F`eDTWjpy-29UW zxTiaL)sgSIQW=_G|4L0Ieiyl{9Pup8DYkD=B^gf3Wz+TtWJeY-Qb+)xKK_<=Ti>SE zSOA^0ms_jkQS#DvJDvWL5~c}qJ$rQ)Cqkukr*iIbQlQfuz#g3-o@jcI?s^(d^MgWD zY+ywrMUL->0-kQOF{y}$Es<2HvMS-xjcN72j&V)k?>Eh^&9^$X4@npp4!xyRp9+5# zA(^)-z!gt0b+5&;vV}x}z>*K1*HW-CU4cZimP>VQbXz&C{tx?0Gg#m*rN^Jy;#I62 z0~ymQGpwdo5~91mla8w{u36)sGNtv5dVSp#8_xA|?`e9ol**0wGU*pNpan1W>{h%4 zVXkiO?|!tmlE}+PtoDl3bI|bqSY>%l7-;%;MuFPu9E035!qmnD*-UmuavYWral>Npo$Yb1bZZNddZuU3?g>vi(Iua`@ue$CW>3bfZ zW^^7t_KHXr%;Q-aK>B6lSdj|%#s1EUNi<}B9d%L~r|0Fj@N{TqG27ncRAs{8Evb_SB+IUB7;E_k#wi$>+~o80`X}gp zAPwQG{`AI10nwv`7lYuG7)Y0N7@gc7#r>LmNu!JpIDi@MRIGPk6XD8fQw}-|B%~rM z;oW=-??KLrW94U6yMe!)MJ7kK0nJF!16#Tnt%%D!ep@ZtY+gY~nZnDS+QBr}|N# z%9JVF6o?2ivh#IqkC!D-Wxr!wH}FSO|qx2a##SYah9@9Q2?bbEDN7DxudL0GeDbF^n|Yp)N;1bjC$ znqE)@a3Z?r%Q|oqnuVGfYY5BTGjZU!eq55+0L;&*oZkQfw%3A9N47i&uTDj57hr^i zy;?nDq`=ee-aF%V7QTCC-FUC&`anO2$82;X;k+%-N`L_kR&cMS%cW=BRZToU2?2kN z!~k2~TdbbAtg9oKxNgoZJAaEKAq|jWI^UICWAAe={l-b`Eu1<}^f{;IeHL}2F%`oD z96Lx%a92RaeGRcO;&HX`bK0W|Kj3T%H$G>`)7luN^_lwK{Ur7Euy>`cLp$P@6CQGcvge95jD5~GRK8of_j^oF zrS#^eW2pT_yHXY1;yHW#NkeMv zybcZbA-w)n|86Hiq;q1zh!xwy=wB5zwW+CTcEs^jAKif<0yKFuc7$L);mlCl7xYM!RZp6#y5zLjR6u&A@`hSeiztcs%1ko z#WlKL$glO^5_I(_*sbKqlKkdoygxoND$7a-wVI;vZA-hCvx97e`I%O@f&fps^=}@I z@na_eam)AyveLS4WB#!bBMUMkalOz<>9BsBjEF4|=6Q5*p-}0o9R<&+2W}jfaHVFAc5WCt}$9Mgb5m8i5ofjh`)t_iFj`d~V`vu{MXY-m+J)pAcF&ZS33J zb{XjmYzbmbFPZ5kanHIIi5S+`(CaL@nqUD|-m@-`N=;nY{&T$0)XeV?KVhF*%3TovzL5W>?qLKq$pwKm-PnAO>CY?ng zzYY4c&drxQ1uTqbac^R3dSvr>A>?^SK-JwYj`me#Am3y`1CxtvKU*WIc=(T~Ym63hF`4!XdS9TCHZRrD`5)qs7aa@^GcxJ(rqDW9g3fM@Y~=XgIaQJ zi!F8J^RHU?4|i+#;~Dovi$_x;TwPz|B`S&h9bNmSgde(%0C`NyYLnxnD}P69noY)V z5H9R}u1!0@%1qx8t#$6T8K?`9FpaD%T}KnS(|~nw(|9Mf3>^fx(TJjP&YsX4e29#C z*ZkN?<%ugtcvzR!vUfY4m*(~TElzBcf!VPRW8;vql8GM^dJ%qi4|oj zQN}pU-neS(YKaDmhX?~p?D`eg@q7t!eUdgdlG+zQpk|^7FCEpg9OE0q$5p|fdD7tN zp_Wbko~Zas5nC$jx_@)7SJ7x3{QP)2#sb>t)#5>YF_k9W1+nzYTpQQJ`Im-{dA|Gk z8NRXbm({w-3QZ98u$3W6>&lI!6Ro7Qo-S9^ZUa1mz%BBW^q%DgCq|e5T7Z-*zlDV> zE(5*1i$B#`ABGs?1ZXV5$hrat=~dPV8B$JdVCBt+bJuF6>Vat}lagBDRr8{g8~=%T z5Lnnvd?pgnq^8%d-y$P8Si5bwaV(UA_A>|vNtuvt>7#I)j8xH5&T~V2m!MI6<9Y;I zEUA&GFU$WBN(Pa+*$A+E;VQcxoB!P{3XtK_`kowX+;UJqO}+Ft#my zaY8!fXTK(W-B;itqj+1So#qh85uNUKvnhJoc6YGTg}Sk(Fp-Prv%PDIi3N|X?7Xw( ze|1 zo=wY;WnY{|FzfmSJkv;1YL1RjRU4a|(wYGG_8g}jPGVfIH&L`_iTRb&Q3bKZ|5u88 zg8`qD%ScNq{+^qvJVhNH6>vvC2D17MT|c64TSWyCziD zB;}?Hb7PKoA?V%F!CzxbX&MCKFzYdmh3OzE9Gq+;>9LH5vtB;OHFUPdy>-N_C`%Pp z9vaRu_HP^E(%Q%j!hl;+4s>LTN!;qhackk%kdS_XyQt_h%{80K>yORTo+-m0-jv^m z#kJj=SgS{)v*?#mdn#3~rMz10azXn9j}7GG8yj9YKZzDf7gJp;B5q%!MKo?EaG6cS zJeZMxua_u3!Ekc?{`VjU45BU>rHttYI>6 zc_9*iNT^=lRDAVv*^goZ6et^G-*W=`3?wTX=d96(yzif&$XvyA)rk-vk zPER=QyfolB_ zL9pg)Ph4;a5y%m$P)(#4eJ-4vV);Y9_=p%z=J-K==M74Qb&W^x#^l_mAHlfGQk;@; zAdn%RoJLL$elh4F^2z@A4zM*^ZJ~+O*##vzerJmYVzx2sAN zfrRe;Og1g)VL$eej0ztQW)WbnG~>!KEl`21jpAPg2g?*4P>HJ53&rs8jYbAtQia41 zDUzK4XX%ZH9od2|E3+fsY~Sz3)RJ{2ZVd{DxskSGQs-BcD*7E2Wj2@B*YUG1y!7K)J%OS*(TzpPPX&<+M@`>{XtW29u_9k2F(_2xLp zl%-2VsbHcY4nI$S{x5mOI=3oU%yKXN4*4(;a5`NXAd5U4SLS6>bSHv{+KDo^OQ~D5 z-c7uPJB+Aj-@F)?WxHvrgVSfEn5*IMb#nmQ|tM_>LrT!_#qH# z{BV|K$e*0&I?jjuucJAubGOp!mhXxYy5o&&@gRinZa#lemlo6J%FXDj+3NVBf0ZM^ z>mI?v+#AbBw04Q;p)q!RdA`|t_Z)QA7mJZ3MMp?sEn$D+SmyToxdo(&&Qd8vv0{Pm z*T-BbhM`z`W{nVhF~NkYKZB$`V>SPZj0^oCzf?X(cqR^qhLyY__WG0lCv4$_S_j(k zZR~nXu7(~3e#}RozS+Dg6k4du%g1&&OL{>ksiSc^ht}Bt3qdTq0Xo`x54_ z^{|5IEk{6wLvRm5g4oro^U}jM1wn5}{v{|_M*Pc|aALJ`wz(^CRmPG^*5w1V9 ztXCo5jKAN^PpDx#R;kmlI5ib$U;k=h-`HxdjZmSK~@c& zx8|OHN5i*2=JbJM{7WVSh~al~aDH@+(%QdgB1s#uZrpTVD9st#Qh3$!UZrks+;BX; zj^q-3hkatRy-I{$SB(XOI)gb*^X6#xAnA=Jx8sUeh3 z&R4^(OkBd};Wj?;l#dx)>VnJc>h^3jO?$=(`*n7`kuIZ)snOy?MqKbK4kx{caS)wB zOa}hJ&9SKnRD(+-kq3}ieSZ1)g7Q7!#W zvHeLk@gvUE-*HU|#FR*b`KU+{w^Syfv|Mg=+=D|Iwza+U>36YJ-=xgliJD7RZ7tad-!6bPc#zbt)@q+d9o6dh z15q2mWNSia$gn4s?XJ-T4Au=3^0bgYzDG4#Fdz5^Uma(a`X2u}FFSknK-3liZ|PFs z8lO~VU;3-FA?TFk))dVmV7`4bOxuQn_xf%`fzqyf!Rz^ZvL=ie$bgK-CQ>+>wy=;F z3S|_K@3?TX)bq2c}pR1;8rKm&~|h!L(XH!BVv~wBzr$YI?La$+s0_Z2HraaN#sX zp?We!;d&g$n48fnV%4+KwzUaEzJ;h%D%aax*!Q$>i1H-Nr{qES46CSZVvSq!FDm)R z<5&AYNdE+i><{`>?;a*sqs`S|V|pE_Km0@F}vRqjP@f zl12p^qSBCIt!~aQSfA#;lQ%+(C@^ORf~$y$VQ#PXVb@sVjt3;g_wVPUq8#Rb*e*ak zp|n`V4tpiJRRQf^`W1Z@adMpABpkjiv^fFfDrp5{R<7q>c;0her2j#9hM;2| zr{sc7NdbiKc(MtD@=AkO>NtyHX|Bx_AXfu^mCL7gm6iSr-%5meo13Dad zUfq`28PtI!ff@7=f5XK3Klotg3~nch$CgR2^HeEe@8gSqyiK2j`lp9P#NE1poZEEM z=vfGEwe14s!Z~_L%gt)t^&64yiPsb?Dzv=>YA3dU`$`ehF{H6cBw^*Dl;NDSQzc z0YO1Q*unzE0@DsyT>4?)6FQ zio^5+Ev5GsZ=hK*jSWUCjxaFYTv)gpWI0`ejfRcjol>t#Iy4zj5DVVoo;Kb%ety&J&$1ne&q3vhe}QxLg^_71Loi}us|wuNtx?}?Jf>__!Q!dxC4zB;Al9entB zy*rTm0Ej{BYF(Vg-h>iUrt}Qo zqp=%1DhF6pp)?!OjCubK!lsmPs_|W1Oot1L!NwjnDVZy)umGaV1(!GJt_E+01s|W3 zZRw}Swth&{x@z*2(zo}Qj%gF8dz7Fm^KqvCKE4e#sV*`bRlh|CFmEvw{8G6y?^r`E zfTJ)`1PJ6qN0;t%;~q5!(7>LyD`b7rIRurO;|C5birO1{w52hgZWB>`yxY^}aZ@AB zp{njD8C#xoA_24tEy%MVqz7?lIzD{~gK^~-1y___^DNvtYDx{lTaG#(yH91@ocd;d zv70|DxvL-(tD3ONOHYg5Y3nkiSAIQD9zPimPj}MAOUtbL+ zB7SNHMa;Aj5WIyM)b9(KWbxxxllTl;85L~bT(3<1wp;7d_uOn;>ezRg_YH=ebnmk&|l&piz0E6so?;EG^}Y7Mn_a{`}rVR24EtEjIuR z4?7px==yaAMH2g#LlCJ)(D51d-M$Dwk*&;il|9{xMbVA_{@p|ALH%@{LSisObV=2; zDIwYg6}NeOx${u+guWQ}PGMRZrMbll8O_)qx%f1ZdnnHF)^N~K@^mwvRt<>uC4w=m z3T1&+B-sBQ^YCQh|33PKmzh40csPyiAUNcC`!F_NsyT}>>SXkikMC>aG;)PBeiFiZ z9G0XAqK}(3q0v)4b3=cB_zb%byN^7JS)6mHDAfpqgF)D558vM93&hbq7^`&NIxtks?YxE~6LtLP5D+3Op`o=hArhQ?z+{hb9&1wICkLo)F{X=^(3#e8NU5?Ri zBn>Z7DIbGG971u8hRpfuagGFrXgRz_hHhd{N2C3&`qf$?S~j<+@80!bl@}2Bj#E~x z_Wz16Xx#picyZwjSS>V=Fsouo3`{~RDzdXXqoQ-w!9QBTFV$iT%ANNuQh`N{nWpR4 zuR~F>UMa2+WVBK))t^&lD|K*v|fN%Mk<&J)Er<>`7m!m3AEn-O>m>Uh-u0ZtsG7(r|>s#HAvLuh;u_ zP9JWm+RrAqZ*Ce_m6y!${vg8}1T5Au*BLf# zm**_0W1?*c8^mdv(wQg>u2b?19*OO^x?8YIiZpdNABZis?~qBQBd@Ku*Ak{U8H+s$ zd$&+WrExZ-x>}=T+<{7Oe@agXphq6!m)*@W9ff@>lJYj$ZdTE2>G=b4T8C+jwUZ^Z z*1m1%7qvlS=M4Ty;ICxo4K?DF&n#~clIMOpflO4~NA-h+EE(eb9PiA{RSt7cnrq{4 z(yhFt%B(r|~U?4*vr$#ZADtSHMN7-NppKwxwe#Z*w4)y@sq z`yaM&v!$rK$RF?FyS8I+LRikt=x=e?tA9GM-=E1o7KO)m#^#y@V^d^xLZlP6MzOMS zc5*ED6*}{@8^3)?N&~tmT3b**b7^|6k}VN!+XRoj7tHL7&-mP)L&zE$xe(et)}q(_ z-;@Kgp=yFq6pq805ogQ|N0d;! z9&5yKh%UZOl99U2X_p0u%e}q5dNwhcD2N=X*-U$2s9htq&Z}hr>|F(9w3?dE2K22*JpWnJ+Z0^n3 z{8_)3X8Rr)xq=u{s^I999Q|Hr>U|;$DEKI+S-00D$|hJZ<9dOHl5SU06|Dt{kF>7d z8~#-Psv+BovZFv83S6&qI&+!BgiL71BIMH0>$*PW3Q8>ZwL`#{+tr~^d29@ z0AS~uf}N<-OPPI}n0v|0NcjLjmk4p_q?5f$FP^6iOms=4beLr1uKl{5U{_w1m( zWFl=sbtxR5|mV2@Mt4ZFP&@$OVH&&{QzzVd)^qWr!UvT9|2 zkpvpqEQk*6H3@FHtVhgZ-Rg@}rwjf-iG^QdOJ2^WR*thS8%bKFQV0h^R9H31MymBX zoxGh!O5Ppc8F4pd5-)6m?Hbr}J?bF}&dew@AeYD~>5+*sXXMAYr3Qx+#r9S}BBc8b z>ql5yWd4t0>csT|7n)HljSzB*d60OS^ACMo>e?C{K6ZStwg_bziYb2)(GwGo`o)*K zc8W*o12>VP3ZKA>a2d~+buJ7;6JO+!;JnUuJ;LPw7-~L8JUl#j1$H&E*_aFXd{C%8 z)bOpW@xOaOJLp6cK6*1#iP z?>)M5Bk^iydAr+SFnAeZ1-R`KiVVD_3-YaR91yJxDBM9#h_A?F2LP_1!Rk(JDxPVw zL8oN9mO<^a)dMyf8!!*uFuz~5Ur9ggf-4GGSLEmO+h`JNb~2Q!Rcg0?apoDUW>UaU z)h<T?Bsb70wO)y9?{Sa-*{dZDT7w*cWZ%_<6Cc&5Mh`Rz59KC2{P8#9uw4chgE~ z4Dt}cH;iK3R3ktc2iRioxvMwq72Re`8%chKvsb-#iy-yp3jKCV?$%gk_f;38@HMox z)5>@ZH%S>mIxpF+JvmE)7+jd0yMjs(1ST$3HKFZ*X);ZrOT|cesK5nN#n@fmn+77qUU*~7*HzEt@IQ%lQT zH`W|Sxx&Lzvz9bTGdLqUs`i&!!$JKnSll^+p*}izL%;_vr!u0{@8=4hQA&1>H%Yr7!FTc!$Q8 zgAr)?n@Ph;7Q38np<6Pz2Kz>y!dm@nr7V^i)=e?+S#1cvrp)y(|C7blqs-hB8LMO5 z>{X|~!)1vG;%nb7`C~T@`*edWw)3hGwB3VFw}~%iV{_QzL&z)~Un`%ytThI{e2#{_ ze6!n%M^Pjmbgj-g7897tmvaQHuC4})_{643{EW$H9GqLok2$P_cAl^wjCnvueADOV z=3-3?dGS&~tm)WGyuS#Fv`u`%iL&of9?hV_tRt<5>5cNlBGg*3UuRrSpu3L@PZ^hu z2)!W^+^FZLmo5qQB@*F#^6}+!^m|FiO+-`#L1E|F+>Dp6TB5&yj{|6E<8YT46e~>h z{PUdesv3*JKI|JDH->~-ZYfSd$X;NXFA($%gU)*VZEgO0HZro&T9}T~QS>h*Vk&)T z=i5dvY$_@yO40ScSV!Cy)EEbhe>vxt0Bm;F%}ZrkLDLT2%n_V(J5}526AV4>3gMz* zC*g;(vi{JPv)2TaMm6onJi<|ma=m_9g0Qvk;LO)SLf%R)}pYK<;P@EMYOAT%Sh4E7d)d1XUSTf@ex{K+msDY=$u3CX80uAejz<_J%hk zP;=NgWgqy>*)1a4Hg^Nc7XRabJkp< zY`aio3w;|o;brGexkFMK>H;zi3wusr4DF*&Urz73G*RAx1Np_T5*G2RGgs75)cCy| z`q_p3f!LXBAzt%VB{}QBX5Rnud8EIU2+7l0e;8?+QKCB(@nOd2hL&^tC_2 zZ6+7hp8HNi+m~-UCsx=9Qtx%KK>smUJhomrw=M)tq(xQ9TxV} zQ&WxBRHI|Qy~D9=hbm0>|8iWpt|ZDGALlB_z5EzCcH)-s#|dL zCAm!zXvP=N;j6@~dJx$H7N*Xdq51Frs$0FyviTNSFdfI!5i8v~%=v{)U&aSrzMomc zs${2RGvR{yReTDf+XD${$N+xnVX`>GfHT}rSW2H4!$0!t+!Ta87Q@yLD~HvgOZAqs zy=+=JskQoqI`?XuJ2Q?FLLMhTD2M(W(aR^)EOZ~E>*6wV^iqBMRdY5x@Vf&L|mlG}=sA^m49h>-FiUzi3_h25x_A zRE6apEz>wv`{oWSA{vUe&K$GItLYiP?VmY;yEa(9)*Al;X+8dph(99OVyBrS_%P#4 z-QeL)`y-Y=D-jEJ}>=K?R?ZMBIm&8(NYO@a}+mHfjV z_fo0H)An)?E?rzE&q-~$jWCwYX-f$q$4h^nhL63%;RcT|h-a5qGmN*V(a{|q570~xoln#_w!ov9oRASJac451N z7&6`Qe;l-Ugo*5BzU=0!ysJN(k#Bz&0% z;S6?j@YUTUd{}(w*e&(E)jE(wB%NS-J%F|d;x+jYIy*U#TA(&*E__*nfqLX+EY7Ho zi{i{L`F41FN(4ibbl1&FcKM?eB}qrIxN^Z;;IhcO$>;~#leR~!c%nYvQy)J*Hd|=F zYE1o>%&?ly!^Y*---iLcas)Jh4+dLoIh-A7IcgBO1r~3`5&JSoC}Fz@kV6ocr=GfI zi!O^mG5p!B6;6NUT54eceI%OEMevy0z`Raty?PuC139g{jR2nyFvFAj@}@DR;YfGn z;ECoEvD(Hu9**IOrUizunAFm`JJz|clhcVX_`9b=a!>ULK1geG_#&V3=I|B_W@RW z%{3al&VRs?LJHT0&OW;dXQ@Ob1>FZCGH(mprvr=CxpUe=Pud-^)Jge37$ANSAYsUb z`3?MfI80_3S(Ch4dHg)wXBi7!{RRPex?OVbrfS+9ao%^&B70I-Sp(W5T>c7g5OT8N ziN+;&y@ko*#_Ptj6*!m#lE2J@#EP2;{GATT~zst>^^5L&gd!Ay=Zy+cjaI%22#L{eCfoQwa+jQn`BjmEp)qgA%^9YitztGdGF8e$YT;ciyH7V$plfXkK&Z>oje3fuY%sK zijN|tyNYs-nrp;x;UdV&2Hs?6XQw>&KHl^R{?cC0^A zCfL9#=ot3fNuENI^-zWk&mDT;@L}~56(Njz2Qv`($$2>9l+9jHWdbi64G`@}3>f^| zeG6C2mkAvlg>w6B9BRJO0e7);I-C;;Y!%+~yGuW|+6#1kKyQ@^;>xUqo;=jIdeN(W zbnL<@HLmh36og~~>@ISmvL3$7lI)sNxfKtxx%aW`VL4TYA=^r}z*&X;{!H+03eD#L zjxTbE>+8CZQu5N{skR8JaG2hGI3Rd*<#-aEmz*Osp~IVKg^0KJWXyF=oSO~}AcZBh z(bJ%IDE-=OsY59^>3VZ9Y4sBScR_OGWpB!@^1zhH@hD@UY2knJi;&<{$i<3(#%mSI zm@zN4gGAsHb)|n=Y>w?1iX4G2wym-4HCVt?qX8dXIW(S@U)Szht0rYm_bJ`jwj4;c z@2WVHfw;MU%KNoH1Sgc0BIN2P4plZ_cJ3Ukg?i*8SPQl_l#7pv$j!BXj=q+Y0^UiV zZBmZf?<2ehOJTEW0k=JiT`3nEcc;2Ih-d-$Z^MrNAt=mQ?^S2_A`2SC)c4)eE&0+` z^ZnTsc9z`ZQ@?q=dQ!3mDkjVez-f78D5Z6?jfy7awQ7b_|EHQDaEU35OY+~p;xIZZFds(*fm-Kme=h}DeW3-)y_>LB$eL%j z<&wSeOc0{J)P~Bc^|*&-7N`5K zcUWT;j@W4gk+Hw+06uf52WRvbe$HyC4{wF+?8c>j0AR4=SjG8mkm>&z>~KeK-tGB@}G}hnvRhzcii(3uxOH8E#Le{%3#bkSFc#wk2Fk7PGIA+mC z9Hr!!=5m>1O!fZL=E!RAl#ehX=iHxydx0Vu8x-j3X>Ooce7W zP!rsIdm%M#-!2F|hFpGsXi(diKv*_i690PT zO^Wc)pdtYO)4m=deJcD9!n{oK=~mz(S$oe1H%O=V`Sw>&W0xO;Zc3(j8D0l3QzyP5 zp84Oh^g$oi30zM74sMlsX_&$Vs3HI8R;A(Bz1)ye>{2=(SDfR+>v6!=LoLbrapXo{gjR9+xKk(jU>h+V`@U*6$ z^(Sg8WP42eF}V{w8|rXsp1-nhbDy^5wwc|y zA&=iO)J2aUwgRv+a0WvL(+}8SvQhK87b|kvn~hi)jWaUCk9X-JA$>+2u&U^FY%S+g z{jUwX$GzVT5Xsi3cud_r6LrN=lxUwoPGa-9`m@m(r&wWD4XR8ZP6bj&(Ab5h&6v_( z6UjZ?4r%>w-YU^3pD*}7H@TmMbky0e{9j8~;TP5Sv_(m!1O=4Nl@jR&QFAaKzDw5Kj(hbtmol8g~wR9{cv3uY3`+M)_{sVJn&NDO5%$d0ze=lo&o^S$?G=Dlt zjj_FL&ZO-Q8{O-J&eZ>RJeUJ|&&zAK)u%u~(qWq|Ip+OmD>&hIoR+Q&>lT*Nmw{2M zs;gSnf$G)G(1zB{$e$Nl^iloo`NYMh(8-4;*RcoqHpH@_ao>535u;kt*>N*nShD|I z29(Z_XyR`yj>Mxq>N9l!5MS6EptXIF_g;$vubklZGXAnP9fzKOanh8Vx{mtfx@{*x z?(H93BKQrujPrO@)qh*#qfOMPy2PV;wfp~}s-EIF!7d3Mc7e5|uTf|FQNHIGl)L}< zkM#q|xud9)eas=mBp{;2l!#=8~z3z44a5BNa9th0#!|2*z0cF4tj2n-W}6L&O* zzaa65d`NMNnAJ5xCmj6H;WhQ@+B5lB0h~4yai1?=B&2+#rM3QOI|+Z_RV^&23$aZjm%^)6d7j>lUYCr}M||YgT4VNn~J8 zuNqgwG}H`K0^G>_a+Pp~q7slflWVpFH~A%ERZB+gy!>TRw^lTUH(!3&SBD}>x&HqO zt^N(f9|Q)GXys~tsvX)}6ZphJ!-+8b8-~f?rxNeQ`{1r*$dXF+rpCin2jK4mJtfYiQq$oZydf1NS$+ z+^!tY>jIYH29rl27;BNGXR4R7$Cn~DXmfC9xvhzD8X9SGZFnY`J2YfcEy+X-Vj2az zGFuQNCoKpm@m%2@rEBG^>RLRhMlrAP2bc!M^IbRTTf|-37o(e7h+ux8g0S3G zmdMd9Y9$gmdIBU%y-e)+4xTLiZob4c-_-IT)B`R`>UdfK`{O7KIk==Qi%Dg2e@FYqm1&27RyQBEHPjPcFS;I60>Fi-X5RX?S9;!Bf+LS$DER@Xq?0QQA`|)@tjW8L znS9*qe##QNyH1aKv3!Z34?n9<2oo}Odn2L7aF`n6zC15z zr!sddC%X}|sNiwf*;E(uFk7Ko=_tK-WNS7!{1u0Au&Q*0$|X&HPnP7nexNSx7ER(Q zn|HApBWS&@wH_su`6y#>W$U!u_b_rTqs?3EQKCh_LF$TXr!bSqK;|UG?B|=CdTgoL zhz3?z``)2JvW8(ctk6S1OzabIJ6!j>x!uGnA-rCiN$=~ojNb=vVp_3Ujj(@HbTK(0 z;V5tSI^FA+qwfM<$b#_?fI3waIc(CpF3YS$X?E$7tAAXIj|}D$8LfgHO;mv_Wv6 z!lV)Uyg0&^hvfWqlER#Rd(_{q&^JdhjMs?U?UcL+jH=(aWDkIlABFLFK|hSWplaJ# zODd|gc(N=OD%g@^q{$d9=px|S``#gg|Bw5 z$~>$gk91PTy*~9T;~zFzx_NL*{QV;E#yO!?zaez{?bnUBNEWX}mEXMubJ%Pd*rx-P z0=%P-K02B8C2rkI&i8E8q4##)BTz$OMRM!YC|*bGbnt|Pjn(q9LGp8QT?<;3HW9sc zKxkTJvY7KJM||N_n=Ab6);T(sr~7;+qGA}i(d2KyVQLB{iq(*0G*RBPkAGv_PF-l& z8Yk{XZ|&ae_s2ahtO-P!3MOB&_2kpGU{z+Jq_4DA1J<<EwSh7b?mxQzNW_^VT2u` zf!^3L5urmwF{~Z2FC#CyYaw;y;j1o*D&$ufr8iQH1WsNrfUf5*%pwj_rH+&6@JC@> z_yZrtjVS_J7`{K)L85*Vt3_RAW5eeXie#P1Qn=v~$6_84j>Y#%Ru*9O?5luCf*<-Q z%AYSz-3NqAgC#!#w(dgL5Ic_CdIwT@p>*{Us4dearrz8O7}Mok8mD}pyzl?~_+sV5 z2Ut=BRdFVs!T0lU4=0iqKf$?@_-*^68DouZO5sRmG@dm5fet@)*&ndyQBHWvF5+@DjJSX&f%9%Cl;Ku9fVbNB3v z;W$?cqlhThUX4y!{!PjD;UR&yM6~Yno8K}}H+`eqMTD$pHOKkG4!w%UKX=>kl+s*LHrD!y<%?{*h*NKakdFBS2* z?Wy%px=Q6Yq#2Ceo9~VW2Xf0?=|;8U=iK~uTu@7X$3LB~#PMU&FZ(9z03U{ve8bc{f4i7?gP(9&b!w3z@kbUQnlc5NCFH*^gO)= zfF+0QYcQBcH`9sK+k%9gse12uJuJ!rVdkLxF znu3LbZ8dLyh&Uk|_2Ja^^6<`yYmZHwPPn9`F~Nl5Sg1VSjDS{wl=I!B1Y>CBMy}+Y zo$@r+8-=*BkyK$h&-oI55(RTnCFo!g;GmV>gl9ho-!O&=GJPQ0QS$B5^z~lu8T)o2 z)0%og<*ugV^!Uu}^6URPu0(TIjPPElACGKmlK-}1^D7ERi^M2yid5$D?Od^iu?O`> z>l(x!NECKgDtYon>70w}LYnsWUFkZKClkBUu`x5bt~@sd_+#)Up(4iKT!8UW+IXSx z?crqih^k@QGfsVlM9%THTBO_e$#15$*0Cv?#!Ti$nz-Mj(?kN3Dy*Uz*;s!1zw!>< zI6Gin0*&=Eq~g1Wp!Cs~Q?3h!CdReSVB&nLo_hZ-d=l0ECe^WE33^U;-JZs#%Y6n= zJW-ehC!Vq(o)7cyw|z-C2C6M4H&0BWB1=UG5~9WFs2pNt#iI110;I!2>+)Fjk(&qX zVifd3wG|%|7|H))srFG<01ghRcZ*LGd!g5?dS+t|%zfmMNN<7jP9?v~J50W-(Nk^U z1vp-P~6&{>@pNwIAsb1~uf48KLBTtmJ>`?AK_N zlio&L^Cm|UYdAwP&3q;|UL=%UEkP7wSyh?*o>BLG6@Q9+<8(Eg3Vx3hDtzPux_Pnd5oWX~raA98?Lw|b-`%B{OvC4n zK5at3ZGyK6)@K0t4oBE<1gBVRqOQC*pSMVZ+TuVhgYagiaMgw6O@9jOBFN7|0-JJ~ z{F3H(&^O)sLW`Lk687PfqJHkr7;W|G1Vgre!M0p?5d1CW;@Kz(%W)(}R01J+5&pj4*~GRQ-ucFk z>QGS>u3w{&KEF`Ye#CZtBn{7R%UzGTCK3;5Xk&1=55_#3XSohu&*MKz>>qBw9$x`g z2c^9{VfSwqFhi2y%jzs9UIVTd|J`-W4g^Lf(FY!6+0l~D*mE8A)noR*%94N&YurSC zPuWZlm(pqUh2O&%)SUHNR34?8-h4IC7pPuUiM0t{oibp!msSmJJjD)j%ZRy{jZKpf zAy+i9o*8Sh)m6XYQq@UKo+@b~1in*}WE8BceekC8)ZE`Kf-gIY9S=AU&Dw^TwZe>z zlMid6J>Tf+KCjM;Z#ek7tMow&+6XrDEjpjL7APkHCc5Mc*^k)Iz=R!8YCD&R{2Gwd z+Xi8I>r>!R&-{9DTpDds3;BaP%{=cTSN+VZ<7XSxb00th-`pZ3b=j7HoDh>y$u>x$ z;oh)BPECE3S!8Iw{}aS#D}u#!BB+E`@-@C-wS{B~e6=k9Ii?<4J#6oWI_N9^HW+*+ zngToPsHY7ejn1p66O(*;3vFRZ--u(i^5>atG|WTRk5*|EmR+8?9sQ4f`y~;mF2;Y) zJE%p8jhBO#i@)d?P*Gvbi%I#r#j(?ZA^MbXQ}5*G{Ma|*a4&936=pN-8!PhN&t`2k z_V*(f_k0CW4Q1$7jW?@ks~lA7RpNISD}9B|)AJd9&Bbi1rg<|CyWkJSBT0Qt@pE$! zvYjKB@ZuzKQZ+vp2T+$V&5%2;_Js7dMKrN=344#S-@Hu%SjRzrclH78^MEvJ%+{v( z2rVmi(QI(mgz_h7-8qV7rh6YHeFk3bgEm;Pr+cPg(^bCFLFe6Hm==@p#6SR45Km!q z3}>jDf&IIg!=%Bo_!ErJA;HhwB1YBy&bl7PSjr^$d9278)Z1S^b(iU$1p>zCBaj5% zEQ=9$fPA|$TqSD^=MbTE#azyN_ zB{&CkR0TGSk$_R{;fHM(syk30y)<9Pk*E5f?Uqa~Vh@sdN{wqS=>CTay2I}RG-@`6 zso8JX@cfYLglpgTaNolQ*cK)>I{A0r0vXLeiqkcowOLt=Hl-r#oZfYX51gWD&(u<0jN%L?e=WYP zgneXsZS_N*+}Y5hIKuClOngN^60ym~Ut#SFXcky#(PT+tu{W0(ZU~YhcQ1Oz0Z_~4 zJGugPou+eDA5lC38R_|>v>=`SQHZ~q0X zRe87jg;I0_J{E}Q)dF3oM408%cOKe$Ub(RJb8$`B!XZq17?M_(084QwGJ>iWViiYR z`_+GJ85I+78-LknidOuwFT+Cdtbt_3+i9Jx2ON@H(%t1VM~B8@{iW`qW=vT z!hpE$Fz0~2X6Nsf^hW~xyftyUQvJHaLr-e$(5uNdY^TpyyIm}h_o(@9tmm@qVRG;9 z@6Tx)x;F0XpZb;J__u(SPBNme@n1LI!^|X`z2cxe069%y;xhU zs~~zgehs*0~``Vq@R1Q?(TJ8etf9J%oyP4>xo(uhXi*wc{~jo@!hcBduXPe zH^>A}pzsFAYIW6Ht&j)1j63<070cE5sCLVn_9gfmre2u4^L)Or$~MT}J`V0J_sxR` zn(iUS+%mA;S0{TvbP(wqEYX4p?@#t?2~H!OWRWv#(V5DCY5;Ja4Z6Qd^wWBJ#Ah)H z+Tp8-G4!&E?R4AEhs1C(D3Rz=4)ScS)O}?M-g6=fvg3YJ_IQI?-1EKEOIb)vfGh{u zjzT!`a0J0<;%3_C=N)uSuaov*4$Wm))13O8bUd+fgSW;Y$_{G1u%f4D zO!9hj7h4U`S@2V2U`pJ*b>P+CF*wpR5<44=U81Qi%_L^2#xonKbKLs;uXcSX?bFBe zKf1?=H|R>HHeJ&?Re~VM8b;e8{6cq5ftzUiqmnY&m|A=~;;r8bc91$TqqQ7X5qzq# zS)4I_kIa39+@fjY3ZaUvm4`KC`soEJo7Xb%>JAd3rz6FXC^(kLPXOJNX}V#n{pvQJ zA*P!D=SP?Ol(I9P9jv~^TYMP4FcGG1VEmJ*)tzaBSG2!>hq*{-9Xj>7%DrolPwtV; zUjn?%n3C$9S|4yMZ*vE)sObZXy3#`ljFvWU+JBde#*y1JxVHpG|c;i`(z91 z`}~hc5c`gjPP&_c@Xjk(`0*eV-f7?$J3McaY2GL%sAV#eP(&h|rw<*~Z9|U`bqWD7{E#Sp(KMXsk)GroxAZm@QHwoGcY#$TeXowjg==8!D#JZ&p0e%=1j1 zR;$61Ill9b=k1OyDWcYX9$Q@J|Ag6&J2QY=xlaIzaj&GAW-kJ5Vq^3Bxkj??|r zpN$TLuNJMW_Bi^u)xSBd?KSlr*3;!n#caGziguND@*m9)Kwi_Nl{Wd~&K*z2uJ`<7 zJG3UPylDV7p3SkAMdxIxvR9yb&k!{8s&Jkv^e3beG=%nvGV2&jNuGgc;!)JsKyOrE zCKNn!fSMf|U3O==IVE)(OOF0c{sWh#s}B6h$EE1__yQ;} zwaq#O=GWurT_Rl1@7TdP;xJPiAVjk~82Z5;%!E2S@o^v|&w%#gY>?g^glpRVI2bffrNGSc$kOy8TDSyPHV_Eg#S&gf<>lG}63Wcrd=4LJw|-<$6|=N=}5y zjZkKwN*`3b{zonUg7rQe=5Yja0L>eiGP$+qSRqn2FS#5$I~ z!Vn*6Tp^GBSw7?y=~;iHb^l|!UezvwI0CCZE%EA@Y0>ZHWRuE}?;L>o$oa&R!*IC| z$EA#k4G$ooW?kMq-jVf}IuPnW5j`|H>*1AL?YkUB^&IO@0qwQ4K{d$9-r_%9ouZ6S zeJ%Y8%~{D{XS~GA+DS^d1G?#o>ar;xP2&(`*|@(+9_! z+%AWvbrd^++q+3WE+EQ8<>z8NB`t-ej3LqObQl^Jie-nZ3nIssFPG{6Kj}A@;S$B0 z4=R+8&S*LIT@DQswO;fmzrV84rK$jfnrJ|d zquvWIEUs{g68w0akUQ)ropi^Y9^sMX-hAIAPbsF44i0^Hr$n{gkNlQymJvXVPg9fB zsHm~FwWBnm>`+svM?O_0Ot4)wB&})U2YuSR*xCU&CGsTG^G(9FyFs0&Qaxfw&^{cr zesSweXDCQ-9@aK|mgUDIa=i^MY=RU9rzG;g^yDixlxbbXKmlld)n)tF{4CsQDrNDF z#MP8jd)gt|g%rE$^zRj>{EVzXpQ)phDvHUmW%h-Z)nxjd{s=MSh?*)ETCaGKc=6N1 z6vpk(U&%JUk!c`HQr{qdaxC<+f0BcTD$jz6>V2)G_!pRx=DJszrv~2mvRrEdt{-yl z{!2Xz7dfxnN!A96iTZvQ)r8T8NdqA<3ooBHJf1f%XHr#uj5xf`%m5Tz3%E=zRmyJg zc3tl`Mx}_^H4&yStbtb1`T3^1$gNai&o@;g%J?G5+f}9ZxJ5-UrJmNf?$U5 z&{-F2=l6dR9pt|OAZZyWCpI^T31|8dN69mkqx+0Bu_?z9&eW6e_eH8hTaFZ~nd2*Y z9YNbVN6T39Kn`}^gU1^aZ3L{kg}HG;$h2~U&*yH~E>rVE9>k3gxaXLR&mx4EiQhk; z?_?O&F_E(&p&zSy#>_vwG_Fv{6nEy}w4sN|a^kY!0YzkM@!Z8}8WB#fC} z?f+~pq3vqwzVlS?S9H5QOyhQ?>1+Lm!XMAFK*NvyU-LoXjn8M5pIr%^O$f~iU6@1C zy6pDCI{y?lE{MtOKmW8e1#a5`{UiPO7)h}jZhX^js7&pH)r(g%tq$O zIo`>9fekqTcIYcEYl1&K#RiCX2}9n>J_;J)`V}!)rdXY~U#)2FBK@*4IY{ms_hz=H z)y|jQN_+gr-X@}HcV6|M%Qpluj&W3sNG3ZG8I{J{J&ARGAj$i*i7<6b)aKz*ARmLp z!`*o*gAQV+=PGh$Cf1i`BLj5c0x8IY>_P_A`0!Q2feGv6R+wLb#D?uPAP3f8wYxtV zywymRJ_D{n?lUlIkss2|%PPdzFS<(q{B|>+idd2;n8Abib>JyU1`1Un>9dv1$$*j7 zgVw3w-FlQWq7uYwh)UNp8XnssGUK3M3t=JSzDwRLyCsW5Y>ofQp?^$c^Ob!7{wv3r zIxc`yFS*jwn|A2DB+5YEUHf}}gN{=vf82BLDf}dx1zG!eC4auL`w|QdeQCb(nO0ZD zJk`F!gNQh`lQ%mJ_TTdDRlN{R>vQ`s)jV!l7wH^T`CTF=I=O&oAG2~Pz3a+GbmsJ% zOKL*!2M+n7_uFKC{S@?3qQg&c)~I}(UKM!gfOQ7ue4BxjDuMLyu5XyiW)@E&qPs%9 zhnjkW=2)E)eQhR!UB1l{PTTot^|o#dg*5ypFaE0FqCVlurk|%*G@TH`JRmenvfta) ztfFf=20!QQF28)-=r2WvU2(rx^cgW#OzS7Qp7XZ~r^mk;HXCJ%?O77u+$Z@A)Z*q2 z<>bmY%$~D$z&A~fJAxky6yegINfgDbzcZ@uXWGp(T5f=je-GS_A1Su$o=sk#5y35k zZ!3J!rIdLv!M0m(h4|`_?gD?!apPx>0whYO)sjcdq>}KT>4q5IJqK`=hO9qYj9rkuV*Zb;fa^hxK65&oZka?Iuj85+Md32&tKf7ca1 zsAQ+&bfu40c2fGb%D*b*lm*5#$<$p#>i)?F5tCnJG%z9Se&1l!meHC6Zq3eAd`7eQ zs9Qj6V}MmzhrW*V>+ZbB28Q~Or0O1H&eC+^CVj|h$wyyFU?;E-#CYu3#ZXEbYRiOb z=?+HanAq8KTqeb6T2_byAuhC`T=~r858p&Dt)7KU^XR{NxM6UIg**r47;7q1c_bEgv+NsQ_G3k zD(AcAfIn0|as2eb(67Pi-mU4-XTfA04VHEz6Fr;+a2zB_1Jyx<;emO6I$h@Tn*xbz zX_V=Zt9>jQ#c}jq&oyWemzyC^U5ql?G%ycF*yp_?HNs65Q zN7?G|k)4wL<`WL?kx7r#*=xExHBNU|oo=(Pf0ZtjG?2U2*jV89v6bl-);YY`E#tFZ zt*z_DKCgGp#C1PfmlJ9aYD!N~<<>*tcTps_1HC^l=HHA6Utg;IZJ#soG{LJKC?Grm z)Gh#-O(~;=ZP4%!C?gQpP9G*~Fqpii<2!LhD}(b7ot~lrQ1Vw-;YC~`{G@vZPZUqW z`_Oarozsz@vedm`hI%rEZ!~SlqD-KH(qs+yHMR_CXpqzLAEGe6JO;!u-UR?a z`;vv1?=7o@owJl($&;K^1w$!1Vq}?rT+DIqF-pJAhZvKK*6F~Qwl3e3T?uNCaze}5 z`I2-=dtb4fnb6S0x8xu4Tf% z#^KW6T^9Rj&p>^lcX%dlv{`37OEUn6p*8yLOK`;=91GAbScze$rKrH#V?Zn?4F{*` zUDWtGoq;VxC?FZv2$c4q`rEC}HA-L_OHHgKY4`wFoWShn*D4#vVkd%T;URmy{sqr) z#OT!v6T{9tS8xN|95|@PBs-XuYV9_%W%4YVC{_s*2<2qmpT0aHQi}_n2F9VEBw+9b zt7$OvZ1>=9lcV!5ZAbe8OPA?f7&2|nLW|nS1Kl1EKG+OO&3Q=9ZFG}`Ei_&uKRsY zXApYu`rTp)`vxpO!{CJks18T2&tC&V&UsL8@rU-jw8uqVD7l_7%Md8;N_g~OG(mv%c*O^ zpG9cXuk=sF-hTfu{u*I7mw}1ApWA-{@$yT=`w4E`&wnasPJlZgk#U#fuC)JAz-T4o zhIz+anxCrhl0Bxx)X0;z)U`ZU>51a%%vR9M+&3CO&1gOp3&4zcp~sV1w4#yp*%+)c z?<-Feb6+teNs~ggj-hrdZXv?tOc*z$^NV!VkDB`PN!@uO+~p$u(Kpl){qrHN!wFTc zr}*+sh{xxP7x@HsI3jkkWpOVRbVYVUNk83cD;XE2%Qdk9>WRL5+?tE8$|5c8zUVbr z{IaPy^%LsH|cA*lht$ZIlBhZsPp{m!#8hFe2`F zT9DYZm#cw=;On{=TpyoGFG&>q@?IFFVId$W32D27SK`52#i~fMYI}dR=0E2d{F|;N zmd;2RuvBoT+!5U)q*y^nNcyFo5+QEVEMJ$(>tKr}y~szrvm#}AG<1*RVfujr#p>6b zZrBA8Cq18sLCarn3iNg(USpMCr^<{I04}6|UZk5F5qJt}Ed(B%sbg{kl z5|8VzWfB!?O}`85XWrZipf*l8PaW50y{&tx0&p64rJ&nmNq!K!`d9Kw-HhBr0d30< zL6SkwVouRMA-Ge2Wh~s!I30a7n+tkgwODpRj@HDtz|New@r-9ZnyT|fF44+Jv^ z(Nka1s*xaX(j2qkj3magoE1$i#6vB?p?7@fR(duBs0|PX$uSl`#*51YL zg5Mn^mvW~f55D*RDzGQy0sKMAdVGum%*6cN6@fFB<0>~U<_Im;^L=UF-YPYjjRG-C zn36zD+~`$c*FEN4EVUA%DSxlWg}3A7lr_})1tC8w)txO{Q)r3|&kE5dG*6MAZ{p8$ zvmf9-;g{86v>&C)u=1_yva(+N67cqXSo^)kY_ZyurLtV<+yg;8pj(umnaZDfzCPJH zJ)_+GW1O!))91QYgNqR)Fl9f5BE1!zONr|E96SzsVoKNjmu?+9R@cCC5?Z>@5M80; zY@*gjzI`Vo4UcC1b9f}G{4oX$Wk|)N-sfgJ6#G+9Bp)TVi>P;iJB8;X0_)EU>+~%Pb<*qNhe#ehCkia_*#j&hst9kgyHYl6I z25EnKt9-sBF;>c3yzyAIl(+E>c$6l+TzoK$yAgz~o@Pq|pa5aiii|tm^wxdUO1aY< z``-RfkRBQ{sP4x?;#^=|?m2;lE9!Y~rPu5uMlQ_X&Gz0)YVLRXly9`XG|7`~39OqS z(>O*dOY^2x>fq>I?NqFOA%>J5{2T#1zFdaK6Mxw0PROroa8rJ~jtPk0jNTOVr-*1! zlqS?qc?)Eh+C9`z#m}vVDBficPkHP-VWBn1dnCRQTCuGaHnsciOP z|15NE)Z7bh!Kv$Ppg3k=s!}LTKZ%J@D$w-D9m{X=Q$ru`^8+H8;bp|mbY$+9fv#Ib z!CC59qmnU&5l+3l0!3P(CNo1Gwi`RxMoxa2WNeAMklur*3D?d({y@J4RVY zC+aDNibRv_e+KtfQg=M9@|UJ_|4OESSg1_2X@By0n^dsP`yXDJukDJ6&b1=Cw zO%?O1mF#~?w7+AbN>XNcP6Uk7Z1%FCCGo=o*OeHShv#SD>D#cHPz3rx6iv2WzG7vGT-F z6Ih12mgYsM#Yb|#ekXvRUzSW0#${<)UB+~Q!{{3RPDY@tSGdznjKD&NzS6;1P8W}H zf3SfGxFgxU$aHLyRiM4{qGM`~!|;5*=7G%^q}E|l=KJAe3(0E(RHK^d)ZjKKa$FWmGwFo0Pu#~% zh1Se(n-asJqTKqDd@bG@ad^3^Kl(p8xmDcdhkTyY45>?dE#56-$N8%}T$0Zwm8$A1}1j zYZ=U?<1oBU=Im>#cbmQ|R~Jb5608`w=P?R~M8j)hwaQobuh<}{b}rXVz1f6v?qiTy zsW;-$l;5$}!$m({TAtFTMhzn24x_a!2FXAY@d}cay%v)HV1RTFc(2!ZCB5V2plhOf z)I+uJhr50$hDRt~MZzf_mW8;PrSOIqC;P)9SgX?L7jd*Ex-}1T%1xgb&0}SrVb>1W z;3dN%o)FIc@+q~zskR{-ewzH17N*OZ`(rz}((dt(UrJ?a9<86si;1Cyj`q`UP$GB% zA7tpVN2Z?A_n3t;;HW z$HTM15sF^p-(kkDr8B?rYoX6u<;G8ycU$RA)$?4OB!Ip&dmUjaGDH#ayXc}hs#3Kq zi#hC)V7Yg1Ud6Vu8@I3-Fjp>1in6%R6gt>H)F6slRh@ltwkH&*`XPJpgV6TP_?6_j z-H$2w?r?pRjKEXs3ybwU%R3&R?`hsciT=YbYEHo*JJ(vyRgL6^RvAX}x`1p9)RkS! zyQ6f1o&XzZ0Zv)}GA)<0Edmty@(Qea^h+Y(k%(Bv(UWh?AG^Qo{JnAWL+M}(e-y=j zZ|)cAf8PtK?eSm0Cs6Qf%Wx;?9FIXq-e*}XQLXFb$}R0=mk1l_BTGHO!2D0kRk)p> zALjPcXQg-kWNr#$jP@q@@@wlO^^(R9Nd9 z^HYpYnktGO*0VLz^YhKb+{s@#uHcL=m#6=;oz#dSJzPQ1U&6HMJExH3i+-ij$TJ2h zZ1FI;{?~WTw!Nl~|9G8cA*fAe#WwvF$>jeF9)y@BJ&A(UFqz{&34YV!FP2Xym#l(V YS50-W!7YgK9n7aBrzTte>Rs^v0fa-$2LJ#7 From 554a37a5514c9c0afbbea0fdea99244873a38ee4 Mon Sep 17 00:00:00 2001 From: Jonny Dixon <45681293+acrylJonny@users.noreply.github.com> Date: Wed, 1 Jan 2025 09:19:27 +0000 Subject: [PATCH 007/249] fix(ingest/aws_common): update tests execution on EC2 instance (#12252) --- .../tests/unit/test_aws_common.py | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/metadata-ingestion/tests/unit/test_aws_common.py b/metadata-ingestion/tests/unit/test_aws_common.py index 9291fb91134b1c..dd1f06cf9bfd55 100644 --- a/metadata-ingestion/tests/unit/test_aws_common.py +++ b/metadata-ingestion/tests/unit/test_aws_common.py @@ -17,6 +17,14 @@ ) +@pytest.fixture +def mock_disable_ec2_metadata(): + """Disable EC2 metadata detection""" + with patch("requests.put") as mock_put: + mock_put.return_value.status_code = 404 + yield mock_put + + @pytest.fixture def mock_aws_config(): return AwsConnectionConfig( @@ -27,17 +35,19 @@ def mock_aws_config(): class TestAwsCommon: - def test_environment_detection_no_environment(self): + def test_environment_detection_no_environment(self, mock_disable_ec2_metadata): """Test environment detection when no AWS environment is present""" with patch.dict(os.environ, {}, clear=True): assert detect_aws_environment() == AwsEnvironment.UNKNOWN - def test_environment_detection_lambda(self): + def test_environment_detection_lambda(self, mock_disable_ec2_metadata): """Test Lambda environment detection""" with patch.dict(os.environ, {"AWS_LAMBDA_FUNCTION_NAME": "test-function"}): assert detect_aws_environment() == AwsEnvironment.LAMBDA - def test_environment_detection_lambda_cloudformation(self): + def test_environment_detection_lambda_cloudformation( + self, mock_disable_ec2_metadata + ): """Test CloudFormation Lambda environment detection""" with patch.dict( os.environ, @@ -48,7 +58,7 @@ def test_environment_detection_lambda_cloudformation(self): ): assert detect_aws_environment() == AwsEnvironment.CLOUD_FORMATION - def test_environment_detection_eks(self): + def test_environment_detection_eks(self, mock_disable_ec2_metadata): """Test EKS environment detection""" with patch.dict( os.environ, @@ -59,19 +69,19 @@ def test_environment_detection_eks(self): ): assert detect_aws_environment() == AwsEnvironment.EKS - def test_environment_detection_app_runner(self): + def test_environment_detection_app_runner(self, mock_disable_ec2_metadata): """Test App Runner environment detection""" with patch.dict(os.environ, {"AWS_APP_RUNNER_SERVICE_ID": "service-id"}): assert detect_aws_environment() == AwsEnvironment.APP_RUNNER - def test_environment_detection_ecs(self): + def test_environment_detection_ecs(self, mock_disable_ec2_metadata): """Test ECS environment detection""" with patch.dict( os.environ, {"ECS_CONTAINER_METADATA_URI_V4": "http://169.254.170.2/v4"} ): assert detect_aws_environment() == AwsEnvironment.ECS - def test_environment_detection_beanstalk(self): + def test_environment_detection_beanstalk(self, mock_disable_ec2_metadata): """Test Elastic Beanstalk environment detection""" with patch.dict(os.environ, {"ELASTIC_BEANSTALK_ENVIRONMENT_NAME": "my-env"}): assert detect_aws_environment() == AwsEnvironment.BEANSTALK @@ -103,6 +113,7 @@ def test_ec2_metadata_token_failure(self, mock_put): @patch("requests.put") def test_is_running_on_ec2(self, mock_put, mock_get): """Test EC2 instance detection with IMDSv2""" + # Explicitly mock EC2 metadata responses mock_put.return_value.status_code = 200 mock_put.return_value.text = "token123" mock_get.return_value.status_code = 200 @@ -322,7 +333,9 @@ def test_aws_connection_config_validation_error(self): ), ], ) - def test_environment_detection_parametrized(self, env_vars, expected_environment): + def test_environment_detection_parametrized( + self, mock_disable_ec2_metadata, env_vars, expected_environment + ): """Parametrized test for environment detection with different configurations""" with patch.dict(os.environ, env_vars, clear=True): assert detect_aws_environment() == expected_environment From ea249caee1bffadd237ee7031c4502dac0f87af6 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Wed, 1 Jan 2025 16:52:17 +0530 Subject: [PATCH 008/249] fix(ingest): json serializable fix (#12246) --- .../datahub_ingestion_run_summary_provider.py | 45 +++++- .../test_datahub_ingestion_reporter.py | 135 ++++++++++++++++++ 2 files changed, 179 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py b/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py index 28def68ccf3f55..c143a8b49f4b7c 100644 --- a/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +++ b/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py @@ -146,12 +146,55 @@ def __init__(self, sink: Sink, report_recipe: bool, ctx: PipelineContext) -> Non aspect_value=source_info_aspect, ) + @staticmethod + def _convert_sets_to_lists(obj: Any) -> Any: + """ + Recursively converts all sets to lists in a Python object. + Works with nested dictionaries, lists, and sets. + + Args: + obj: Any Python object that might contain sets + + Returns: + The object with all sets converted to lists + """ + if isinstance(obj, dict): + return { + key: DatahubIngestionRunSummaryProvider._convert_sets_to_lists(value) + for key, value in obj.items() + } + elif isinstance(obj, list): + return [ + DatahubIngestionRunSummaryProvider._convert_sets_to_lists(element) + for element in obj + ] + elif isinstance(obj, set): + return [ + DatahubIngestionRunSummaryProvider._convert_sets_to_lists(element) + for element in obj + ] + elif isinstance(obj, tuple): + return tuple( + DatahubIngestionRunSummaryProvider._convert_sets_to_lists(element) + for element in obj + ) + else: + return obj + def _get_recipe_to_report(self, ctx: PipelineContext) -> str: assert ctx.pipeline_config if not self.report_recipe or not ctx.pipeline_config.get_raw_dict(): return "" else: - return json.dumps(redact_raw_config(ctx.pipeline_config.get_raw_dict())) + redacted_recipe = redact_raw_config(ctx.pipeline_config.get_raw_dict()) + # This is required otherwise json dumps will fail + # with a TypeError: Object of type set is not JSON serializable + converted_recipe = ( + DatahubIngestionRunSummaryProvider._convert_sets_to_lists( + redacted_recipe + ) + ) + return json.dumps(converted_recipe) def _emit_aspect(self, entity_urn: Urn, aspect_value: _Aspect) -> None: self.sink.write_record_async( diff --git a/metadata-ingestion/tests/unit/reporting/test_datahub_ingestion_reporter.py b/metadata-ingestion/tests/unit/reporting/test_datahub_ingestion_reporter.py index 749ea03a7f20a8..2ab6208e2dcc68 100644 --- a/metadata-ingestion/tests/unit/reporting/test_datahub_ingestion_reporter.py +++ b/metadata-ingestion/tests/unit/reporting/test_datahub_ingestion_reporter.py @@ -1,3 +1,5 @@ +from typing import Any, Dict, List, Set, Tuple, Union + import pytest from datahub.ingestion.reporting.datahub_ingestion_run_summary_provider import ( @@ -50,3 +52,136 @@ def test_default_config(): typed_config = DatahubIngestionRunSummaryProviderConfig.parse_obj({}) assert typed_config.sink is None assert typed_config.report_recipe is True + + +def test_simple_set() -> None: + """Test conversion of a simple set""" + input_data: Set[int] = {1, 2, 3} + expected: List[int] = [1, 2, 3] + result = DatahubIngestionRunSummaryProvider._convert_sets_to_lists(input_data) + assert sorted(result) == sorted(expected) + assert isinstance(result, list) + + +def test_nested_dict_with_sets() -> None: + """Test conversion of nested dictionary containing sets""" + input_data: Dict[str, Union[Set[int], Dict[str, Set[str]]]] = { + "set1": {1, 2, 3}, + "dict1": {"set2": {"a", "b"}}, + } + expected = { + "set1": [1, 2, 3], + "dict1": {"set2": ["a", "b"]}, + } + result = DatahubIngestionRunSummaryProvider._convert_sets_to_lists(input_data) + + def sort_nested_lists(d): + return { + k: ( + sorted(v) + if isinstance(v, list) + else (sort_nested_lists(v) if isinstance(v, dict) else v) + ) + for k, v in d.items() + } + + assert sort_nested_lists(result) == sort_nested_lists(expected) + + +def test_nested_lists_with_sets() -> None: + """Test conversion of nested lists containing sets""" + input_data = [{1, 2}, [{3, 4}, {5, 6}]] + expected = [[1, 2], [[3, 4], [5, 6]]] + result = DatahubIngestionRunSummaryProvider._convert_sets_to_lists(input_data) + assert [ + sorted(x) + if isinstance(x, list) and len(x) > 0 and not isinstance(x[0], list) + else x + for x in result + ] == [ + sorted(x) + if isinstance(x, list) and len(x) > 0 and not isinstance(x[0], list) + else x + for x in expected + ] + + +def test_tuple_with_sets() -> None: + """Test conversion of tuples containing sets""" + input_data = (1, {2, 3}, 4) + expected = (1, [2, 3], 4) + result = DatahubIngestionRunSummaryProvider._convert_sets_to_lists(input_data) + assert (result[0], sorted(result[1]), result[2]) == ( + expected[0], + sorted(expected[1]), + expected[2], + ) + assert isinstance(result, tuple) + + +def test_mixed_nested_structure() -> None: + """Test conversion of a complex nested structure""" + input_data = { + "simple_set": {1, 2, 3}, + "nested_dict": { + "another_set": {"a", "b", "c"}, + "mixed_list": [1, {2, 3}, {"x", "y"}], + }, + "tuple_with_set": (1, {4, 5}, 6), + "list_of_sets": [{1, 2}, {3, 4}], + } + result = DatahubIngestionRunSummaryProvider._convert_sets_to_lists(input_data) + + # Verify structure types + assert isinstance(result["simple_set"], list) + assert isinstance(result["nested_dict"]["another_set"], list) + assert isinstance(result["nested_dict"]["mixed_list"][1], list) + assert isinstance(result["nested_dict"]["mixed_list"][2], list) + assert isinstance(result["tuple_with_set"], tuple) + assert isinstance(result["tuple_with_set"][1], list) + assert isinstance(result["list_of_sets"][0], list) + + +def test_non_set_data() -> None: + """Test that non-set data remains unchanged""" + input_data = { + "string": "hello", + "int": 42, + "float": 3.14, + "bool": True, + "none": None, + "list": [1, 2, 3], + "dict": {"a": 1, "b": 2}, + } + result = DatahubIngestionRunSummaryProvider._convert_sets_to_lists(input_data) + assert result == input_data + + +def test_empty_structures() -> None: + """Test handling of empty structures""" + input_data: Dict[ + str, Union[Set[Any], Dict[Any, Any], List[Any], Tuple[Any, ...]] + ] = {"empty_set": set(), "empty_dict": {}, "empty_list": [], "empty_tuple": ()} + expected: Dict[ + str, Union[List[Any], Dict[Any, Any], List[Any], Tuple[Any, ...]] + ] = {"empty_set": [], "empty_dict": {}, "empty_list": [], "empty_tuple": ()} + result = DatahubIngestionRunSummaryProvider._convert_sets_to_lists(input_data) + assert result == expected + + +def test_json_serializable() -> None: + """Test that the converted structure is JSON serializable""" + import json + + input_data = { + "set": {1, 2, 3}, + "nested": {"set": {"a", "b"}}, + "mixed": [1, {2, 3}, {"x"}], + } + result = DatahubIngestionRunSummaryProvider._convert_sets_to_lists(input_data) + try: + json.dumps(result) + serializable = True + except TypeError: + serializable = False + assert serializable From 67ef55efc7a65485b944722fb4895ea54e644f1d Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Thu, 2 Jan 2025 15:11:35 +0530 Subject: [PATCH 009/249] fix(ingest/gc): soft deletion loop fix (#12255) --- .../source/gc/dataprocess_cleanup.py | 8 +- .../source/gc/soft_deleted_entity_cleanup.py | 230 ++++++++++++------ 2 files changed, 163 insertions(+), 75 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py b/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py index 3f7a1fc453bcdb..64c1a0ad0bfbad 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py +++ b/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py @@ -167,7 +167,7 @@ class DataJobEntity: class DataProcessCleanupReport(SourceReport): num_aspects_removed: int = 0 num_aspect_removed_by_type: TopKDict[str, int] = field(default_factory=TopKDict) - sample_removed_aspects_by_type: TopKDict[str, LossyList[str]] = field( + sample_soft_deleted_aspects_by_type: TopKDict[str, LossyList[str]] = field( default_factory=TopKDict ) num_data_flows_found: int = 0 @@ -286,9 +286,9 @@ def delete_entity(self, urn: str, type: str) -> None: self.report.num_aspect_removed_by_type[type] = ( self.report.num_aspect_removed_by_type.get(type, 0) + 1 ) - if type not in self.report.sample_removed_aspects_by_type: - self.report.sample_removed_aspects_by_type[type] = LossyList() - self.report.sample_removed_aspects_by_type[type].append(urn) + if type not in self.report.sample_soft_deleted_aspects_by_type: + self.report.sample_soft_deleted_aspects_by_type[type] = LossyList() + self.report.sample_soft_deleted_aspects_by_type[type].append(urn) if self.dry_run: logger.info( diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py b/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py index 93f004ab675edc..4c0355834f9b4f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +++ b/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py @@ -1,9 +1,10 @@ import logging import time -from concurrent.futures import ThreadPoolExecutor, as_completed +from concurrent.futures import FIRST_COMPLETED, Future, ThreadPoolExecutor, wait from dataclasses import dataclass, field from datetime import datetime, timezone -from typing import List, Optional +from threading import Lock +from typing import Dict, Iterable, List, Optional from pydantic import Field @@ -18,12 +19,28 @@ logger = logging.getLogger(__name__) +QUERY_QUERY_ENTITY = """ +query listQueries($input: ScrollAcrossEntitiesInput!) { + scrollAcrossEntities(input: $input) { + nextScrollId + count + searchResults { + entity { + ... on QueryEntity { + urn + } + } + } + } +} +""" + class SoftDeletedEntitiesCleanupConfig(ConfigModel): enabled: bool = Field( default=True, description="Whether to do soft deletion cleanup." ) - retention_days: Optional[int] = Field( + retention_days: int = Field( 10, description="Number of days to retain metadata in DataHub", ) @@ -62,23 +79,30 @@ class SoftDeletedEntitiesCleanupConfig(ConfigModel): default=None, description="Query to filter entities", ) + limit_entities_delete: Optional[int] = Field( 25000, description="Max number of entities to delete." ) - runtime_limit_seconds: Optional[int] = Field( - None, + futures_max_at_time: int = Field( + 1000, description="Max number of futures to have at a time." + ) + + runtime_limit_seconds: int = Field( + 7200, # 2 hours by default description="Runtime limit in seconds", ) @dataclass class SoftDeletedEntitiesReport(SourceReport): - num_soft_deleted_entity_removed: int = 0 - num_soft_deleted_entity_removed_by_type: TopKDict[str, int] = field( - default_factory=TopKDict - ) - sample_soft_deleted_removed_aspects_by_type: TopKDict[str, LossyList[str]] = field( + num_queries_found: int = 0 + num_soft_deleted_entity_processed: int = 0 + num_soft_deleted_retained_due_to_age: int = 0 + num_soft_deleted_entity_removal_started: int = 0 + num_hard_deleted: int = 0 + num_hard_deleted_by_type: TopKDict[str, int] = field(default_factory=TopKDict) + sample_hard_deleted_aspects_by_type: TopKDict[str, LossyList[str]] = field( default_factory=TopKDict ) @@ -103,48 +127,53 @@ def __init__( self.config = config self.report = report self.dry_run = dry_run + self.start_time = 0.0 + self._report_lock: Lock = Lock() + self.last_print_time = 0.0 + + def _increment_retained_count(self) -> None: + """Thread-safe method to update report fields""" + with self._report_lock: + self.report.num_soft_deleted_retained_due_to_age += 1 + + def _increment_removal_started_count(self) -> None: + """Thread-safe method to update report fields""" + with self._report_lock: + self.report.num_soft_deleted_entity_removal_started += 1 + + def _update_report(self, urn: str, entity_type: str) -> None: + """Thread-safe method to update report fields""" + with self._report_lock: + self.report.num_hard_deleted += 1 + + current_count = self.report.num_hard_deleted_by_type.get(entity_type, 0) + self.report.num_hard_deleted_by_type[entity_type] = current_count + 1 + if entity_type not in self.report.sample_hard_deleted_aspects_by_type: + self.report.sample_hard_deleted_aspects_by_type[ + entity_type + ] = LossyList() + self.report.sample_hard_deleted_aspects_by_type[entity_type].append(urn) def delete_entity(self, urn: str) -> None: assert self.ctx.graph entity_urn = Urn.from_string(urn) - self.report.num_soft_deleted_entity_removed += 1 - self.report.num_soft_deleted_entity_removed_by_type[entity_urn.entity_type] = ( - self.report.num_soft_deleted_entity_removed_by_type.get( - entity_urn.entity_type, 0 - ) - + 1 - ) - if ( - entity_urn.entity_type - not in self.report.sample_soft_deleted_removed_aspects_by_type - ): - self.report.sample_soft_deleted_removed_aspects_by_type[ - entity_urn.entity_type - ] = LossyList() - self.report.sample_soft_deleted_removed_aspects_by_type[ - entity_urn.entity_type - ].append(urn) - if self.dry_run: logger.info( f"Dry run is on otherwise it would have deleted {urn} with hard deletion" ) return - + self._increment_removal_started_count() self.ctx.graph.delete_entity(urn=urn, hard=True) self.ctx.graph.delete_references_to_urn( urn=urn, dry_run=False, ) + self._update_report(urn, entity_urn.entity_type) def delete_soft_deleted_entity(self, urn: str) -> None: assert self.ctx.graph - if self.config.retention_days is None: - logger.info("Retention days is not set, skipping soft delete cleanup") - return - retention_time = ( int(datetime.now(timezone.utc).timestamp()) - self.config.retention_days * 24 * 60 * 60 @@ -157,15 +186,85 @@ def delete_soft_deleted_entity(self, urn: str) -> None: ]["created"]["time"] < (retention_time * 1000): logger.debug(f"Hard deleting {urn}") self.delete_entity(urn) + else: + self._increment_retained_count() + + def _print_report(self) -> None: + time_taken = round(time.time() - self.last_print_time, 1) + # Print report every 2 minutes + if time_taken > 120: + self.last_print_time = time.time() + logger.info(f"\n{self.report.as_string()}") + + def _process_futures(self, futures: Dict[Future, str]) -> Dict[Future, str]: + done, not_done = wait(futures, return_when=FIRST_COMPLETED) + futures = {future: urn for future, urn in futures.items() if future in not_done} + + for future in done: + self._print_report() + if future.exception(): + logger.error( + f"Failed to delete entity {futures[future]}: {future.exception()}" + ) + self.report.failure( + f"Failed to delete entity {futures[future]}", + exc=future.exception(), + ) + self.report.num_soft_deleted_entity_processed += 1 + if ( + self.report.num_soft_deleted_entity_processed % self.config.batch_size + == 0 + ): + if self.config.delay: + logger.debug( + f"Sleeping for {self.config.delay} seconds before further processing batch" + ) + time.sleep(self.config.delay) + return futures - def cleanup_soft_deleted_entities(self) -> None: - if not self.config.enabled: - return + def _get_soft_deleted_queries(self) -> Iterable[str]: assert self.ctx.graph - start_time = time.time() - - deleted_count_retention = 0 - urns = self.ctx.graph.get_urns_by_filter( + scroll_id: Optional[str] = None + while True: + try: + result = self.ctx.graph.execute_graphql( + QUERY_QUERY_ENTITY, + { + "input": { + "types": ["QUERY"], + "query": "*", + "scrollId": scroll_id if scroll_id else None, + "count": self.config.batch_size, + "orFilters": [ + { + "and": [ + { + "field": "removed", + "values": ["true"], + "condition": "EQUAL", + } + ] + } + ], + } + }, + ) + except Exception as e: + self.report.failure( + f"While trying to get queries with {scroll_id}", exc=e + ) + break + scroll_across_entities = result.get("scrollAcrossEntities") + if not scroll_across_entities: + break + scroll_id = scroll_across_entities.get("nextScrollId") + self.report.num_queries_found += scroll_across_entities.get("count") + for query in scroll_across_entities.get("searchResults"): + yield query["entity"]["urn"] + + def _get_urns(self) -> Iterable[str]: + assert self.ctx.graph + yield from self.ctx.graph.get_urns_by_filter( entity_types=self.config.entity_types, platform=self.config.platform, env=self.config.env, @@ -173,52 +272,41 @@ def cleanup_soft_deleted_entities(self) -> None: status=RemovedStatusFilter.ONLY_SOFT_DELETED, batch_size=self.config.batch_size, ) + yield from self._get_soft_deleted_queries() + + def cleanup_soft_deleted_entities(self) -> None: + if not self.config.enabled: + return + self.start_time = time.time() - futures = {} + futures: Dict[Future, str] = dict() with ThreadPoolExecutor(max_workers=self.config.max_workers) as executor: - num_urns_submitted = 0 - for urn in urns: - num_urns_submitted += 1 + for urn in self._get_urns(): + self._print_report() + while len(futures) >= self.config.futures_max_at_time: + futures = self._process_futures(futures) if ( self.config.limit_entities_delete - and num_urns_submitted > self.config.limit_entities_delete + and self.report.num_hard_deleted > self.config.limit_entities_delete ): logger.info( - f"Limit of {self.config.limit_entities_delete} entities reached. Stopping" + f"Limit of {self.config.limit_entities_delete} entities reached. Stopped adding more." ) break if ( self.config.runtime_limit_seconds - and time.time() - start_time > self.config.runtime_limit_seconds + and time.time() - self.start_time + > self.config.runtime_limit_seconds ): logger.info( - f"Runtime limit of {self.config.runtime_limit_seconds} seconds reached. Stopping" + f"Runtime limit of {self.config.runtime_limit_seconds} seconds reached. Not submitting more futures." ) break future = executor.submit(self.delete_soft_deleted_entity, urn) futures[future] = urn - if not futures: - return - for future in as_completed(futures): - if future.exception(): - logger.error( - f"Failed to delete entity {futures[future]}: {future.exception()}" - ) - self.report.failure( - f"Failed to delete entity {futures[future]}", - exc=future.exception(), - ) - deleted_count_retention += 1 - - if deleted_count_retention % self.config.batch_size == 0: - logger.info( - f"Processed {deleted_count_retention} soft deleted entity and deleted {self.report.num_soft_deleted_entity_removed} entities so far" - ) - - if self.config.delay: - logger.debug( - f"Sleeping for {self.config.delay} seconds before getting next batch" - ) - time.sleep(self.config.delay) + logger.info(f"Waiting for {len(futures)} futures to complete") + while len(futures) > 0: + self._print_report() + futures = self._process_futures(futures) From a814cb389f364ab4a4782d3f4987fe047d07962a Mon Sep 17 00:00:00 2001 From: sagar-salvi-apptware <159135491+sagar-salvi-apptware@users.noreply.github.com> Date: Thu, 2 Jan 2025 17:59:22 +0530 Subject: [PATCH 010/249] fix(ingest/bigquery): All View generation when queries_v2 is turned off (#12181) Co-authored-by: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> --- docs/how/updating-datahub.md | 1 + .../ingestion/source/bigquery_v2/bigquery.py | 28 +- .../source/bigquery_v2/bigquery_config.py | 12 +- .../source/bigquery_v2/bigquery_schema_gen.py | 28 +- .../ingestion/source/bigquery_v2/lineage.py | 31 +- .../bigquery_mcp_lineage_golden_1.json | 977 +++++++++++++++ .../bigquery_mcp_lineage_golden_2.json | 1064 +++++++++++++++++ .../integration/bigquery_v2/test_bigquery.py | 145 +++ 8 files changed, 2220 insertions(+), 66 deletions(-) create mode 100644 metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_lineage_golden_1.json create mode 100644 metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_lineage_golden_2.json diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index d6620fde0bf794..19261da23bcf96 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -18,6 +18,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ## Next - #12191 - Configs `include_view_lineage` and `include_view_column_lineage` are removed from snowflake ingestion source. View and External Table DDL lineage will always be ingested when definitions are available. +- #12181 - Configs `include_view_lineage`, `include_view_column_lineage` and `lineage_parse_view_ddl` are removed from bigquery ingestion source. View and Snapshot lineage will always be ingested when definitions are available. - #11560 - The PowerBI ingestion source configuration option include_workspace_name_in_dataset_urn determines whether the workspace name is included in the PowerBI dataset's URN.
PowerBI allows to have identical name of semantic model and their tables across the workspace, It will overwrite the semantic model in-case of multi-workspace ingestion.
Entity urn with `include_workspace_name_in_dataset_urn: false` diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index 16a5268a2dea76..38eab3606b7e95 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -206,9 +206,7 @@ def test_connection(config_dict: dict) -> TestConnectionReport: def _init_schema_resolver(self) -> SchemaResolver: schema_resolution_required = ( - self.config.use_queries_v2 - or self.config.lineage_parse_view_ddl - or self.config.lineage_use_sql_parser + self.config.use_queries_v2 or self.config.lineage_use_sql_parser ) schema_ingestion_enabled = ( self.config.include_schema_metadata @@ -255,18 +253,16 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: for project in projects: yield from self.bq_schema_extractor.get_project_workunits(project) - if self.config.use_queries_v2: - # Always ingest View and Snapshot lineage with schema ingestion - self.report.set_ingestion_stage("*", "View and Snapshot Lineage") - - yield from self.lineage_extractor.get_lineage_workunits_for_views_and_snapshots( - [p.id for p in projects], - self.bq_schema_extractor.view_refs_by_project, - self.bq_schema_extractor.view_definitions, - self.bq_schema_extractor.snapshot_refs_by_project, - self.bq_schema_extractor.snapshots_by_ref, - ) + self.report.set_ingestion_stage("*", "View and Snapshot Lineage") + yield from self.lineage_extractor.get_lineage_workunits_for_views_and_snapshots( + [p.id for p in projects], + self.bq_schema_extractor.view_refs_by_project, + self.bq_schema_extractor.view_definitions, + self.bq_schema_extractor.snapshot_refs_by_project, + self.bq_schema_extractor.snapshots_by_ref, + ) + if self.config.use_queries_v2: # if both usage and lineage are disabled then skip queries extractor piece if ( not self.config.include_usage_statistics @@ -306,10 +302,6 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: if self.config.include_table_lineage: yield from self.lineage_extractor.get_lineage_workunits( [p.id for p in projects], - self.bq_schema_extractor.view_refs_by_project, - self.bq_schema_extractor.view_definitions, - self.bq_schema_extractor.snapshot_refs_by_project, - self.bq_schema_extractor.snapshots_by_ref, self.bq_schema_extractor.table_refs, ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py index 4af41921c9fa3c..ef323260b014e6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py @@ -463,10 +463,6 @@ def have_table_data_read_permission(self) -> bool: default=True, description="Use sql parser to resolve view/table lineage.", ) - lineage_parse_view_ddl: bool = Field( - default=True, - description="Sql parse view ddl to get lineage.", - ) lineage_sql_parser_use_raw_names: bool = Field( default=False, @@ -572,11 +568,9 @@ def have_table_data_read_permission(self) -> bool: "See [this](https://cloud.google.com/bigquery/docs/information-schema-jobs#scope_and_syntax) for details.", ) - # include_view_lineage and include_view_column_lineage are inherited from SQLCommonConfig - # but not used in bigquery so we hide them from docs. - include_view_lineage: bool = Field(default=True, hidden_from_docs=True) - - include_view_column_lineage: bool = Field(default=True, hidden_from_docs=True) + _include_view_lineage = pydantic_removed_field("include_view_lineage") + _include_view_column_lineage = pydantic_removed_field("include_view_column_lineage") + _lineage_parse_view_ddl = pydantic_removed_field("lineage_parse_view_ddl") @root_validator(pre=True) def set_include_schema_metadata(cls, values: Dict) -> Dict: diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py index 4a3b47f6b543a6..bc2688e6b481ab 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py @@ -653,14 +653,11 @@ def _process_view( self.report.report_dropped(table_identifier.raw_table_name()) return - if self.store_table_refs: - table_ref = str( - BigQueryTableRef(table_identifier).get_sanitized_table_ref() - ) - self.table_refs.add(table_ref) - if self.config.lineage_parse_view_ddl and view.view_definition: - self.view_refs_by_project[project_id].add(table_ref) - self.view_definitions[table_ref] = view.view_definition + table_ref = str(BigQueryTableRef(table_identifier).get_sanitized_table_ref()) + self.table_refs.add(table_ref) + if view.view_definition: + self.view_refs_by_project[project_id].add(table_ref) + self.view_definitions[table_ref] = view.view_definition view.column_count = len(columns) if not view.column_count: @@ -701,14 +698,11 @@ def _process_snapshot( f"Snapshot doesn't have any column or unable to get columns for snapshot: {table_identifier}" ) - if self.store_table_refs: - table_ref = str( - BigQueryTableRef(table_identifier).get_sanitized_table_ref() - ) - self.table_refs.add(table_ref) - if snapshot.base_table_identifier: - self.snapshot_refs_by_project[project_id].add(table_ref) - self.snapshots_by_ref[table_ref] = snapshot + table_ref = str(BigQueryTableRef(table_identifier).get_sanitized_table_ref()) + self.table_refs.add(table_ref) + if snapshot.base_table_identifier: + self.snapshot_refs_by_project[project_id].add(table_ref) + self.snapshots_by_ref[table_ref] = snapshot yield from self.gen_snapshot_dataset_workunits( table=snapshot, @@ -1148,7 +1142,7 @@ def gen_schema_metadata( foreignKeys=foreign_keys if foreign_keys else None, ) - if self.config.lineage_parse_view_ddl or self.config.lineage_use_sql_parser: + if self.config.lineage_use_sql_parser: self.sql_parser_schema_resolver.add_schema_metadata( dataset_urn, schema_metadata ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py index 321b1b6207fabf..ba3357aa8ca20c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py @@ -291,16 +291,15 @@ def get_lineage_workunits_for_views_and_snapshots( snapshots_by_ref: FileBackedDict[BigqueryTableSnapshot], ) -> Iterable[MetadataWorkUnit]: for project in projects: - if self.config.lineage_parse_view_ddl: - for view in view_refs_by_project[project]: - self.datasets_skip_audit_log_lineage.add(view) - self.aggregator.add_view_definition( - view_urn=self.identifiers.gen_dataset_urn_from_raw_ref( - BigQueryTableRef.from_string_name(view) - ), - view_definition=view_definitions[view], - default_db=project, - ) + for view in view_refs_by_project[project]: + self.datasets_skip_audit_log_lineage.add(view) + self.aggregator.add_view_definition( + view_urn=self.identifiers.gen_dataset_urn_from_raw_ref( + BigQueryTableRef.from_string_name(view) + ), + view_definition=view_definitions[view], + default_db=project, + ) for snapshot_ref in snapshot_refs_by_project[project]: snapshot = snapshots_by_ref[snapshot_ref] @@ -322,23 +321,11 @@ def get_lineage_workunits_for_views_and_snapshots( def get_lineage_workunits( self, projects: List[str], - view_refs_by_project: Dict[str, Set[str]], - view_definitions: FileBackedDict[str], - snapshot_refs_by_project: Dict[str, Set[str]], - snapshots_by_ref: FileBackedDict[BigqueryTableSnapshot], table_refs: Set[str], ) -> Iterable[MetadataWorkUnit]: if not self._should_ingest_lineage(): return - yield from self.get_lineage_workunits_for_views_and_snapshots( - projects, - view_refs_by_project, - view_definitions, - snapshot_refs_by_project, - snapshots_by_ref, - ) - if self.config.use_exported_bigquery_audit_metadata: projects = ["*"] # project_id not used when using exported metadata diff --git a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_lineage_golden_1.json b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_lineage_golden_1.json new file mode 100644 index 00000000000000..8f411ca5137711 --- /dev/null +++ b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_lineage_golden_1.json @@ -0,0 +1,977 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:068bd9323110994a40019fcf6cfc60d3", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "bigquery", + "env": "PROD", + "project_id": "project-id-1" + }, + "name": "project-id-1", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:068bd9323110994a40019fcf6cfc60d3", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:068bd9323110994a40019fcf6cfc60d3", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:068bd9323110994a40019fcf6cfc60d3", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:068bd9323110994a40019fcf6cfc60d3", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:068bd9323110994a40019fcf6cfc60d3" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "bigquery", + "env": "PROD", + "project_id": "project-id-1", + "dataset_id": "bigquery-dataset-1", + "location": "US" + }, + "externalUrl": "https://console.cloud.google.com/bigquery?project=project-id-1&ws=!1m4!1m3!3m2!1sproject-id-1!2sbigquery-dataset-1", + "name": "bigquery-dataset-1", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Dataset" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:068bd9323110994a40019fcf6cfc60d3", + "urn": "urn:li:container:068bd9323110994a40019fcf6cfc60d3" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "project-id-1.bigquery-dataset-1.table-1", + "platform": "urn:li:dataPlatform:bigquery", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "age", + "nullable": false, + "description": "comment", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "INT", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Test Policy Tag" + } + ] + }, + "isPartOfKey": false, + "isPartitioningKey": false + }, + { + "fieldPath": "email", + "nullable": false, + "description": "comment", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [] + }, + "isPartOfKey": false, + "isPartitioningKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "https://console.cloud.google.com/bigquery?project=project-id-1&ws=!1m5!1m4!4m3!1sproject-id-1!2sbigquery-dataset-1!3stable-1", + "name": "table-1", + "qualifiedName": "project-id-1.bigquery-dataset-1.table-1", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:bigquery,project-id-1)" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:068bd9323110994a40019fcf6cfc60d3", + "urn": "urn:li:container:068bd9323110994a40019fcf6cfc60d3" + }, + { + "id": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0", + "urn": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "project-id-1.bigquery-dataset-1.view-1", + "platform": "urn:li:dataPlatform:bigquery", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "age", + "nullable": false, + "description": "comment", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "INT", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Test Policy Tag" + } + ] + }, + "isPartOfKey": false, + "isPartitioningKey": false + }, + { + "fieldPath": "email", + "nullable": false, + "description": "comment", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [] + }, + "isPartOfKey": false, + "isPartitioningKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "https://console.cloud.google.com/bigquery?project=project-id-1&ws=!1m5!1m4!4m3!1sproject-id-1!2sbigquery-dataset-1!3sview-1", + "name": "view-1", + "qualifiedName": "project-id-1.bigquery-dataset-1.view-1", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:bigquery,project-id-1)" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "create view `bigquery-dataset-1.view-1` as select email from `bigquery-dataset-1.table-1`", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:068bd9323110994a40019fcf6cfc60d3", + "urn": "urn:li:container:068bd9323110994a40019fcf6cfc60d3" + }, + { + "id": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0", + "urn": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.snapshot-table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.snapshot-table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "project-id-1.bigquery-dataset-1.snapshot-table-1", + "platform": "urn:li:dataPlatform:bigquery", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "age", + "nullable": false, + "description": "comment", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "INT", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Test Policy Tag" + } + ] + }, + "isPartOfKey": false, + "isPartitioningKey": false + }, + { + "fieldPath": "email", + "nullable": false, + "description": "comment", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [] + }, + "isPartOfKey": false, + "isPartitioningKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.snapshot-table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "https://console.cloud.google.com/bigquery?project=project-id-1&ws=!1m5!1m4!4m3!1sproject-id-1!2sbigquery-dataset-1!3ssnapshot-table-1", + "name": "snapshot-table-1", + "qualifiedName": "project-id-1.bigquery-dataset-1.snapshot-table-1", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.snapshot-table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.snapshot-table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:bigquery,project-id-1)" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.snapshot-table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Bigquery Table Snapshot" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.snapshot-table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", + "type": "COPY" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD),age)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.snapshot-table-1,PROD),age)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD),email)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.snapshot-table-1,PROD),email)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.snapshot-table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:068bd9323110994a40019fcf6cfc60d3", + "urn": "urn:li:container:068bd9323110994a40019fcf6cfc60d3" + }, + { + "id": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0", + "urn": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Abigquery%2Cproject-id-1.bigquery-dataset-1.view-1%2CPROD%29" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD),email)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD),email)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Abigquery%2Cproject-id-1.bigquery-dataset-1.view-1%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Abigquery%2Cproject-id-1.bigquery-dataset-1.view-1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE VIEW `bigquery-dataset-1.view-1` AS\nSELECT\n email\nFROM `bigquery-dataset-1.table-1`", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1643871600000, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Abigquery%2Cproject-id-1.bigquery-dataset-1.view-1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD),email)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD),email)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Abigquery%2Cproject-id-1.bigquery-dataset-1.view-1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Abigquery%2Cproject-id-1.bigquery-dataset-1.view-1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Test Policy Tag", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "Test Policy Tag" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-0mn4n3", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_lineage_golden_2.json b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_lineage_golden_2.json new file mode 100644 index 00000000000000..26abc09569ccfa --- /dev/null +++ b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_lineage_golden_2.json @@ -0,0 +1,1064 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:068bd9323110994a40019fcf6cfc60d3", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "bigquery", + "env": "PROD", + "project_id": "project-id-1" + }, + "name": "project-id-1", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:068bd9323110994a40019fcf6cfc60d3", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:068bd9323110994a40019fcf6cfc60d3", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:068bd9323110994a40019fcf6cfc60d3", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:068bd9323110994a40019fcf6cfc60d3", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:068bd9323110994a40019fcf6cfc60d3" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "bigquery", + "env": "PROD", + "project_id": "project-id-1", + "dataset_id": "bigquery-dataset-1", + "location": "US" + }, + "externalUrl": "https://console.cloud.google.com/bigquery?project=project-id-1&ws=!1m4!1m3!3m2!1sproject-id-1!2sbigquery-dataset-1", + "name": "bigquery-dataset-1", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Dataset" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:068bd9323110994a40019fcf6cfc60d3", + "urn": "urn:li:container:068bd9323110994a40019fcf6cfc60d3" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "project-id-1.bigquery-dataset-1.table-1", + "platform": "urn:li:dataPlatform:bigquery", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "age", + "nullable": false, + "description": "comment", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "INT", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Test Policy Tag" + } + ] + }, + "isPartOfKey": false, + "isPartitioningKey": false + }, + { + "fieldPath": "email", + "nullable": false, + "description": "comment", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [] + }, + "isPartOfKey": false, + "isPartitioningKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "https://console.cloud.google.com/bigquery?project=project-id-1&ws=!1m5!1m4!4m3!1sproject-id-1!2sbigquery-dataset-1!3stable-1", + "name": "table-1", + "qualifiedName": "project-id-1.bigquery-dataset-1.table-1", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:bigquery,project-id-1)" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:068bd9323110994a40019fcf6cfc60d3", + "urn": "urn:li:container:068bd9323110994a40019fcf6cfc60d3" + }, + { + "id": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0", + "urn": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "project-id-1.bigquery-dataset-1.view-1", + "platform": "urn:li:dataPlatform:bigquery", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "age", + "nullable": false, + "description": "comment", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "INT", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Test Policy Tag" + } + ] + }, + "isPartOfKey": false, + "isPartitioningKey": false + }, + { + "fieldPath": "email", + "nullable": false, + "description": "comment", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [] + }, + "isPartOfKey": false, + "isPartitioningKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "https://console.cloud.google.com/bigquery?project=project-id-1&ws=!1m5!1m4!4m3!1sproject-id-1!2sbigquery-dataset-1!3sview-1", + "name": "view-1", + "qualifiedName": "project-id-1.bigquery-dataset-1.view-1", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:bigquery,project-id-1)" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "create view `bigquery-dataset-1.view-1` as select email from `bigquery-dataset-1.table-1`", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:068bd9323110994a40019fcf6cfc60d3", + "urn": "urn:li:container:068bd9323110994a40019fcf6cfc60d3" + }, + { + "id": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0", + "urn": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.snapshot-table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.snapshot-table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "project-id-1.bigquery-dataset-1.snapshot-table-1", + "platform": "urn:li:dataPlatform:bigquery", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "age", + "nullable": false, + "description": "comment", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "INT", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Test Policy Tag" + } + ] + }, + "isPartOfKey": false, + "isPartitioningKey": false + }, + { + "fieldPath": "email", + "nullable": false, + "description": "comment", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [] + }, + "isPartOfKey": false, + "isPartitioningKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.snapshot-table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "https://console.cloud.google.com/bigquery?project=project-id-1&ws=!1m5!1m4!4m3!1sproject-id-1!2sbigquery-dataset-1!3ssnapshot-table-1", + "name": "snapshot-table-1", + "qualifiedName": "project-id-1.bigquery-dataset-1.snapshot-table-1", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.snapshot-table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.snapshot-table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:bigquery,project-id-1)" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.snapshot-table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Bigquery Table Snapshot" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.snapshot-table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", + "type": "COPY" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD),age)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.snapshot-table-1,PROD),age)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD),email)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.snapshot-table-1,PROD),email)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.snapshot-table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:068bd9323110994a40019fcf6cfc60d3", + "urn": "urn:li:container:068bd9323110994a40019fcf6cfc60d3" + }, + { + "id": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0", + "urn": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Abigquery%2Cproject-id-1.bigquery-dataset-1.view-1%2CPROD%29" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD),email)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD),email)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Abigquery%2Cproject-id-1.bigquery-dataset-1.view-1%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Abigquery%2Cproject-id-1.bigquery-dataset-1.view-1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE VIEW `bigquery-dataset-1.view-1` AS\nSELECT\n email\nFROM `bigquery-dataset-1.table-1`", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1643871600000, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Abigquery%2Cproject-id-1.bigquery-dataset-1.view-1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD),email)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD),email)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Abigquery%2Cproject-id-1.bigquery-dataset-1.view-1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.snapshot-table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1643760000000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 0, + "totalSqlQueries": 0, + "topSqlQueries": [], + "userCounts": [], + "fieldCounts": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1643760000000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 0, + "totalSqlQueries": 0, + "topSqlQueries": [], + "userCounts": [], + "fieldCounts": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.table-1,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetUsageStatistics", + "aspect": { + "json": { + "timestampMillis": 1643760000000, + "eventGranularity": { + "unit": "DAY", + "multiple": 1 + }, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "uniqueUserCount": 0, + "totalSqlQueries": 0, + "topSqlQueries": [], + "userCounts": [], + "fieldCounts": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Abigquery%2Cproject-id-1.bigquery-dataset-1.view-1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Test Policy Tag", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "Test Policy Tag" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-k4o1z9", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py b/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py index 1f146886361617..2dd320041a1132 100644 --- a/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py +++ b/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py @@ -4,6 +4,7 @@ from typing import Any, Dict, Optional from unittest.mock import MagicMock, patch +import pytest from freezegun import freeze_time from google.cloud.bigquery.table import TableListItem @@ -577,3 +578,147 @@ def test_bigquery_queries_v2_lineage_usage_ingest( output_path=mcp_output_path, golden_path=mcp_golden_path, ) + + +@freeze_time(FROZEN_TIME) +@patch.object(BigQuerySchemaApi, "get_snapshots_for_dataset") +@patch.object(BigQuerySchemaApi, "get_views_for_dataset") +@patch.object(BigQuerySchemaApi, "get_tables_for_dataset") +@patch.object(BigQuerySchemaGenerator, "get_core_table_details") +@patch.object(BigQuerySchemaApi, "get_datasets_for_project_id") +@patch.object(BigQuerySchemaApi, "get_columns_for_dataset") +@patch.object(BigQueryDataReader, "get_sample_data_for_table") +@patch("google.cloud.bigquery.Client") +@patch("google.cloud.datacatalog_v1.PolicyTagManagerClient") +@patch("google.cloud.resourcemanager_v3.ProjectsClient") +@pytest.mark.parametrize( + "use_queries_v2, include_table_lineage, include_usage_statistics, golden_file", + [ + (True, False, False, "bigquery_mcp_lineage_golden_1.json"), + (True, True, False, "bigquery_mcp_lineage_golden_1.json"), + (False, False, True, "bigquery_mcp_lineage_golden_2.json"), + (False, True, True, "bigquery_mcp_lineage_golden_2.json"), + ], +) +def test_bigquery_lineage_v2_ingest_view_snapshots( + client, + policy_tag_manager_client, + projects_client, + get_sample_data_for_table, + get_columns_for_dataset, + get_datasets_for_project_id, + get_core_table_details, + get_tables_for_dataset, + get_views_for_dataset, + get_snapshots_for_dataset, + pytestconfig, + tmp_path, + use_queries_v2, + include_table_lineage, + include_usage_statistics, + golden_file, +): + test_resources_dir = pytestconfig.rootpath / "tests/integration/bigquery_v2" + mcp_golden_path = f"{test_resources_dir}/{golden_file}" + mcp_output_path = "{}/{}_output.json".format(tmp_path, golden_file) + + dataset_name = "bigquery-dataset-1" + get_datasets_for_project_id.return_value = [ + BigqueryDataset(name=dataset_name, location="US") + ] + + table_list_item = TableListItem( + {"tableReference": {"projectId": "", "datasetId": "", "tableId": ""}} + ) + table_name = "table-1" + snapshot_table_name = "snapshot-table-1" + view_name = "view-1" + get_core_table_details.return_value = {table_name: table_list_item} + columns = [ + BigqueryColumn( + name="age", + ordinal_position=1, + is_nullable=False, + field_path="col_1", + data_type="INT", + comment="comment", + is_partition_column=False, + cluster_column_position=None, + policy_tags=["Test Policy Tag"], + ), + BigqueryColumn( + name="email", + ordinal_position=1, + is_nullable=False, + field_path="col_2", + data_type="STRING", + comment="comment", + is_partition_column=False, + cluster_column_position=None, + ), + ] + + get_columns_for_dataset.return_value = { + table_name: columns, + snapshot_table_name: columns, + view_name: columns, + } + get_sample_data_for_table.return_value = { + "age": [random.randint(1, 80) for i in range(20)], + "email": [random_email() for i in range(20)], + } + + bigquery_table = BigqueryTable( + name=table_name, + comment=None, + created=None, + last_altered=None, + size_in_bytes=None, + rows_count=None, + ) + get_tables_for_dataset.return_value = iter([bigquery_table]) + + bigquery_view = BigqueryView( + name=view_name, + comment=None, + created=None, + view_definition=f"create view `{dataset_name}.view-1` as select email from `{dataset_name}.table-1`", + last_altered=None, + size_in_bytes=None, + rows_count=None, + materialized=False, + ) + + get_views_for_dataset.return_value = iter([bigquery_view]) + snapshot_table = BigqueryTableSnapshot( + name=snapshot_table_name, + comment=None, + created=None, + last_altered=None, + size_in_bytes=None, + rows_count=None, + base_table_identifier=BigqueryTableIdentifier( + project_id="project-id-1", + dataset="bigquery-dataset-1", + table="table-1", + ), + ) + get_snapshots_for_dataset.return_value = iter([snapshot_table]) + + pipeline_config_dict: Dict[str, Any] = recipe( + mcp_output_path=mcp_output_path, + source_config_override={ + "use_queries_v2": use_queries_v2, + "include_table_lineage": include_table_lineage, + "include_usage_statistics": include_usage_statistics, + "classification": {"enabled": False}, + }, + ) + + run_and_get_pipeline(pipeline_config_dict) + + mce_helpers.check_golden_file( + pytestconfig, + output_path=mcp_output_path, + golden_path=mcp_golden_path, + ) From 7f64ffd2f7541900bbcd2b7b5b3f6dde237a8667 Mon Sep 17 00:00:00 2001 From: sagar-salvi-apptware <159135491+sagar-salvi-apptware@users.noreply.github.com> Date: Thu, 2 Jan 2025 18:44:45 +0530 Subject: [PATCH 011/249] test(ingest/athena): add connector integration tests (#12256) --- .../integration/athena/athena_mce_golden.json | 1362 +++++++++++++++++ .../integration/athena/test_athena_source.py | 163 ++ 2 files changed, 1525 insertions(+) create mode 100644 metadata-ingestion/tests/integration/athena/athena_mce_golden.json create mode 100644 metadata-ingestion/tests/integration/athena/test_athena_source.py diff --git a/metadata-ingestion/tests/integration/athena/athena_mce_golden.json b/metadata-ingestion/tests/integration/athena/athena_mce_golden.json new file mode 100644 index 00000000000000..1b3fdb0bdb2538 --- /dev/null +++ b/metadata-ingestion/tests/integration/athena/athena_mce_golden.json @@ -0,0 +1,1362 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:28d9272f625e7a366dfdc276b6ce4a67", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "athena", + "env": "PROD", + "database": "test_schema" + }, + "name": "test_schema", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:28d9272f625e7a366dfdc276b6ce4a67", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:28d9272f625e7a366dfdc276b6ce4a67", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:athena" + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:28d9272f625e7a366dfdc276b6ce4a67", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Database" + ] + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:28d9272f625e7a366dfdc276b6ce4a67", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:28d9272f625e7a366dfdc276b6ce4a67" + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "key": "value", + "table_type": "EXTERNAL_TABLE", + "is_view": "True", + "view_definition": "CREATE VIEW \"test_schema\".test_view_1 AS\nSELECT *\nFROM\n \"test_schema\".\"test_table\"" + }, + "name": "test_table", + "description": "Test table description", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "test_schema.test_table", + "platform": "urn:li:dataPlatform:athena", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "[version=2.0].[type=string].employee_id", + "nullable": false, + "description": "Unique identifier for the employee", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR\", \"_nullable\": false}" + }, + { + "fieldPath": "[version=2.0].[type=long].annual_salary", + "nullable": true, + "description": "Annual salary of the employee in USD", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "BIGINT", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false, + "jsonProps": "{\"native_data_type\": \"BIGINT\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].employee_name", + "nullable": false, + "description": "Full name of the employee", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR\", \"_nullable\": false}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=map].[type=struct].job_history", + "nullable": true, + "description": "Job history map: year to details (company, role)", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.MapType": { + "keyType": "string", + "valueType": "record" + } + } + }, + "nativeDataType": "MapType(String(), STRUCT(year=INTEGER(), company=String(), role=String()))", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false, + "jsonProps": "{\"native_data_type\": \"MapType(String(), STRUCT(year=INTEGER(), company=String(), role=String()))\", \"key_type\": {\"type\": \"string\", \"native_data_type\": \"VARCHAR\", \"_nullable\": true}, \"key_native_data_type\": \"VARCHAR\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=map].[type=struct].job_history.[type=int].year", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"INTEGER\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=map].[type=struct].job_history.[type=string].company", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=map].[type=struct].job_history.[type=string].role", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=map].[type=long].department_budgets", + "nullable": true, + "description": "Map of department names to their respective budgets", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.MapType": { + "keyType": "string", + "valueType": "long" + } + } + }, + "nativeDataType": "MapType(String(), BIGINT())", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false, + "jsonProps": "{\"native_data_type\": \"MapType(String(), BIGINT())\", \"key_type\": {\"type\": \"string\", \"native_data_type\": \"VARCHAR\", \"_nullable\": true}, \"key_native_data_type\": \"VARCHAR\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=string].skills", + "nullable": true, + "description": "List of skills possessed by the employee", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "string" + ] + } + } + }, + "nativeDataType": "array", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false, + "jsonProps": "{\"native_data_type\": \"array\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:28d9272f625e7a366dfdc276b6ce4a67", + "urn": "urn:li:container:28d9272f625e7a366dfdc276b6ce4a67" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:28d9272f625e7a366dfdc276b6ce4a67" + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "key": "value", + "table_type": "EXTERNAL_TABLE", + "is_view": "True", + "view_definition": "CREATE VIEW \"test_schema\".test_view_2 AS\nSELECT employee_id, employee_name, skills\nFROM\n \"test_schema\".\"test_view_1\"" + }, + "name": "test_view_1", + "description": "Test table description", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "test_schema.test_view_1", + "platform": "urn:li:dataPlatform:athena", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "[version=2.0].[type=string].employee_id", + "nullable": false, + "description": "Unique identifier for the employee", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR\", \"_nullable\": false}" + }, + { + "fieldPath": "[version=2.0].[type=long].annual_salary", + "nullable": true, + "description": "Annual salary of the employee in USD", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "BIGINT", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false, + "jsonProps": "{\"native_data_type\": \"BIGINT\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].employee_name", + "nullable": false, + "description": "Full name of the employee", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR\", \"_nullable\": false}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=map].[type=struct].job_history", + "nullable": true, + "description": "Job history map: year to details (company, role)", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.MapType": { + "keyType": "string", + "valueType": "record" + } + } + }, + "nativeDataType": "MapType(String(), STRUCT(year=INTEGER(), company=String(), role=String()))", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false, + "jsonProps": "{\"native_data_type\": \"MapType(String(), STRUCT(year=INTEGER(), company=String(), role=String()))\", \"key_type\": {\"type\": \"string\", \"native_data_type\": \"VARCHAR\", \"_nullable\": true}, \"key_native_data_type\": \"VARCHAR\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=map].[type=struct].job_history.[type=int].year", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"INTEGER\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=map].[type=struct].job_history.[type=string].company", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=map].[type=struct].job_history.[type=string].role", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=map].[type=long].department_budgets", + "nullable": true, + "description": "Map of department names to their respective budgets", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.MapType": { + "keyType": "string", + "valueType": "long" + } + } + }, + "nativeDataType": "MapType(String(), BIGINT())", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false, + "jsonProps": "{\"native_data_type\": \"MapType(String(), BIGINT())\", \"key_type\": {\"type\": \"string\", \"native_data_type\": \"VARCHAR\", \"_nullable\": true}, \"key_native_data_type\": \"VARCHAR\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=string].skills", + "nullable": true, + "description": "List of skills possessed by the employee", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "string" + ] + } + } + }, + "nativeDataType": "array", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false, + "jsonProps": "{\"native_data_type\": \"array\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "CREATE VIEW \"test_schema\".test_view_1 AS\nSELECT *\nFROM\n \"test_schema\".\"test_table\"", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:28d9272f625e7a366dfdc276b6ce4a67", + "urn": "urn:li:container:28d9272f625e7a366dfdc276b6ce4a67" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:28d9272f625e7a366dfdc276b6ce4a67" + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_2,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "key": "value", + "table_type": "EXTERNAL_TABLE", + "is_view": "True", + "view_definition": "CREATE VIEW \"test_schema\".test_view_2 AS\nSELECT employee_id, employee_name, skills\nFROM\n \"test_schema\".\"test_view_1\"" + }, + "name": "test_view_2", + "description": "Test table description", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "test_schema.test_view_2", + "platform": "urn:li:dataPlatform:athena", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "[version=2.0].[type=string].employee_id", + "nullable": false, + "description": "Unique identifier for the employee", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR\", \"_nullable\": false}" + }, + { + "fieldPath": "[version=2.0].[type=long].annual_salary", + "nullable": true, + "description": "Annual salary of the employee in USD", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "BIGINT", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false, + "jsonProps": "{\"native_data_type\": \"BIGINT\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].employee_name", + "nullable": false, + "description": "Full name of the employee", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR\", \"_nullable\": false}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=map].[type=struct].job_history", + "nullable": true, + "description": "Job history map: year to details (company, role)", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.MapType": { + "keyType": "string", + "valueType": "record" + } + } + }, + "nativeDataType": "MapType(String(), STRUCT(year=INTEGER(), company=String(), role=String()))", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false, + "jsonProps": "{\"native_data_type\": \"MapType(String(), STRUCT(year=INTEGER(), company=String(), role=String()))\", \"key_type\": {\"type\": \"string\", \"native_data_type\": \"VARCHAR\", \"_nullable\": true}, \"key_native_data_type\": \"VARCHAR\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=map].[type=struct].job_history.[type=int].year", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"INTEGER\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=map].[type=struct].job_history.[type=string].company", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=map].[type=struct].job_history.[type=string].role", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=map].[type=long].department_budgets", + "nullable": true, + "description": "Map of department names to their respective budgets", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.MapType": { + "keyType": "string", + "valueType": "long" + } + } + }, + "nativeDataType": "MapType(String(), BIGINT())", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false, + "jsonProps": "{\"native_data_type\": \"MapType(String(), BIGINT())\", \"key_type\": {\"type\": \"string\", \"native_data_type\": \"VARCHAR\", \"_nullable\": true}, \"key_native_data_type\": \"VARCHAR\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=string].skills", + "nullable": true, + "description": "List of skills possessed by the employee", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "string" + ] + } + } + }, + "nativeDataType": "array", + "recursive": false, + "isPartOfKey": false, + "isPartitioningKey": false, + "jsonProps": "{\"native_data_type\": \"array\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "CREATE VIEW \"test_schema\".test_view_2 AS\nSELECT employee_id, employee_name, skills\nFROM\n \"test_schema\".\"test_view_1\"", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:28d9272f625e7a366dfdc276b6ce4a67", + "urn": "urn:li:container:28d9272f625e7a366dfdc276b6ce4a67" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1671098400000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:s3,test-bucket/test_table,PROD)", + "type": "COPY" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,test-bucket/test_table,PROD),employee_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD),employee_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,test-bucket/test_table,PROD),annual_salary)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD),annual_salary)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,test-bucket/test_table,PROD),employee_name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD),employee_name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,test-bucket/test_table,PROD),job_history)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD),job_history)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,test-bucket/test_table,PROD),department_budgets)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD),department_budgets)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:s3,test-bucket/test_table,PROD),skills)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD),skills)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1671098400000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD)", + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aathena%2Ctest_schema.test_view_1%2CPROD%29" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD),employee_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD),employee_id)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aathena%2Ctest_schema.test_view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD),annual_salary)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD),annual_salary)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aathena%2Ctest_schema.test_view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD),employee_name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD),employee_name)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aathena%2Ctest_schema.test_view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD),job_history)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD),job_history)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aathena%2Ctest_schema.test_view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD),department_budgets)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD),department_budgets)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aathena%2Ctest_schema.test_view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD),skills)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD),skills)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aathena%2Ctest_schema.test_view_1%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aathena%2Ctest_schema.test_view_1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE VIEW \"test_schema\".test_view_1 AS\nSELECT\n *\nFROM \"test_schema\".\"test_table\"", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1671098400000, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aathena%2Ctest_schema.test_view_1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD),annual_salary)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD),department_budgets)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD),employee_id)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD),employee_name)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD),job_history)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_table,PROD),skills)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD),employee_id)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD),annual_salary)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD),employee_name)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD),job_history)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD),department_budgets)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD),skills)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aathena%2Ctest_schema.test_view_1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:athena" + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1671098400000, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD)", + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aathena%2Ctest_schema.test_view_2%2CPROD%29" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD),employee_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_2,PROD),employee_id)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aathena%2Ctest_schema.test_view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD),employee_name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_2,PROD),employee_name)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aathena%2Ctest_schema.test_view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD),skills)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_2,PROD),skills)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aathena%2Ctest_schema.test_view_2%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aathena%2Ctest_schema.test_view_2%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "CREATE VIEW \"test_schema\".test_view_2 AS\nSELECT\n employee_id,\n employee_name,\n skills\nFROM \"test_schema\".\"test_view_1\"", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1671098400000, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aathena%2Ctest_schema.test_view_2%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD),employee_id)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD),employee_name)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_1,PROD),skills)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_2,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_2,PROD),employee_id)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_2,PROD),employee_name)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:athena,test_schema.test_view_2,PROD),skills)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aathena%2Ctest_schema.test_view_2%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:athena" + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aathena%2Ctest_schema.test_view_1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Aathena%2Ctest_schema.test_view_2%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1671098400000, + "runId": "athena-test", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/athena/test_athena_source.py b/metadata-ingestion/tests/integration/athena/test_athena_source.py new file mode 100644 index 00000000000000..56e7cbe6b3e2dd --- /dev/null +++ b/metadata-ingestion/tests/integration/athena/test_athena_source.py @@ -0,0 +1,163 @@ +from unittest.mock import MagicMock, patch + +from freezegun import freeze_time +from sqlalchemy import ARRAY, BIGINT, INTEGER, String +from sqlalchemy_bigquery import STRUCT + +from datahub.ingestion.run.pipeline import Pipeline +from datahub.ingestion.source.aws.s3_util import make_s3_urn +from datahub.ingestion.source.sql.athena import AthenaSource +from datahub.utilities.sqlalchemy_type_converter import MapType +from tests.test_helpers import ( # Ensure mce_helpers is available for validation. + mce_helpers, +) + +FROZEN_TIME = "2022-12-15 10:00:00" + + +@freeze_time(FROZEN_TIME) +def test_athena_source_ingestion(pytestconfig, tmp_path): + """Test Athena source ingestion and generate MCP JSON file for validation.""" + output_file_name = "athena_mce_output.json" + golden_file_name = "athena_mce_golden.json" + test_resources_dir = pytestconfig.rootpath / "tests/integration/athena" + + # Mock dependencies + with patch.object( + AthenaSource, "get_inspectors" + ) as mock_get_inspectors, patch.object( + AthenaSource, "get_table_properties" + ) as mock_get_table_properties: + # Mock engine and inspectors + mock_inspector = MagicMock() + mock_get_inspectors.return_value = [mock_inspector] + mock_engine_instance = MagicMock() + mock_engine_instance.url.database = "" + mock_inspector.engine = mock_engine_instance + + # Mock schema and table names + mock_inspector.get_schema_names.return_value = ["test_schema"] + mock_inspector.get_table_names.return_value = ["test_table"] + mock_inspector.get_view_names.return_value = ["test_view_1", "test_view_2"] + + # Mock view definitions + def mock_get_view_definition(view_name, schema): + if view_name == "test_view_1": + return ( + 'CREATE VIEW "test_schema".test_view_1 AS\n' + "SELECT *\n" + "FROM\n" + ' "test_schema"."test_table"' + ) + elif view_name == "test_view_2": + return ( + 'CREATE VIEW "test_schema".test_view_2 AS\n' + "SELECT employee_id, employee_name, skills\n" + "FROM\n" + ' "test_schema"."test_view_1"' + ) + return "" + + mock_inspector.get_view_definition.side_effect = mock_get_view_definition + + mock_inspector.get_columns.return_value = [ + { + "name": "employee_id", + "type": String(), + "nullable": False, + "default": None, + "autoincrement": False, + "comment": "Unique identifier for the employee", + "dialect_options": {"awsathena_partition": None}, + }, + { + "name": "annual_salary", + "type": BIGINT(), + "nullable": True, + "default": None, + "autoincrement": False, + "comment": "Annual salary of the employee in USD", + "dialect_options": {"awsathena_partition": None}, + }, + { + "name": "employee_name", + "type": String(), + "nullable": False, + "default": None, + "autoincrement": False, + "comment": "Full name of the employee", + "dialect_options": {"awsathena_partition": None}, + }, + { + "name": "job_history", + "type": MapType( + String(), STRUCT(year=INTEGER(), company=String(), role=String()) + ), + "nullable": True, + "default": None, + "autoincrement": False, + "comment": "Job history map: year to details (company, role)", + "dialect_options": {"awsathena_partition": None}, + }, + { + "name": "department_budgets", + "type": MapType(String(), BIGINT()), + "nullable": True, + "default": None, + "autoincrement": False, + "comment": "Map of department names to their respective budgets", + "dialect_options": {"awsathena_partition": None}, + }, + { + "name": "skills", + "type": ARRAY(String()), + "nullable": True, + "default": None, + "autoincrement": False, + "comment": "List of skills possessed by the employee", + "dialect_options": {"awsathena_partition": None}, + }, + ] + # Mock table properties + mock_get_table_properties.return_value = ( + "Test table description", + {"key": "value", "table_type": "EXTERNAL_TABLE"}, + make_s3_urn("s3://test-bucket/test_table", "PROD"), + ) + + # Define the pipeline configuration + config_dict = { + "run_id": "athena-test", + "source": { + "type": "athena", + "config": { + "aws_region": "us-east-1", + "work_group": "primary", + "query_result_location": "s3://athena-query-results/", + "catalog_name": "awsdatacatalog", + "include_views": True, + "include_tables": True, + "profiling": { + "enabled": False, + }, + }, + }, + "sink": { + "type": "file", + "config": { + "filename": f"{tmp_path}/{output_file_name}", + }, + }, + } + + # Create and run the pipeline + pipeline = Pipeline.create(config_dict) + pipeline.run() + pipeline.raise_from_status() + + # Validate the output with the golden file + mce_helpers.check_golden_file( + pytestconfig=pytestconfig, + output_path=f"{tmp_path}/{output_file_name}", + golden_path=f"{test_resources_dir}/{golden_file_name}", + ) From ccf5fc708f918de84019d280bd8dc795c19f09e1 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 2 Jan 2025 11:53:31 -0500 Subject: [PATCH 012/249] chore(ingest): refactor common pytest args (#12240) --- metadata-ingestion/build.gradle | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/metadata-ingestion/build.gradle b/metadata-ingestion/build.gradle index fc1409fbed74e4..ac8658bd869272 100644 --- a/metadata-ingestion/build.gradle +++ b/metadata-ingestion/build.gradle @@ -127,6 +127,9 @@ task lintFix(type: Exec, dependsOn: installDev) { "mypy --show-traceback --show-error-codes src/ tests/ examples/" } +def pytest_default_env = "PYTHONDEVMODE=1" +def pytest_default_args = "--durations=30 -vv --continue-on-collection-errors" + task testQuick(type: Exec, dependsOn: [installDev, ':metadata-models:generateJsonSchema']) { // We can't enforce the coverage requirements if we run a subset of the tests. inputs.files(project.fileTree(dir: "src/", include: "**/*.py")) @@ -135,7 +138,7 @@ task testQuick(type: Exec, dependsOn: [installDev, ':metadata-models:generateJso def cvg_arg = get_coverage_args("quick") commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + - "pytest ${cvg_arg} tests/unit --random-order --durations=20 -m 'not integration' -vv --continue-on-collection-errors --junit-xml=junit.quick.xml" + "${pytest_default_env} pytest ${cvg_arg} tests/unit ${pytest_default_args} --random-order -m 'not integration' --junit-xml=junit.quick.xml" } task installDevTest(type: Exec, dependsOn: [install]) { @@ -155,7 +158,7 @@ task testSingle(dependsOn: [installDevTest]) { if (testFile != 'unknown') { exec { commandLine 'bash', '-c', - "source ${venv_name}/bin/activate && pytest ${testFile}" + "source ${venv_name}/bin/activate && ${pytest_default_env} pytest ${testFile} ${pytest_default_args}" } } else { throw new GradleException("No file provided. Use -PtestFile=") @@ -167,25 +170,25 @@ task testIntegrationBatch0(type: Exec, dependsOn: [installDevTest]) { def cvg_arg = get_coverage_args("intBatch0") commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + - "pytest ${cvg_arg} --durations=50 -m 'integration_batch_0' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch0.xml" + "${pytest_default_env} pytest ${cvg_arg} ${pytest_default_args} -m 'integration_batch_0' --junit-xml=junit.integrationbatch0.xml" } task testIntegrationBatch1(type: Exec, dependsOn: [installDevTest]) { def cvg_arg = get_coverage_args("intBatch1") commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + - "pytest ${cvg_arg} --durations=50 -m 'integration_batch_1' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch1.xml" + "${pytest_default_env} pytest ${cvg_arg} ${pytest_default_args} -m 'integration_batch_1' --junit-xml=junit.integrationbatch1.xml" } task testIntegrationBatch2(type: Exec, dependsOn: [installDevTest]) { def cvg_arg = get_coverage_args("intBatch2") commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + - "pytest ${cvg_arg} --durations=20 -m 'integration_batch_2' -vv --continue-on-collection-errors --junit-xml=junit.integrationbatch2.xml" + "${pytest_default_env} pytest ${cvg_arg} ${pytest_default_args} -m 'integration_batch_2' --junit-xml=junit.integrationbatch2.xml" } task testFull(type: Exec, dependsOn: [installDevTest]) { commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + - "pytest --durations=50 -vv --continue-on-collection-errors --junit-xml=junit.full.xml" + "${pytest_default_env} pytest ${pytest_default_args} --junit-xml=junit.full.xml" } task specGen(type: Exec, dependsOn: [codegen, installDevTest]) { From bdc34b7b35aa5b707a9b4d57d2842c8c3727b712 Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Thu, 2 Jan 2025 17:28:10 +0000 Subject: [PATCH 013/249] fix(sample data): Update timestamps in bootstrap_mce.json to more recent (#12257) --- metadata-ingestion/examples/mce_files/bootstrap_mce.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/examples/mce_files/bootstrap_mce.json b/metadata-ingestion/examples/mce_files/bootstrap_mce.json index bc218e5e8c2d53..d4e3d3aa5d8c42 100644 --- a/metadata-ingestion/examples/mce_files/bootstrap_mce.json +++ b/metadata-ingestion/examples/mce_files/bootstrap_mce.json @@ -3394,7 +3394,7 @@ "changeType":"UPSERT", "aspectName":"datasetProfile", "aspect":{ - "value":"{\"timestampMillis\": 1723488954865, \"rowCount\": 4500, \"columnCount\": 2, \"sizeInBytes\": 842000200000, \"fieldProfiles\": [{\"fieldPath\": \"field_foo\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00044, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"true\", \"false\"]}, {\"fieldPath\": \"field_bar\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00044, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"false\"]}]}", + "value":"{\"timestampMillis\": 1735823280000, \"rowCount\": 4500, \"columnCount\": 2, \"sizeInBytes\": 842000200000, \"fieldProfiles\": [{\"fieldPath\": \"field_foo\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00044, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"true\", \"false\"]}, {\"fieldPath\": \"field_bar\", \"uniqueCount\": 2, \"uniqueProportion\": 0.00044, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"false\"]}]}", "contentType":"application/json" }, "systemMetadata":null @@ -3418,7 +3418,7 @@ "changeType":"UPSERT", "aspectName":"operation", "aspect":{ - "value":"{\"timestampMillis\": 1679515693000, \"operationType\": \"INSERT\", \"lastUpdatedTimestamp\": 1629097200001 }", + "value":"{\"timestampMillis\": 1711138093000, \"operationType\": \"INSERT\", \"lastUpdatedTimestamp\": 1629097200001 }", "contentType":"application/json" }, "systemMetadata":null @@ -3584,7 +3584,7 @@ "changeType": "UPSERT", "aspectName": "assertionRunEvent", "aspect": { - "value": "{\"timestampMillis\": 1675155843000, \"partitionSpec\": {\"type\": \"PARTITION\", \"partition\": \"{\\\"category\\\": \\\"catA\\\"}\"}, \"runId\": \"2021-12-28T12:00:00Z\", \"assertionUrn\": \"urn:li:assertion:358c683782c93c2fc2bd4bdd4fdb0153\", \"asserteeUrn\": \"urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)\", \"batchSpec\": {\"customProperties\": {\"data_asset_name\": \"data__foo1__asset\", \"datasource_name\": \"my_hive_datasource\"}, \"nativeBatchId\": \"c8f12129f2e57412eee5fb8656154d05\", \"limit\": 10}, \"status\": \"COMPLETE\", \"result\": {\"type\": \"SUCCESS\", \"nativeResults\": {}}}", + "value": "{\"timestampMillis\": 1730554659000, \"partitionSpec\": {\"type\": \"PARTITION\", \"partition\": \"{\\\"category\\\": \\\"catA\\\"}\"}, \"runId\": \"2021-12-28T12:00:00Z\", \"assertionUrn\": \"urn:li:assertion:358c683782c93c2fc2bd4bdd4fdb0153\", \"asserteeUrn\": \"urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)\", \"batchSpec\": {\"customProperties\": {\"data_asset_name\": \"data__foo1__asset\", \"datasource_name\": \"my_hive_datasource\"}, \"nativeBatchId\": \"c8f12129f2e57412eee5fb8656154d05\", \"limit\": 10}, \"status\": \"COMPLETE\", \"result\": {\"type\": \"SUCCESS\", \"nativeResults\": {}}}", "contentType": "application/json" }, "systemMetadata": null From f396d8d87a6b6567874340d530bbda966fda684e Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 2 Jan 2025 15:36:07 -0500 Subject: [PATCH 014/249] refactor(sdk/patch): improve patch implementation internals (#12253) --- .github/workflows/airflow-plugin.yml | 4 +- .github/workflows/metadata-ingestion.yml | 9 +- .github/workflows/prefect-plugin.yml | 4 +- .../src/datahub/emitter/mce_builder.py | 6 +- .../src/datahub/emitter/mcp_patch_builder.py | 48 +++- .../specific/aspect_helpers/__init__.py | 0 .../aspect_helpers/custom_properties.py | 79 ++++++ .../specific/aspect_helpers/ownership.py | 67 ++++++ .../aspect_helpers/structured_properties.py | 72 ++++++ .../datahub/specific/aspect_helpers/tags.py | 42 ++++ .../datahub/specific/aspect_helpers/terms.py | 43 ++++ .../src/datahub/specific/chart.py | 212 +++------------- .../src/datahub/specific/custom_properties.py | 37 --- .../src/datahub/specific/dashboard.py | 227 +++--------------- .../src/datahub/specific/datajob.py | 223 +++-------------- .../src/datahub/specific/dataproduct.py | 110 ++------- .../src/datahub/specific/dataset.py | 181 ++++---------- .../src/datahub/specific/form.py | 44 +--- .../src/datahub/specific/ownership.py | 48 ---- .../datahub/specific/structured_properties.py | 53 ---- .../datahub/specific/structured_property.py | 18 +- 21 files changed, 535 insertions(+), 992 deletions(-) create mode 100644 metadata-ingestion/src/datahub/specific/aspect_helpers/__init__.py create mode 100644 metadata-ingestion/src/datahub/specific/aspect_helpers/custom_properties.py create mode 100644 metadata-ingestion/src/datahub/specific/aspect_helpers/ownership.py create mode 100644 metadata-ingestion/src/datahub/specific/aspect_helpers/structured_properties.py create mode 100644 metadata-ingestion/src/datahub/specific/aspect_helpers/tags.py create mode 100644 metadata-ingestion/src/datahub/specific/aspect_helpers/terms.py delete mode 100644 metadata-ingestion/src/datahub/specific/custom_properties.py delete mode 100644 metadata-ingestion/src/datahub/specific/ownership.py delete mode 100644 metadata-ingestion/src/datahub/specific/structured_properties.py diff --git a/.github/workflows/airflow-plugin.yml b/.github/workflows/airflow-plugin.yml index 26fcceb8aeab70..b824a21be63f8f 100644 --- a/.github/workflows/airflow-plugin.yml +++ b/.github/workflows/airflow-plugin.yml @@ -84,8 +84,8 @@ jobs: token: ${{ secrets.CODECOV_TOKEN }} directory: ./build/coverage-reports/ fail_ci_if_error: false - flags: airflow,airflow-${{ matrix.extra_pip_extras }} - name: pytest-airflow-${{ matrix.python-version }}-${{ matrix.extra_pip_requirements }} + flags: airflow-${{ matrix.python-version }}-${{ matrix.extra_pip_extras }} + name: pytest-airflow verbose: true event-file: diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index 106cba1473982e..f4d87b361b5edc 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -41,9 +41,6 @@ jobs: "testIntegrationBatch1", "testIntegrationBatch2", ] - include: - - python-version: "3.8" - - python-version: "3.11" fail-fast: false steps: - name: Free up disk space @@ -92,14 +89,14 @@ jobs: **/junit.*.xml !**/binary/** - name: Upload coverage to Codecov - if: ${{ always() && matrix.python-version == '3.10' }} + if: ${{ always() }} uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} directory: ./build/coverage-reports/ fail_ci_if_error: false - flags: pytest-${{ matrix.command }} - name: pytest-${{ matrix.python-version }}-${{ matrix.command }} + flags: ingestion-${{ matrix.python-version }}-${{ matrix.command }} + name: pytest-ingestion verbose: true event-file: diff --git a/.github/workflows/prefect-plugin.yml b/.github/workflows/prefect-plugin.yml index d77142a1f00ded..879df032409f28 100644 --- a/.github/workflows/prefect-plugin.yml +++ b/.github/workflows/prefect-plugin.yml @@ -67,8 +67,8 @@ jobs: token: ${{ secrets.CODECOV_TOKEN }} directory: ./build/coverage-reports/ fail_ci_if_error: false - flags: prefect,prefect-${{ matrix.python-version }} - name: pytest-prefect-${{ matrix.python-version }} + flags: prefect-${{ matrix.python-version }} + name: pytest-prefect verbose: true event-file: diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py index 110624aa61cb89..f095fffbaea6b4 100644 --- a/metadata-ingestion/src/datahub/emitter/mce_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py @@ -24,6 +24,7 @@ import typing_inspect from avrogen.dict_wrapper import DictWrapper +from typing_extensions import assert_never from datahub.emitter.enum_helpers import get_enum_options from datahub.metadata.schema_classes import ( @@ -269,9 +270,8 @@ def make_owner_urn(owner: str, owner_type: OwnerType) -> str: return make_user_urn(owner) elif owner_type == OwnerType.GROUP: return make_group_urn(owner) - # This should pretty much never happen. - # TODO: With Python 3.11, we can use typing.assert_never() here. - return f"urn:li:{owner_type.value}:{owner}" + else: + assert_never(owner_type) def make_ownership_type_urn(type: str) -> str: diff --git a/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py index 1ed8ce1d5a6158..17026a4114c128 100644 --- a/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py @@ -2,7 +2,19 @@ import time from collections import defaultdict from dataclasses import dataclass -from typing import Any, Dict, List, Optional, Sequence, Union +from typing import ( + Any, + Dict, + List, + Literal, + Optional, + Protocol, + Tuple, + Union, + runtime_checkable, +) + +from typing_extensions import LiteralString from datahub.emitter.aspect import JSON_PATCH_CONTENT_TYPE from datahub.emitter.serialization_helper import pre_json_transform @@ -19,25 +31,36 @@ from datahub.utilities.urns.urn import guess_entity_type +@runtime_checkable +class SupportsToObj(Protocol): + def to_obj(self) -> Any: + ... + + def _recursive_to_obj(obj: Any) -> Any: if isinstance(obj, list): return [_recursive_to_obj(v) for v in obj] - elif hasattr(obj, "to_obj"): + elif isinstance(obj, SupportsToObj): return obj.to_obj() else: return obj +PatchPath = Tuple[Union[LiteralString, Urn], ...] +PatchOp = Literal["add", "remove", "replace"] + + @dataclass -class _Patch: - op: str # one of ['add', 'remove', 'replace']; we don't support move, copy or test - path: str +class _Patch(SupportsToObj): + op: PatchOp + path: PatchPath value: Any def to_obj(self) -> Dict: + quoted_path = "/" + "/".join(MetadataPatchProposal.quote(p) for p in self.path) return { "op": self.op, - "path": self.path, + "path": quoted_path, "value": _recursive_to_obj(self.value), } @@ -63,15 +86,16 @@ def __init__( # Json Patch quoting based on https://jsonpatch.com/#json-pointer @classmethod - def quote(cls, value: str) -> str: - return value.replace("~", "~0").replace("/", "~1") + def quote(cls, value: Union[str, Urn]) -> str: + return str(value).replace("~", "~0").replace("/", "~1") def _add_patch( - self, aspect_name: str, op: str, path: Union[str, Sequence[str]], value: Any + self, + aspect_name: str, + op: PatchOp, + path: PatchPath, + value: Any, ) -> None: - if not isinstance(path, str): - path = "/" + "/".join(self.quote(p) for p in path) - # TODO: Validate that aspectName is a valid aspect for this entityType self.patches[aspect_name].append(_Patch(op, path, value)) diff --git a/metadata-ingestion/src/datahub/specific/aspect_helpers/__init__.py b/metadata-ingestion/src/datahub/specific/aspect_helpers/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/metadata-ingestion/src/datahub/specific/aspect_helpers/custom_properties.py b/metadata-ingestion/src/datahub/specific/aspect_helpers/custom_properties.py new file mode 100644 index 00000000000000..1fd1585a913581 --- /dev/null +++ b/metadata-ingestion/src/datahub/specific/aspect_helpers/custom_properties.py @@ -0,0 +1,79 @@ +from abc import abstractmethod +from typing import Dict, Optional, Tuple + +from typing_extensions import Self + +from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath + + +class HasCustomPropertiesPatch(MetadataPatchProposal): + @classmethod + @abstractmethod + def _custom_properties_location(self) -> Tuple[str, PatchPath]: + ... + + def add_custom_property(self, key: str, value: str) -> Self: + """Add a custom property to the entity. + + Args: + key: The key of the custom property. + value: The value of the custom property. + + Returns: + The patch builder instance. + """ + aspect_name, path = self._custom_properties_location() + self._add_patch( + aspect_name, + "add", + path=(*path, key), + value=value, + ) + return self + + def add_custom_properties( + self, custom_properties: Optional[Dict[str, str]] = None + ) -> Self: + if custom_properties is not None: + for key, value in custom_properties.items(): + self.add_custom_property(key, value) + return self + + def remove_custom_property(self, key: str) -> Self: + """Remove a custom property from the entity. + + Args: + key: The key of the custom property to remove. + + Returns: + The patch builder instance. + """ + aspect_name, path = self._custom_properties_location() + self._add_patch( + aspect_name, + "remove", + path=(*path, key), + value={}, + ) + return self + + def set_custom_properties(self, custom_properties: Dict[str, str]) -> Self: + """Sets the custom properties of the entity. + + This method replaces all existing custom properties with the given dictionary. + + Args: + custom_properties: A dictionary containing the custom properties to be set. + + Returns: + The patch builder instance. + """ + + aspect_name, path = self._custom_properties_location() + self._add_patch( + aspect_name, + "add", + path=path, + value=custom_properties, + ) + return self diff --git a/metadata-ingestion/src/datahub/specific/aspect_helpers/ownership.py b/metadata-ingestion/src/datahub/specific/aspect_helpers/ownership.py new file mode 100644 index 00000000000000..1e2c789c7def35 --- /dev/null +++ b/metadata-ingestion/src/datahub/specific/aspect_helpers/ownership.py @@ -0,0 +1,67 @@ +from typing import List, Optional + +from typing_extensions import Self + +from datahub.emitter.mcp_patch_builder import MetadataPatchProposal +from datahub.metadata.schema_classes import ( + OwnerClass, + OwnershipClass, + OwnershipTypeClass, +) + + +class HasOwnershipPatch(MetadataPatchProposal): + def add_owner(self, owner: OwnerClass) -> Self: + """Add an owner to the entity. + + Args: + owner: The Owner object to add. + + Returns: + The patch builder instance. + """ + self._add_patch( + OwnershipClass.ASPECT_NAME, + "add", + path=("owners", owner.owner, str(owner.type)), + value=owner, + ) + return self + + def remove_owner( + self, owner: str, owner_type: Optional[OwnershipTypeClass] = None + ) -> Self: + """Remove an owner from the entity. + + If owner_type is not provided, the owner will be removed regardless of ownership type. + + Args: + owner: The owner to remove. + owner_type: The ownership type of the owner (optional). + + Returns: + The patch builder instance. + """ + self._add_patch( + OwnershipClass.ASPECT_NAME, + "remove", + path=("owners", owner) + ((str(owner_type),) if owner_type else ()), + value=owner, + ) + return self + + def set_owners(self, owners: List[OwnerClass]) -> Self: + """Set the owners of the entity. + + This will effectively replace all existing owners with the new list - it doesn't really patch things. + + Args: + owners: The list of owners to set. + + Returns: + The patch builder instance. + """ + self._add_patch( + OwnershipClass.ASPECT_NAME, "add", path=("owners",), value=owners + ) + return self diff --git a/metadata-ingestion/src/datahub/specific/aspect_helpers/structured_properties.py b/metadata-ingestion/src/datahub/specific/aspect_helpers/structured_properties.py new file mode 100644 index 00000000000000..48050bbad8e50d --- /dev/null +++ b/metadata-ingestion/src/datahub/specific/aspect_helpers/structured_properties.py @@ -0,0 +1,72 @@ +from typing import List, Union + +from typing_extensions import Self + +from datahub.emitter.mcp_patch_builder import MetadataPatchProposal +from datahub.metadata.schema_classes import ( + StructuredPropertiesClass, + StructuredPropertyValueAssignmentClass, +) +from datahub.utilities.urns.structured_properties_urn import ( + make_structured_property_urn, +) + + +class HasStructuredPropertiesPatch(MetadataPatchProposal): + def set_structured_property( + self, key: str, value: Union[str, float, List[Union[str, float]]] + ) -> Self: + """Add or update a structured property. + + Args: + key: the name of the property (either bare or urn form) + value: the value of the property (for multi-valued properties, this can be a list) + + Returns: + The patch builder instance. + """ + self.remove_structured_property(key) + self.add_structured_property(key, value) + return self + + def remove_structured_property(self, key: str) -> Self: + """Remove a structured property. + + Args: + key: the name of the property (either bare or urn form) + + Returns: + The patch builder instance. + """ + + self._add_patch( + StructuredPropertiesClass.ASPECT_NAME, + "remove", + path=("properties", make_structured_property_urn(key)), + value={}, + ) + return self + + def add_structured_property( + self, key: str, value: Union[str, float, List[Union[str, float]]] + ) -> Self: + """Add a structured property. + + Args: + key: the name of the property (either bare or urn form) + value: the value of the property (for multi-valued properties, this value will be appended to the list) + + Returns: + The patch builder instance. + """ + + self._add_patch( + StructuredPropertiesClass.ASPECT_NAME, + "add", + path=("properties", make_structured_property_urn(key)), + value=StructuredPropertyValueAssignmentClass( + propertyUrn=make_structured_property_urn(key), + values=value if isinstance(value, list) else [value], + ), + ) + return self diff --git a/metadata-ingestion/src/datahub/specific/aspect_helpers/tags.py b/metadata-ingestion/src/datahub/specific/aspect_helpers/tags.py new file mode 100644 index 00000000000000..afbc9115ca6e2b --- /dev/null +++ b/metadata-ingestion/src/datahub/specific/aspect_helpers/tags.py @@ -0,0 +1,42 @@ +from typing import Union + +from typing_extensions import Self + +from datahub.emitter.mcp_patch_builder import MetadataPatchProposal +from datahub.metadata.schema_classes import ( + GlobalTagsClass as GlobalTags, + TagAssociationClass as Tag, +) +from datahub.metadata.urns import TagUrn, Urn + + +class HasTagsPatch(MetadataPatchProposal): + def add_tag(self, tag: Tag) -> Self: + """Adds a tag to the entity. + + Args: + tag: The Tag object representing the tag to be added. + + Returns: + The patch builder instance. + """ + + # TODO: Make this support raw strings, in addition to Tag objects. + self._add_patch( + GlobalTags.ASPECT_NAME, "add", path=("tags", tag.tag), value=tag + ) + return self + + def remove_tag(self, tag: Union[str, Urn]) -> Self: + """Removes a tag from the entity. + + Args: + tag: The tag to remove, specified as a string or Urn object. + + Returns: + The patch builder instance. + """ + if isinstance(tag, str) and not tag.startswith("urn:li:tag:"): + tag = TagUrn.create_from_id(tag) + self._add_patch(GlobalTags.ASPECT_NAME, "remove", path=("tags", tag), value={}) + return self diff --git a/metadata-ingestion/src/datahub/specific/aspect_helpers/terms.py b/metadata-ingestion/src/datahub/specific/aspect_helpers/terms.py new file mode 100644 index 00000000000000..ae199124372b40 --- /dev/null +++ b/metadata-ingestion/src/datahub/specific/aspect_helpers/terms.py @@ -0,0 +1,43 @@ +from typing import Union + +from typing_extensions import Self + +from datahub.emitter.mcp_patch_builder import MetadataPatchProposal +from datahub.metadata.schema_classes import ( + GlossaryTermAssociationClass as Term, + GlossaryTermsClass, +) +from datahub.metadata.urns import GlossaryTermUrn, Urn + + +class HasTermsPatch(MetadataPatchProposal): + def add_term(self, term: Term) -> Self: + """Adds a glossary term to the entity. + + Args: + term: The Term object representing the glossary term to be added. + + Returns: + The patch builder instance. + """ + # TODO: Make this support raw strings, in addition to Term objects. + self._add_patch( + GlossaryTermsClass.ASPECT_NAME, "add", path=("terms", term.urn), value=term + ) + return self + + def remove_term(self, term: Union[str, Urn]) -> Self: + """Removes a glossary term from the entity. + + Args: + term: The term to remove, specified as a string or Urn object. + + Returns: + The patch builder instance. + """ + if isinstance(term, str) and not term.startswith("urn:li:glossaryTerm:"): + term = GlossaryTermUrn(term) + self._add_patch( + GlossaryTermsClass.ASPECT_NAME, "remove", path=("terms", term), value={} + ) + return self diff --git a/metadata-ingestion/src/datahub/specific/chart.py b/metadata-ingestion/src/datahub/specific/chart.py index 104a7c21a07e2f..f44a2ffc0d68ab 100644 --- a/metadata-ingestion/src/datahub/specific/chart.py +++ b/metadata-ingestion/src/datahub/specific/chart.py @@ -1,28 +1,29 @@ -from typing import Dict, List, Optional, Union +from typing import List, Optional, Tuple, Union -from datahub.emitter.mcp_patch_builder import MetadataPatchProposal +from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath from datahub.metadata.schema_classes import ( AccessLevelClass, ChangeAuditStampsClass, ChartInfoClass as ChartInfo, ChartTypeClass, EdgeClass as Edge, - GlobalTagsClass as GlobalTags, - GlossaryTermAssociationClass as Term, - GlossaryTermsClass as GlossaryTerms, KafkaAuditHeaderClass, - OwnerClass as Owner, - OwnershipTypeClass, SystemMetadataClass, - TagAssociationClass as Tag, ) -from datahub.specific.custom_properties import CustomPropertiesPatchHelper -from datahub.specific.ownership import OwnershipPatchHelper -from datahub.utilities.urns.tag_urn import TagUrn +from datahub.specific.aspect_helpers.custom_properties import HasCustomPropertiesPatch +from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch +from datahub.specific.aspect_helpers.tags import HasTagsPatch +from datahub.specific.aspect_helpers.terms import HasTermsPatch from datahub.utilities.urns.urn import Urn -class ChartPatchBuilder(MetadataPatchProposal): +class ChartPatchBuilder( + HasOwnershipPatch, + HasCustomPropertiesPatch, + HasTagsPatch, + HasTermsPatch, + MetadataPatchProposal, +): def __init__( self, urn: str, @@ -40,55 +41,10 @@ def __init__( super().__init__( urn, system_metadata=system_metadata, audit_header=audit_header ) - self.custom_properties_patch_helper = CustomPropertiesPatchHelper( - self, ChartInfo.ASPECT_NAME - ) - self.ownership_patch_helper = OwnershipPatchHelper(self) - - def add_owner(self, owner: Owner) -> "ChartPatchBuilder": - """ - Adds an owner to the ChartPatchBuilder. - - Args: - owner: The Owner object to add. - - Returns: - The ChartPatchBuilder instance. - """ - self.ownership_patch_helper.add_owner(owner) - return self - def remove_owner( - self, owner: str, owner_type: Optional[OwnershipTypeClass] = None - ) -> "ChartPatchBuilder": - """ - Removes an owner from the ChartPatchBuilder. - - Args: - owner: The owner to remove. - owner_type: The ownership type of the owner (optional). - - Returns: - The ChartPatchBuilder instance. - - Notes: - `owner_type` is optional. - """ - self.ownership_patch_helper.remove_owner(owner, owner_type) - return self - - def set_owners(self, owners: List[Owner]) -> "ChartPatchBuilder": - """ - Sets the owners of the ChartPatchBuilder. - - Args: - owners: A list of Owner objects. - - Returns: - The ChartPatchBuilder instance. - """ - self.ownership_patch_helper.set_owners(owners) - return self + @classmethod + def _custom_properties_location(cls) -> Tuple[str, PatchPath]: + return ChartInfo.ASPECT_NAME, ("customProperties",) def add_input_edge(self, input: Union[Edge, Urn, str]) -> "ChartPatchBuilder": """ @@ -120,7 +76,7 @@ def add_input_edge(self, input: Union[Edge, Urn, str]) -> "ChartPatchBuilder": self._add_patch( ChartInfo.ASPECT_NAME, "add", - path=f"/inputEdges/{self.quote(input_urn)}", + path=("inputEdges", input_urn), value=input_urn, ) return self @@ -138,7 +94,7 @@ def remove_input_edge(self, input: Union[str, Urn]) -> "ChartPatchBuilder": self._add_patch( ChartInfo.ASPECT_NAME, "remove", - path=f"/inputEdges/{self.quote(str(input))}", + path=("inputEdges", str(input)), value={}, ) return self @@ -159,129 +115,17 @@ def set_input_edges(self, inputs: List[Edge]) -> "ChartPatchBuilder": self._add_patch( ChartInfo.ASPECT_NAME, "add", - path="/inputEdges", + path=("inputEdges",), value=inputs, ) return self - def add_tag(self, tag: Tag) -> "ChartPatchBuilder": - """ - Adds a tag to the ChartPatchBuilder. - - Args: - tag: The Tag object representing the tag to be added. - - Returns: - The ChartPatchBuilder instance. - """ - self._add_patch( - GlobalTags.ASPECT_NAME, "add", path=f"/tags/{tag.tag}", value=tag - ) - return self - - def remove_tag(self, tag: Union[str, Urn]) -> "ChartPatchBuilder": - """ - Removes a tag from the ChartPatchBuilder. - - Args: - tag: The tag to remove, specified as a string or Urn object. - - Returns: - The ChartPatchBuilder instance. - """ - if isinstance(tag, str) and not tag.startswith("urn:li:tag:"): - tag = TagUrn.create_from_id(tag) - self._add_patch(GlobalTags.ASPECT_NAME, "remove", path=f"/tags/{tag}", value={}) - return self - - def add_term(self, term: Term) -> "ChartPatchBuilder": - """ - Adds a glossary term to the ChartPatchBuilder. - - Args: - term: The Term object representing the glossary term to be added. - - Returns: - The ChartPatchBuilder instance. - """ - self._add_patch( - GlossaryTerms.ASPECT_NAME, "add", path=f"/terms/{term.urn}", value=term - ) - return self - - def remove_term(self, term: Union[str, Urn]) -> "ChartPatchBuilder": - """ - Removes a glossary term from the ChartPatchBuilder. - - Args: - term: The term to remove, specified as a string or Urn object. - - Returns: - The ChartPatchBuilder instance. - """ - if isinstance(term, str) and not term.startswith("urn:li:glossaryTerm:"): - term = "urn:li:glossaryTerm:" + term - self._add_patch( - GlossaryTerms.ASPECT_NAME, "remove", path=f"/terms/{term}", value={} - ) - return self - - def set_custom_properties( - self, custom_properties: Dict[str, str] - ) -> "ChartPatchBuilder": - """ - Sets the custom properties for the ChartPatchBuilder. - - Args: - custom_properties: A dictionary containing the custom properties to be set. - - Returns: - The ChartPatchBuilder instance. - - Notes: - This method replaces all existing custom properties with the given dictionary. - """ - self._add_patch( - ChartInfo.ASPECT_NAME, - "add", - path="/customProperties", - value=custom_properties, - ) - return self - - def add_custom_property(self, key: str, value: str) -> "ChartPatchBuilder": - """ - Adds a custom property to the ChartPatchBuilder. - - Args: - key: The key of the custom property. - value: The value of the custom property. - - Returns: - The ChartPatchBuilder instance. - """ - self.custom_properties_patch_helper.add_property(key, value) - return self - - def remove_custom_property(self, key: str) -> "ChartPatchBuilder": - """ - Removes a custom property from the ChartPatchBuilder. - - Args: - key: The key of the custom property to remove. - - Returns: - The ChartPatchBuilder instance. - """ - self.custom_properties_patch_helper.remove_property(key) - return self - def set_title(self, title: str) -> "ChartPatchBuilder": assert title, "ChartInfo title should not be None" self._add_patch( ChartInfo.ASPECT_NAME, "add", - path="/title", + path=("title",), value=title, ) @@ -292,7 +136,7 @@ def set_description(self, description: str) -> "ChartPatchBuilder": self._add_patch( ChartInfo.ASPECT_NAME, "add", - path="/description", + path=("description",), value=description, ) @@ -303,7 +147,7 @@ def set_last_refreshed(self, last_refreshed: Optional[int]) -> "ChartPatchBuilde self._add_patch( ChartInfo.ASPECT_NAME, "add", - path="/lastRefreshed", + path=("lastRefreshed",), value=last_refreshed, ) @@ -316,7 +160,7 @@ def set_last_modified( self._add_patch( ChartInfo.ASPECT_NAME, "add", - path="/lastModified", + path=("lastModified",), value=last_modified, ) @@ -327,7 +171,7 @@ def set_external_url(self, external_url: Optional[str]) -> "ChartPatchBuilder": self._add_patch( ChartInfo.ASPECT_NAME, "add", - path="/externalUrl", + path=("externalUrl",), value=external_url, ) return self @@ -337,7 +181,7 @@ def set_chart_url(self, dashboard_url: Optional[str]) -> "ChartPatchBuilder": self._add_patch( ChartInfo.ASPECT_NAME, "add", - path="/chartUrl", + path=("chartUrl",), value=dashboard_url, ) @@ -350,7 +194,7 @@ def set_type( self._add_patch( ChartInfo.ASPECT_NAME, "add", - path="/type", + path=("type",), value=type, ) @@ -363,7 +207,7 @@ def set_access( self._add_patch( ChartInfo.ASPECT_NAME, "add", - path="/access", + path=("access",), value=access, ) @@ -375,7 +219,7 @@ def add_inputs(self, input_urns: Optional[List[str]]) -> "ChartPatchBuilder": self._add_patch( aspect_name=ChartInfo.ASPECT_NAME, op="add", - path=f"/inputs/{urn}", + path=("inputs", urn), value=urn, ) diff --git a/metadata-ingestion/src/datahub/specific/custom_properties.py b/metadata-ingestion/src/datahub/specific/custom_properties.py deleted file mode 100644 index d399a448cc0c23..00000000000000 --- a/metadata-ingestion/src/datahub/specific/custom_properties.py +++ /dev/null @@ -1,37 +0,0 @@ -from typing import Generic, TypeVar - -from datahub.emitter.mcp_patch_builder import MetadataPatchProposal - -_Parent = TypeVar("_Parent", bound=MetadataPatchProposal) - - -class CustomPropertiesPatchHelper(Generic[_Parent]): - def __init__( - self, - parent: _Parent, - aspect_name: str, - ) -> None: - self.aspect_name = aspect_name - self._parent = parent - self.aspect_field = "customProperties" - - def parent(self) -> _Parent: - return self._parent - - def add_property(self, key: str, value: str) -> "CustomPropertiesPatchHelper": - self._parent._add_patch( - self.aspect_name, - "add", - path=f"/{self.aspect_field}/{key}", - value=value, - ) - return self - - def remove_property(self, key: str) -> "CustomPropertiesPatchHelper": - self._parent._add_patch( - self.aspect_name, - "remove", - path=f"/{self.aspect_field}/{key}", - value={}, - ) - return self diff --git a/metadata-ingestion/src/datahub/specific/dashboard.py b/metadata-ingestion/src/datahub/specific/dashboard.py index da5abbfd1dc129..515fcf0c6da955 100644 --- a/metadata-ingestion/src/datahub/specific/dashboard.py +++ b/metadata-ingestion/src/datahub/specific/dashboard.py @@ -1,27 +1,28 @@ -from typing import Dict, List, Optional, Union +from typing import List, Optional, Tuple, Union -from datahub.emitter.mcp_patch_builder import MetadataPatchProposal +from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath from datahub.metadata.schema_classes import ( AccessLevelClass, ChangeAuditStampsClass, DashboardInfoClass as DashboardInfo, EdgeClass as Edge, - GlobalTagsClass as GlobalTags, - GlossaryTermAssociationClass as Term, - GlossaryTermsClass as GlossaryTerms, KafkaAuditHeaderClass, - OwnerClass as Owner, - OwnershipTypeClass, SystemMetadataClass, - TagAssociationClass as Tag, ) -from datahub.specific.custom_properties import CustomPropertiesPatchHelper -from datahub.specific.ownership import OwnershipPatchHelper -from datahub.utilities.urns.tag_urn import TagUrn +from datahub.specific.aspect_helpers.custom_properties import HasCustomPropertiesPatch +from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch +from datahub.specific.aspect_helpers.tags import HasTagsPatch +from datahub.specific.aspect_helpers.terms import HasTermsPatch from datahub.utilities.urns.urn import Urn -class DashboardPatchBuilder(MetadataPatchProposal): +class DashboardPatchBuilder( + HasOwnershipPatch, + HasCustomPropertiesPatch, + HasTagsPatch, + HasTermsPatch, + MetadataPatchProposal, +): def __init__( self, urn: str, @@ -39,55 +40,10 @@ def __init__( super().__init__( urn, system_metadata=system_metadata, audit_header=audit_header ) - self.custom_properties_patch_helper = CustomPropertiesPatchHelper( - self, DashboardInfo.ASPECT_NAME - ) - self.ownership_patch_helper = OwnershipPatchHelper(self) - - def add_owner(self, owner: Owner) -> "DashboardPatchBuilder": - """ - Adds an owner to the DashboardPatchBuilder. - - Args: - owner: The Owner object to add. - - Returns: - The DashboardPatchBuilder instance. - """ - self.ownership_patch_helper.add_owner(owner) - return self - - def remove_owner( - self, owner: str, owner_type: Optional[OwnershipTypeClass] = None - ) -> "DashboardPatchBuilder": - """ - Removes an owner from the DashboardPatchBuilder. - - Args: - owner: The owner to remove. - owner_type: The ownership type of the owner (optional). - - Returns: - The DashboardPatchBuilder instance. - - Notes: - `owner_type` is optional. - """ - self.ownership_patch_helper.remove_owner(owner, owner_type) - return self - - def set_owners(self, owners: List[Owner]) -> "DashboardPatchBuilder": - """ - Sets the owners of the DashboardPatchBuilder. - Args: - owners: A list of Owner objects. - - Returns: - The DashboardPatchBuilder instance. - """ - self.ownership_patch_helper.set_owners(owners) - return self + @classmethod + def _custom_properties_location(cls) -> Tuple[str, PatchPath]: + return DashboardInfo.ASPECT_NAME, ("customProperties",) def add_dataset_edge( self, dataset: Union[Edge, Urn, str] @@ -126,7 +82,7 @@ def add_dataset_edge( self._add_patch( DashboardInfo.ASPECT_NAME, "add", - path=f"/datasetEdges/{self.quote(dataset_urn)}", + path=("datasetEdges", dataset_urn), value=dataset_edge, ) return self @@ -144,7 +100,7 @@ def remove_dataset_edge(self, dataset: Union[str, Urn]) -> "DashboardPatchBuilde self._add_patch( DashboardInfo.ASPECT_NAME, "remove", - path=f"/datasetEdges/{dataset}", + path=("datasetEdges", dataset), value={}, ) return self @@ -169,7 +125,7 @@ def set_dataset_edges(self, datasets: List[Edge]) -> "DashboardPatchBuilder": self._add_patch( DashboardInfo.ASPECT_NAME, "add", - path="/datasetEdges", + path=("datasetEdges",), value=datasets, ) return self @@ -209,7 +165,7 @@ def add_chart_edge(self, chart: Union[Edge, Urn, str]) -> "DashboardPatchBuilder self._add_patch( DashboardInfo.ASPECT_NAME, "add", - path=f"/chartEdges/{self.quote(chart_urn)}", + path=("chartEdges", chart_urn), value=chart_edge, ) return self @@ -227,7 +183,7 @@ def remove_chart_edge(self, chart: Union[str, Urn]) -> "DashboardPatchBuilder": self._add_patch( DashboardInfo.ASPECT_NAME, "remove", - path=f"/chartEdges/{chart}", + path=("chartEdges", chart), value={}, ) return self @@ -252,129 +208,17 @@ def set_chart_edges(self, charts: List[Edge]) -> "DashboardPatchBuilder": self._add_patch( DashboardInfo.ASPECT_NAME, "add", - path="/chartEdges", + path=("chartEdges",), value=charts, ) return self - def add_tag(self, tag: Tag) -> "DashboardPatchBuilder": - """ - Adds a tag to the DashboardPatchBuilder. - - Args: - tag: The Tag object representing the tag to be added. - - Returns: - The DashboardPatchBuilder instance. - """ - self._add_patch( - GlobalTags.ASPECT_NAME, "add", path=f"/tags/{tag.tag}", value=tag - ) - return self - - def remove_tag(self, tag: Union[str, Urn]) -> "DashboardPatchBuilder": - """ - Removes a tag from the DashboardPatchBuilder. - - Args: - tag: The tag to remove, specified as a string or Urn object. - - Returns: - The DashboardPatchBuilder instance. - """ - if isinstance(tag, str) and not tag.startswith("urn:li:tag:"): - tag = TagUrn.create_from_id(tag) - self._add_patch(GlobalTags.ASPECT_NAME, "remove", path=f"/tags/{tag}", value={}) - return self - - def add_term(self, term: Term) -> "DashboardPatchBuilder": - """ - Adds a glossary term to the DashboardPatchBuilder. - - Args: - term: The Term object representing the glossary term to be added. - - Returns: - The DashboardPatchBuilder instance. - """ - self._add_patch( - GlossaryTerms.ASPECT_NAME, "add", path=f"/terms/{term.urn}", value=term - ) - return self - - def remove_term(self, term: Union[str, Urn]) -> "DashboardPatchBuilder": - """ - Removes a glossary term from the DashboardPatchBuilder. - - Args: - term: The term to remove, specified as a string or Urn object. - - Returns: - The DashboardPatchBuilder instance. - """ - if isinstance(term, str) and not term.startswith("urn:li:glossaryTerm:"): - term = "urn:li:glossaryTerm:" + term - self._add_patch( - GlossaryTerms.ASPECT_NAME, "remove", path=f"/terms/{term}", value={} - ) - return self - - def set_custom_properties( - self, custom_properties: Dict[str, str] - ) -> "DashboardPatchBuilder": - """ - Sets the custom properties for the DashboardPatchBuilder. - - Args: - custom_properties: A dictionary containing the custom properties to be set. - - Returns: - The DashboardPatchBuilder instance. - - Notes: - This method replaces all existing custom properties with the given dictionary. - """ - self._add_patch( - DashboardInfo.ASPECT_NAME, - "add", - path="/customProperties", - value=custom_properties, - ) - return self - - def add_custom_property(self, key: str, value: str) -> "DashboardPatchBuilder": - """ - Adds a custom property to the DashboardPatchBuilder. - - Args: - key: The key of the custom property. - value: The value of the custom property. - - Returns: - The DashboardPatchBuilder instance. - """ - self.custom_properties_patch_helper.add_property(key, value) - return self - - def remove_custom_property(self, key: str) -> "DashboardPatchBuilder": - """ - Removes a custom property from the DashboardPatchBuilder. - - Args: - key: The key of the custom property to remove. - - Returns: - The DashboardPatchBuilder instance. - """ - self.custom_properties_patch_helper.remove_property(key) - return self - def set_title(self, title: str) -> "DashboardPatchBuilder": assert title, "DashboardInfo title should not be None" self._add_patch( DashboardInfo.ASPECT_NAME, "add", - path="/title", + path=("title",), value=title, ) @@ -385,27 +229,18 @@ def set_description(self, description: str) -> "DashboardPatchBuilder": self._add_patch( DashboardInfo.ASPECT_NAME, "add", - path="/description", + path=("description",), value=description, ) return self - def add_custom_properties( - self, custom_properties: Optional[Dict[str, str]] = None - ) -> "DashboardPatchBuilder": - if custom_properties: - for key, value in custom_properties.items(): - self.custom_properties_patch_helper.add_property(key, value) - - return self - def set_external_url(self, external_url: Optional[str]) -> "DashboardPatchBuilder": if external_url: self._add_patch( DashboardInfo.ASPECT_NAME, "add", - path="/externalUrl", + path=("externalUrl",), value=external_url, ) return self @@ -416,7 +251,7 @@ def add_charts(self, chart_urns: Optional[List[str]]) -> "DashboardPatchBuilder" self._add_patch( aspect_name=DashboardInfo.ASPECT_NAME, op="add", - path=f"/charts/{urn}", + path=("charts", urn), value=urn, ) @@ -430,7 +265,7 @@ def add_datasets( self._add_patch( aspect_name=DashboardInfo.ASPECT_NAME, op="add", - path=f"/datasets/{urn}", + path=("datasets", urn), value=urn, ) @@ -443,7 +278,7 @@ def set_dashboard_url( self._add_patch( DashboardInfo.ASPECT_NAME, "add", - path="/dashboardUrl", + path=("dashboardUrl",), value=dashboard_url, ) @@ -456,7 +291,7 @@ def set_access( self._add_patch( DashboardInfo.ASPECT_NAME, "add", - path="/access", + path=("access",), value=access, ) @@ -469,7 +304,7 @@ def set_last_refreshed( self._add_patch( DashboardInfo.ASPECT_NAME, "add", - path="/lastRefreshed", + path=("lastRefreshed",), value=last_refreshed, ) @@ -482,7 +317,7 @@ def set_last_modified( self._add_patch( DashboardInfo.ASPECT_NAME, "add", - path="/lastModified", + path=("lastModified",), value=last_modified, ) diff --git a/metadata-ingestion/src/datahub/specific/datajob.py b/metadata-ingestion/src/datahub/specific/datajob.py index 6ff4741b09c26a..fd826c6dd59ca3 100644 --- a/metadata-ingestion/src/datahub/specific/datajob.py +++ b/metadata-ingestion/src/datahub/specific/datajob.py @@ -1,25 +1,27 @@ -from typing import Dict, List, Optional, Union +from typing import List, Optional, Tuple, Union -from datahub.emitter.mcp_patch_builder import MetadataPatchProposal +from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath from datahub.metadata.schema_classes import ( DataJobInfoClass as DataJobInfo, DataJobInputOutputClass as DataJobInputOutput, EdgeClass as Edge, - GlobalTagsClass as GlobalTags, - GlossaryTermAssociationClass as Term, - GlossaryTermsClass as GlossaryTerms, KafkaAuditHeaderClass, - OwnerClass as Owner, - OwnershipTypeClass, SystemMetadataClass, - TagAssociationClass as Tag, ) -from datahub.metadata.urns import SchemaFieldUrn, TagUrn, Urn -from datahub.specific.custom_properties import CustomPropertiesPatchHelper -from datahub.specific.ownership import OwnershipPatchHelper - - -class DataJobPatchBuilder(MetadataPatchProposal): +from datahub.metadata.urns import SchemaFieldUrn, Urn +from datahub.specific.aspect_helpers.custom_properties import HasCustomPropertiesPatch +from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch +from datahub.specific.aspect_helpers.tags import HasTagsPatch +from datahub.specific.aspect_helpers.terms import HasTermsPatch + + +class DataJobPatchBuilder( + HasOwnershipPatch, + HasCustomPropertiesPatch, + HasTagsPatch, + HasTermsPatch, + MetadataPatchProposal, +): def __init__( self, urn: str, @@ -37,55 +39,10 @@ def __init__( super().__init__( urn, system_metadata=system_metadata, audit_header=audit_header ) - self.custom_properties_patch_helper = CustomPropertiesPatchHelper( - self, DataJobInfo.ASPECT_NAME - ) - self.ownership_patch_helper = OwnershipPatchHelper(self) - - def add_owner(self, owner: Owner) -> "DataJobPatchBuilder": - """ - Adds an owner to the DataJobPatchBuilder. - - Args: - owner: The Owner object to add. - - Returns: - The DataJobPatchBuilder instance. - """ - self.ownership_patch_helper.add_owner(owner) - return self - - def remove_owner( - self, owner: str, owner_type: Optional[OwnershipTypeClass] = None - ) -> "DataJobPatchBuilder": - """ - Removes an owner from the DataJobPatchBuilder. - - Args: - owner: The owner to remove. - owner_type: The ownership type of the owner (optional). - - Returns: - The DataJobPatchBuilder instance. - - Notes: - `owner_type` is optional. - """ - self.ownership_patch_helper.remove_owner(owner, owner_type) - return self - - def set_owners(self, owners: List[Owner]) -> "DataJobPatchBuilder": - """ - Sets the owners of the DataJobPatchBuilder. - - Args: - owners: A list of Owner objects. - Returns: - The DataJobPatchBuilder instance. - """ - self.ownership_patch_helper.set_owners(owners) - return self + @classmethod + def _custom_properties_location(cls) -> Tuple[str, PatchPath]: + return DataJobInfo.ASPECT_NAME, ("customProperties",) def add_input_datajob(self, input: Union[Edge, Urn, str]) -> "DataJobPatchBuilder": """ @@ -120,7 +77,7 @@ def add_input_datajob(self, input: Union[Edge, Urn, str]) -> "DataJobPatchBuilde self._add_patch( DataJobInputOutput.ASPECT_NAME, "add", - path=f"/inputDatajobEdges/{self.quote(input_urn)}", + path=("inputDatajobEdges", input_urn), value=input_edge, ) return self @@ -138,7 +95,7 @@ def remove_input_datajob(self, input: Union[str, Urn]) -> "DataJobPatchBuilder": self._add_patch( DataJobInputOutput.ASPECT_NAME, "remove", - path=f"/inputDatajobEdges/{input}", + path=("inputDatajobEdges", input), value={}, ) return self @@ -163,7 +120,7 @@ def set_input_datajobs(self, inputs: List[Edge]) -> "DataJobPatchBuilder": self._add_patch( DataJobInputOutput.ASPECT_NAME, "add", - path="/inputDatajobEdges", + path=("inputDatajobEdges",), value=inputs, ) return self @@ -201,7 +158,7 @@ def add_input_dataset(self, input: Union[Edge, Urn, str]) -> "DataJobPatchBuilde self._add_patch( DataJobInputOutput.ASPECT_NAME, "add", - path=f"/inputDatasetEdges/{self.quote(input_urn)}", + path=("inputDatasetEdges", input_urn), value=input_edge, ) return self @@ -219,7 +176,7 @@ def remove_input_dataset(self, input: Union[str, Urn]) -> "DataJobPatchBuilder": self._add_patch( DataJobInputOutput.ASPECT_NAME, "remove", - path=f"/inputDatasetEdges/{self.quote(str(input))}", + path=("inputDatasetEdges", input), value={}, ) return self @@ -244,7 +201,7 @@ def set_input_datasets(self, inputs: List[Edge]) -> "DataJobPatchBuilder": self._add_patch( DataJobInputOutput.ASPECT_NAME, "add", - path="/inputDatasetEdges", + path=("inputDatasetEdges",), value=inputs, ) return self @@ -284,7 +241,7 @@ def add_output_dataset( self._add_patch( DataJobInputOutput.ASPECT_NAME, "add", - path=f"/outputDatasetEdges/{self.quote(output_urn)}", + path=("outputDatasetEdges", output_urn), value=output_edge, ) return self @@ -302,7 +259,7 @@ def remove_output_dataset(self, output: Union[str, Urn]) -> "DataJobPatchBuilder self._add_patch( DataJobInputOutput.ASPECT_NAME, "remove", - path=f"/outputDatasetEdges/{self.quote(str(output))}", + path=("outputDatasetEdges", output), value={}, ) return self @@ -327,7 +284,7 @@ def set_output_datasets(self, outputs: List[Edge]) -> "DataJobPatchBuilder": self._add_patch( DataJobInputOutput.ASPECT_NAME, "add", - path="/outputDatasetEdges", + path=("outputDatasetEdges",), value=outputs, ) return self @@ -351,7 +308,7 @@ def add_input_dataset_field(self, input: Union[Urn, str]) -> "DataJobPatchBuilde self._add_patch( DataJobInputOutput.ASPECT_NAME, "add", - path=f"/inputDatasetFields/{self.quote(input_urn)}", + path=("inputDatasetFields", input_urn), value={}, ) return self @@ -372,7 +329,7 @@ def remove_input_dataset_field( self._add_patch( DataJobInputOutput.ASPECT_NAME, "remove", - path=f"/inputDatasetFields/{self.quote(input_urn)}", + path=("inputDatasetFields", input_urn), value={}, ) return self @@ -397,7 +354,7 @@ def set_input_dataset_fields(self, inputs: List[Edge]) -> "DataJobPatchBuilder": self._add_patch( DataJobInputOutput.ASPECT_NAME, "add", - path="/inputDatasetFields", + path=("inputDatasetFields",), value=inputs, ) return self @@ -423,7 +380,7 @@ def add_output_dataset_field( self._add_patch( DataJobInputOutput.ASPECT_NAME, "add", - path=f"/outputDatasetFields/{self.quote(output_urn)}", + path=("outputDatasetFields", output_urn), value={}, ) return self @@ -444,7 +401,7 @@ def remove_output_dataset_field( self._add_patch( DataJobInputOutput.ASPECT_NAME, "remove", - path=f"/outputDatasetFields/{self.quote(output_urn)}", + path=("outputDatasetFields", output_urn), value={}, ) return self @@ -469,119 +426,7 @@ def set_output_dataset_fields(self, outputs: List[Edge]) -> "DataJobPatchBuilder self._add_patch( DataJobInputOutput.ASPECT_NAME, "add", - path="/outputDatasetFields", + path=("outputDatasetFields",), value=outputs, ) return self - - def add_tag(self, tag: Tag) -> "DataJobPatchBuilder": - """ - Adds a tag to the DataJobPatchBuilder. - - Args: - tag: The Tag object representing the tag to be added. - - Returns: - The DataJobPatchBuilder instance. - """ - self._add_patch( - GlobalTags.ASPECT_NAME, "add", path=f"/tags/{tag.tag}", value=tag - ) - return self - - def remove_tag(self, tag: Union[str, Urn]) -> "DataJobPatchBuilder": - """ - Removes a tag from the DataJobPatchBuilder. - - Args: - tag: The tag to remove, specified as a string or Urn object. - - Returns: - The DataJobPatchBuilder instance. - """ - if isinstance(tag, str) and not tag.startswith("urn:li:tag:"): - tag = TagUrn.create_from_id(tag) - self._add_patch(GlobalTags.ASPECT_NAME, "remove", path=f"/tags/{tag}", value={}) - return self - - def add_term(self, term: Term) -> "DataJobPatchBuilder": - """ - Adds a glossary term to the DataJobPatchBuilder. - - Args: - term: The Term object representing the glossary term to be added. - - Returns: - The DataJobPatchBuilder instance. - """ - self._add_patch( - GlossaryTerms.ASPECT_NAME, "add", path=f"/terms/{term.urn}", value=term - ) - return self - - def remove_term(self, term: Union[str, Urn]) -> "DataJobPatchBuilder": - """ - Removes a glossary term from the DataJobPatchBuilder. - - Args: - term: The term to remove, specified as a string or Urn object. - - Returns: - The DataJobPatchBuilder instance. - """ - if isinstance(term, str) and not term.startswith("urn:li:glossaryTerm:"): - term = "urn:li:glossaryTerm:" + term - self._add_patch( - GlossaryTerms.ASPECT_NAME, "remove", path=f"/terms/{term}", value={} - ) - return self - - def set_custom_properties( - self, custom_properties: Dict[str, str] - ) -> "DataJobPatchBuilder": - """ - Sets the custom properties for the DataJobPatchBuilder. - - Args: - custom_properties: A dictionary containing the custom properties to be set. - - Returns: - The DataJobPatchBuilder instance. - - Notes: - This method replaces all existing custom properties with the given dictionary. - """ - self._add_patch( - DataJobInfo.ASPECT_NAME, - "add", - path="/customProperties", - value=custom_properties, - ) - return self - - def add_custom_property(self, key: str, value: str) -> "DataJobPatchBuilder": - """ - Adds a custom property to the DataJobPatchBuilder. - - Args: - key: The key of the custom property. - value: The value of the custom property. - - Returns: - The DataJobPatchBuilder instance. - """ - self.custom_properties_patch_helper.add_property(key, value) - return self - - def remove_custom_property(self, key: str) -> "DataJobPatchBuilder": - """ - Removes a custom property from the DataJobPatchBuilder. - - Args: - key: The key of the custom property to remove. - - Returns: - The DataJobPatchBuilder instance. - """ - self.custom_properties_patch_helper.remove_property(key) - return self diff --git a/metadata-ingestion/src/datahub/specific/dataproduct.py b/metadata-ingestion/src/datahub/specific/dataproduct.py index f9830a4b23df05..d38d2d4156315d 100644 --- a/metadata-ingestion/src/datahub/specific/dataproduct.py +++ b/metadata-ingestion/src/datahub/specific/dataproduct.py @@ -1,25 +1,25 @@ -from typing import Dict, List, Optional, Union +from typing import List, Optional, Tuple -from datahub.emitter.mcp_patch_builder import MetadataPatchProposal +from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath from datahub.metadata.schema_classes import ( DataProductAssociationClass as DataProductAssociation, DataProductPropertiesClass as DataProductProperties, - GlobalTagsClass as GlobalTags, - GlossaryTermAssociationClass as Term, - GlossaryTermsClass as GlossaryTerms, KafkaAuditHeaderClass, - OwnerClass as Owner, - OwnershipTypeClass, SystemMetadataClass, - TagAssociationClass as Tag, ) -from datahub.specific.custom_properties import CustomPropertiesPatchHelper -from datahub.specific.ownership import OwnershipPatchHelper -from datahub.utilities.urns.tag_urn import TagUrn -from datahub.utilities.urns.urn import Urn - - -class DataProductPatchBuilder(MetadataPatchProposal): +from datahub.specific.aspect_helpers.custom_properties import HasCustomPropertiesPatch +from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch +from datahub.specific.aspect_helpers.tags import HasTagsPatch +from datahub.specific.aspect_helpers.terms import HasTermsPatch + + +class DataProductPatchBuilder( + HasOwnershipPatch, + HasCustomPropertiesPatch, + HasTagsPatch, + HasTermsPatch, + MetadataPatchProposal, +): def __init__( self, urn: str, @@ -31,59 +31,16 @@ def __init__( system_metadata=system_metadata, audit_header=audit_header, ) - self.custom_properties_patch_helper = CustomPropertiesPatchHelper( - self, DataProductProperties.ASPECT_NAME - ) - self.ownership_patch_helper = OwnershipPatchHelper(self) - - def add_owner(self, owner: Owner) -> "DataProductPatchBuilder": - self.ownership_patch_helper.add_owner(owner) - return self - - def remove_owner( - self, owner: str, owner_type: Optional[OwnershipTypeClass] = None - ) -> "DataProductPatchBuilder": - """ - param: owner_type is optional - """ - self.ownership_patch_helper.remove_owner(owner, owner_type) - return self - - def set_owners(self, owners: List[Owner]) -> "DataProductPatchBuilder": - self.ownership_patch_helper.set_owners(owners) - return self - - def add_tag(self, tag: Tag) -> "DataProductPatchBuilder": - self._add_patch( - GlobalTags.ASPECT_NAME, "add", path=f"/tags/{tag.tag}", value=tag - ) - return self - - def remove_tag(self, tag: Union[str, Urn]) -> "DataProductPatchBuilder": - if isinstance(tag, str) and not tag.startswith("urn:li:tag:"): - tag = TagUrn.create_from_id(tag) - self._add_patch(GlobalTags.ASPECT_NAME, "remove", path=f"/tags/{tag}", value={}) - return self - - def add_term(self, term: Term) -> "DataProductPatchBuilder": - self._add_patch( - GlossaryTerms.ASPECT_NAME, "add", path=f"/terms/{term.urn}", value=term - ) - return self - def remove_term(self, term: Union[str, Urn]) -> "DataProductPatchBuilder": - if isinstance(term, str) and not term.startswith("urn:li:glossaryTerm:"): - term = "urn:li:glossaryTerm:" + term - self._add_patch( - GlossaryTerms.ASPECT_NAME, "remove", path=f"/terms/{term}", value={} - ) - return self + @classmethod + def _custom_properties_location(cls) -> Tuple[str, PatchPath]: + return DataProductProperties.ASPECT_NAME, ("customProperties",) def set_name(self, name: str) -> "DataProductPatchBuilder": self._add_patch( DataProductProperties.ASPECT_NAME, "add", - path="/name", + path=("name",), value=name, ) return self @@ -92,37 +49,18 @@ def set_description(self, description: str) -> "DataProductPatchBuilder": self._add_patch( DataProductProperties.ASPECT_NAME, "add", - path="/description", + path=("description",), value=description, ) return self - def set_custom_properties( - self, custom_properties: Dict[str, str] - ) -> "DataProductPatchBuilder": - self._add_patch( - DataProductProperties.ASPECT_NAME, - "add", - path="/customProperties", - value=custom_properties, - ) - return self - - def add_custom_property(self, key: str, value: str) -> "DataProductPatchBuilder": - self.custom_properties_patch_helper.add_property(key, value) - return self - - def remove_custom_property(self, key: str) -> "DataProductPatchBuilder": - self.custom_properties_patch_helper.remove_property(key) - return self - def set_assets( self, assets: List[DataProductAssociation] ) -> "DataProductPatchBuilder": self._add_patch( DataProductProperties.ASPECT_NAME, "add", - path="/assets", + path=("assets",), value=assets, ) return self @@ -131,7 +69,7 @@ def add_asset(self, asset_urn: str) -> "DataProductPatchBuilder": self._add_patch( DataProductProperties.ASPECT_NAME, "add", - path=f"/assets/{self.quote(asset_urn)}", + path=("assets", asset_urn), value=DataProductAssociation(destinationUrn=asset_urn), ) return self @@ -140,7 +78,7 @@ def remove_asset(self, asset_urn: str) -> "DataProductPatchBuilder": self._add_patch( DataProductProperties.ASPECT_NAME, "remove", - path=f"/assets/{self.quote(asset_urn)}", + path=("assets", asset_urn), value={}, ) return self @@ -149,7 +87,7 @@ def set_external_url(self, external_url: str) -> "DataProductPatchBuilder": self._add_patch( DataProductProperties.ASPECT_NAME, "add", - path="/externalUrl", + path=("externalUrl",), value=external_url, ) return self diff --git a/metadata-ingestion/src/datahub/specific/dataset.py b/metadata-ingestion/src/datahub/specific/dataset.py index b171dc4cc2939f..6332386684bbf0 100644 --- a/metadata-ingestion/src/datahub/specific/dataset.py +++ b/metadata-ingestion/src/datahub/specific/dataset.py @@ -1,27 +1,27 @@ -from typing import Dict, Generic, List, Optional, Tuple, TypeVar, Union +from typing import Generic, List, Optional, Tuple, TypeVar, Union -from datahub.emitter.mcp_patch_builder import MetadataPatchProposal +from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath from datahub.metadata.com.linkedin.pegasus2avro.common import TimeStamp from datahub.metadata.schema_classes import ( DatasetPropertiesClass as DatasetProperties, EditableDatasetPropertiesClass as EditableDatasetProperties, EditableSchemaMetadataClass as EditableSchemaMetadata, FineGrainedLineageClass as FineGrainedLineage, - GlobalTagsClass as GlobalTags, GlossaryTermAssociationClass as Term, - GlossaryTermsClass as GlossaryTerms, KafkaAuditHeaderClass, - OwnerClass as Owner, - OwnershipTypeClass, SchemaMetadataClass, SystemMetadataClass, TagAssociationClass as Tag, UpstreamClass as Upstream, UpstreamLineageClass as UpstreamLineage, ) -from datahub.specific.custom_properties import CustomPropertiesPatchHelper -from datahub.specific.ownership import OwnershipPatchHelper -from datahub.specific.structured_properties import StructuredPropertiesPatchHelper +from datahub.specific.aspect_helpers.custom_properties import HasCustomPropertiesPatch +from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch +from datahub.specific.aspect_helpers.structured_properties import ( + HasStructuredPropertiesPatch, +) +from datahub.specific.aspect_helpers.tags import HasTagsPatch +from datahub.specific.aspect_helpers.terms import HasTermsPatch from datahub.utilities.urns.tag_urn import TagUrn from datahub.utilities.urns.urn import Urn @@ -48,7 +48,7 @@ def add_tag(self, tag: Tag) -> "FieldPatchHelper": self._parent._add_patch( self.aspect_name, "add", - path=f"/{self.aspect_field}/{self.field_path}/globalTags/tags/{tag.tag}", + path=(self.aspect_field, self.field_path, "globalTags", "tags", tag.tag), value=tag, ) return self @@ -59,7 +59,7 @@ def remove_tag(self, tag: Union[str, Urn]) -> "FieldPatchHelper": self._parent._add_patch( self.aspect_name, "remove", - path=f"/{self.aspect_field}/{self.field_path}/globalTags/tags/{tag}", + path=(self.aspect_field, self.field_path, "globalTags", "tags", tag), value={}, ) return self @@ -68,7 +68,13 @@ def add_term(self, term: Term) -> "FieldPatchHelper": self._parent._add_patch( self.aspect_name, "add", - path=f"/{self.aspect_field}/{self.field_path}/glossaryTerms/terms/{term.urn}", + path=( + self.aspect_field, + self.field_path, + "glossaryTerms", + "terms", + term.urn, + ), value=term, ) return self @@ -79,7 +85,7 @@ def remove_term(self, term: Union[str, Urn]) -> "FieldPatchHelper": self._parent._add_patch( self.aspect_name, "remove", - path=f"/{self.aspect_field}/{self.field_path}/glossaryTerms/terms/{term}", + path=(self.aspect_field, self.field_path, "glossaryTerms", "terms", term), value={}, ) return self @@ -88,7 +94,14 @@ def parent(self) -> _Parent: return self._parent -class DatasetPatchBuilder(MetadataPatchProposal): +class DatasetPatchBuilder( + HasOwnershipPatch, + HasCustomPropertiesPatch, + HasStructuredPropertiesPatch, + HasTagsPatch, + HasTermsPatch, + MetadataPatchProposal, +): def __init__( self, urn: str, @@ -98,34 +111,16 @@ def __init__( super().__init__( urn, system_metadata=system_metadata, audit_header=audit_header ) - self.custom_properties_patch_helper = CustomPropertiesPatchHelper( - self, DatasetProperties.ASPECT_NAME - ) - self.ownership_patch_helper = OwnershipPatchHelper(self) - self.structured_properties_patch_helper = StructuredPropertiesPatchHelper(self) - - def add_owner(self, owner: Owner) -> "DatasetPatchBuilder": - self.ownership_patch_helper.add_owner(owner) - return self - def remove_owner( - self, owner: str, owner_type: Optional[OwnershipTypeClass] = None - ) -> "DatasetPatchBuilder": - """ - param: owner_type is optional - """ - self.ownership_patch_helper.remove_owner(owner, owner_type) - return self - - def set_owners(self, owners: List[Owner]) -> "DatasetPatchBuilder": - self.ownership_patch_helper.set_owners(owners) - return self + @classmethod + def _custom_properties_location(cls) -> Tuple[str, PatchPath]: + return DatasetProperties.ASPECT_NAME, ("customProperties",) def add_upstream_lineage(self, upstream: Upstream) -> "DatasetPatchBuilder": self._add_patch( UpstreamLineage.ASPECT_NAME, "add", - path=f"/upstreams/{self.quote(upstream.dataset)}", + path=("upstreams", upstream.dataset), value=upstream, ) return self @@ -136,14 +131,14 @@ def remove_upstream_lineage( self._add_patch( UpstreamLineage.ASPECT_NAME, "remove", - path=f"/upstreams/{dataset}", + path=("upstreams", dataset), value={}, ) return self def set_upstream_lineages(self, upstreams: List[Upstream]) -> "DatasetPatchBuilder": self._add_patch( - UpstreamLineage.ASPECT_NAME, "add", path="/upstreams", value=upstreams + UpstreamLineage.ASPECT_NAME, "add", path=("upstreams",), value=upstreams ) return self @@ -159,7 +154,7 @@ def add_fine_grained_upstream_lineage( self._add_patch( UpstreamLineage.ASPECT_NAME, "add", - path=DatasetPatchBuilder.quote_fine_grained_path( + path=self._build_fine_grained_path( transform_op, downstream_urn, query_id, upstream_urn ), value={"confidenceScore": fine_grained_lineage.confidenceScore}, @@ -179,12 +174,15 @@ def get_fine_grained_key( return transform_op, downstream_urn, query_id @classmethod - def quote_fine_grained_path( + def _build_fine_grained_path( cls, transform_op: str, downstream_urn: str, query_id: str, upstream_urn: str - ) -> str: + ) -> PatchPath: return ( - f"/fineGrainedLineages/{cls.quote(transform_op)}/" - f"{cls.quote(downstream_urn)}/{cls.quote(query_id)}/{cls.quote(upstream_urn)}" + "fineGrainedLineages", + transform_op, + downstream_urn, + query_id, + upstream_urn, ) def remove_fine_grained_upstream_lineage( @@ -199,7 +197,7 @@ def remove_fine_grained_upstream_lineage( self._add_patch( UpstreamLineage.ASPECT_NAME, "remove", - path=DatasetPatchBuilder.quote_fine_grained_path( + path=self._build_fine_grained_path( transform_op, downstream_urn, query_id, upstream_urn ), value={}, @@ -212,37 +210,11 @@ def set_fine_grained_upstream_lineages( self._add_patch( UpstreamLineage.ASPECT_NAME, "add", - path="/fineGrainedLineages", + path=("fineGrainedLineages",), value=fine_grained_lineages, ) return self - def add_tag(self, tag: Tag) -> "DatasetPatchBuilder": - self._add_patch( - GlobalTags.ASPECT_NAME, "add", path=f"/tags/{tag.tag}", value=tag - ) - return self - - def remove_tag(self, tag: Union[str, Urn]) -> "DatasetPatchBuilder": - if isinstance(tag, str) and not tag.startswith("urn:li:tag:"): - tag = TagUrn.create_from_id(tag) - self._add_patch(GlobalTags.ASPECT_NAME, "remove", path=f"/tags/{tag}", value={}) - return self - - def add_term(self, term: Term) -> "DatasetPatchBuilder": - self._add_patch( - GlossaryTerms.ASPECT_NAME, "add", path=f"/terms/{term.urn}", value=term - ) - return self - - def remove_term(self, term: Union[str, Urn]) -> "DatasetPatchBuilder": - if isinstance(term, str) and not term.startswith("urn:li:glossaryTerm:"): - term = "urn:li:glossaryTerm:" + term - self._add_patch( - GlossaryTerms.ASPECT_NAME, "remove", path=f"/terms/{term}", value={} - ) - return self - def for_field( self, field_path: str, editable: bool = True ) -> FieldPatchHelper["DatasetPatchBuilder"]: @@ -269,38 +241,11 @@ def set_description( else EditableDatasetProperties.ASPECT_NAME ), "add", - path="/description", + path=("description",), value=description, ) return self - def set_custom_properties( - self, custom_properties: Dict[str, str] - ) -> "DatasetPatchBuilder": - self._add_patch( - DatasetProperties.ASPECT_NAME, - "add", - path="/customProperties", - value=custom_properties, - ) - return self - - def add_custom_property(self, key: str, value: str) -> "DatasetPatchBuilder": - self.custom_properties_patch_helper.add_property(key, value) - return self - - def add_custom_properties( - self, custom_properties: Optional[Dict[str, str]] = None - ) -> "DatasetPatchBuilder": - if custom_properties is not None: - for key, value in custom_properties.items(): - self.custom_properties_patch_helper.add_property(key, value) - return self - - def remove_custom_property(self, key: str) -> "DatasetPatchBuilder": - self.custom_properties_patch_helper.remove_property(key) - return self - def set_display_name( self, display_name: Optional[str] = None ) -> "DatasetPatchBuilder": @@ -308,7 +253,7 @@ def set_display_name( self._add_patch( DatasetProperties.ASPECT_NAME, "add", - path="/name", + path=("name",), value=display_name, ) return self @@ -320,7 +265,7 @@ def set_qualified_name( self._add_patch( DatasetProperties.ASPECT_NAME, "add", - path="/qualifiedName", + path=("qualifiedName",), value=qualified_name, ) return self @@ -332,7 +277,7 @@ def set_created( self._add_patch( DatasetProperties.ASPECT_NAME, "add", - path="/created", + path=("created",), value=timestamp, ) return self @@ -344,37 +289,7 @@ def set_last_modified( self._add_patch( DatasetProperties.ASPECT_NAME, "add", - path="/lastModified", + path=("lastModified",), value=timestamp, ) return self - - def set_structured_property( - self, property_name: str, value: Union[str, float, List[Union[str, float]]] - ) -> "DatasetPatchBuilder": - """ - This is a helper method to set a structured property. - @param property_name: the name of the property (either bare or urn form) - @param value: the value of the property (for multi-valued properties, this can be a list) - """ - self.structured_properties_patch_helper.set_property(property_name, value) - return self - - def add_structured_property( - self, property_name: str, value: Union[str, float] - ) -> "DatasetPatchBuilder": - """ - This is a helper method to add a structured property. - @param property_name: the name of the property (either bare or urn form) - @param value: the value of the property (for multi-valued properties, this value will be appended to the list) - """ - self.structured_properties_patch_helper.add_property(property_name, value) - return self - - def remove_structured_property(self, property_name: str) -> "DatasetPatchBuilder": - """ - This is a helper method to remove a structured property. - @param property_name: the name of the property (either bare or urn form) - """ - self.structured_properties_patch_helper.remove_property(property_name) - return self diff --git a/metadata-ingestion/src/datahub/specific/form.py b/metadata-ingestion/src/datahub/specific/form.py index 78182c202f7162..281b3cac99b2c1 100644 --- a/metadata-ingestion/src/datahub/specific/form.py +++ b/metadata-ingestion/src/datahub/specific/form.py @@ -5,15 +5,13 @@ FormInfoClass as FormInfo, FormPromptClass, KafkaAuditHeaderClass, - OwnerClass as Owner, - OwnershipTypeClass, SystemMetadataClass, ) -from datahub.specific.ownership import OwnershipPatchHelper +from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch from datahub.utilities.urns.urn import Urn -class FormPatchBuilder(MetadataPatchProposal): +class FormPatchBuilder(HasOwnershipPatch, MetadataPatchProposal): def __init__( self, urn: str, @@ -23,31 +21,13 @@ def __init__( super().__init__( urn, system_metadata=system_metadata, audit_header=audit_header ) - self.ownership_patch_helper = OwnershipPatchHelper(self) - - def add_owner(self, owner: Owner) -> "FormPatchBuilder": - self.ownership_patch_helper.add_owner(owner) - return self - - def remove_owner( - self, owner: str, owner_type: Optional[OwnershipTypeClass] = None - ) -> "FormPatchBuilder": - """ - param: owner_type is optional - """ - self.ownership_patch_helper.remove_owner(owner, owner_type) - return self - - def set_owners(self, owners: List[Owner]) -> "FormPatchBuilder": - self.ownership_patch_helper.set_owners(owners) - return self def set_name(self, name: Optional[str] = None) -> "FormPatchBuilder": if name is not None: self._add_patch( FormInfo.ASPECT_NAME, "add", - path="/name", + path=("name",), value=name, ) return self @@ -57,7 +37,7 @@ def set_description(self, description: Optional[str] = None) -> "FormPatchBuilde self._add_patch( FormInfo.ASPECT_NAME, "add", - path="/description", + path=("description",), value=description, ) return self @@ -67,7 +47,7 @@ def set_type(self, type: Optional[str] = None) -> "FormPatchBuilder": self._add_patch( FormInfo.ASPECT_NAME, "add", - path="/type", + path=("type",), value=type, ) return self @@ -76,7 +56,7 @@ def add_prompt(self, prompt: FormPromptClass) -> "FormPatchBuilder": self._add_patch( FormInfo.ASPECT_NAME, "add", - path=f"/prompts/{self.quote(prompt.id)}", + path=("prompts", prompt.id), value=prompt, ) return self @@ -90,7 +70,7 @@ def remove_prompt(self, prompt_id: str) -> "FormPatchBuilder": self._add_patch( FormInfo.ASPECT_NAME, "remove", - path=f"/prompts/{self.quote(prompt_id)}", + path=("prompts", prompt_id), value=prompt_id, ) return self @@ -104,7 +84,7 @@ def set_ownership_form(self, is_ownership: bool) -> "FormPatchBuilder": self._add_patch( FormInfo.ASPECT_NAME, "add", - path="/actors/owners", + path=("actors", "owners"), value=is_ownership, ) return self @@ -113,7 +93,7 @@ def add_assigned_user(self, user_urn: Union[str, Urn]) -> "FormPatchBuilder": self._add_patch( FormInfo.ASPECT_NAME, "add", - path=f"/actors/users/{self.quote(str(user_urn))}", + path=("actors", "users", user_urn), value=user_urn, ) return self @@ -122,7 +102,7 @@ def remove_assigned_user(self, user_urn: Union[str, Urn]) -> "FormPatchBuilder": self._add_patch( FormInfo.ASPECT_NAME, "remove", - path=f"/actors/users/{self.quote(str(user_urn))}", + path=("actors", "users", user_urn), value=user_urn, ) return self @@ -131,7 +111,7 @@ def add_assigned_group(self, group_urn: Union[str, Urn]) -> "FormPatchBuilder": self._add_patch( FormInfo.ASPECT_NAME, "add", - path=f"/actors/groups/{self.quote(str(group_urn))}", + path=("actors", "groups", group_urn), value=group_urn, ) return self @@ -140,7 +120,7 @@ def remove_assigned_group(self, group_urn: Union[str, Urn]) -> "FormPatchBuilder self._add_patch( FormInfo.ASPECT_NAME, "remove", - path=f"/actors/groups/{self.quote(str(group_urn))}", + path=("actors", "groups", group_urn), value=group_urn, ) return self diff --git a/metadata-ingestion/src/datahub/specific/ownership.py b/metadata-ingestion/src/datahub/specific/ownership.py deleted file mode 100644 index b377a8814f38a0..00000000000000 --- a/metadata-ingestion/src/datahub/specific/ownership.py +++ /dev/null @@ -1,48 +0,0 @@ -from typing import Generic, List, Optional, TypeVar - -from datahub.emitter.mcp_patch_builder import MetadataPatchProposal -from datahub.metadata.schema_classes import ( - OwnerClass, - OwnershipClass, - OwnershipTypeClass, -) - -_Parent = TypeVar("_Parent", bound=MetadataPatchProposal) - - -class OwnershipPatchHelper(Generic[_Parent]): - def __init__(self, parent: _Parent) -> None: - self._parent = parent - self.aspect_field = OwnershipClass.ASPECT_NAME - - def parent(self) -> _Parent: - return self._parent - - def add_owner(self, owner: OwnerClass) -> "OwnershipPatchHelper": - self._parent._add_patch( - OwnershipClass.ASPECT_NAME, - "add", - path=f"/owners/{owner.owner}/{owner.type}", - value=owner, - ) - return self - - def remove_owner( - self, owner: str, owner_type: Optional[OwnershipTypeClass] = None - ) -> "OwnershipPatchHelper": - """ - param: owner_type is optional - """ - self._parent._add_patch( - OwnershipClass.ASPECT_NAME, - "remove", - path=f"/owners/{owner}" + (f"/{owner_type}" if owner_type else ""), - value=owner, - ) - return self - - def set_owners(self, owners: List[OwnerClass]) -> "OwnershipPatchHelper": - self._parent._add_patch( - OwnershipClass.ASPECT_NAME, "add", path="/owners", value=owners - ) - return self diff --git a/metadata-ingestion/src/datahub/specific/structured_properties.py b/metadata-ingestion/src/datahub/specific/structured_properties.py deleted file mode 100644 index 17d896249c4746..00000000000000 --- a/metadata-ingestion/src/datahub/specific/structured_properties.py +++ /dev/null @@ -1,53 +0,0 @@ -from typing import Generic, List, TypeVar, Union - -from datahub.emitter.mcp_patch_builder import MetadataPatchProposal -from datahub.metadata.schema_classes import StructuredPropertyValueAssignmentClass -from datahub.utilities.urns.structured_properties_urn import ( - make_structured_property_urn, -) - -_Parent = TypeVar("_Parent", bound=MetadataPatchProposal) - - -class StructuredPropertiesPatchHelper(Generic[_Parent]): - def __init__( - self, - parent: _Parent, - aspect_name: str = "structuredProperties", - ) -> None: - self.aspect_name = aspect_name - self._parent = parent - self.aspect_field = "properties" - - def parent(self) -> _Parent: - return self._parent - - def set_property( - self, key: str, value: Union[str, float, List[Union[str, float]]] - ) -> "StructuredPropertiesPatchHelper": - self.remove_property(key) - self.add_property(key, value) - return self - - def remove_property(self, key: str) -> "StructuredPropertiesPatchHelper": - self._parent._add_patch( - self.aspect_name, - "remove", - path=(self.aspect_field, make_structured_property_urn(key)), - value={}, - ) - return self - - def add_property( - self, key: str, value: Union[str, float, List[Union[str, float]]] - ) -> "StructuredPropertiesPatchHelper": - self._parent._add_patch( - self.aspect_name, - "add", - path=(self.aspect_field, make_structured_property_urn(key)), - value=StructuredPropertyValueAssignmentClass( - propertyUrn=make_structured_property_urn(key), - values=value if isinstance(value, list) else [value], - ), - ) - return self diff --git a/metadata-ingestion/src/datahub/specific/structured_property.py b/metadata-ingestion/src/datahub/specific/structured_property.py index 50f1f079c2aa72..bcae174ed3c4f4 100644 --- a/metadata-ingestion/src/datahub/specific/structured_property.py +++ b/metadata-ingestion/src/datahub/specific/structured_property.py @@ -29,7 +29,7 @@ def set_qualified_name( self._add_patch( StructuredPropertyDefinition.ASPECT_NAME, "add", - path="/qualifiedName", + path=("qualifiedName",), value=qualified_name, ) return self @@ -41,7 +41,7 @@ def set_display_name( self._add_patch( StructuredPropertyDefinition.ASPECT_NAME, "add", - path="/displayName", + path=("displayName",), value=display_name, ) return self @@ -53,7 +53,7 @@ def set_value_type( self._add_patch( StructuredPropertyDefinition.ASPECT_NAME, "add", - path="/valueType", + path=("valueType",), value=value_type, ) return self @@ -66,7 +66,7 @@ def set_type_qualifier( self._add_patch( StructuredPropertyDefinition.ASPECT_NAME, "add", - path="/typeQualifier", + path=("typeQualifier",), value=type_qualifier, ) return self @@ -78,7 +78,7 @@ def add_allowed_value( self._add_patch( StructuredPropertyDefinition.ASPECT_NAME, "add", - path=f"/allowedValues/{str(allowed_value.get('value'))}", + path=("allowedValues", str(allowed_value.get("value"))), value=allowed_value, ) return self @@ -87,7 +87,7 @@ def set_cardinality(self, cardinality: str) -> "StructuredPropertyPatchBuilder": self._add_patch( StructuredPropertyDefinition.ASPECT_NAME, "add", - path="/cardinality", + path=("cardinality",), value=cardinality, ) return self @@ -98,7 +98,7 @@ def add_entity_type( self._add_patch( StructuredPropertyDefinition.ASPECT_NAME, "add", - path=f"/entityTypes/{self.quote(str(entity_type))}", + path=("entityTypes", str(entity_type)), value=entity_type, ) return self @@ -110,7 +110,7 @@ def set_description( self._add_patch( StructuredPropertyDefinition.ASPECT_NAME, "add", - path="/description", + path=("description",), value=description, ) return self @@ -119,7 +119,7 @@ def set_immutable(self, immutable: bool) -> "StructuredPropertyPatchBuilder": self._add_patch( StructuredPropertyDefinition.ASPECT_NAME, "add", - path="/immutable", + path=("immutable",), value=immutable, ) return self From 4a898e15945eff826e5e4cf3cce86bb237c8e5ea Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 2 Jan 2025 17:25:23 -0600 Subject: [PATCH 015/249] feat(auth): user.props authentication (#12259) --- datahub-frontend/app/auth/AuthModule.java | 11 +++++-- .../app/config/ConfigurationProvider.java | 4 +++ .../upgrade/config/SystemUpdateConfig.java | 3 +- docs/authentication/guides/add-users.md | 30 +++++++++++++++++++ docs/how/updating-datahub.md | 1 + .../SampleDataFixtureConfiguration.java | 4 +-- .../SearchLineageFixtureConfiguration.java | 2 +- .../MCLSpringCommonTestConfiguration.java | 3 +- .../metadata/context/ActorContext.java | 16 +++++++--- .../metadata/context/OperationContext.java | 25 +++++++++++----- .../context/TestOperationContexts.java | 3 +- .../metadata/context/ActorContextTest.java | 25 ++++++++-------- .../context/OperationContextTest.java | 5 ++-- .../AuthenticationConfiguration.java | 3 ++ .../authorization/DataHubAuthorizerTest.java | 3 +- .../src/main/resources/application.yaml | 3 ++ .../SystemOperationContextFactory.java | 6 ++-- .../IngestDataPlatformInstancesStepTest.java | 2 +- 18 files changed, 112 insertions(+), 37 deletions(-) diff --git a/datahub-frontend/app/auth/AuthModule.java b/datahub-frontend/app/auth/AuthModule.java index b95515684f01fc..3de0170fc70389 100644 --- a/datahub-frontend/app/auth/AuthModule.java +++ b/datahub-frontend/app/auth/AuthModule.java @@ -181,7 +181,12 @@ protected OperationContext provideOperationContext( final Authentication systemAuthentication, final ConfigurationProvider configurationProvider) { ActorContext systemActorContext = - ActorContext.builder().systemAuth(true).authentication(systemAuthentication).build(); + ActorContext.builder() + .systemAuth(true) + .authentication(systemAuthentication) + .enforceExistenceEnabled( + configurationProvider.getAuthentication().isEnforceExistenceEnabled()) + .build(); OperationContextConfig systemConfig = OperationContextConfig.builder() .viewAuthorizationConfiguration(configurationProvider.getAuthorization().getView()) @@ -197,7 +202,9 @@ protected OperationContext provideOperationContext( .entityRegistryContext(EntityRegistryContext.builder().build(EmptyEntityRegistry.EMPTY)) .validationContext(ValidationContext.builder().alternateValidation(false).build()) .retrieverContext(RetrieverContext.EMPTY) - .build(systemAuthentication); + .build( + systemAuthentication, + configurationProvider.getAuthentication().isEnforceExistenceEnabled()); } @Provides diff --git a/datahub-frontend/app/config/ConfigurationProvider.java b/datahub-frontend/app/config/ConfigurationProvider.java index 97e916769a6c45..9bc28be1bfc89f 100644 --- a/datahub-frontend/app/config/ConfigurationProvider.java +++ b/datahub-frontend/app/config/ConfigurationProvider.java @@ -1,5 +1,6 @@ package config; +import com.datahub.authentication.AuthenticationConfiguration; import com.datahub.authorization.AuthorizationConfiguration; import com.linkedin.metadata.config.VisualConfiguration; import com.linkedin.metadata.config.cache.CacheConfiguration; @@ -30,4 +31,7 @@ public class ConfigurationProvider { /** Configuration for authorization */ private AuthorizationConfiguration authorization; + + /** Configuration for authentication */ + private AuthenticationConfiguration authentication; } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java index fdd84da6044f73..d0493019a40af2 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java @@ -194,7 +194,8 @@ protected OperationContext javaSystemOperationContext( ValidationContext.builder() .alternateValidation( configurationProvider.getFeatureFlags().isAlternateMCPValidation()) - .build()); + .build(), + true); entityServiceAspectRetriever.setSystemOperationContext(systemOperationContext); systemGraphRetriever.setSystemOperationContext(systemOperationContext); diff --git a/docs/authentication/guides/add-users.md b/docs/authentication/guides/add-users.md index 30da5c9f229f94..dbd44b63086783 100644 --- a/docs/authentication/guides/add-users.md +++ b/docs/authentication/guides/add-users.md @@ -1,3 +1,6 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # Onboarding Users to DataHub New user accounts can be provisioned on DataHub in 3 ways: @@ -94,6 +97,11 @@ using this mechanism. It is highly recommended that admins change or remove the ## Adding new users using a user.props file +:::NOTE +Adding users via the `user.props` will require disabling existence checks on GMS using the `METADATA_SERVICE_AUTH_ENFORCE_EXISTENCE_ENABLED=false` environment variable or using the API to enable the user prior to login. +The directions below demonstrate using the API to enable the user. +::: + To define a set of username / password combinations that should be allowed to log in to DataHub (in addition to the root 'datahub' user), create a new file called `user.props` at the file path `${HOME}/.datahub/plugins/frontend/auth/user.props` within the `datahub-frontend-react` container or pod. @@ -107,6 +115,28 @@ janesmith:janespassword johndoe:johnspassword ``` +In order to enable the user access with the credential defined in `user.props`, set the `status` aspect on the user with an Admin user. This can be done using an API call or via the [OpenAPI UI interface](/docs/api/openapi/openapi-usage-guide.md). + + + + +Example enabling login for the `janesmith` user from the example above. Make sure to update the example with your access token. + +```shell +curl -X 'POST' \ + 'http://localhost:9002/openapi/v3/entity/corpuser/urn%3Ali%3Acorpuser%3Ajanesmith/status?async=false&systemMetadata=false&createIfEntityNotExists=false&createIfNotExists=true' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -H 'Authorization: Bearer ' \ + -d '{ + "value": { + "removed": false + } +}' +``` + + + Once you've saved the file, simply start the DataHub containers & navigate to `http://localhost:9002/login` to verify that your new credentials work. diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 19261da23bcf96..07577079d66d12 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -66,6 +66,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe changed to NOT fill out `created` and `lastModified` auditstamps by default for input and output dataset edges. This should not have any user-observable impact (time-based lineage viz will still continue working based on observed time), but could break assumptions previously being made by clients. +- #12158 - Users provisioned with `user.props` will need to be enabled before login in order to be granted access to DataHub. ### Potential Downtime diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java index 5e387d7d88292a..968f0dd4dd61ef 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java @@ -137,7 +137,7 @@ protected OperationContext sampleDataOperationContext( return testOpContext.toBuilder() .searchContext(SearchContext.builder().indexConvention(indexConvention).build()) - .build(testOpContext.getSessionAuthentication()); + .build(testOpContext.getSessionAuthentication(), true); } @Bean(name = "longTailOperationContext") @@ -148,7 +148,7 @@ protected OperationContext longTailOperationContext( return testOpContext.toBuilder() .searchContext(SearchContext.builder().indexConvention(indexConvention).build()) - .build(testOpContext.getSessionAuthentication()); + .build(testOpContext.getSessionAuthentication(), true); } protected EntityIndexBuilders entityIndexBuildersHelper(OperationContext opContext) { diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java index b7b698c73ddac3..26443e019829bf 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java @@ -162,7 +162,7 @@ protected OperationContext searchLineageOperationContext( return testOpContext.toBuilder() .searchContext(SearchContext.builder().indexConvention(indexConvention).build()) - .build(testOpContext.getSessionAuthentication()); + .build(testOpContext.getSessionAuthentication(), true); } @Bean(name = "searchLineageESIndexBuilder") diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java index f16c9dbd82e749..c92749385145de 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java @@ -95,7 +95,8 @@ public OperationContext operationContext( mock(ServicesRegistryContext.class), indexConvention, TestOperationContexts.emptyActiveUsersRetrieverContext(() -> entityRegistry), - mock(ValidationContext.class)); + mock(ValidationContext.class), + true); } @MockBean SpringStandardPluginConfiguration springStandardPluginConfiguration; diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ActorContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ActorContext.java index c08b7fad4dee32..11e38dfb179e0c 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ActorContext.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ActorContext.java @@ -29,23 +29,31 @@ @EqualsAndHashCode public class ActorContext implements ContextInterface { - public static ActorContext asSystem(Authentication systemAuthentication) { - return ActorContext.builder().systemAuth(true).authentication(systemAuthentication).build(); + public static ActorContext asSystem( + Authentication systemAuthentication, boolean enforceExistenceEnabled) { + return ActorContext.builder() + .systemAuth(true) + .authentication(systemAuthentication) + .enforceExistenceEnabled(enforceExistenceEnabled) + .build(); } public static ActorContext asSessionRestricted( Authentication authentication, Set dataHubPolicySet, - Collection groupMembership) { + Collection groupMembership, + boolean enforceExistenceEnabled) { return ActorContext.builder() .systemAuth(false) .authentication(authentication) .policyInfoSet(dataHubPolicySet) .groupMembership(groupMembership) + .enforceExistenceEnabled(enforceExistenceEnabled) .build(); } private final Authentication authentication; + private final boolean enforceExistenceEnabled; @EqualsAndHashCode.Exclude @Builder.Default private final Set policyInfoSet = Collections.emptySet(); @@ -79,7 +87,7 @@ public boolean isActive(AspectRetriever aspectRetriever) { Map aspectMap = urnAspectMap.getOrDefault(selfUrn, Map.of()); - if (!aspectMap.containsKey(CORP_USER_KEY_ASPECT_NAME)) { + if (enforceExistenceEnabled && !aspectMap.containsKey(CORP_USER_KEY_ASPECT_NAME)) { // user is hard deleted return false; } diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java index 9158129235b39e..30255f7ebcac36 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java @@ -152,7 +152,8 @@ public static OperationContext asSystem( @Nullable ServicesRegistryContext servicesRegistryContext, @Nullable IndexConvention indexConvention, @Nullable RetrieverContext retrieverContext, - @Nonnull ValidationContext validationContext) { + @Nonnull ValidationContext validationContext, + boolean enforceExistenceEnabled) { return asSystem( config, systemAuthentication, @@ -161,7 +162,8 @@ public static OperationContext asSystem( indexConvention, retrieverContext, validationContext, - ObjectMapperContext.DEFAULT); + ObjectMapperContext.DEFAULT, + enforceExistenceEnabled); } public static OperationContext asSystem( @@ -172,10 +174,15 @@ public static OperationContext asSystem( @Nullable IndexConvention indexConvention, @Nullable RetrieverContext retrieverContext, @Nonnull ValidationContext validationContext, - @Nonnull ObjectMapperContext objectMapperContext) { + @Nonnull ObjectMapperContext objectMapperContext, + boolean enforceExistenceEnabled) { ActorContext systemActorContext = - ActorContext.builder().systemAuth(true).authentication(systemAuthentication).build(); + ActorContext.builder() + .systemAuth(true) + .authentication(systemAuthentication) + .enforceExistenceEnabled(enforceExistenceEnabled) + .build(); OperationContextConfig systemConfig = config.toBuilder().allowSystemAuthentication(true).build(); SearchContext systemSearchContext = @@ -457,13 +464,16 @@ public int hashCode() { public static class OperationContextBuilder { @Nonnull - public OperationContext build(@Nonnull Authentication sessionAuthentication) { - return build(sessionAuthentication, false); + public OperationContext build( + @Nonnull Authentication sessionAuthentication, boolean enforceExistenceEnabled) { + return build(sessionAuthentication, false, enforceExistenceEnabled); } @Nonnull public OperationContext build( - @Nonnull Authentication sessionAuthentication, boolean skipCache) { + @Nonnull Authentication sessionAuthentication, + boolean skipCache, + boolean enforceExistenceEnabled) { final Urn actorUrn = UrnUtils.getUrn(sessionAuthentication.getActor().toUrnStr()); final ActorContext sessionActor = ActorContext.builder() @@ -476,6 +486,7 @@ public OperationContext build( .equals(sessionAuthentication.getActor())) .policyInfoSet(this.authorizationContext.getAuthorizer().getActorPolicies(actorUrn)) .groupMembership(this.authorizationContext.getAuthorizer().getActorGroups(actorUrn)) + .enforceExistenceEnabled(enforceExistenceEnabled) .build(); return build(sessionActor, skipCache); } diff --git a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java index 4abfbb196f067c..92d62d42295b92 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java @@ -260,7 +260,8 @@ public static OperationContext systemContext( servicesRegistryContext, indexConvention, retrieverContext, - validationContext); + validationContext, + true); if (postConstruct != null) { postConstruct.accept(operationContext); diff --git a/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/ActorContextTest.java b/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/ActorContextTest.java index 15fe2bc277b9b9..de6f71408e2589 100644 --- a/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/ActorContextTest.java +++ b/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/ActorContextTest.java @@ -87,42 +87,43 @@ public void actorContextId() { Authentication userAuth = new Authentication(new Actor(ActorType.USER, "USER"), ""); assertEquals( - ActorContext.asSessionRestricted(userAuth, Set.of(), Set.of()).getCacheKeyComponent(), - ActorContext.asSessionRestricted(userAuth, Set.of(), Set.of()).getCacheKeyComponent(), + ActorContext.asSessionRestricted(userAuth, Set.of(), Set.of(), true).getCacheKeyComponent(), + ActorContext.asSessionRestricted(userAuth, Set.of(), Set.of(), true).getCacheKeyComponent(), "Expected equality across instances"); assertEquals( - ActorContext.asSessionRestricted(userAuth, Set.of(), Set.of()).getCacheKeyComponent(), + ActorContext.asSessionRestricted(userAuth, Set.of(), Set.of(), true).getCacheKeyComponent(), ActorContext.asSessionRestricted( - userAuth, Set.of(), Set.of(UrnUtils.getUrn("urn:li:corpGroup:group1"))) + userAuth, Set.of(), Set.of(UrnUtils.getUrn("urn:li:corpGroup:group1")), true) .getCacheKeyComponent(), "Expected no impact to cache context from group membership"); assertEquals( - ActorContext.asSessionRestricted(userAuth, Set.of(POLICY_ABC, POLICY_D), Set.of()) + ActorContext.asSessionRestricted(userAuth, Set.of(POLICY_ABC, POLICY_D), Set.of(), true) .getCacheKeyComponent(), - ActorContext.asSessionRestricted(userAuth, Set.of(POLICY_ABC, POLICY_D), Set.of()) + ActorContext.asSessionRestricted(userAuth, Set.of(POLICY_ABC, POLICY_D), Set.of(), true) .getCacheKeyComponent(), "Expected equality when non-ownership policies are identical"); assertNotEquals( - ActorContext.asSessionRestricted(userAuth, Set.of(POLICY_ABC_RESOURCE, POLICY_D), Set.of()) + ActorContext.asSessionRestricted( + userAuth, Set.of(POLICY_ABC_RESOURCE, POLICY_D), Set.of(), true) .getCacheKeyComponent(), - ActorContext.asSessionRestricted(userAuth, Set.of(POLICY_ABC, POLICY_D), Set.of()) + ActorContext.asSessionRestricted(userAuth, Set.of(POLICY_ABC, POLICY_D), Set.of(), true) .getCacheKeyComponent(), "Expected differences with non-identical resource policy"); assertNotEquals( - ActorContext.asSessionRestricted(userAuth, Set.of(POLICY_D_OWNER), Set.of()) + ActorContext.asSessionRestricted(userAuth, Set.of(POLICY_D_OWNER), Set.of(), true) .getCacheKeyComponent(), - ActorContext.asSessionRestricted(userAuth, Set.of(POLICY_D), Set.of()) + ActorContext.asSessionRestricted(userAuth, Set.of(POLICY_D), Set.of(), true) .getCacheKeyComponent(), "Expected differences with ownership policy"); assertNotEquals( - ActorContext.asSessionRestricted(userAuth, Set.of(POLICY_D_OWNER_TYPE), Set.of()) + ActorContext.asSessionRestricted(userAuth, Set.of(POLICY_D_OWNER_TYPE), Set.of(), true) .getCacheKeyComponent(), - ActorContext.asSessionRestricted(userAuth, Set.of(POLICY_D), Set.of()) + ActorContext.asSessionRestricted(userAuth, Set.of(POLICY_D), Set.of(), true) .getCacheKeyComponent(), "Expected differences with ownership type policy"); } diff --git a/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/OperationContextTest.java b/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/OperationContextTest.java index f77b244d8f2d86..a2575c1c562209 100644 --- a/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/OperationContextTest.java +++ b/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/OperationContextTest.java @@ -27,7 +27,8 @@ public void testSystemPrivilegeEscalation() { mock(ServicesRegistryContext.class), null, TestOperationContexts.emptyActiveUsersRetrieverContext(null), - mock(ValidationContext.class)); + mock(ValidationContext.class), + true); OperationContext opContext = systemOpContext.asSession(RequestContext.TEST, Authorizer.EMPTY, userAuth); @@ -51,7 +52,7 @@ public void testSystemPrivilegeEscalation() { systemOpContext.getOperationContextConfig().toBuilder() .allowSystemAuthentication(false) .build()) - .build(userAuth); + .build(userAuth, true); assertEquals( opContextNoSystem.getAuthentication(), diff --git a/metadata-service/auth-config/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java b/metadata-service/auth-config/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java index 442263bbd6b43e..81cc5e60552a77 100644 --- a/metadata-service/auth-config/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java +++ b/metadata-service/auth-config/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java @@ -9,6 +9,9 @@ public class AuthenticationConfiguration { /** Whether authentication is enabled */ private boolean enabled; + /** Whether user existence is enforced */ + private boolean enforceExistenceEnabled; + /** * List of configurations for {@link com.datahub.plugins.auth.authentication.Authenticator}s to be * registered diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java index 4437682bfeb0a1..ce9c636be16ac7 100644 --- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java +++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java @@ -320,7 +320,8 @@ public void setupTest() throws Exception { mock(ServicesRegistryContext.class), mock(IndexConvention.class), mock(RetrieverContext.class), - mock(ValidationContext.class)); + mock(ValidationContext.class), + true); _dataHubAuthorizer = new DataHubAuthorizer( diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index f6fa4a37fdadbc..c029cb4648d012 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -6,6 +6,9 @@ authentication: # Enable if you want all requests to the Metadata Service to be authenticated. enabled: ${METADATA_SERVICE_AUTH_ENABLED:true} + # Disable if you want to skip validation of deleted user's tokens + enforceExistenceEnabled: ${METADATA_SERVICE_AUTH_ENFORCE_EXISTENCE_ENABLED:true} + # Required if enabled is true! A configurable chain of Authenticators authenticators: # Required for authenticating requests with DataHub-issued Access Tokens - best not to remove. diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java index 3e2823591e168c..78107cc0ecc900 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java @@ -79,7 +79,8 @@ protected OperationContext javaSystemOperationContext( ValidationContext.builder() .alternateValidation( configurationProvider.getFeatureFlags().isAlternateMCPValidation()) - .build()); + .build(), + configurationProvider.getAuthentication().isEnforceExistenceEnabled()); entityClientAspectRetriever.setSystemOperationContext(systemOperationContext); entityServiceAspectRetriever.setSystemOperationContext(systemOperationContext); @@ -134,7 +135,8 @@ protected OperationContext restliSystemOperationContext( ValidationContext.builder() .alternateValidation( configurationProvider.getFeatureFlags().isAlternateMCPValidation()) - .build()); + .build(), + configurationProvider.getAuthentication().isEnforceExistenceEnabled()); entityClientAspectRetriever.setSystemOperationContext(systemOperationContext); systemGraphRetriever.setSystemOperationContext(systemOperationContext); diff --git a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStepTest.java b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStepTest.java index cc21819cf4ab58..b47c779f768a9b 100644 --- a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStepTest.java +++ b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStepTest.java @@ -87,7 +87,7 @@ public void testExecuteChecksKeySpecForAllUrns() throws Exception { mockOpContext = mockOpContext.toBuilder() .entityRegistryContext(spyEntityRegistryContext) - .build(mockOpContext.getSessionAuthentication()); + .build(mockOpContext.getSessionAuthentication(), true); mockDBWithWorkToDo(migrationsDao, countOfCorpUserEntities, countOfChartEntities); From 539f521388a9ad86ce9565a819b31d0da8f8d5b2 Mon Sep 17 00:00:00 2001 From: Gabe Lyons Date: Thu, 2 Jan 2025 15:56:54 -0800 Subject: [PATCH 016/249] docs(delete): Document un-soft-delete commands in delete-metadata.md (#12251) --- docs/how/delete-metadata.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/how/delete-metadata.md b/docs/how/delete-metadata.md index e36940bf398356..1b1a9952f78983 100644 --- a/docs/how/delete-metadata.md +++ b/docs/how/delete-metadata.md @@ -97,6 +97,21 @@ The start and end time fields filter on the `timestampMillis` field of the times - `ddddddddd` (e.g. `1684384045`): a unix timestamp - `min`, `max`, `now`: special keywords +#### Undo-ing soft deletion of entities + +You can restore soft-deleted entities using the `undo-by-filter` command. This reverts the effect of a soft delete. + +```shell +# Restore (un-soft-delete) a single soft-deleted entity +datahub delete undo-by-filter --urn "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD)" + +# Restore all soft-deleted entities from a specific platform +datahub delete undo-by-filter --platform snowflake + +# You can adjust the batch size (default 3000, max 10000) for better performance +datahub delete undo-by-filter --platform snowflake --batch-size 5000 +``` + ## Delete CLI Examples :::note From 1190dd95b2cedadbd2a5e8295d47497333b2288b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Fri, 3 Jan 2025 09:15:53 +0100 Subject: [PATCH 017/249] fix(tableau): fixes some aspects being emitted multiple times (#12258) --- .../ingestion/source/tableau/tableau.py | 3 ++ .../source/tableau/tableau_common.py | 18 ++++++++ .../tests/unit/test_tableau_source.py | 46 ++++++++++++++++++- 3 files changed, 66 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index d47e10c9eb5c62..008216fea89508 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -109,6 +109,7 @@ make_filter, make_fine_grained_lineage_class, make_upstream_class, + optimize_query_filter, published_datasource_graphql_query, query_metadata_cursor_based_pagination, sheet_graphql_query, @@ -1363,6 +1364,8 @@ def get_connection_objects( query_filter: dict = {}, page_size_override: Optional[int] = None, ) -> Iterable[dict]: + query_filter = optimize_query_filter(query_filter) + # Calls the get_connection_object_page function to get the objects, # and automatically handles pagination. page_size = page_size_override or self.config.page_size diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py index 61b56c4bee5bda..8f9d81eb9a18c1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py @@ -1,3 +1,4 @@ +import copy import html import json import logging @@ -35,6 +36,7 @@ UpstreamClass, ) from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo, SqlParsingResult +from datahub.utilities.ordered_set import OrderedSet logger = logging.getLogger(__name__) @@ -1000,3 +1002,19 @@ def get_filter_pages(query_filter: dict, page_size: int) -> List[dict]: ] return filter_pages + + +def optimize_query_filter(query_filter: dict) -> dict: + """ + Duplicates in the filter cause duplicates in the result, + leading to entities/aspects being emitted multiple times unnecessarily + """ + optimized_query = copy.deepcopy(query_filter) + + if query_filter.get(c.ID_WITH_IN): + optimized_query[c.ID_WITH_IN] = list(OrderedSet(query_filter[c.ID_WITH_IN])) + if query_filter.get(c.PROJECT_NAME_WITH_IN): + optimized_query[c.PROJECT_NAME_WITH_IN] = list( + OrderedSet(query_filter[c.PROJECT_NAME_WITH_IN]) + ) + return optimized_query diff --git a/metadata-ingestion/tests/unit/test_tableau_source.py b/metadata-ingestion/tests/unit/test_tableau_source.py index 44e59decaecbd7..227519fdb464a8 100644 --- a/metadata-ingestion/tests/unit/test_tableau_source.py +++ b/metadata-ingestion/tests/unit/test_tableau_source.py @@ -1,4 +1,4 @@ -from typing import Any, Dict +from typing import Any, Dict, List import pytest @@ -7,6 +7,7 @@ from datahub.ingestion.source.tableau.tableau_common import ( get_filter_pages, make_filter, + optimize_query_filter, tableau_field_to_schema_field, ) from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField @@ -203,3 +204,46 @@ def test_get_filter_pages_id_filter_splits_into_multiple_filters(): {c.ID_WITH_IN: filter_dict[c.ID_WITH_IN][i : i + page_size]} for i in range(0, num_ids, page_size) ] + + +def test_optimize_query_filter_removes_duplicates(): + query_filter = { + c.ID_WITH_IN: ["id1", "id2", "id1"], + c.PROJECT_NAME_WITH_IN: ["project1", "project2", "project1"], + } + result = optimize_query_filter(query_filter) + assert len(result) == 2 + assert result[c.ID_WITH_IN] == ["id1", "id2"] + assert result[c.PROJECT_NAME_WITH_IN] == ["project1", "project2"] + + +def test_optimize_query_filter_handles_empty_lists(): + query_filter: Dict[str, List[str]] = {c.ID_WITH_IN: [], c.PROJECT_NAME_WITH_IN: []} + result = optimize_query_filter(query_filter) + assert len(result) == 2 + assert result[c.ID_WITH_IN] == [] + assert result[c.PROJECT_NAME_WITH_IN] == [] + + +def test_optimize_query_filter_handles_missing_keys(): + query_filter: Dict[str, List[str]] = {} + result = optimize_query_filter(query_filter) + assert result == {} + + +def test_optimize_query_filter_handles_other_keys(): + query_filter = {"any_other_key": ["id1", "id2", "id1"]} + result = optimize_query_filter(query_filter) + assert len(result) == 1 + assert result["any_other_key"] == ["id1", "id2", "id1"] + + +def test_optimize_query_filter_handles_no_duplicates(): + query_filter = { + c.ID_WITH_IN: ["id1", "id2"], + c.PROJECT_NAME_WITH_IN: ["project1", "project2"], + } + result = optimize_query_filter(query_filter) + assert len(result) == 2 + assert result[c.ID_WITH_IN] == ["id1", "id2"] + assert result[c.PROJECT_NAME_WITH_IN] == ["project1", "project2"] From b76db335a2f33775a1d0d48c56018a9765ddf458 Mon Sep 17 00:00:00 2001 From: skrydal Date: Fri, 3 Jan 2025 13:33:45 +0100 Subject: [PATCH 018/249] fix(ingestion/redshift): Bumped redshift-connector dependency due to CVE-2024-12745 (#12265) --- metadata-ingestion/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 8357262537bcf8..5a48f8b7918dce 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -207,7 +207,7 @@ # Clickhouse 0.8.3 adds support for SQLAlchemy 1.4.x "sqlalchemy-redshift>=0.8.3", "GeoAlchemy2", - "redshift-connector>=2.1.0", + "redshift-connector>=2.1.5", *path_spec_common, } From f9e2c4939139c05307c9dbf7830d3373d1bc1ad3 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Fri, 3 Jan 2025 22:15:52 +0530 Subject: [PATCH 019/249] fix(ingest/gc): logging and stopping fix (#12266) --- .../source/gc/execution_request_cleanup.py | 18 +++++-- .../source/gc/soft_deleted_entity_cleanup.py | 49 +++++++++++-------- metadata-ingestion/tests/unit/test_gc.py | 28 +++++++++++ 3 files changed, 69 insertions(+), 26 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/execution_request_cleanup.py b/metadata-ingestion/src/datahub/ingestion/source/gc/execution_request_cleanup.py index 170a6ada3e336f..f9a00d7f009058 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/gc/execution_request_cleanup.py +++ b/metadata-ingestion/src/datahub/ingestion/source/gc/execution_request_cleanup.py @@ -141,7 +141,9 @@ def _scroll_execution_requests( break if self.report.ergc_read_errors >= self.config.max_read_errors: self.report.failure( - f"ergc({self.instance_id}): too many read errors, aborting." + title="Too many read errors, aborting", + message="Too many read errors, aborting", + context=str(self.instance_id), ) break try: @@ -158,8 +160,11 @@ def _scroll_execution_requests( break params["scrollId"] = document["scrollId"] except Exception as e: - logger.error( - f"ergc({self.instance_id}): failed to fetch next batch of execution requests: {e}" + self.report.failure( + title="Failed to fetch next batch of execution requests", + message="Failed to fetch next batch of execution requests", + context=str(self.instance_id), + exc=e, ) self.report.ergc_read_errors += 1 @@ -231,8 +236,11 @@ def _delete_entry(self, entry: CleanupRecord) -> None: self.graph.delete_entity(entry.urn, True) except Exception as e: self.report.ergc_delete_errors += 1 - logger.error( - f"ergc({self.instance_id}): failed to delete ExecutionRequest {entry.request_id}: {e}" + self.report.failure( + title="Failed to delete ExecutionRequest", + message="Failed to delete ExecutionRequest", + context=str(self.instance_id), + exc=e, ) def _reached_runtime_limit(self) -> bool: diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py b/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py index 4c0355834f9b4f..cf810d05aa2ca1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +++ b/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py @@ -105,6 +105,8 @@ class SoftDeletedEntitiesReport(SourceReport): sample_hard_deleted_aspects_by_type: TopKDict[str, LossyList[str]] = field( default_factory=TopKDict ) + runtime_limit_reached: bool = False + deletion_limit_reached: bool = False class SoftDeletedEntitiesCleanup: @@ -163,6 +165,8 @@ def delete_entity(self, urn: str) -> None: f"Dry run is on otherwise it would have deleted {urn} with hard deletion" ) return + if self._deletion_limit_reached() or self._times_up(): + return self._increment_removal_started_count() self.ctx.graph.delete_entity(urn=urn, hard=True) self.ctx.graph.delete_references_to_urn( @@ -203,11 +207,10 @@ def _process_futures(self, futures: Dict[Future, str]) -> Dict[Future, str]: for future in done: self._print_report() if future.exception(): - logger.error( - f"Failed to delete entity {futures[future]}: {future.exception()}" - ) self.report.failure( - f"Failed to delete entity {futures[future]}", + title="Failed to delete entity", + message="Failed to delete entity", + context=futures[future], exc=future.exception(), ) self.report.num_soft_deleted_entity_processed += 1 @@ -274,6 +277,26 @@ def _get_urns(self) -> Iterable[str]: ) yield from self._get_soft_deleted_queries() + def _times_up(self) -> bool: + if ( + self.config.runtime_limit_seconds + and time.time() - self.start_time > self.config.runtime_limit_seconds + ): + with self._report_lock: + self.report.runtime_limit_reached = True + return True + return False + + def _deletion_limit_reached(self) -> bool: + if ( + self.config.limit_entities_delete + and self.report.num_hard_deleted > self.config.limit_entities_delete + ): + with self._report_lock: + self.report.deletion_limit_reached = True + return True + return False + def cleanup_soft_deleted_entities(self) -> None: if not self.config.enabled: return @@ -285,24 +308,8 @@ def cleanup_soft_deleted_entities(self) -> None: self._print_report() while len(futures) >= self.config.futures_max_at_time: futures = self._process_futures(futures) - if ( - self.config.limit_entities_delete - and self.report.num_hard_deleted > self.config.limit_entities_delete - ): - logger.info( - f"Limit of {self.config.limit_entities_delete} entities reached. Stopped adding more." - ) + if self._deletion_limit_reached() or self._times_up(): break - if ( - self.config.runtime_limit_seconds - and time.time() - self.start_time - > self.config.runtime_limit_seconds - ): - logger.info( - f"Runtime limit of {self.config.runtime_limit_seconds} seconds reached. Not submitting more futures." - ) - break - future = executor.submit(self.delete_soft_deleted_entity, urn) futures[future] = urn diff --git a/metadata-ingestion/tests/unit/test_gc.py b/metadata-ingestion/tests/unit/test_gc.py index 8f00d5e064db85..fde9a3f2e0cf03 100644 --- a/metadata-ingestion/tests/unit/test_gc.py +++ b/metadata-ingestion/tests/unit/test_gc.py @@ -9,6 +9,34 @@ DataProcessCleanupConfig, DataProcessCleanupReport, ) +from datahub.ingestion.source.gc.soft_deleted_entity_cleanup import ( + SoftDeletedEntitiesCleanup, + SoftDeletedEntitiesCleanupConfig, + SoftDeletedEntitiesReport, +) + + +class TestSoftDeletedEntitiesCleanup(unittest.TestCase): + def setUp(self): + self.ctx = PipelineContext(run_id="test_run") + self.ctx.graph = MagicMock() + self.config = SoftDeletedEntitiesCleanupConfig() + self.report = SoftDeletedEntitiesReport() + self.cleanup = SoftDeletedEntitiesCleanup( + self.ctx, self.config, self.report, dry_run=True + ) + + def test_update_report(self): + self.cleanup._update_report( + urn="urn:li:dataset:1", + entity_type="dataset", + ) + self.assertEqual(1, self.report.num_hard_deleted) + self.assertEqual(1, self.report.num_hard_deleted_by_type["dataset"]) + + def test_increment_retained_count(self): + self.cleanup._increment_retained_count() + self.assertEqual(1, self.report.num_soft_deleted_retained_due_to_age) class TestDataProcessCleanup(unittest.TestCase): From fba09966f36d86e33b19ee423706e0f79ee2ad7e Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Sat, 4 Jan 2025 00:19:42 +0530 Subject: [PATCH 020/249] fix(ingest): consistent fingerprint for sql parsing aggregator (#12239) --- .../source/snowflake/snowflake_queries.py | 10 ++-- .../ingestion/source/usage/usage_common.py | 16 ++++++- .../sql_parsing/sql_parsing_aggregator.py | 4 +- .../test_add_known_query_lineage.json | 16 +++---- .../aggregator_goldens/test_table_rename.json | 14 +++--- .../test_table_rename_with_temp.json | 12 ++--- .../aggregator_goldens/test_table_swap.json | 46 +++++++++---------- .../test_table_swap_with_temp.json | 24 +++++----- .../unit/sql_parsing/test_sqlglot_utils.py | 12 +++++ .../tests/unit/test_usage_common.py | 6 +++ 10 files changed, 96 insertions(+), 64 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_queries.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_queries.py index 36825dc33fe7dc..b82734cbbe84ea 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_queries.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_queries.py @@ -61,6 +61,7 @@ ColumnRef, DownstreamColumnRef, ) +from datahub.sql_parsing.sqlglot_utils import get_query_fingerprint from datahub.utilities.file_backed_collections import ConnectionWrapper, FileBackedList from datahub.utilities.perf_timer import PerfTimer @@ -475,10 +476,11 @@ def _parse_audit_log_row( entry = PreparsedQuery( # Despite having Snowflake's fingerprints available, our own fingerprinting logic does a better - # job at eliminating redundant / repetitive queries. As such, we don't include the fingerprint - # here so that the aggregator auto-generates one. - # query_id=res["query_fingerprint"], - query_id=None, + # job at eliminating redundant / repetitive queries. As such, we include the fast fingerprint + # here + query_id=get_query_fingerprint( + res["query_text"], self.identifiers.platform, fast=True + ), query_text=res["query_text"], upstreams=upstreams, downstream=downstream, diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/usage_common.py b/metadata-ingestion/src/datahub/ingestion/source/usage/usage_common.py index 2b7aae8330905e..95c2345232a1ee 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/usage/usage_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/usage/usage_common.py @@ -54,6 +54,20 @@ def default_user_urn_builder(email: str) -> str: return builder.make_user_urn(email.split("@")[0]) +def extract_user_email(user: str) -> Optional[str]: + """Extracts user email from user input + + >>> extract_user_email('urn:li:corpuser:abc@xyz.com') + 'abc@xyz.com' + >>> extract_user_email('urn:li:corpuser:abc') + >>> extract_user_email('abc@xyz.com') + 'abc@xyz.com' + """ + if user.startswith(("urn:li:corpuser:", "urn:li:corpGroup:")): + user = user.split(":")[-1] + return user if "@" in user else None + + def make_usage_workunit( bucket_start_time: datetime, resource: ResourceType, @@ -104,7 +118,7 @@ def make_usage_workunit( DatasetUserUsageCountsClass( user=user_urn_builder(user), count=count, - userEmail=user if "@" in user else None, + userEmail=extract_user_email(user), ) for user, count in user_freq ], diff --git a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py index f81eb291e89e1d..a4a49f77882168 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py @@ -198,7 +198,7 @@ def id(self) -> str: @dataclasses.dataclass class PreparsedQuery: - # If not provided, we will generate one using the fast fingerprint generator. + # If not provided, we will generate one using the fingerprint generator. query_id: Optional[QueryId] query_text: str @@ -622,7 +622,6 @@ def add_known_query_lineage( query_fingerprint = get_query_fingerprint( known_query_lineage.query_text, platform=self.platform.platform_name, - fast=True, ) formatted_query = self._maybe_format_query(known_query_lineage.query_text) @@ -848,7 +847,6 @@ def add_preparsed_query( query_fingerprint = get_query_fingerprint( parsed.query_text, platform=self.platform.platform_name, - fast=True, ) # Format the query. diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_add_known_query_lineage.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_add_known_query_lineage.json index 0d8822736c95eb..31d7419b2c8cca 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_add_known_query_lineage.json +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_add_known_query_lineage.json @@ -18,7 +18,7 @@ }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.bar,PROD)", "type": "TRANSFORMED", - "query": "urn:li:query:6ed1d12fbf2ccc8138ceec08cc35b981030d6d004bfad9743c7afd84260fa63f" + "query": "urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae" } ], "fineGrainedLineages": [ @@ -32,7 +32,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),a)" ], "confidenceScore": 1.0, - "query": "urn:li:query:6ed1d12fbf2ccc8138ceec08cc35b981030d6d004bfad9743c7afd84260fa63f" + "query": "urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae" }, { "upstreamType": "FIELD_SET", @@ -44,7 +44,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),b)" ], "confidenceScore": 1.0, - "query": "urn:li:query:6ed1d12fbf2ccc8138ceec08cc35b981030d6d004bfad9743c7afd84260fa63f" + "query": "urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae" }, { "upstreamType": "FIELD_SET", @@ -56,7 +56,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),c)" ], "confidenceScore": 1.0, - "query": "urn:li:query:6ed1d12fbf2ccc8138ceec08cc35b981030d6d004bfad9743c7afd84260fa63f" + "query": "urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae" } ] } @@ -64,7 +64,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:6ed1d12fbf2ccc8138ceec08cc35b981030d6d004bfad9743c7afd84260fa63f", + "entityUrn": "urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae", "changeType": "UPSERT", "aspectName": "queryProperties", "aspect": { @@ -87,7 +87,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:6ed1d12fbf2ccc8138ceec08cc35b981030d6d004bfad9743c7afd84260fa63f", + "entityUrn": "urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae", "changeType": "UPSERT", "aspectName": "querySubjects", "aspect": { @@ -114,7 +114,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:6ed1d12fbf2ccc8138ceec08cc35b981030d6d004bfad9743c7afd84260fa63f", + "entityUrn": "urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -137,7 +137,7 @@ }, "operationType": "INSERT", "customProperties": { - "query_urn": "urn:li:query:6ed1d12fbf2ccc8138ceec08cc35b981030d6d004bfad9743c7afd84260fa63f" + "query_urn": "urn:li:query:02e2ec36678bea2a8c4c855fed5255d087cfeb2710d326e95fd9b48a9c4fc0ae" }, "lastUpdatedTimestamp": 20000 } diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json index fd8475090f009e..e22947fd96ce45 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json @@ -133,7 +133,7 @@ }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_staging,PROD)", "type": "TRANSFORMED", - "query": "urn:li:query:88d742bcc0216d6ccb50c7430d1d97494d5dfcfa90160ffa123108844ad261e4" + "query": "urn:li:query:07a307ad99d3c4a7e54d20c004a4f2d52496f3f5283b33013f80e6323700d97b" } ], "fineGrainedLineages": [ @@ -147,7 +147,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),a)" ], "confidenceScore": 1.0, - "query": "urn:li:query:88d742bcc0216d6ccb50c7430d1d97494d5dfcfa90160ffa123108844ad261e4" + "query": "urn:li:query:07a307ad99d3c4a7e54d20c004a4f2d52496f3f5283b33013f80e6323700d97b" }, { "upstreamType": "FIELD_SET", @@ -159,7 +159,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),b)" ], "confidenceScore": 1.0, - "query": "urn:li:query:88d742bcc0216d6ccb50c7430d1d97494d5dfcfa90160ffa123108844ad261e4" + "query": "urn:li:query:07a307ad99d3c4a7e54d20c004a4f2d52496f3f5283b33013f80e6323700d97b" }, { "upstreamType": "FIELD_SET", @@ -171,7 +171,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),c)" ], "confidenceScore": 1.0, - "query": "urn:li:query:88d742bcc0216d6ccb50c7430d1d97494d5dfcfa90160ffa123108844ad261e4" + "query": "urn:li:query:07a307ad99d3c4a7e54d20c004a4f2d52496f3f5283b33013f80e6323700d97b" } ] } @@ -179,7 +179,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:88d742bcc0216d6ccb50c7430d1d97494d5dfcfa90160ffa123108844ad261e4", + "entityUrn": "urn:li:query:07a307ad99d3c4a7e54d20c004a4f2d52496f3f5283b33013f80e6323700d97b", "changeType": "UPSERT", "aspectName": "queryProperties", "aspect": { @@ -202,7 +202,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:88d742bcc0216d6ccb50c7430d1d97494d5dfcfa90160ffa123108844ad261e4", + "entityUrn": "urn:li:query:07a307ad99d3c4a7e54d20c004a4f2d52496f3f5283b33013f80e6323700d97b", "changeType": "UPSERT", "aspectName": "querySubjects", "aspect": { @@ -229,7 +229,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:88d742bcc0216d6ccb50c7430d1d97494d5dfcfa90160ffa123108844ad261e4", + "entityUrn": "urn:li:query:07a307ad99d3c4a7e54d20c004a4f2d52496f3f5283b33013f80e6323700d97b", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename_with_temp.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename_with_temp.json index a4ac349c3c455c..b657b46476cbbd 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename_with_temp.json +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename_with_temp.json @@ -133,7 +133,7 @@ }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_dep,PROD)", "type": "TRANSFORMED", - "query": "urn:li:query:composite_2efc2a13ee673ccf7b195f8f2c0e4ba0570194d8200c3c20b1eb7e8ca4fb4332" + "query": "urn:li:query:composite_c035c933cc4ce5cf8a111bcaf419b8e66a1e41853bb154ff9aaa24cd00ecf51e" } ], "fineGrainedLineages": [ @@ -147,7 +147,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),a)" ], "confidenceScore": 0.2, - "query": "urn:li:query:composite_2efc2a13ee673ccf7b195f8f2c0e4ba0570194d8200c3c20b1eb7e8ca4fb4332" + "query": "urn:li:query:composite_c035c933cc4ce5cf8a111bcaf419b8e66a1e41853bb154ff9aaa24cd00ecf51e" }, { "upstreamType": "FIELD_SET", @@ -159,7 +159,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),b)" ], "confidenceScore": 0.2, - "query": "urn:li:query:composite_2efc2a13ee673ccf7b195f8f2c0e4ba0570194d8200c3c20b1eb7e8ca4fb4332" + "query": "urn:li:query:composite_c035c933cc4ce5cf8a111bcaf419b8e66a1e41853bb154ff9aaa24cd00ecf51e" } ] } @@ -167,7 +167,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:composite_2efc2a13ee673ccf7b195f8f2c0e4ba0570194d8200c3c20b1eb7e8ca4fb4332", + "entityUrn": "urn:li:query:composite_c035c933cc4ce5cf8a111bcaf419b8e66a1e41853bb154ff9aaa24cd00ecf51e", "changeType": "UPSERT", "aspectName": "queryProperties", "aspect": { @@ -190,7 +190,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:composite_2efc2a13ee673ccf7b195f8f2c0e4ba0570194d8200c3c20b1eb7e8ca4fb4332", + "entityUrn": "urn:li:query:composite_c035c933cc4ce5cf8a111bcaf419b8e66a1e41853bb154ff9aaa24cd00ecf51e", "changeType": "UPSERT", "aspectName": "querySubjects", "aspect": { @@ -217,7 +217,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:composite_2efc2a13ee673ccf7b195f8f2c0e4ba0570194d8200c3c20b1eb7e8ca4fb4332", + "entityUrn": "urn:li:query:composite_c035c933cc4ce5cf8a111bcaf419b8e66a1e41853bb154ff9aaa24cd00ecf51e", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json index d9d46a4b14a146..09a98a81f2602e 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json @@ -133,7 +133,7 @@ }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_swap,PROD)", "type": "TRANSFORMED", - "query": "urn:li:query:b256c8cc8f386b209ef8da55485d46c3fbd471b942f804d370e24350b3087405" + "query": "urn:li:query:1ed34195f33514203e8359ca22772e03a3588b669e0db00b1681e1a8d0862300" } ], "fineGrainedLineages": [ @@ -147,7 +147,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD),a)" ], "confidenceScore": 1.0, - "query": "urn:li:query:b256c8cc8f386b209ef8da55485d46c3fbd471b942f804d370e24350b3087405" + "query": "urn:li:query:1ed34195f33514203e8359ca22772e03a3588b669e0db00b1681e1a8d0862300" }, { "upstreamType": "FIELD_SET", @@ -159,7 +159,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD),b)" ], "confidenceScore": 1.0, - "query": "urn:li:query:b256c8cc8f386b209ef8da55485d46c3fbd471b942f804d370e24350b3087405" + "query": "urn:li:query:1ed34195f33514203e8359ca22772e03a3588b669e0db00b1681e1a8d0862300" }, { "upstreamType": "FIELD_SET", @@ -171,7 +171,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD),c)" ], "confidenceScore": 1.0, - "query": "urn:li:query:b256c8cc8f386b209ef8da55485d46c3fbd471b942f804d370e24350b3087405" + "query": "urn:li:query:1ed34195f33514203e8359ca22772e03a3588b669e0db00b1681e1a8d0862300" } ] } @@ -179,7 +179,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:b256c8cc8f386b209ef8da55485d46c3fbd471b942f804d370e24350b3087405", + "entityUrn": "urn:li:query:1ed34195f33514203e8359ca22772e03a3588b669e0db00b1681e1a8d0862300", "changeType": "UPSERT", "aspectName": "queryProperties", "aspect": { @@ -202,7 +202,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:b256c8cc8f386b209ef8da55485d46c3fbd471b942f804d370e24350b3087405", + "entityUrn": "urn:li:query:1ed34195f33514203e8359ca22772e03a3588b669e0db00b1681e1a8d0862300", "changeType": "UPSERT", "aspectName": "querySubjects", "aspect": { @@ -229,7 +229,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:b256c8cc8f386b209ef8da55485d46c3fbd471b942f804d370e24350b3087405", + "entityUrn": "urn:li:query:1ed34195f33514203e8359ca22772e03a3588b669e0db00b1681e1a8d0862300", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -257,7 +257,7 @@ }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_swap,PROD)", "type": "TRANSFORMED", - "query": "urn:li:query:6f71602f39d01a39b3f8bd411c74c5ac08dc4b90bc3d49b257089acb19fa8559" + "query": "urn:li:query:76f0a8e1da90c4d33b5741c6e1014251ce2d1650ba0f58ab136ebaf1bb64dc8c" } ] } @@ -265,7 +265,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:6f71602f39d01a39b3f8bd411c74c5ac08dc4b90bc3d49b257089acb19fa8559", + "entityUrn": "urn:li:query:76f0a8e1da90c4d33b5741c6e1014251ce2d1650ba0f58ab136ebaf1bb64dc8c", "changeType": "UPSERT", "aspectName": "queryProperties", "aspect": { @@ -288,7 +288,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:6f71602f39d01a39b3f8bd411c74c5ac08dc4b90bc3d49b257089acb19fa8559", + "entityUrn": "urn:li:query:76f0a8e1da90c4d33b5741c6e1014251ce2d1650ba0f58ab136ebaf1bb64dc8c", "changeType": "UPSERT", "aspectName": "querySubjects", "aspect": { @@ -306,7 +306,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:6f71602f39d01a39b3f8bd411c74c5ac08dc4b90bc3d49b257089acb19fa8559", + "entityUrn": "urn:li:query:76f0a8e1da90c4d33b5741c6e1014251ce2d1650ba0f58ab136ebaf1bb64dc8c", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -334,7 +334,7 @@ }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_dep,PROD)", "type": "TRANSFORMED", - "query": "urn:li:query:4b1fad909083e1ed5c47c146bd01247ed4d6295d175c34f9065b8fc6000fc7ae" + "query": "urn:li:query:37c14a3bbb67360d19d1666fa4e11b67ef81926e1e2bcd46b87ea239d27a549d" } ] } @@ -342,7 +342,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:4b1fad909083e1ed5c47c146bd01247ed4d6295d175c34f9065b8fc6000fc7ae", + "entityUrn": "urn:li:query:37c14a3bbb67360d19d1666fa4e11b67ef81926e1e2bcd46b87ea239d27a549d", "changeType": "UPSERT", "aspectName": "queryProperties", "aspect": { @@ -365,7 +365,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:4b1fad909083e1ed5c47c146bd01247ed4d6295d175c34f9065b8fc6000fc7ae", + "entityUrn": "urn:li:query:37c14a3bbb67360d19d1666fa4e11b67ef81926e1e2bcd46b87ea239d27a549d", "changeType": "UPSERT", "aspectName": "querySubjects", "aspect": { @@ -383,7 +383,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:4b1fad909083e1ed5c47c146bd01247ed4d6295d175c34f9065b8fc6000fc7ae", + "entityUrn": "urn:li:query:37c14a3bbb67360d19d1666fa4e11b67ef81926e1e2bcd46b87ea239d27a549d", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -411,7 +411,7 @@ }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD)", "type": "TRANSFORMED", - "query": "urn:li:query:3886d427c84692923797048da6d3991693e89ce44e10d1917c12e8b6fd493904" + "query": "urn:li:query:f4eb748a53291bbea59e080f6d415b08dfd7003d0b7c3d538d02f4e404b30943" }, { "auditStamp": { @@ -424,7 +424,7 @@ }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_incremental,PROD)", "type": "TRANSFORMED", - "query": "urn:li:query:481d0392ffeffdafd198d94e0a9f778dd722b60daa47083a32800b99ea21f86f" + "query": "urn:li:query:29935c31db1f06edf50d62a59d2874a86c51570256ab3b3102984439c03be1f2" } ] } @@ -432,7 +432,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:3886d427c84692923797048da6d3991693e89ce44e10d1917c12e8b6fd493904", + "entityUrn": "urn:li:query:f4eb748a53291bbea59e080f6d415b08dfd7003d0b7c3d538d02f4e404b30943", "changeType": "UPSERT", "aspectName": "queryProperties", "aspect": { @@ -455,7 +455,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:3886d427c84692923797048da6d3991693e89ce44e10d1917c12e8b6fd493904", + "entityUrn": "urn:li:query:f4eb748a53291bbea59e080f6d415b08dfd7003d0b7c3d538d02f4e404b30943", "changeType": "UPSERT", "aspectName": "querySubjects", "aspect": { @@ -473,7 +473,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:3886d427c84692923797048da6d3991693e89ce44e10d1917c12e8b6fd493904", + "entityUrn": "urn:li:query:f4eb748a53291bbea59e080f6d415b08dfd7003d0b7c3d538d02f4e404b30943", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -484,7 +484,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:481d0392ffeffdafd198d94e0a9f778dd722b60daa47083a32800b99ea21f86f", + "entityUrn": "urn:li:query:29935c31db1f06edf50d62a59d2874a86c51570256ab3b3102984439c03be1f2", "changeType": "UPSERT", "aspectName": "queryProperties", "aspect": { @@ -507,7 +507,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:481d0392ffeffdafd198d94e0a9f778dd722b60daa47083a32800b99ea21f86f", + "entityUrn": "urn:li:query:29935c31db1f06edf50d62a59d2874a86c51570256ab3b3102984439c03be1f2", "changeType": "UPSERT", "aspectName": "querySubjects", "aspect": { @@ -525,7 +525,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:481d0392ffeffdafd198d94e0a9f778dd722b60daa47083a32800b99ea21f86f", + "entityUrn": "urn:li:query:29935c31db1f06edf50d62a59d2874a86c51570256ab3b3102984439c03be1f2", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap_with_temp.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap_with_temp.json index b4eaf76a149337..69bcd8eb10e951 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap_with_temp.json +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap_with_temp.json @@ -133,7 +133,7 @@ }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD)", "type": "TRANSFORMED", - "query": "urn:li:query:composite_9e36ef19163461d35b618fd1eea2a3f6a5d10a23a979a6d5ef688b31f277abb3" + "query": "urn:li:query:composite_a10e266957d5007837642526d09f058ca461e42e2159ff45c328ebd069c112df" }, { "auditStamp": { @@ -146,7 +146,7 @@ }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_dep,PROD)", "type": "TRANSFORMED", - "query": "urn:li:query:composite_9e36ef19163461d35b618fd1eea2a3f6a5d10a23a979a6d5ef688b31f277abb3" + "query": "urn:li:query:composite_a10e266957d5007837642526d09f058ca461e42e2159ff45c328ebd069c112df" } ], "fineGrainedLineages": [ @@ -161,7 +161,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD),a)" ], "confidenceScore": 1.0, - "query": "urn:li:query:composite_9e36ef19163461d35b618fd1eea2a3f6a5d10a23a979a6d5ef688b31f277abb3" + "query": "urn:li:query:composite_a10e266957d5007837642526d09f058ca461e42e2159ff45c328ebd069c112df" } ] } @@ -169,7 +169,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:composite_9e36ef19163461d35b618fd1eea2a3f6a5d10a23a979a6d5ef688b31f277abb3", + "entityUrn": "urn:li:query:composite_a10e266957d5007837642526d09f058ca461e42e2159ff45c328ebd069c112df", "changeType": "UPSERT", "aspectName": "queryProperties", "aspect": { @@ -192,7 +192,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:composite_9e36ef19163461d35b618fd1eea2a3f6a5d10a23a979a6d5ef688b31f277abb3", + "entityUrn": "urn:li:query:composite_a10e266957d5007837642526d09f058ca461e42e2159ff45c328ebd069c112df", "changeType": "UPSERT", "aspectName": "querySubjects", "aspect": { @@ -219,7 +219,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:composite_9e36ef19163461d35b618fd1eea2a3f6a5d10a23a979a6d5ef688b31f277abb3", + "entityUrn": "urn:li:query:composite_a10e266957d5007837642526d09f058ca461e42e2159ff45c328ebd069c112df", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -247,7 +247,7 @@ }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD)", "type": "TRANSFORMED", - "query": "urn:li:query:composite_49daa72ac1d22734879a6bed1224daa7f8c1293750d6d7b8a24a0aa0e9f74d80" + "query": "urn:li:query:composite_5d8360cfc2f57f023d9945749848ad52227674fefc9fec568e7fbb1787cfd544" }, { "auditStamp": { @@ -260,7 +260,7 @@ }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_dep,PROD)", "type": "TRANSFORMED", - "query": "urn:li:query:composite_49daa72ac1d22734879a6bed1224daa7f8c1293750d6d7b8a24a0aa0e9f74d80" + "query": "urn:li:query:composite_5d8360cfc2f57f023d9945749848ad52227674fefc9fec568e7fbb1787cfd544" } ], "fineGrainedLineages": [ @@ -275,7 +275,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_backup,PROD),a)" ], "confidenceScore": 1.0, - "query": "urn:li:query:composite_49daa72ac1d22734879a6bed1224daa7f8c1293750d6d7b8a24a0aa0e9f74d80" + "query": "urn:li:query:composite_5d8360cfc2f57f023d9945749848ad52227674fefc9fec568e7fbb1787cfd544" } ] } @@ -283,7 +283,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:composite_49daa72ac1d22734879a6bed1224daa7f8c1293750d6d7b8a24a0aa0e9f74d80", + "entityUrn": "urn:li:query:composite_5d8360cfc2f57f023d9945749848ad52227674fefc9fec568e7fbb1787cfd544", "changeType": "UPSERT", "aspectName": "queryProperties", "aspect": { @@ -306,7 +306,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:composite_49daa72ac1d22734879a6bed1224daa7f8c1293750d6d7b8a24a0aa0e9f74d80", + "entityUrn": "urn:li:query:composite_5d8360cfc2f57f023d9945749848ad52227674fefc9fec568e7fbb1787cfd544", "changeType": "UPSERT", "aspectName": "querySubjects", "aspect": { @@ -330,7 +330,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:composite_49daa72ac1d22734879a6bed1224daa7f8c1293750d6d7b8a24a0aa0e9f74d80", + "entityUrn": "urn:li:query:composite_5d8360cfc2f57f023d9945749848ad52227674fefc9fec568e7fbb1787cfd544", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_utils.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_utils.py index dbe24ade6944f6..c3c3a4a15d915b 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_utils.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_utils.py @@ -186,3 +186,15 @@ def test_query_fingerprint(): assert get_query_fingerprint( "select 1 + 1", platform="postgres" ) != get_query_fingerprint("select 2", platform="postgres") + + +def test_redshift_query_fingerprint(): + query1 = "insert into insert_into_table (select * from base_table);" + query2 = "INSERT INTO insert_into_table (SELECT * FROM base_table)" + + assert get_query_fingerprint(query1, "redshift") == get_query_fingerprint( + query2, "redshift" + ) + assert get_query_fingerprint(query1, "redshift", True) != get_query_fingerprint( + query2, "redshift", True + ) diff --git a/metadata-ingestion/tests/unit/test_usage_common.py b/metadata-ingestion/tests/unit/test_usage_common.py index e01f0ea77df837..bd6d194835dd96 100644 --- a/metadata-ingestion/tests/unit/test_usage_common.py +++ b/metadata-ingestion/tests/unit/test_usage_common.py @@ -5,6 +5,7 @@ from freezegun import freeze_time from pydantic import ValidationError +import datahub.ingestion.source.usage.usage_common from datahub.configuration.common import AllowDenyPattern from datahub.configuration.time_window_config import BucketDuration, get_time_bucket from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance @@ -28,6 +29,7 @@ UserUsageCountsClass, WindowDurationClass, ) +from datahub.testing.doctest import assert_doctest _TestTableRef = str @@ -373,3 +375,7 @@ def test_convert_usage_aggregation_class(): eventGranularity=TimeWindowSizeClass(unit=CalendarIntervalClass.MONTH), ), ) + + +def test_extract_user_email(): + assert_doctest(datahub.ingestion.source.usage.usage_common) From d2b67cab6d1fa2e30accac59f833bb6366307352 Mon Sep 17 00:00:00 2001 From: Gabe Lyons Date: Fri, 3 Jan 2025 11:57:34 -0800 Subject: [PATCH 021/249] docs(queries_v2): set use_queries_v2 to true in snowflake_recipe.yml (#12269) --- metadata-ingestion/docs/sources/snowflake/snowflake_recipe.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/metadata-ingestion/docs/sources/snowflake/snowflake_recipe.yml b/metadata-ingestion/docs/sources/snowflake/snowflake_recipe.yml index 7e8dbcff88e1c0..3226f23c963ddf 100644 --- a/metadata-ingestion/docs/sources/snowflake/snowflake_recipe.yml +++ b/metadata-ingestion/docs/sources/snowflake/snowflake_recipe.yml @@ -4,6 +4,9 @@ source: # This option is recommended to be used to ingest all lineage ignore_start_time_lineage: true + # This flag tells the snowflake ingestion to use the more advanced query parsing. This will become the default eventually. + use_queries_v2: true + # Coordinates account_id: "abc48144" warehouse: "COMPUTE_WH" From 8093882d45c821090ec58c9c499517158588c1b9 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Sun, 5 Jan 2025 12:40:58 +0530 Subject: [PATCH 022/249] feat(ingest/gc): truncate query usage statistics aspect (#12268) --- .../src/datahub/ingestion/source/gc/datahub_gc.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py b/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py index 168b787b85e8be..443368e6d8b4fb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py +++ b/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py @@ -188,6 +188,9 @@ def truncate_indices(self) -> None: self._truncate_timeseries_helper( aspect_name="dashboardUsageStatistics", entity_type="dashboard" ) + self._truncate_timeseries_helper( + aspect_name="queryusagestatistics", entity_type="query" + ) def _truncate_timeseries_helper(self, aspect_name: str, entity_type: str) -> None: self._truncate_timeseries_with_watch_optional( From b86bbf726275be256331c01707704626035f3067 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Mon, 6 Jan 2025 12:14:59 +0530 Subject: [PATCH 023/249] fix(ingest/tableau): retry on auth error for special case (#12264) --- .../ingestion/source/tableau/tableau.py | 68 ++++++++++++++----- .../tableau/test_tableau_ingest.py | 64 ++++++++++++++++- 2 files changed, 112 insertions(+), 20 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index 008216fea89508..d149402741e82f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -2,9 +2,9 @@ import logging import re import time -from collections import OrderedDict -from dataclasses import dataclass -from datetime import datetime +from collections import OrderedDict, defaultdict +from dataclasses import dataclass, field as dataclass_field +from datetime import datetime, timedelta, timezone from functools import lru_cache from typing import ( Any, @@ -196,6 +196,11 @@ 504, # Gateway Timeout ] +# From experience, this expiry time typically ranges from 50 minutes +# to 2 hours but might as well be configurable. We will allow upto +# 10 minutes of such expiry time +REGULAR_AUTH_EXPIRY_PERIOD = timedelta(minutes=10) + logger: logging.Logger = logging.getLogger(__name__) # Replace / with | @@ -637,6 +642,7 @@ class SiteIdContentUrl: site_content_url: str +@dataclass class TableauSourceReport(StaleEntityRemovalSourceReport): get_all_datasources_query_failed: bool = False num_get_datasource_query_failures: int = 0 @@ -653,7 +659,14 @@ class TableauSourceReport(StaleEntityRemovalSourceReport): num_upstream_table_lineage_failed_parse_sql: int = 0 num_upstream_fine_grained_lineage_failed_parse_sql: int = 0 num_hidden_assets_skipped: int = 0 - logged_in_user: List[UserInfo] = [] + logged_in_user: List[UserInfo] = dataclass_field(default_factory=list) + last_authenticated_at: Optional[datetime] = None + + num_expected_tableau_metadata_queries: int = 0 + num_actual_tableau_metadata_queries: int = 0 + tableau_server_error_stats: Dict[str, int] = dataclass_field( + default_factory=(lambda: defaultdict(int)) + ) def report_user_role(report: TableauSourceReport, server: Server) -> None: @@ -724,6 +737,7 @@ def _authenticate(self, site_content_url: str) -> None: try: logger.info(f"Authenticated to Tableau site: '{site_content_url}'") self.server = self.config.make_tableau_client(site_content_url) + self.report.last_authenticated_at = datetime.now(timezone.utc) report_user_role(report=self.report, server=self.server) # Note that we're not catching ConfigurationError, since we want that to throw. except ValueError as e: @@ -807,10 +821,13 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: site_source = TableauSiteSource( config=self.config, ctx=self.ctx, - site=site - if site - else SiteIdContentUrl( - site_id=self.server.site_id, site_content_url=self.config.site + site=( + site + if site + else SiteIdContentUrl( + site_id=self.server.site_id, + site_content_url=self.config.site, + ) ), report=self.report, server=self.server, @@ -925,6 +942,7 @@ def _re_authenticate(self) -> None: # Sign-in again may not be enough because Tableau sometimes caches invalid sessions # so we need to recreate the Tableau Server object self.server = self.config.make_tableau_client(self.site_content_url) + self.report.last_authenticated_at = datetime.now(timezone.utc) def _populate_usage_stat_registry(self) -> None: if self.server is None: @@ -1190,6 +1208,7 @@ def get_connection_object_page( ) try: assert self.server is not None + self.report.num_actual_tableau_metadata_queries += 1 query_data = query_metadata_cursor_based_pagination( server=self.server, main_query=query, @@ -1199,25 +1218,36 @@ def get_connection_object_page( qry_filter=query_filter, ) - except REAUTHENTICATE_ERRORS: - if not retry_on_auth_error: + except REAUTHENTICATE_ERRORS as e: + self.report.tableau_server_error_stats[e.__class__.__name__] += 1 + if not retry_on_auth_error or retries_remaining <= 0: raise - # If ingestion has been running for over 2 hours, the Tableau - # temporary credentials will expire. If this happens, this exception - # will be thrown, and we need to re-authenticate and retry. - self._re_authenticate() + # We have been getting some irregular authorization errors like below well before the expected expiry time + # - within few seconds of initial authentication . We'll retry without re-auth for such cases. + # : + # b'{"timestamp":"xxx","status":401,"error":"Unauthorized","path":"/relationship-service-war/graphql"}' + if self.report.last_authenticated_at and ( + datetime.now(timezone.utc) - self.report.last_authenticated_at + > REGULAR_AUTH_EXPIRY_PERIOD + ): + # If ingestion has been running for over 2 hours, the Tableau + # temporary credentials will expire. If this happens, this exception + # will be thrown, and we need to re-authenticate and retry. + self._re_authenticate() + return self.get_connection_object_page( query=query, connection_type=connection_type, query_filter=query_filter, fetch_size=fetch_size, current_cursor=current_cursor, - retry_on_auth_error=False, + retry_on_auth_error=True, retries_remaining=retries_remaining - 1, ) except InternalServerError as ise: + self.report.tableau_server_error_stats[InternalServerError.__name__] += 1 # In some cases Tableau Server returns 504 error, which is a timeout error, so it worths to retry. # Extended with other retryable errors. if ise.code in RETRIABLE_ERROR_CODES: @@ -1230,13 +1260,14 @@ def get_connection_object_page( query_filter=query_filter, fetch_size=fetch_size, current_cursor=current_cursor, - retry_on_auth_error=False, + retry_on_auth_error=True, retries_remaining=retries_remaining - 1, ) else: raise ise except OSError: + self.report.tableau_server_error_stats[OSError.__name__] += 1 # In tableauseverclient 0.26 (which was yanked and released in 0.28 on 2023-10-04), # the request logic was changed to use threads. # https://github.com/tableau/server-client-python/commit/307d8a20a30f32c1ce615cca7c6a78b9b9bff081 @@ -1251,7 +1282,7 @@ def get_connection_object_page( query_filter=query_filter, fetch_size=fetch_size, current_cursor=current_cursor, - retry_on_auth_error=False, + retry_on_auth_error=True, retries_remaining=retries_remaining - 1, ) @@ -1339,7 +1370,7 @@ def get_connection_object_page( query_filter=query_filter, fetch_size=fetch_size, current_cursor=current_cursor, - retry_on_auth_error=False, + retry_on_auth_error=True, retries_remaining=retries_remaining, ) raise RuntimeError(f"Query {connection_type} error: {errors}") @@ -1377,6 +1408,7 @@ def get_connection_objects( while has_next_page: filter_: str = make_filter(filter_page) + self.report.num_expected_tableau_metadata_queries += 1 ( connection_objects, current_cursor, diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py index 71e5ad10c2fc5e..d7868038a40aa1 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py @@ -1,6 +1,6 @@ import json import pathlib -from typing import Any, Dict, List, cast +from typing import Any, Dict, List, Union, cast from unittest import mock import pytest @@ -13,10 +13,15 @@ GroupItem, ProjectItem, SiteItem, + UserItem, ViewItem, WorkbookItem, ) from tableauserverclient.models.reference_item import ResourceReference +from tableauserverclient.server.endpoint.exceptions import ( + NonXMLResponseError, + TableauError, +) from datahub.emitter.mce_builder import DEFAULT_ENV, make_schema_field_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper @@ -270,7 +275,7 @@ def side_effect_site_get_by_id(id, *arg, **kwargs): def mock_sdk_client( - side_effect_query_metadata_response: List[dict], + side_effect_query_metadata_response: List[Union[dict, TableauError]], datasources_side_effect: List[dict], sign_out_side_effect: List[dict], ) -> mock.MagicMock: @@ -1312,6 +1317,61 @@ def test_permission_warning(pytestconfig, tmp_path, mock_datahub_graph): ) +@freeze_time(FROZEN_TIME) +@pytest.mark.integration +def test_retry_on_error(pytestconfig, tmp_path, mock_datahub_graph): + with mock.patch( + "datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph", + mock_datahub_graph, + ) as mock_checkpoint: + mock_checkpoint.return_value = mock_datahub_graph + + with mock.patch("datahub.ingestion.source.tableau.tableau.Server") as mock_sdk: + mock_client = mock_sdk_client( + side_effect_query_metadata_response=[ + NonXMLResponseError( + """{"timestamp":"xxx","status":401,"error":"Unauthorized","path":"/relationship-service-war/graphql"}""" + ), + *mock_data(), + ], + sign_out_side_effect=[{}], + datasources_side_effect=[{}], + ) + mock_client.users = mock.Mock() + mock_client.users.get_by_id.side_effect = [ + UserItem( + name="name", site_role=UserItem.Roles.SiteAdministratorExplorer + ) + ] + mock_sdk.return_value = mock_client + + reporter = TableauSourceReport() + tableau_source = TableauSiteSource( + platform="tableau", + config=mock.MagicMock(), + ctx=mock.MagicMock(), + site=mock.MagicMock(spec=SiteItem, id="Site1", content_url="site1"), + server=mock_sdk.return_value, + report=reporter, + ) + + tableau_source.get_connection_object_page( + query=mock.MagicMock(), + connection_type=mock.MagicMock(), + query_filter=mock.MagicMock(), + current_cursor=None, + retries_remaining=1, + fetch_size=10, + ) + + assert reporter.num_actual_tableau_metadata_queries == 2 + assert reporter.tableau_server_error_stats + assert reporter.tableau_server_error_stats["NonXMLResponseError"] == 1 + + assert reporter.warnings == [] + assert reporter.failures == [] + + @freeze_time(FROZEN_TIME) @pytest.mark.parametrize( "extract_project_hierarchy, allowed_projects", From 842c8f94a5177d7053bf596d571db9c99df2e4b6 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Mon, 6 Jan 2025 16:39:02 +0530 Subject: [PATCH 024/249] fix(ingest/gc): infinite loop query entities (#12274) --- .../datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py b/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py index cf810d05aa2ca1..32243106bb53f6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +++ b/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py @@ -258,7 +258,7 @@ def _get_soft_deleted_queries(self) -> Iterable[str]: ) break scroll_across_entities = result.get("scrollAcrossEntities") - if not scroll_across_entities: + if not scroll_across_entities or not scroll_across_entities.get("count"): break scroll_id = scroll_across_entities.get("nextScrollId") self.report.num_queries_found += scroll_across_entities.get("count") From 91c1c6bf77247e8a37855aecd77919cefbe6d0d7 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Mon, 6 Jan 2025 21:48:56 +0530 Subject: [PATCH 025/249] fix(ingest/snowflake): use fast query fingerprint for lineage (#12275) --- .../source/snowflake/snowflake_lineage_v2.py | 4 ++++ .../datahub/sql_parsing/sql_parsing_aggregator.py | 13 ++++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py index 6b200590d7ab63..e93ecf30171f65 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py @@ -40,6 +40,7 @@ ColumnRef, DownstreamColumnRef, ) +from datahub.sql_parsing.sqlglot_utils import get_query_fingerprint from datahub.utilities.perf_timer import PerfTimer from datahub.utilities.time import ts_millis_to_datetime @@ -239,6 +240,9 @@ def get_known_query_lineage( downstream_table_urn = self.identifiers.gen_dataset_urn(dataset_name) known_lineage = KnownQueryLineageInfo( + query_id=get_query_fingerprint( + query.query_text, self.identifiers.platform, fast=True + ), query_text=query.query_text, downstream=downstream_table_urn, upstreams=self.map_query_result_upstreams( diff --git a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py index a4a49f77882168..25b63ffac45f96 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py @@ -165,6 +165,7 @@ class KnownQueryLineageInfo: timestamp: Optional[datetime] = None session_id: Optional[str] = None query_type: QueryType = QueryType.UNKNOWN + query_id: Optional[str] = None @dataclasses.dataclass @@ -618,11 +619,13 @@ def add_known_query_lineage( self.report.num_known_query_lineage += 1 # Generate a fingerprint for the query. - with self.report.sql_fingerprinting_timer: - query_fingerprint = get_query_fingerprint( - known_query_lineage.query_text, - platform=self.platform.platform_name, - ) + query_fingerprint = known_query_lineage.query_id + if not query_fingerprint: + with self.report.sql_fingerprinting_timer: + query_fingerprint = get_query_fingerprint( + known_query_lineage.query_text, + platform=self.platform.platform_name, + ) formatted_query = self._maybe_format_query(known_query_lineage.query_text) # Register the query. From 3316d407f74486655ae57209c7377903d2e498d4 Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Mon, 6 Jan 2025 18:21:24 +0100 Subject: [PATCH 026/249] fix(spark): Finegrained lineage is emitted on the DataJob and not on the emitted Datasets. (#11956) --- .../java/acryl-spark-lineage/README.md | 101 +++++++------ .../datahub/spark/DatahubEventEmitter.java | 26 +++- .../datahub/spark/conf/SparkConfigParser.java | 19 +++ .../datahub/spark/conf/SparkLineageConf.java | 2 + .../spark/OpenLineageEventToDatahubTest.java | 28 ++++ .../ol_events/map_partition_job.json | 66 +++++++++ .../config/DatahubOpenlineageConfig.java | 2 + .../converter/OpenLineageToDataHub.java | 23 ++- .../openlineage/dataset/DatahubJob.java | 136 +++++++++++++----- 9 files changed, 319 insertions(+), 84 deletions(-) create mode 100644 metadata-integration/java/acryl-spark-lineage/src/test/resources/ol_events/map_partition_job.json diff --git a/metadata-integration/java/acryl-spark-lineage/README.md b/metadata-integration/java/acryl-spark-lineage/README.md index 97851e90e860ed..e51c884c297d7e 100644 --- a/metadata-integration/java/acryl-spark-lineage/README.md +++ b/metadata-integration/java/acryl-spark-lineage/README.md @@ -24,7 +24,7 @@ When running jobs using spark-submit, the agent needs to be configured in the co ```text #Configuring DataHub spark agent jar -spark.jars.packages io.acryl:acryl-spark-lineage:0.2.16 +spark.jars.packages io.acryl:acryl-spark-lineage:0.2.17 spark.extraListeners datahub.spark.DatahubSparkListener spark.datahub.rest.server http://localhost:8080 ``` @@ -32,7 +32,7 @@ spark.datahub.rest.server http://localhost:8080 ## spark-submit command line ```sh -spark-submit --packages io.acryl:acryl-spark-lineage:0.2.16 --conf "spark.extraListeners=datahub.spark.DatahubSparkListener" my_spark_job_to_run.py +spark-submit --packages io.acryl:acryl-spark-lineage:0.2.17 --conf "spark.extraListeners=datahub.spark.DatahubSparkListener" my_spark_job_to_run.py ``` ### Configuration Instructions: Amazon EMR @@ -41,7 +41,7 @@ Set the following spark-defaults configuration properties as it stated [here](https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-spark-configure.html) ```text -spark.jars.packages io.acryl:acryl-spark-lineage:0.2.16 +spark.jars.packages io.acryl:acryl-spark-lineage:0.2.17 spark.extraListeners datahub.spark.DatahubSparkListener spark.datahub.rest.server https://your_datahub_host/gms #If you have authentication set up then you also need to specify the Datahub access token @@ -56,7 +56,7 @@ When running interactive jobs from a notebook, the listener can be configured wh spark = SparkSession.builder .master("spark://spark-master:7077") .appName("test-application") -.config("spark.jars.packages", "io.acryl:acryl-spark-lineage:0.2.16") +.config("spark.jars.packages", "io.acryl:acryl-spark-lineage:0.2.17") .config("spark.extraListeners", "datahub.spark.DatahubSparkListener") .config("spark.datahub.rest.server", "http://localhost:8080") .enableHiveSupport() @@ -79,7 +79,7 @@ appName("test-application") config("spark.master","spark://spark-master:7077") . -config("spark.jars.packages","io.acryl:acryl-spark-lineage:0.2.16") +config("spark.jars.packages","io.acryl:acryl-spark-lineage:0.2.17") . config("spark.extraListeners","datahub.spark.DatahubSparkListener") @@ -158,45 +158,47 @@ information like tokens. ## Configuration Options -| Field | Required | Default | Description | -|--------------------------------------------------------|----------|-----------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| spark.jars.packages | ✅ | | Set with latest/required version io.acryl:acryl-spark-lineage:0.2.15 | -| spark.extraListeners | ✅ | | datahub.spark.DatahubSparkListener | -| spark.datahub.emitter | | rest | Specify the ways to emit metadata. By default it sends to DataHub using REST emitter. Valid options are rest, kafka or file | -| spark.datahub.rest.server | | http://localhost:8080 | Datahub server url eg: | -| spark.datahub.rest.token | | | Authentication token. | -| spark.datahub.rest.disable_ssl_verification | | false | Disable SSL certificate validation. Caution: Only use this if you know what you are doing! | -| spark.datahub.rest.disable_chunked_encoding | | false | Disable Chunked Transfer Encoding. In some environment chunked encoding causes issues. With this config option it can be disabled. || -| spark.datahub.rest.max_retries | | 0 | Number of times a request retried if failed | -| spark.datahub.rest.retry_interval | | 10 | Number of seconds to wait between retries | -| spark.datahub.file.filename | | | The file where metadata will be written if file emitter is set | -| spark.datahub.kafka.bootstrap | | | The Kafka bootstrap server url to use if the Kafka emitter is set | -| spark.datahub.kafka.schema_registry_url | | | The Schema registry url to use if the Kafka emitter is set | -| spark.datahub.kafka.schema_registry_config. | | | Additional config to pass in to the Schema Registry Client | -| spark.datahub.kafka.producer_config. | | | Additional config to pass in to the Kafka producer. For example: `--conf "spark.datahub.kafka.producer_config.client.id=my_client_id"` | -| spark.datahub.metadata.pipeline.platformInstance | | | Pipeline level platform instance | -| spark.datahub.metadata.dataset.platformInstance | | | dataset level platform instance (it is usefult to set if you have it in your glue ingestion) | -| spark.datahub.metadata.dataset.env | | PROD | [Supported values](https://datahubproject.io/docs/graphql/enums#fabrictype). In all other cases, will fallback to PROD | -| spark.datahub.metadata.dataset.hivePlatformAlias | | hive | By default, datahub assigns Hive-like tables to the Hive platform. If you are using Glue as your Hive metastore, set this config flag to `glue` | +| Field | Required | Default | Description | +|--------------------------------------------------------|----------|-----------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| spark.jars.packages | ✅ | | Set with latest/required version io.acryl:acryl-spark-lineage:0.2.15 | +| spark.extraListeners | ✅ | | datahub.spark.DatahubSparkListener | +| spark.datahub.emitter | | rest | Specify the ways to emit metadata. By default it sends to DataHub using REST emitter. Valid options are rest, kafka or file | +| spark.datahub.rest.server | | http://localhost:8080 | Datahub server url eg: | +| spark.datahub.rest.token | | | Authentication token. | +| spark.datahub.rest.disable_ssl_verification | | false | Disable SSL certificate validation. Caution: Only use this if you know what you are doing! | +| spark.datahub.rest.disable_chunked_encoding | | false | Disable Chunked Transfer Encoding. In some environment chunked encoding causes issues. With this config option it can be disabled. || +| spark.datahub.rest.max_retries | | 0 | Number of times a request retried if failed | +| spark.datahub.rest.retry_interval | | 10 | Number of seconds to wait between retries | +| spark.datahub.file.filename | | | The file where metadata will be written if file emitter is set | +| spark.datahub.kafka.bootstrap | | | The Kafka bootstrap server url to use if the Kafka emitter is set | +| spark.datahub.kafka.schema_registry_url | | | The Schema registry url to use if the Kafka emitter is set | +| spark.datahub.kafka.schema_registry_config. | | | Additional config to pass in to the Schema Registry Client | +| spark.datahub.kafka.producer_config. | | | Additional config to pass in to the Kafka producer. For example: `--conf "spark.datahub.kafka.producer_config.client.id=my_client_id"` | +| spark.datahub.metadata.pipeline.platformInstance | | | Pipeline level platform instance | +| spark.datahub.metadata.dataset.platformInstance | | | dataset level platform instance (it is usefult to set if you have it in your glue ingestion) | +| spark.datahub.metadata.dataset.env | | PROD | [Supported values](https://datahubproject.io/docs/graphql/enums#fabrictype). In all other cases, will fallback to PROD | +| spark.datahub.metadata.dataset.hivePlatformAlias | | hive | By default, datahub assigns Hive-like tables to the Hive platform. If you are using Glue as your Hive metastore, set this config flag to `glue` | | spark.datahub.metadata.include_scheme | | true | Include scheme from the path URI (e.g. hdfs://, s3://) in the dataset URN. We recommend setting this value to false, it is set to true for backwards compatibility with previous versions | -| spark.datahub.metadata.remove_partition_pattern | | | Remove partition pattern. (e.g. /partition=\d+) It change database/table/partition=123 to database/table | -| spark.datahub.coalesce_jobs | | true | Only one datajob(task) will be emitted containing all input and output datasets for the spark application | -| spark.datahub.parent.datajob_urn | | | Specified dataset will be set as upstream dataset for datajob created. Effective only when spark.datahub.coalesce_jobs is set to true | -| spark.datahub.metadata.dataset.materialize | | false | Materialize Datasets in DataHub | -| spark.datahub.platform.s3.path_spec_list | | | List of pathspec per platform | -| spark.datahub.metadata.dataset.include_schema_metadata | false | | Emit dataset schema metadata based on the spark execution. It is recommended to get schema information from platform specific DataHub sources as this is less reliable | -| spark.datahub.flow_name | | | If it is set it will be used as the DataFlow name otherwise it uses spark app name as flow_name | -| spark.datahub.file_partition_regexp | | | Strip partition part from the path if path end matches with the specified regexp. Example `year=.*/month=.*/day=.*` | -| spark.datahub.tags | | | Comma separated list of tags to attach to the DataFlow | -| spark.datahub.domains | | | Comma separated list of domain urns to attach to the DataFlow | -| spark.datahub.stage_metadata_coalescing | | | Normally it coalesces and sends metadata at the onApplicationEnd event which is never called on Databricks or on Glue. You should enable this on Databricks if you want coalesced run. | -| spark.datahub.patch.enabled | | false | Set this to true to send lineage as a patch, which appends rather than overwrites existing Dataset lineage edges. By default, it is disabled. | -| spark.datahub.metadata.dataset.lowerCaseUrns | | false | Set this to true to lowercase dataset urns. By default, it is disabled. | -| spark.datahub.disableSymlinkResolution | | false | Set this to true if you prefer using the s3 location instead of the Hive table. By default, it is disabled. | -| spark.datahub.s3.bucket | | | The name of the bucket where metadata will be written if s3 emitter is set | -| spark.datahub.s3.prefix | | | The prefix for the file where metadata will be written on s3 if s3 emitter is set | -| spark.datahub.s3.filename | | | The name of the file where metadata will be written if it is not set random filename will be used on s3 if s3 emitter is set | - +| spark.datahub.metadata.remove_partition_pattern | | | Remove partition pattern. (e.g. /partition=\d+) It change database/table/partition=123 to database/table | +| spark.datahub.coalesce_jobs | | true | Only one datajob(task) will be emitted containing all input and output datasets for the spark application | +| spark.datahub.parent.datajob_urn | | | Specified dataset will be set as upstream dataset for datajob created. Effective only when spark.datahub.coalesce_jobs is set to true | +| spark.datahub.metadata.dataset.materialize | | false | Materialize Datasets in DataHub | +| spark.datahub.platform.s3.path_spec_list | | | List of pathspec per platform | +| spark.datahub.metadata.dataset.include_schema_metadata | false | | Emit dataset schema metadata based on the spark execution. It is recommended to get schema information from platform specific DataHub sources as this is less reliable | +| spark.datahub.flow_name | | | If it is set it will be used as the DataFlow name otherwise it uses spark app name as flow_name | +| spark.datahub.file_partition_regexp | | | Strip partition part from the path if path end matches with the specified regexp. Example `year=.*/month=.*/day=.*` | +| spark.datahub.tags | | | Comma separated list of tags to attach to the DataFlow | +| spark.datahub.domains | | | Comma separated list of domain urns to attach to the DataFlow | +| spark.datahub.stage_metadata_coalescing | | | Normally it coalesces and sends metadata at the onApplicationEnd event which is never called on Databricks or on Glue. You should enable this on Databricks if you want coalesced run. | +| spark.datahub.patch.enabled | | false | Set this to true to send lineage as a patch, which appends rather than overwrites existing Dataset lineage edges. By default, it is disabled. | +| spark.datahub.metadata.dataset.lowerCaseUrns | | false | Set this to true to lowercase dataset urns. By default, it is disabled. | +| spark.datahub.disableSymlinkResolution | | false | Set this to true if you prefer using the s3 location instead of the Hive table. By default, it is disabled. | +| spark.datahub.s3.bucket | | | The name of the bucket where metadata will be written if s3 emitter is set | +| spark.datahub.s3.prefix | | | The prefix for the file where metadata will be written on s3 if s3 emitter is set | +| spark.datahub.s3.filename | | | The name of the file where metadata will be written if it is not set random filename will be used on s3 if s3 emitter is set | +| spark.datahub.s3.filename | | | The name of the file where metadata will be written if it is not set random filename will be used on s3 if s3 emitter is set | +|spark.datahub.log.mcps | | true | Set this to true to log MCPS to the log. By default, it is enabled. | +|spark.datahub.legacyLineageCleanup.enabled| | false | Set this to true to remove legacy lineages from older Spark Plugin runs. This will remove those lineages from the Datasets which it adds to DataJob. By default, it is disabled. | ## What to Expect: The Metadata Model @@ -358,6 +360,19 @@ Use Java 8 to build the project. The project uses Gradle as the build tool. To b + ## Changelog +### Version 0.2.17 +- *Major changes*: + - Finegrained lineage is emitted on the DataJob and not on the emitted Datasets. This is the correct behaviour which was not correct earlier. This causes earlier emitted finegrained lineages won't be overwritten by the new ones. + You can remove the old lineages by setting `spark.datahub.legacyLineageCleanup.enabled=true`. Make sure you have the latest server if you enable with patch support. (this was introduced since 0.2.17-rc5) + +- *Changes*: + - OpenLineage 1.25.0 upgrade + - Add option to disable chunked encoding in the datahub rest sink -> `spark.datahub.rest.disable_chunked_encoding` + - Add option to specify the mcp kafka topic for the datahub kafka sink -> `spark.datahub.kafka.mcp_topic` + - Add option to remove legacy lineages from older Spark Plugin runs. This will remove those lineages from the Datasets which it adds to DataJob -> `spark.datahub.legacyLineageCleanup.enabled` +- *Fixes*: + - Fix handling map transformation in the lineage. Earlier it generated wrong lineage for map transformation. + ### Version 0.2.16 - Remove logging DataHub config into logs diff --git a/metadata-integration/java/acryl-spark-lineage/src/main/java/datahub/spark/DatahubEventEmitter.java b/metadata-integration/java/acryl-spark-lineage/src/main/java/datahub/spark/DatahubEventEmitter.java index 0bcc7db9e87408..84f397226ce912 100644 --- a/metadata-integration/java/acryl-spark-lineage/src/main/java/datahub/spark/DatahubEventEmitter.java +++ b/metadata-integration/java/acryl-spark-lineage/src/main/java/datahub/spark/DatahubEventEmitter.java @@ -1,12 +1,18 @@ package datahub.spark; +import static com.linkedin.metadata.Constants.*; import static datahub.spark.converter.SparkStreamingEventToDatahub.*; import static io.datahubproject.openlineage.converter.OpenLineageToDataHub.*; import static io.datahubproject.openlineage.utils.DatahubUtils.*; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.core.StreamReadConstraints; +import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.common.GlobalTags; import com.linkedin.common.UrnArray; import com.linkedin.common.urn.DataJobUrn; +import com.linkedin.data.DataMap; +import com.linkedin.data.template.JacksonDataTemplateCodec; import com.linkedin.data.template.StringMap; import com.linkedin.dataprocess.DataProcessInstanceRelationships; import com.linkedin.dataprocess.RunResultType; @@ -62,12 +68,23 @@ public class DatahubEventEmitter extends EventEmitter { private final Map schemaMap = new HashMap<>(); private SparkLineageConf datahubConf; private static final int DEFAULT_TIMEOUT_SEC = 10; + private final ObjectMapper objectMapper; + private final JacksonDataTemplateCodec dataTemplateCodec; private final EventFormatter eventFormatter = new EventFormatter(); public DatahubEventEmitter(SparkOpenLineageConfig config, String applicationJobName) throws URISyntaxException { super(config, applicationJobName); + objectMapper = new ObjectMapper().setSerializationInclusion(JsonInclude.Include.NON_NULL); + int maxSize = + Integer.parseInt( + System.getenv() + .getOrDefault(INGESTION_MAX_SERIALIZED_STRING_LENGTH, MAX_JACKSON_STRING_SIZE)); + objectMapper + .getFactory() + .setStreamReadConstraints(StreamReadConstraints.builder().maxStringLength(maxSize).build()); + dataTemplateCodec = new JacksonDataTemplateCodec(objectMapper.getFactory()); } private Optional getEmitter() { @@ -407,7 +424,14 @@ protected void emitMcps(List mcps) { .map( mcp -> { try { - log.info("emitting mcpw: " + mcp); + if (this.datahubConf.isLogMcps()) { + DataMap map = mcp.data(); + String serializedMCP = dataTemplateCodec.mapToString(map); + log.info("emitting mcpw: {}", serializedMCP); + } else { + log.info( + "emitting aspect: {} for urn: {}", mcp.getAspectName(), mcp.getEntityUrn()); + } return emitter.get().emit(mcp); } catch (IOException ioException) { log.error("Failed to emit metadata to DataHub", ioException); diff --git a/metadata-integration/java/acryl-spark-lineage/src/main/java/datahub/spark/conf/SparkConfigParser.java b/metadata-integration/java/acryl-spark-lineage/src/main/java/datahub/spark/conf/SparkConfigParser.java index 3860285083c4bb..824cd1a687b264 100644 --- a/metadata-integration/java/acryl-spark-lineage/src/main/java/datahub/spark/conf/SparkConfigParser.java +++ b/metadata-integration/java/acryl-spark-lineage/src/main/java/datahub/spark/conf/SparkConfigParser.java @@ -31,6 +31,7 @@ public class SparkConfigParser { public static final String FILE_EMITTER_FILE_NAME = "file.filename"; public static final String DISABLE_SSL_VERIFICATION_KEY = "rest.disable_ssl_verification"; public static final String REST_DISABLE_CHUNKED_ENCODING = "rest.disable_chunked_encoding"; + public static final String CONFIG_LOG_MCPS = "log.mcps"; public static final String MAX_RETRIES = "rest.max_retries"; public static final String RETRY_INTERVAL_IN_SEC = "rest.retry_interval_in_sec"; @@ -51,6 +52,7 @@ public class SparkConfigParser { public static final String COALESCE_KEY = "coalesce_jobs"; public static final String PATCH_ENABLED = "patch.enabled"; + public static final String LEGACY_LINEAGE_CLEANUP = "legacyLineageCleanup.enabled"; public static final String DISABLE_SYMLINK_RESOLUTION = "disableSymlinkResolution"; public static final String STAGE_METADATA_COALESCING = "stage_metadata_coalescing"; @@ -158,6 +160,7 @@ public static DatahubOpenlineageConfig sparkConfigToDatahubOpenlineageConf( Config sparkConfig, SparkAppContext sparkAppContext) { DatahubOpenlineageConfig.DatahubOpenlineageConfigBuilder builder = DatahubOpenlineageConfig.builder(); + builder.isSpark(true); builder.filePartitionRegexpPattern( SparkConfigParser.getFilePartitionRegexpPattern(sparkConfig)); builder.fabricType(SparkConfigParser.getCommonFabricType(sparkConfig)); @@ -172,6 +175,7 @@ public static DatahubOpenlineageConfig sparkConfigToDatahubOpenlineageConf( builder.commonDatasetPlatformInstance(SparkConfigParser.getCommonPlatformInstance(sparkConfig)); builder.hivePlatformAlias(SparkConfigParser.getHivePlatformAlias(sparkConfig)); builder.usePatch(SparkConfigParser.isPatchEnabled(sparkConfig)); + builder.removeLegacyLineage(SparkConfigParser.isLegacyLineageCleanupEnabled(sparkConfig)); builder.disableSymlinkResolution(SparkConfigParser.isDisableSymlinkResolution(sparkConfig)); builder.lowerCaseDatasetUrns(SparkConfigParser.isLowerCaseDatasetUrns(sparkConfig)); try { @@ -311,6 +315,13 @@ public static boolean isDatasetMaterialize(Config datahubConfig) { && datahubConfig.getBoolean(DATASET_MATERIALIZE_KEY); } + public static boolean isLogMcps(Config datahubConfig) { + if (datahubConfig.hasPath(CONFIG_LOG_MCPS)) { + return datahubConfig.getBoolean(CONFIG_LOG_MCPS); + } + return true; + } + public static boolean isIncludeSchemaMetadata(Config datahubConfig) { if (datahubConfig.hasPath(DATASET_INCLUDE_SCHEMA_METADATA)) { return datahubConfig.getBoolean(DATASET_INCLUDE_SCHEMA_METADATA); @@ -352,6 +363,14 @@ public static boolean isPatchEnabled(Config datahubConfig) { return datahubConfig.hasPath(PATCH_ENABLED) && datahubConfig.getBoolean(PATCH_ENABLED); } + public static boolean isLegacyLineageCleanupEnabled(Config datahubConfig) { + if (!datahubConfig.hasPath(LEGACY_LINEAGE_CLEANUP)) { + return false; + } + return datahubConfig.hasPath(LEGACY_LINEAGE_CLEANUP) + && datahubConfig.getBoolean(LEGACY_LINEAGE_CLEANUP); + } + public static boolean isDisableSymlinkResolution(Config datahubConfig) { if (!datahubConfig.hasPath(DISABLE_SYMLINK_RESOLUTION)) { return false; diff --git a/metadata-integration/java/acryl-spark-lineage/src/main/java/datahub/spark/conf/SparkLineageConf.java b/metadata-integration/java/acryl-spark-lineage/src/main/java/datahub/spark/conf/SparkLineageConf.java index 014cff873bbde9..96afe729b82c00 100644 --- a/metadata-integration/java/acryl-spark-lineage/src/main/java/datahub/spark/conf/SparkLineageConf.java +++ b/metadata-integration/java/acryl-spark-lineage/src/main/java/datahub/spark/conf/SparkLineageConf.java @@ -17,6 +17,7 @@ public class SparkLineageConf { final DatahubOpenlineageConfig openLineageConf; @Builder.Default final boolean coalesceEnabled = true; @Builder.Default final boolean emitCoalescePeriodically = false; + @Builder.Default final boolean logMcps = true; final SparkAppContext sparkAppContext; final DatahubEmitterConfig datahubEmitterConfig; @Builder.Default final List tags = new LinkedList<>(); @@ -32,6 +33,7 @@ public static SparkLineageConf toSparkLineageConf( SparkConfigParser.sparkConfigToDatahubOpenlineageConf(sparkConfig, sparkAppContext); builder.openLineageConf(datahubOpenlineageConfig); builder.coalesceEnabled(SparkConfigParser.isCoalesceEnabled(sparkConfig)); + builder.logMcps(SparkConfigParser.isLogMcps(sparkConfig)); if (SparkConfigParser.getTags(sparkConfig) != null) { builder.tags(Arrays.asList(Objects.requireNonNull(SparkConfigParser.getTags(sparkConfig)))); } diff --git a/metadata-integration/java/acryl-spark-lineage/src/test/java/datahub/spark/OpenLineageEventToDatahubTest.java b/metadata-integration/java/acryl-spark-lineage/src/test/java/datahub/spark/OpenLineageEventToDatahubTest.java index ef2b17e9932f2f..b9a142364d4e89 100644 --- a/metadata-integration/java/acryl-spark-lineage/src/test/java/datahub/spark/OpenLineageEventToDatahubTest.java +++ b/metadata-integration/java/acryl-spark-lineage/src/test/java/datahub/spark/OpenLineageEventToDatahubTest.java @@ -814,4 +814,32 @@ public void testProcessGCSInputsOutputs() throws URISyntaxException, IOException dataset.getUrn().toString()); } } + + public void testProcessMappartitionJob() throws URISyntaxException, IOException { + DatahubOpenlineageConfig.DatahubOpenlineageConfigBuilder builder = + DatahubOpenlineageConfig.builder(); + builder.fabricType(FabricType.DEV); + builder.lowerCaseDatasetUrns(true); + builder.materializeDataset(true); + builder.includeSchemaMetadata(true); + builder.isSpark(true); + + String olEvent = + IOUtils.toString( + this.getClass().getResourceAsStream("/ol_events/map_partition_job.json"), + StandardCharsets.UTF_8); + + OpenLineage.RunEvent runEvent = OpenLineageClientUtils.runEventFromJson(olEvent); + DatahubJob datahubJob = OpenLineageToDataHub.convertRunEventToJob(runEvent, builder.build()); + + assertNotNull(datahubJob); + + assertEquals(1, datahubJob.getInSet().size()); + for (DatahubDataset dataset : datahubJob.getInSet()) { + assertEquals( + "urn:li:dataset:(urn:li:dataPlatform:s3,my-bucket/my_dir/my_file.csv,DEV)", + dataset.getUrn().toString()); + } + assertEquals(0, datahubJob.getOutSet().size()); + } } diff --git a/metadata-integration/java/acryl-spark-lineage/src/test/resources/ol_events/map_partition_job.json b/metadata-integration/java/acryl-spark-lineage/src/test/resources/ol_events/map_partition_job.json new file mode 100644 index 00000000000000..39560a782840ce --- /dev/null +++ b/metadata-integration/java/acryl-spark-lineage/src/test/resources/ol_events/map_partition_job.json @@ -0,0 +1,66 @@ +{ + "eventTime": "2024-11-20T12:59:29.059Z", + "producer": "https://github.com/OpenLineage/OpenLineage/tree/1.24.2/integration/spark", + "schemaURL": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunEvent", + "eventType": "START", + "run": { + "runId": "01902a1e-0b05-750e-b38d-439998f7a853", + "facets": { + "parent": { + "_producer": "https://github.com/OpenLineage/OpenLineage/tree/1.24.2/integration/spark", + "_schemaURL": "https://openlineage.io/spec/facets/1-0-1/ParentRunFacet.json#/$defs/ParentRunFacet", + "run": { + "runId": "01902a1e-0b05-750e-b38d-439998f7a853" + }, + "job": { + "namespace": "default", + "name": "spark_context_session" + } + }, + "processing_engine": { + "_producer": "https://github.com/OpenLineage/OpenLineage/tree/1.24.2/integration/spark", + "_schemaURL": "https://openlineage.io/spec/facets/1-1-1/ProcessingEngineRunFacet.json#/$defs/ProcessingEngineRunFacet", + "version": "3.4.2", + "name": "spark" + }, + "spark_jobDetails": { + "_producer": "https://github.com/OpenLineage/OpenLineage/tree/1.24.2/integration/spark", + "_schemaURL": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet", + "jobId": 0 + }, + "spark_properties": { + "_producer": "https://github.com/OpenLineage/OpenLineage/tree/1.24.2/integration/spark", + "_schemaURL": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/RunFacet", + "properties": { + "spark.master": "yarn", + "spark.app.name": "SparkContextSession" + } + } + } + }, + "job": { + "namespace": "default", + "name": "spark_context_session.map_partitions_parallel_collection", + "facets": { + "jobType": { + "_producer": "https://github.com/OpenLineage/OpenLineage/tree/1.24.2/integration/spark", + "_schemaURL": "https://openlineage.io/spec/facets/2-0-3/JobTypeJobFacet.json#/$defs/JobTypeJobFacet", + "processingType": "BATCH", + "integration": "SPARK", + "jobType": "RDD_JOB" + } + } + }, + "inputs": [ + { + "namespace": "s3://my-bucket", + "name": "my_dir/my_file.csv" + } + ], + "outputs": [ + { + "namespace": "s3://my-bucket", + "name": "my_dir/my_file.csv" + } + ] +} \ No newline at end of file diff --git a/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/config/DatahubOpenlineageConfig.java b/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/config/DatahubOpenlineageConfig.java index 5abb3c90d232bd..c725673eae47b5 100644 --- a/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/config/DatahubOpenlineageConfig.java +++ b/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/config/DatahubOpenlineageConfig.java @@ -16,6 +16,7 @@ @Getter @ToString public class DatahubOpenlineageConfig { + @Builder.Default private final boolean isSpark = false; @Builder.Default private final boolean isStreaming = false; @Builder.Default private final String pipelineName = null; private final String platformInstance; @@ -34,6 +35,7 @@ public class DatahubOpenlineageConfig { @Builder.Default private Map urnAliases = new HashMap<>(); @Builder.Default private final boolean disableSymlinkResolution = false; @Builder.Default private final boolean lowerCaseDatasetUrns = false; + @Builder.Default private final boolean removeLegacyLineage = false; public List getPathSpecsForPlatform(String platform) { if ((pathSpecs == null) || (pathSpecs.isEmpty())) { diff --git a/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/converter/OpenLineageToDataHub.java b/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/converter/OpenLineageToDataHub.java index 9237ee60f473b4..9fcfc68bd03f55 100644 --- a/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/converter/OpenLineageToDataHub.java +++ b/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/converter/OpenLineageToDataHub.java @@ -675,9 +675,30 @@ private static void convertJobToDataJob( datahubJob.setJobInfo(dji); DataJobInputOutput inputOutput = new DataJobInputOutput(); + boolean inputsEqualOutputs = false; + if ((datahubConf.isSpark()) + && ((event.getInputs() != null && event.getOutputs() != null) + && (event.getInputs().size() == event.getOutputs().size()))) { + inputsEqualOutputs = + event.getInputs().stream() + .map(OpenLineage.Dataset::getName) + .collect(Collectors.toSet()) + .equals( + event.getOutputs().stream() + .map(OpenLineage.Dataset::getName) + .collect(Collectors.toSet())); + if (inputsEqualOutputs) { + log.info( + "Inputs equals Outputs: {}. This is most probably because of an rdd map operation and we only process Inputs", + inputsEqualOutputs); + } + } + processJobInputs(datahubJob, event, datahubConf); - processJobOutputs(datahubJob, event, datahubConf); + if (!inputsEqualOutputs) { + processJobOutputs(datahubJob, event, datahubConf); + } DataProcessInstanceRunEvent dpire = processDataProcessInstanceResult(event); datahubJob.setDataProcessInstanceRunEvent(dpire); diff --git a/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/dataset/DatahubJob.java b/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/dataset/DatahubJob.java index 60caaae359677f..e2aa2c3a04c406 100644 --- a/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/dataset/DatahubJob.java +++ b/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/dataset/DatahubJob.java @@ -28,7 +28,10 @@ import com.linkedin.dataprocess.DataProcessInstanceRelationships; import com.linkedin.dataprocess.DataProcessInstanceRunEvent; import com.linkedin.dataset.FineGrainedLineage; +import com.linkedin.dataset.FineGrainedLineageArray; import com.linkedin.dataset.Upstream; +import com.linkedin.dataset.UpstreamArray; +import com.linkedin.dataset.UpstreamLineage; import com.linkedin.domain.Domains; import com.linkedin.metadata.aspect.patch.builder.DataJobInputOutputPatchBuilder; import com.linkedin.metadata.aspect.patch.builder.GlobalTagsPatchBuilder; @@ -167,11 +170,34 @@ public List toMcps(DatahubOpenlineageConfig config) thro return mcps; } + private FineGrainedLineageArray mergeFinegrainedLineages() { + FineGrainedLineageArray fgls = new FineGrainedLineageArray(); + + for (DatahubDataset dataset : inSet) { + if (dataset.lineage != null && dataset.lineage.getFineGrainedLineages() != null) { + dataset.lineage.getFineGrainedLineages().stream() + .filter(Objects::nonNull) + .forEach(fgls::add); + } + } + + for (DatahubDataset dataset : outSet) { + if (dataset.lineage != null && dataset.lineage.getFineGrainedLineages() != null) { + dataset.lineage.getFineGrainedLineages().stream() + .filter(Objects::nonNull) + .forEach(fgls::add); + } + } + + return fgls; + } + private void generateDataJobInputOutputMcp( EdgeArray inputEdges, EdgeArray outputEdges, DatahubOpenlineageConfig config, List mcps) { + DataJobInputOutput dataJobInputOutput = new DataJobInputOutput(); log.info("Adding DataJob edges to {}", jobUrn); if (config.isUsePatch() && (!parentJobs.isEmpty() || !inSet.isEmpty() || !outSet.isEmpty())) { @@ -186,6 +212,27 @@ private void generateDataJobInputOutputMcp( for (DataJobUrn parentJob : parentJobs) { dataJobInputOutputPatchBuilder.addInputDatajobEdge(parentJob); } + + FineGrainedLineageArray fgls = mergeFinegrainedLineages(); + fgls.forEach( + fgl -> { + Objects.requireNonNull(fgl.getUpstreams()) + .forEach( + upstream -> { + Objects.requireNonNull(fgl.getDownstreams()) + .forEach( + downstream -> { + dataJobInputOutputPatchBuilder.addFineGrainedUpstreamField( + upstream, + fgl.getConfidenceScore(), + StringUtils.defaultIfEmpty( + fgl.getTransformOperation(), "TRANSFORM"), + downstream, + fgl.getQuery()); + }); + }); + }); + MetadataChangeProposal dataJobInputOutputMcp = dataJobInputOutputPatchBuilder.build(); log.info( "dataJobInputOutputMcp: {}", @@ -195,6 +242,8 @@ private void generateDataJobInputOutputMcp( mcps.add(dataJobInputOutputPatchBuilder.build()); } else { + FineGrainedLineageArray fgls = mergeFinegrainedLineages(); + dataJobInputOutput.setFineGrainedLineages(fgls); dataJobInputOutput.setInputDatasetEdges(inputEdges); dataJobInputOutput.setInputDatasets(new DatasetUrnArray()); dataJobInputOutput.setOutputDatasetEdges(outputEdges); @@ -235,6 +284,49 @@ private void generateDataProcessInstanceMcp( generateDataProcessInstanceRelationship(mcps); } + private void deleteOldDatasetLineage( + DatahubDataset dataset, DatahubOpenlineageConfig config, List mcps) { + if (dataset.getLineage() != null) { + if (config.isUsePatch()) { + if (!dataset.getLineage().getUpstreams().isEmpty()) { + UpstreamLineagePatchBuilder upstreamLineagePatchBuilder = + new UpstreamLineagePatchBuilder().urn(dataset.getUrn()); + for (Upstream upstream : dataset.getLineage().getUpstreams()) { + upstreamLineagePatchBuilder.removeUpstream(upstream.getDataset()); + } + + log.info("Removing FineGrainedLineage to {}", dataset.getUrn()); + for (FineGrainedLineage fineGrainedLineage : + Objects.requireNonNull(dataset.getLineage().getFineGrainedLineages())) { + for (Urn upstream : Objects.requireNonNull(fineGrainedLineage.getUpstreams())) { + for (Urn downstream : Objects.requireNonNull(fineGrainedLineage.getDownstreams())) { + upstreamLineagePatchBuilder.removeFineGrainedUpstreamField( + upstream, + StringUtils.defaultIfEmpty( + fineGrainedLineage.getTransformOperation(), "TRANSFORM"), + downstream, + null); + } + } + } + MetadataChangeProposal mcp = upstreamLineagePatchBuilder.build(); + log.info( + "upstreamLineagePatch: {}", + mcp.getAspect().getValue().asString(Charset.defaultCharset())); + mcps.add(mcp); + } + } else { + if (!dataset.getLineage().getUpstreams().isEmpty()) { + // Remove earlier created UpstreamLineage which most probably was created by the plugin. + UpstreamLineage upstreamLineage = new UpstreamLineage(); + upstreamLineage.setUpstreams(new UpstreamArray()); + upstreamLineage.setFineGrainedLineages(new FineGrainedLineageArray()); + addAspectToMcps(dataset.getUrn(), DATASET_ENTITY_TYPE, upstreamLineage, mcps); + } + } + } + } + private Pair processDownstreams( DatahubOpenlineageConfig config, List mcps) { UrnArray outputUrnArray = new UrnArray(); @@ -263,43 +355,13 @@ private Pair processDownstreams( dataset.getUrn(), DATASET_ENTITY_TYPE, dataset.getSchemaMetadata(), mcps); } - if (dataset.getLineage() != null) { - if (config.isUsePatch()) { - if (!dataset.getLineage().getUpstreams().isEmpty()) { - UpstreamLineagePatchBuilder upstreamLineagePatchBuilder = - new UpstreamLineagePatchBuilder().urn(dataset.getUrn()); - for (Upstream upstream : dataset.getLineage().getUpstreams()) { - upstreamLineagePatchBuilder.addUpstream( - upstream.getDataset(), upstream.getType()); - } - - log.info("Adding FineGrainedLineage to {}", dataset.getUrn()); - for (FineGrainedLineage fineGrainedLineage : - Objects.requireNonNull(dataset.getLineage().getFineGrainedLineages())) { - for (Urn upstream : Objects.requireNonNull(fineGrainedLineage.getUpstreams())) { - for (Urn downstream : - Objects.requireNonNull(fineGrainedLineage.getDownstreams())) { - upstreamLineagePatchBuilder.addFineGrainedUpstreamField( - upstream, - fineGrainedLineage.getConfidenceScore(), - StringUtils.defaultIfEmpty( - fineGrainedLineage.getTransformOperation(), "TRANSFORM"), - downstream, - null); - } - } - } - MetadataChangeProposal mcp = upstreamLineagePatchBuilder.build(); - log.info( - "upstreamLineagePatch: {}", - mcp.getAspect().getValue().asString(Charset.defaultCharset())); - mcps.add(mcp); - } - } else { - addAspectToMcps(dataset.getUrn(), DATASET_ENTITY_TYPE, dataset.getLineage(), mcps); - } + // Remove lineage which was added by older plugin that set lineage on Datasets and not on + // DataJobs + if (config.isRemoveLegacyLineage()) { + deleteOldDatasetLineage(dataset, config, mcps); } }); + return Pair.of(outputUrnArray, outputEdges); } @@ -330,10 +392,6 @@ private Pair processUpstreams( addAspectToMcps( dataset.getUrn(), DATASET_ENTITY_TYPE, dataset.getSchemaMetadata(), mcps); } - - if (dataset.getLineage() != null) { - addAspectToMcps(dataset.getUrn(), DATASET_ENTITY_TYPE, dataset.getLineage(), mcps); - } }); return Pair.of(inputUrnArray, inputEdges); } From fb471f112e93c90ec1c02b2bd52976ea5d68aa06 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 6 Jan 2025 13:32:46 -0500 Subject: [PATCH 027/249] docs(tableau): clarify docs around tableau permissions (#12270) --- .../docs/sources/tableau/tableau_pre.md | 30 ++++++++++++++----- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/metadata-ingestion/docs/sources/tableau/tableau_pre.md b/metadata-ingestion/docs/sources/tableau/tableau_pre.md index aeb67f85b241b9..65ff08367fdc8f 100644 --- a/metadata-ingestion/docs/sources/tableau/tableau_pre.md +++ b/metadata-ingestion/docs/sources/tableau/tableau_pre.md @@ -3,9 +3,24 @@ In order to ingest metadata from Tableau, you will need: - Tableau Server Version 2021.1.10 and above. It may also work for older versions. -- [Enable the Tableau Metadata API](https://help.tableau.com/current/api/metadata_api/en-us/docs/meta_api_start.html#enable-the-tableau-metadata-api-for-tableau-server) for Tableau Server, if its not already enabled. -- Tableau Credentials (Username/Password or [Personal Access Token](https://help.tableau.com/current/pro/desktop/en-us/useracct.htm#create-and-revoke-personal-access-tokens)) -- The user or token must have **Site Administrator Explorer** permissions. +- [Enable the Tableau Metadata API](https://help.tableau.com/current/api/metadata_api/en-us/docs/meta_api_start.html#enable-the-tableau-metadata-api-for-tableau-server) for Tableau Server, if its not already enabled. This is always enabled for Tableau Cloud. + +### Authentication + +DataHub supports two authentication methods: + +1. Username/Password +2. [Personal Access Token](https://help.tableau.com/current/pro/desktop/en-us/useracct.htm#create-and-revoke-personal-access-tokens) + +Either way, the user/token must have the **Site Administrator Explorer** site role. + +:::info + +We need the `Site Administrator Explorer` site role in order to get complete metadata from Tableau. + +With any lower role, the Tableau Metadata API returns missing/partial metadata. This particularly affects data source fields and definitions, which impacts our ability to extract columns and generate column lineage. As such, other site roles like `Viewer` are insufficient with the current Tableau Metadata API. + +::: ### Ingestion through UI @@ -46,8 +61,8 @@ This ingestion source maps the following Source System Concepts to DataHub Conce | Source Concept | DataHub Concept | Notes | | --------------------------- | ------------------------------------------------------------- | --------------------------------- | -| `"Tableau"` | [Data Platform](../../metamodel/entities/dataPlatform.md) | -| Project | [Container](../../metamodel/entities/container.md) | SubType `"Project"` | +| `"Tableau"` | [Data Platform](../../metamodel/entities/dataPlatform.md) | +| Project | [Container](../../metamodel/entities/container.md) | SubType `"Project"` | | Embedded DataSource | [Dataset](../../metamodel/entities/dataset.md) | SubType `"Embedded Data Source"` | | Published DataSource | [Dataset](../../metamodel/entities/dataset.md) | SubType `"Published Data Source"` | | Custom SQL Table | [Dataset](../../metamodel/entities/dataset.md) | SubTypes `"View"`, `"Custom SQL"` | @@ -75,14 +90,15 @@ Lineage is emitted as received from Tableau's metadata API for ### Troubleshooting -### Why are only some workbooks/custom SQLs/published datasources ingested from the specified project? +#### Why are only some workbooks/custom SQLs/published datasources ingested from the specified project? This may happen when the Tableau API returns NODE_LIMIT_EXCEEDED error in response to metadata query and returns partial results with message "Showing partial results. , The request exceeded the ‘n’ node limit. Use pagination, additional filtering, or both in the query to adjust results." To resolve this, consider - reducing the page size using the `page_size` config param in datahub recipe (Defaults to 10). - increasing tableau configuration [metadata query node limit](https://help.tableau.com/current/server/en-us/cli_configuration-set_tsm.htm#metadata_nodelimit) to higher value. -### `PERMISSIONS_MODE_SWITCHED` error in ingestion report +#### `PERMISSIONS_MODE_SWITCHED` error in ingestion report + This error occurs if the Tableau site is using external assets. For more detail, refer to the Tableau documentation [Manage Permissions for External Assets](https://help.tableau.com/current/online/en-us/dm_perms_assets.htm). Follow the below steps to enable the derived permissions: From ba8bf538ca3308143058264d1f0fbaa6978f36fe Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 6 Jan 2025 13:52:35 -0500 Subject: [PATCH 028/249] feat(ingest): enable `EnsureAspectSizeProcessor` for all sources (#12262) --- .../src/datahub/emitter/rest_emitter.py | 23 ++++++++++++------- .../auto_ensure_aspect_size.py | 8 ++++--- .../src/datahub/ingestion/api/source.py | 4 ++++ .../datahub/ingestion/source/unity/source.py | 4 ---- 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/metadata-ingestion/src/datahub/emitter/rest_emitter.py b/metadata-ingestion/src/datahub/emitter/rest_emitter.py index 04242c8bf45d2b..7c67349c74db10 100644 --- a/metadata-ingestion/src/datahub/emitter/rest_emitter.py +++ b/metadata-ingestion/src/datahub/emitter/rest_emitter.py @@ -13,6 +13,7 @@ from datahub import nice_version_name from datahub.cli import config_utils from datahub.cli.cli_utils import ensure_has_system_metadata, fixup_gms_url +from datahub.cli.env_utils import get_boolean_env_variable from datahub.configuration.common import ConfigurationError, OperationalError from datahub.emitter.generic_emitter import Emitter from datahub.emitter.mcp import MetadataChangeProposalWrapper @@ -46,6 +47,8 @@ os.getenv("DATAHUB_REST_EMITTER_DEFAULT_RETRY_MAX_TIMES", "4") ) +_DATAHUB_EMITTER_TRACE = get_boolean_env_variable("DATAHUB_EMITTER_TRACE", False) + # The limit is 16mb. We will use a max of 15mb to have some space # for overhead like request headers. # This applies to pretty much all calls to GMS. @@ -291,7 +294,8 @@ def emit_mcps( mcps: Sequence[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]], async_flag: Optional[bool] = None, ) -> int: - logger.debug("Attempting to emit batch mcps") + if _DATAHUB_EMITTER_TRACE: + logger.debug(f"Attempting to emit MCP batch of size {len(mcps)}") url = f"{self._gms_server}/aspects?action=ingestProposalBatch" for mcp in mcps: ensure_has_system_metadata(mcp) @@ -304,22 +308,25 @@ def emit_mcps( current_chunk_size = INGEST_MAX_PAYLOAD_BYTES for mcp_obj in mcp_objs: mcp_obj_size = len(json.dumps(mcp_obj)) - logger.debug( - f"Iterating through object with size {mcp_obj_size} (type: {mcp_obj.get('aspectName')}" - ) + if _DATAHUB_EMITTER_TRACE: + logger.debug( + f"Iterating through object with size {mcp_obj_size} (type: {mcp_obj.get('aspectName')}" + ) if ( mcp_obj_size + current_chunk_size > INGEST_MAX_PAYLOAD_BYTES or len(mcp_obj_chunks[-1]) >= BATCH_INGEST_MAX_PAYLOAD_LENGTH ): - logger.debug("Decided to create new chunk") + if _DATAHUB_EMITTER_TRACE: + logger.debug("Decided to create new chunk") mcp_obj_chunks.append([]) current_chunk_size = 0 mcp_obj_chunks[-1].append(mcp_obj) current_chunk_size += mcp_obj_size - logger.debug( - f"Decided to send {len(mcps)} mcps in {len(mcp_obj_chunks)} chunks" - ) + if len(mcp_obj_chunks) > 0: + logger.debug( + f"Decided to send {len(mcps)} MCP batch in {len(mcp_obj_chunks)} chunks" + ) for mcp_obj_chunk in mcp_obj_chunks: # TODO: We're calling json.dumps on each MCP object twice, once to estimate diff --git a/metadata-ingestion/src/datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py b/metadata-ingestion/src/datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py index 559f0b77f59dfa..b63c96b617ff06 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +++ b/metadata-ingestion/src/datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py @@ -1,10 +1,9 @@ import json import logging -from typing import Iterable, List +from typing import TYPE_CHECKING, Iterable, List from datahub.emitter.rest_emitter import INGEST_MAX_PAYLOAD_BYTES from datahub.emitter.serialization_helper import pre_json_transform -from datahub.ingestion.api.source import SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.metadata.schema_classes import ( DatasetProfileClass, @@ -12,12 +11,15 @@ SchemaMetadataClass, ) +if TYPE_CHECKING: + from datahub.ingestion.api.source import SourceReport + logger = logging.getLogger(__name__) class EnsureAspectSizeProcessor: def __init__( - self, report: SourceReport, payload_constraint: int = INGEST_MAX_PAYLOAD_BYTES + self, report: "SourceReport", payload_constraint: int = INGEST_MAX_PAYLOAD_BYTES ): self.report = report self.payload_constraint = payload_constraint diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py index c3638635b19aac..75dc980e234ac8 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source.py @@ -31,6 +31,9 @@ from datahub.ingestion.api.auto_work_units.auto_dataset_properties_aspect import ( auto_patch_last_modified, ) +from datahub.ingestion.api.auto_work_units.auto_ensure_aspect_size import ( + EnsureAspectSizeProcessor, +) from datahub.ingestion.api.closeable import Closeable from datahub.ingestion.api.common import PipelineContext, RecordEnvelope, WorkUnit from datahub.ingestion.api.report import Report @@ -450,6 +453,7 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: browse_path_processor, partial(auto_workunit_reporter, self.get_report()), auto_patch_last_modified, + EnsureAspectSizeProcessor(self.get_report()).ensure_aspect_size, ] @staticmethod diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py index 7bfa7fdb28aaf8..9d9a746580f939 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py @@ -26,9 +26,6 @@ gen_containers, ) from datahub.emitter.sql_parsing_builder import SqlParsingBuilder -from datahub.ingestion.api.auto_work_units.auto_ensure_aspect_size import ( - EnsureAspectSizeProcessor, -) from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SupportStatus, @@ -263,7 +260,6 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: StaleEntityRemovalHandler.create( self, self.config, self.ctx ).workunit_processor, - EnsureAspectSizeProcessor(self.get_report()).ensure_aspect_size, ] def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: From 30a77c022a6cc65e4d487301349389f5361efa1b Mon Sep 17 00:00:00 2001 From: skrydal Date: Mon, 6 Jan 2025 22:12:56 +0100 Subject: [PATCH 029/249] fix(ingestion/classifier): temporary measure to avoid deadlocks for classifier (#12261) --- metadata-ingestion/docs/dev_guides/classification.md | 4 ++-- .../src/datahub/ingestion/glossary/classifier.py | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/metadata-ingestion/docs/dev_guides/classification.md b/metadata-ingestion/docs/dev_guides/classification.md index 39eac229a66013..457725b6783e52 100644 --- a/metadata-ingestion/docs/dev_guides/classification.md +++ b/metadata-ingestion/docs/dev_guides/classification.md @@ -7,10 +7,10 @@ The classification feature enables sources to be configured to automatically pre Note that a `.` is used to denote nested fields in the YAML recipe. | Field | Required | Type | Description | Default | -| ------------------------- | -------- | --------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------- | +| ------------------------- | -------- | --------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |------------------------------------------------------------| | enabled | | boolean | Whether classification should be used to auto-detect glossary terms | False | | sample_size | | int | Number of sample values used for classification. | 100 | -| max_workers | | int | Number of worker processes to use for classification. Set to 1 to disable. | Number of cpu cores or 4 | +| max_workers | | int | Number of worker processes to use for classification. Note that any number above 1 might lead to a deadlock. Set to 1 to disable. | 1 | | info_type_to_term | | Dict[str,string] | Optional mapping to provide glossary term identifier for info type. | By default, info type is used as glossary term identifier. | | classifiers | | Array of object | Classifiers to use to auto-detect glossary terms. If more than one classifier, infotype predictions from the classifier defined later in sequence take precedance. | [{'type': 'datahub', 'config': None}] | | table_pattern | | AllowDenyPattern (see below for fields) | Regex patterns to filter tables for classification. This is used in combination with other patterns in parent config. Specify regex to match the entire table name in `database.schema.table` format. e.g. to match all tables starting with customer in Customer database and public schema, use the regex 'Customer.public.customer.*' | {'allow': ['.*'], 'deny': [], 'ignoreCase': True} | diff --git a/metadata-ingestion/src/datahub/ingestion/glossary/classifier.py b/metadata-ingestion/src/datahub/ingestion/glossary/classifier.py index ddcb74e354613a..bdcdcb8990eba7 100644 --- a/metadata-ingestion/src/datahub/ingestion/glossary/classifier.py +++ b/metadata-ingestion/src/datahub/ingestion/glossary/classifier.py @@ -1,4 +1,3 @@ -import os from abc import ABCMeta, abstractmethod from dataclasses import dataclass from typing import Any, Dict, List, Optional @@ -38,8 +37,8 @@ class ClassificationConfig(ConfigModel): ) max_workers: int = Field( - default=(os.cpu_count() or 4), - description="Number of worker processes to use for classification. Set to 1 to disable.", + default=1, + description="Number of worker processes to use for classification. Note that any number above 1 might lead to a deadlock. Set to 1 to disable.", ) table_pattern: AllowDenyPattern = Field( From a06a229499bfd88eb800ae2808abc97e3b42a360 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 6 Jan 2025 18:29:51 -0500 Subject: [PATCH 030/249] feat(ingest/datahub): use stream_results with mysql (#12278) --- metadata-ingestion/setup.py | 2 +- .../ingestion/source/datahub/config.py | 10 ++++++++++ .../source/datahub/datahub_database_reader.py | 20 +++---------------- .../source/datahub/datahub_source.py | 2 +- 4 files changed, 15 insertions(+), 19 deletions(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 5a48f8b7918dce..d5dbb98d3cb17b 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -461,7 +461,7 @@ "mssql-odbc": sql_common | mssql_common | {"pyodbc"}, "mysql": mysql, # mariadb should have same dependency as mysql - "mariadb": sql_common | {"pymysql>=1.0.2"}, + "mariadb": sql_common | mysql, "okta": {"okta~=1.7.0", "nest-asyncio"}, "oracle": sql_common | {"oracledb"}, "postgres": sql_common | postgres_common, diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py index cd3c2146e6d848..09f38913f11b19 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py @@ -1,6 +1,7 @@ import os from typing import Optional, Set +import pydantic from pydantic import Field, root_validator from datahub.configuration.common import AllowDenyPattern @@ -119,3 +120,12 @@ def check_ingesting_data(cls, values): " Please specify at least one of `database_connection` or `kafka_connection`, ideally both." ) return values + + @pydantic.validator("database_connection") + def validate_mysql_scheme( + cls, v: SQLAlchemyConnectionConfig + ) -> SQLAlchemyConnectionConfig: + if "mysql" in v.scheme: + if v.scheme != "mysql+pymysql": + raise ValueError("For MySQL, the scheme must be mysql+pymysql.") + return v diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py index 80906ca63115f5..ee105f4862caba 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py +++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py @@ -151,8 +151,10 @@ def execute_server_cursor( self, query: str, params: Dict[str, Any] ) -> Iterable[Dict[str, Any]]: with self.engine.connect() as conn: - if self.engine.dialect.name == "postgresql": + if self.engine.dialect.name in ["postgresql", "mysql", "mariadb"]: with conn.begin(): # Transaction required for PostgreSQL server-side cursor + # Note that stream_results=True is mainly supported by PostgreSQL and MySQL-based dialects. + # https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Connection.execution_options.params.stream_results conn = conn.execution_options( stream_results=True, yield_per=self.config.database_query_batch_size, @@ -160,22 +162,6 @@ def execute_server_cursor( result = conn.execute(query, params) for row in result: yield dict(row) - elif self.engine.dialect.name == "mysql": # MySQL - import MySQLdb - - with contextlib.closing( - conn.connection.cursor(MySQLdb.cursors.SSCursor) - ) as cursor: - logger.debug(f"Using Cursor type: {cursor.__class__.__name__}") - cursor.execute(query, params) - - columns = [desc[0] for desc in cursor.description] - while True: - rows = cursor.fetchmany(self.config.database_query_batch_size) - if not rows: - break # Use break instead of return in generator - for row in rows: - yield dict(zip(columns, row)) else: raise ValueError(f"Unsupported dialect: {self.engine.dialect.name}") diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py index cb72441344088c..12daba298a2014 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py @@ -130,7 +130,7 @@ def _get_database_workunits( self._commit_progress(i) def _get_kafka_workunits( - self, from_offsets: Dict[int, int], soft_deleted_urns: List[str] = [] + self, from_offsets: Dict[int, int], soft_deleted_urns: List[str] ) -> Iterable[MetadataWorkUnit]: if self.config.kafka_connection is None: return From 124e2006e11e3f94f131187c848877d65bb810b2 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Tue, 7 Jan 2025 16:42:08 +0530 Subject: [PATCH 031/249] ci: fix shellcheck warnings, update actions (#12281) --- .github/workflows/build-and-test.yml | 4 ++-- .github/workflows/close-stale-issues.yml | 2 +- .github/workflows/contributor-open-pr-comment.yml | 6 +++--- .github/workflows/docker-unified.yml | 6 +++--- .github/workflows/metadata-io.yml | 4 ++-- .github/workflows/spark-smoke-test.yml | 4 ++-- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 784dce0f11b2b5..0cca80c8fdf982 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -113,7 +113,7 @@ jobs: if: ${{ matrix.command == 'except_metadata_ingestion' && needs.setup.outputs.backend_change == 'true' }} run: | ./gradlew -PjavaClassVersionDefault=8 :metadata-integration:java:spark-lineage:compileJava - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: always() with: name: Test Results (build) @@ -152,7 +152,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Upload - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: Event File path: ${{ github.event_path }} diff --git a/.github/workflows/close-stale-issues.yml b/.github/workflows/close-stale-issues.yml index 98e3041f288040..005f41b767ea6d 100644 --- a/.github/workflows/close-stale-issues.yml +++ b/.github/workflows/close-stale-issues.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@v6 + - uses: actions/stale@v9 with: ascending: true operations-per-run: 100 diff --git a/.github/workflows/contributor-open-pr-comment.yml b/.github/workflows/contributor-open-pr-comment.yml index decc7ab27a411d..fe60601b0159bd 100644 --- a/.github/workflows/contributor-open-pr-comment.yml +++ b/.github/workflows/contributor-open-pr-comment.yml @@ -17,12 +17,12 @@ jobs: - name: Get and Format Username (PR only) if: github.event_name == 'pull_request' run: | - formatted_username=$(echo "${{ github.event.pull_request.user.login }}" | tr '[:upper:]' '[:lower:]' | sed 's/ /-/g') - echo "FORMATTED_USERNAME=$formatted_username" >> $GITHUB_ENV + formatted_username="$(echo "${{ github.event.pull_request.user.login }}" | tr '[:upper:]' '[:lower:]' | sed 's/ /-/g')" + echo "FORMATTED_USERNAME=${formatted_username}" >> "$GITHUB_ENV" - name: Create Comment (PR only) if: github.event_name == 'pull_request' - uses: actions/github-script@v6 + uses: actions/github-script@v7 with: script: | if (context.payload.pull_request) { diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index a5200c7e917d81..e44e6b11c6d057 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -1253,19 +1253,19 @@ jobs: TEST_STRATEGY="-${{ matrix.test_strategy }}-${{ matrix.batch }}" source .github/scripts/docker_logs.sh - name: Upload logs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: failure() with: name: docker-logs-${{ matrix.test_strategy }}-${{ matrix.batch }} path: "docker_logs/*.log" retention-days: 5 - name: Upload screenshots - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: failure() with: name: cypress-snapshots-${{ matrix.test_strategy }}-${{ matrix.batch }} path: smoke-test/tests/cypress/cypress/screenshots/ - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: always() with: name: Test Results (smoke tests) ${{ matrix.test_strategy }} ${{ matrix.batch }} diff --git a/.github/workflows/metadata-io.yml b/.github/workflows/metadata-io.yml index 2225baecde64c6..aedcd9257d83ba 100644 --- a/.github/workflows/metadata-io.yml +++ b/.github/workflows/metadata-io.yml @@ -70,7 +70,7 @@ jobs: - name: Gradle build (and test) run: | ./gradlew :metadata-io:test - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: always() with: name: Test Results (metadata-io) @@ -95,7 +95,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Upload - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: Event File path: ${{ github.event_path }} diff --git a/.github/workflows/spark-smoke-test.yml b/.github/workflows/spark-smoke-test.yml index 23413336404f2b..e6a6705a72879c 100644 --- a/.github/workflows/spark-smoke-test.yml +++ b/.github/workflows/spark-smoke-test.yml @@ -72,14 +72,14 @@ jobs: docker logs elasticsearch >& elasticsearch-${{ matrix.test_strategy }}.log || true docker logs datahub-frontend-react >& frontend-${{ matrix.test_strategy }}.log || true - name: Upload logs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: failure() with: name: docker logs path: | "**/build/container-logs/*.log" "*.log" - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: always() with: name: Test Results (smoke tests) From f940c70c73acedc625257eb2a1a4aa5164738c02 Mon Sep 17 00:00:00 2001 From: skrydal Date: Tue, 7 Jan 2025 14:34:09 +0100 Subject: [PATCH 032/249] docs(business attribute): clarify support (#12260) --- docs/businessattributes.md | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/docs/businessattributes.md b/docs/businessattributes.md index 3e912e7e609805..2359c2ac85b585 100644 --- a/docs/businessattributes.md +++ b/docs/businessattributes.md @@ -1,5 +1,10 @@ +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + # Business Attributes + + +>**Note:** This is BETA feature ## What are Business Attributes A Business Attribute, as its name implies, is an attribute with a business focus. It embodies the traits or properties of an entity within a business framework. This attribute is a crucial piece of data for a business, utilised to define or control the entity throughout the organisation. If a business process or concept is depicted as a comprehensive logical model, then each Business Attribute can be considered as an individual component within that model. While business names and descriptions are generally managed through glossary terms, Business Attributes encompass additional characteristics such as data quality rules/assertions, data privacy markers, data usage protocols, standard tags, and supplementary documentation, alongside Names and Descriptions. @@ -70,9 +75,11 @@ Description inherited from business attribute is greyed out to differentiate bet

### Enable Business Attributes Feature -By default, business attribute is disabled. To enable Business Attributes feature, set the following configuration in [application.yaml](../metadata-service/configuration/src/main/resources/application.yaml) - -businessAttributeEntityEnabled : true +By default, business attribute is disabled. To enable Business Attributes feature, export environmental variable +(may be done via `extraEnvs` for GMS deployment): +```shell +BUSINESS_ATTRIBUTE_ENTITY_ENABLED=true +``` ### What updates are planned for the Business Attributes feature? From 03e3f46175df71b83f2c3adcffd97a9962747698 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 7 Jan 2025 10:46:35 -0500 Subject: [PATCH 033/249] fix(airflow): fix tests with Airflow 2.4 (#12279) --- metadata-ingestion-modules/airflow-plugin/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py index 2693aab0700da3..d07063dbffc5c4 100644 --- a/metadata-ingestion-modules/airflow-plugin/setup.py +++ b/metadata-ingestion-modules/airflow-plugin/setup.py @@ -119,6 +119,7 @@ def get_long_description(): "pendulum<3.0", "Flask-Session<0.6.0", "connexion<3.0", + "marshmallow<3.24.0", }, } From afa94a588754c28c8e11f5aa0963808ba5ee6599 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 7 Jan 2025 17:00:13 -0500 Subject: [PATCH 034/249] fix(ingest): better correctness on the emitter -> graph conversion (#12272) --- .../src/datahub/cli/cli_utils.py | 11 +- .../src/datahub/emitter/rest_emitter.py | 209 +++++++++++------- .../src/datahub/ingestion/graph/client.py | 25 ++- .../src/datahub/ingestion/graph/config.py | 2 +- .../tests/unit/sdk/test_rest_emitter.py | 32 +-- 5 files changed, 167 insertions(+), 112 deletions(-) diff --git a/metadata-ingestion/src/datahub/cli/cli_utils.py b/metadata-ingestion/src/datahub/cli/cli_utils.py index f80181192ba583..ca4a11b41925e5 100644 --- a/metadata-ingestion/src/datahub/cli/cli_utils.py +++ b/metadata-ingestion/src/datahub/cli/cli_utils.py @@ -3,7 +3,7 @@ import time import typing from datetime import datetime -from typing import Any, Dict, List, Optional, Tuple, Type, Union +from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union import click import requests @@ -33,6 +33,15 @@ def first_non_null(ls: List[Optional[str]]) -> Optional[str]: return next((el for el in ls if el is not None and el.strip() != ""), None) +_T = TypeVar("_T") + + +def get_or_else(value: Optional[_T], default: _T) -> _T: + # Normally we'd use `value or default`. However, that runs into issues + # when value is falsey but not None. + return value if value is not None else default + + def parse_run_restli_response(response: requests.Response) -> dict: response_json = response.json() if response.status_code != 200: diff --git a/metadata-ingestion/src/datahub/emitter/rest_emitter.py b/metadata-ingestion/src/datahub/emitter/rest_emitter.py index 7c67349c74db10..74b8ade7da445b 100644 --- a/metadata-ingestion/src/datahub/emitter/rest_emitter.py +++ b/metadata-ingestion/src/datahub/emitter/rest_emitter.py @@ -1,9 +1,21 @@ +from __future__ import annotations + import functools import json import logging import os from json.decoder import JSONDecodeError -from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Sequence, Union +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + List, + Optional, + Sequence, + Tuple, + Union, +) import requests from deprecated import deprecated @@ -12,9 +24,13 @@ from datahub import nice_version_name from datahub.cli import config_utils -from datahub.cli.cli_utils import ensure_has_system_metadata, fixup_gms_url +from datahub.cli.cli_utils import ensure_has_system_metadata, fixup_gms_url, get_or_else from datahub.cli.env_utils import get_boolean_env_variable -from datahub.configuration.common import ConfigurationError, OperationalError +from datahub.configuration.common import ( + ConfigModel, + ConfigurationError, + OperationalError, +) from datahub.emitter.generic_emitter import Emitter from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.request_helper import make_curl_command @@ -31,10 +47,8 @@ logger = logging.getLogger(__name__) -_DEFAULT_CONNECT_TIMEOUT_SEC = 30 # 30 seconds should be plenty to connect -_DEFAULT_READ_TIMEOUT_SEC = ( - 30 # Any ingest call taking longer than 30 seconds should be abandoned -) +_DEFAULT_TIMEOUT_SEC = 30 # 30 seconds should be plenty to connect +_TIMEOUT_LOWER_BOUND_SEC = 1 # if below this, we log a warning _DEFAULT_RETRY_STATUS_CODES = [ # Additional status codes to retry on 429, 500, @@ -63,15 +77,76 @@ ) +class RequestsSessionConfig(ConfigModel): + timeout: Union[float, Tuple[float, float], None] = _DEFAULT_TIMEOUT_SEC + + retry_status_codes: List[int] = _DEFAULT_RETRY_STATUS_CODES + retry_methods: List[str] = _DEFAULT_RETRY_METHODS + retry_max_times: int = _DEFAULT_RETRY_MAX_TIMES + + extra_headers: Dict[str, str] = {} + + ca_certificate_path: Optional[str] = None + client_certificate_path: Optional[str] = None + disable_ssl_verification: bool = False + + def build_session(self) -> requests.Session: + session = requests.Session() + + if self.extra_headers: + session.headers.update(self.extra_headers) + + if self.client_certificate_path: + session.cert = self.client_certificate_path + + if self.ca_certificate_path: + session.verify = self.ca_certificate_path + + if self.disable_ssl_verification: + session.verify = False + + try: + # Set raise_on_status to False to propagate errors: + # https://stackoverflow.com/questions/70189330/determine-status-code-from-python-retry-exception + # Must call `raise_for_status` after making a request, which we do + retry_strategy = Retry( + total=self.retry_max_times, + status_forcelist=self.retry_status_codes, + backoff_factor=2, + allowed_methods=self.retry_methods, + raise_on_status=False, + ) + except TypeError: + # Prior to urllib3 1.26, the Retry class used `method_whitelist` instead of `allowed_methods`. + retry_strategy = Retry( + total=self.retry_max_times, + status_forcelist=self.retry_status_codes, + backoff_factor=2, + method_whitelist=self.retry_methods, + raise_on_status=False, + ) + + adapter = HTTPAdapter( + pool_connections=100, pool_maxsize=100, max_retries=retry_strategy + ) + session.mount("http://", adapter) + session.mount("https://", adapter) + + if self.timeout is not None: + # Shim session.request to apply default timeout values. + # Via https://stackoverflow.com/a/59317604. + session.request = functools.partial( # type: ignore + session.request, + timeout=self.timeout, + ) + + return session + + class DataHubRestEmitter(Closeable, Emitter): _gms_server: str _token: Optional[str] _session: requests.Session - _connect_timeout_sec: float = _DEFAULT_CONNECT_TIMEOUT_SEC - _read_timeout_sec: float = _DEFAULT_READ_TIMEOUT_SEC - _retry_status_codes: List[int] = _DEFAULT_RETRY_STATUS_CODES - _retry_methods: List[str] = _DEFAULT_RETRY_METHODS - _retry_max_times: int = _DEFAULT_RETRY_MAX_TIMES def __init__( self, @@ -102,15 +177,13 @@ def __init__( self._session = requests.Session() - self._session.headers.update( - { - "X-RestLi-Protocol-Version": "2.0.0", - "X-DataHub-Py-Cli-Version": nice_version_name(), - "Content-Type": "application/json", - } - ) + headers = { + "X-RestLi-Protocol-Version": "2.0.0", + "X-DataHub-Py-Cli-Version": nice_version_name(), + "Content-Type": "application/json", + } if token: - self._session.headers.update({"Authorization": f"Bearer {token}"}) + headers["Authorization"] = f"Bearer {token}" else: # HACK: When no token is provided but system auth env variables are set, we use them. # Ideally this should simply get passed in as config, instead of being sneakily injected @@ -119,75 +192,43 @@ def __init__( # rest emitter, and the rest sink uses the rest emitter under the hood. system_auth = config_utils.get_system_auth() if system_auth is not None: - self._session.headers.update({"Authorization": system_auth}) - - if extra_headers: - self._session.headers.update(extra_headers) - - if client_certificate_path: - self._session.cert = client_certificate_path - - if ca_certificate_path: - self._session.verify = ca_certificate_path - - if disable_ssl_verification: - self._session.verify = False - - self._connect_timeout_sec = ( - connect_timeout_sec or timeout_sec or _DEFAULT_CONNECT_TIMEOUT_SEC - ) - self._read_timeout_sec = ( - read_timeout_sec or timeout_sec or _DEFAULT_READ_TIMEOUT_SEC - ) - - if self._connect_timeout_sec < 1 or self._read_timeout_sec < 1: - logger.warning( - f"Setting timeout values lower than 1 second is not recommended. Your configuration is connect_timeout:{self._connect_timeout_sec}s, read_timeout:{self._read_timeout_sec}s" - ) - - if retry_status_codes is not None: # Only if missing. Empty list is allowed - self._retry_status_codes = retry_status_codes - - if retry_methods is not None: - self._retry_methods = retry_methods - - if retry_max_times: - self._retry_max_times = retry_max_times + headers["Authorization"] = system_auth - try: - # Set raise_on_status to False to propagate errors: - # https://stackoverflow.com/questions/70189330/determine-status-code-from-python-retry-exception - # Must call `raise_for_status` after making a request, which we do - retry_strategy = Retry( - total=self._retry_max_times, - status_forcelist=self._retry_status_codes, - backoff_factor=2, - allowed_methods=self._retry_methods, - raise_on_status=False, - ) - except TypeError: - # Prior to urllib3 1.26, the Retry class used `method_whitelist` instead of `allowed_methods`. - retry_strategy = Retry( - total=self._retry_max_times, - status_forcelist=self._retry_status_codes, - backoff_factor=2, - method_whitelist=self._retry_methods, - raise_on_status=False, + timeout: float | tuple[float, float] + if connect_timeout_sec is not None or read_timeout_sec is not None: + timeout = ( + connect_timeout_sec or timeout_sec or _DEFAULT_TIMEOUT_SEC, + read_timeout_sec or timeout_sec or _DEFAULT_TIMEOUT_SEC, ) + if ( + timeout[0] < _TIMEOUT_LOWER_BOUND_SEC + or timeout[1] < _TIMEOUT_LOWER_BOUND_SEC + ): + logger.warning( + f"Setting timeout values lower than {_TIMEOUT_LOWER_BOUND_SEC} second is not recommended. Your configuration is (connect_timeout, read_timeout) = {timeout} seconds" + ) + else: + timeout = get_or_else(timeout_sec, _DEFAULT_TIMEOUT_SEC) + if timeout < _TIMEOUT_LOWER_BOUND_SEC: + logger.warning( + f"Setting timeout values lower than {_TIMEOUT_LOWER_BOUND_SEC} second is not recommended. Your configuration is timeout = {timeout} seconds" + ) - adapter = HTTPAdapter( - pool_connections=100, pool_maxsize=100, max_retries=retry_strategy - ) - self._session.mount("http://", adapter) - self._session.mount("https://", adapter) - - # Shim session.request to apply default timeout values. - # Via https://stackoverflow.com/a/59317604. - self._session.request = functools.partial( # type: ignore - self._session.request, - timeout=(self._connect_timeout_sec, self._read_timeout_sec), + self._session_config = RequestsSessionConfig( + timeout=timeout, + retry_status_codes=get_or_else( + retry_status_codes, _DEFAULT_RETRY_STATUS_CODES + ), + retry_methods=get_or_else(retry_methods, _DEFAULT_RETRY_METHODS), + retry_max_times=get_or_else(retry_max_times, _DEFAULT_RETRY_MAX_TIMES), + extra_headers={**headers, **(extra_headers or {})}, + ca_certificate_path=ca_certificate_path, + client_certificate_path=client_certificate_path, + disable_ssl_verification=disable_ssl_verification, ) + self._session = self._session_config.build_session() + def test_connection(self) -> None: url = f"{self._gms_server}/config" response = self._session.get(url) diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index ca9a41172e5b6e..7de6e8130a7ab6 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -179,21 +179,24 @@ def frontend_base_url(self) -> str: @classmethod def from_emitter(cls, emitter: DatahubRestEmitter) -> "DataHubGraph": + session_config = emitter._session_config + if isinstance(session_config.timeout, tuple): + # TODO: This is slightly lossy. Eventually, we want to modify the emitter + # to accept a tuple for timeout_sec, and then we'll be able to remove this. + timeout_sec: Optional[float] = session_config.timeout[0] + else: + timeout_sec = session_config.timeout return cls( DatahubClientConfig( server=emitter._gms_server, token=emitter._token, - timeout_sec=emitter._read_timeout_sec, - retry_status_codes=emitter._retry_status_codes, - retry_max_times=emitter._retry_max_times, - extra_headers=emitter._session.headers, - disable_ssl_verification=emitter._session.verify is False, - ca_certificate_path=( - emitter._session.verify - if isinstance(emitter._session.verify, str) - else None - ), - client_certificate_path=emitter._session.cert, + timeout_sec=timeout_sec, + retry_status_codes=session_config.retry_status_codes, + retry_max_times=session_config.retry_max_times, + extra_headers=session_config.extra_headers, + disable_ssl_verification=session_config.disable_ssl_verification, + ca_certificate_path=session_config.ca_certificate_path, + client_certificate_path=session_config.client_certificate_path, ) ) diff --git a/metadata-ingestion/src/datahub/ingestion/graph/config.py b/metadata-ingestion/src/datahub/ingestion/graph/config.py index 5f269e14e1a4af..8f0a5844c97c4b 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/config.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/config.py @@ -10,7 +10,7 @@ class DatahubClientConfig(ConfigModel): # by callers / the CLI, but the actual client should not have any magic. server: str token: Optional[str] = None - timeout_sec: Optional[int] = None + timeout_sec: Optional[float] = None retry_status_codes: Optional[List[int]] = None retry_max_times: Optional[int] = None extra_headers: Optional[Dict[str, str]] = None diff --git a/metadata-ingestion/tests/unit/sdk/test_rest_emitter.py b/metadata-ingestion/tests/unit/sdk/test_rest_emitter.py index b4d7cb17b66f5c..81120dfc87aba3 100644 --- a/metadata-ingestion/tests/unit/sdk/test_rest_emitter.py +++ b/metadata-ingestion/tests/unit/sdk/test_rest_emitter.py @@ -4,39 +4,41 @@ MOCK_GMS_ENDPOINT = "http://fakegmshost:8080" -def test_datahub_rest_emitter_construction(): +def test_datahub_rest_emitter_construction() -> None: emitter = DatahubRestEmitter(MOCK_GMS_ENDPOINT) - assert emitter._connect_timeout_sec == rest_emitter._DEFAULT_CONNECT_TIMEOUT_SEC - assert emitter._read_timeout_sec == rest_emitter._DEFAULT_READ_TIMEOUT_SEC - assert emitter._retry_status_codes == rest_emitter._DEFAULT_RETRY_STATUS_CODES - assert emitter._retry_max_times == rest_emitter._DEFAULT_RETRY_MAX_TIMES + assert emitter._session_config.timeout == rest_emitter._DEFAULT_TIMEOUT_SEC + assert ( + emitter._session_config.retry_status_codes + == rest_emitter._DEFAULT_RETRY_STATUS_CODES + ) + assert ( + emitter._session_config.retry_max_times == rest_emitter._DEFAULT_RETRY_MAX_TIMES + ) -def test_datahub_rest_emitter_timeout_construction(): +def test_datahub_rest_emitter_timeout_construction() -> None: emitter = DatahubRestEmitter( MOCK_GMS_ENDPOINT, connect_timeout_sec=2, read_timeout_sec=4 ) - assert emitter._connect_timeout_sec == 2 - assert emitter._read_timeout_sec == 4 + assert emitter._session_config.timeout == (2, 4) -def test_datahub_rest_emitter_general_timeout_construction(): +def test_datahub_rest_emitter_general_timeout_construction() -> None: emitter = DatahubRestEmitter(MOCK_GMS_ENDPOINT, timeout_sec=2, read_timeout_sec=4) - assert emitter._connect_timeout_sec == 2 - assert emitter._read_timeout_sec == 4 + assert emitter._session_config.timeout == (2, 4) -def test_datahub_rest_emitter_retry_construction(): +def test_datahub_rest_emitter_retry_construction() -> None: emitter = DatahubRestEmitter( MOCK_GMS_ENDPOINT, retry_status_codes=[418], retry_max_times=42, ) - assert emitter._retry_status_codes == [418] - assert emitter._retry_max_times == 42 + assert emitter._session_config.retry_status_codes == [418] + assert emitter._session_config.retry_max_times == 42 -def test_datahub_rest_emitter_extra_params(): +def test_datahub_rest_emitter_extra_params() -> None: emitter = DatahubRestEmitter( MOCK_GMS_ENDPOINT, extra_headers={"key1": "value1", "key2": "value2"} ) From cbb36bbe590812b525e6f92608279c624123333c Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 7 Jan 2025 19:23:58 -0500 Subject: [PATCH 035/249] feat(ingest): configurable query generation in combined sources (#12284) --- .../src/datahub/ingestion/source/bigquery_v2/bigquery.py | 2 ++ .../ingestion/source/bigquery_v2/bigquery_config.py | 8 ++++++++ .../ingestion/source/snowflake/snowflake_config.py | 8 ++++++++ .../datahub/ingestion/source/snowflake/snowflake_v2.py | 2 ++ 4 files changed, 20 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index 38eab3606b7e95..db7b0540e49e71 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -281,6 +281,8 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: include_lineage=self.config.include_table_lineage, include_usage_statistics=self.config.include_usage_statistics, include_operations=self.config.usage.include_operational_stats, + include_queries=self.config.include_queries, + include_query_usage_statistics=self.config.include_query_usage_statistics, top_n_queries=self.config.usage.top_n_queries, region_qualifiers=self.config.region_qualifiers, ), diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py index ef323260b014e6..afbe919df4dcae 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py @@ -447,6 +447,14 @@ class BigQueryV2Config( default=False, description="If enabled, uses the new queries extractor to extract queries from bigquery.", ) + include_queries: bool = Field( + default=True, + description="If enabled, generate query entities associated with lineage edges. Only applicable if `use_queries_v2` is enabled.", + ) + include_query_usage_statistics: bool = Field( + default=True, + description="If enabled, generate query popularity statistics. Only applicable if `use_queries_v2` is enabled.", + ) @property def have_table_data_read_permission(self) -> bool: diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py index 12e5fb72b00de8..2d61ce59857778 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py @@ -221,6 +221,14 @@ class SnowflakeV2Config( default=False, description="If enabled, uses the new queries extractor to extract queries from snowflake.", ) + include_queries: bool = Field( + default=True, + description="If enabled, generate query entities associated with lineage edges. Only applicable if `use_queries_v2` is enabled.", + ) + include_query_usage_statistics: bool = Field( + default=True, + description="If enabled, generate query popularity statistics. Only applicable if `use_queries_v2` is enabled.", + ) lazy_schema_resolver: bool = Field( default=True, diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index 954e8a29c1a1bd..aede3d056709a2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -528,6 +528,8 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: include_lineage=self.config.include_table_lineage, include_usage_statistics=self.config.include_usage_stats, include_operations=self.config.include_operational_stats, + include_queries=self.config.include_queries, + include_query_usage_statistics=self.config.include_query_usage_statistics, user_email_pattern=self.config.user_email_pattern, ), structured_report=self.report, From 98a5a2c086df1667a1b669410efaeafbeb5e3d8b Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Wed, 8 Jan 2025 06:34:10 -0600 Subject: [PATCH 036/249] fix(javaEntityClient): correct config parameter (#12287) --- .../java/com/linkedin/metadata/client/JavaEntityClient.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java index 3d35f5956b0f4f..35d133c74c0692 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java @@ -775,7 +775,8 @@ public List batchIngestProposals( List updatedUrns = new ArrayList<>(); Iterators.partition( - metadataChangeProposals.iterator(), Math.max(1, entityClientConfig.getBatchGetV2Size())) + metadataChangeProposals.iterator(), + Math.max(1, entityClientConfig.getBatchIngestSize())) .forEachRemaining( batch -> { AspectsBatch aspectsBatch = From c0b13f087aaff9898ab8377259fb0b691b128ca0 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Wed, 8 Jan 2025 18:40:19 +0530 Subject: [PATCH 037/249] ci: upload test coverage to codecov (#12291) --- .github/workflows/airflow-plugin.yml | 5 +++++ .github/workflows/build-and-test.yml | 5 +++++ .github/workflows/dagster-plugin.yml | 5 +++++ .github/workflows/gx-plugin.yml | 5 +++++ .github/workflows/metadata-ingestion.yml | 5 +++++ .github/workflows/metadata-io.yml | 5 +++++ .github/workflows/prefect-plugin.yml | 5 +++++ 7 files changed, 35 insertions(+) diff --git a/.github/workflows/airflow-plugin.yml b/.github/workflows/airflow-plugin.yml index b824a21be63f8f..89e0c9e2513d8b 100644 --- a/.github/workflows/airflow-plugin.yml +++ b/.github/workflows/airflow-plugin.yml @@ -87,6 +87,11 @@ jobs: flags: airflow-${{ matrix.python-version }}-${{ matrix.extra_pip_extras }} name: pytest-airflow verbose: true + - name: Upload test results to Codecov + if: ${{ !cancelled() }} + uses: codecov/test-results-action@v1 + with: + token: ${{ secrets.CODECOV_TOKEN }} event-file: runs-on: ubuntu-latest diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 0cca80c8fdf982..058ac4a5c9b1e5 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -134,6 +134,11 @@ jobs: flags: ${{ matrix.timezone }} name: ${{ matrix.command }} verbose: true + - name: Upload test results to Codecov + if: ${{ !cancelled() }} + uses: codecov/test-results-action@v1 + with: + token: ${{ secrets.CODECOV_TOKEN }} quickstart-compose-validation: runs-on: ubuntu-latest diff --git a/.github/workflows/dagster-plugin.yml b/.github/workflows/dagster-plugin.yml index ae9a0b1605cdf3..c29e72367c53c5 100644 --- a/.github/workflows/dagster-plugin.yml +++ b/.github/workflows/dagster-plugin.yml @@ -74,6 +74,11 @@ jobs: flags: dagster-${{ matrix.python-version }}-${{ matrix.extraPythonRequirement }} name: pytest-dagster verbose: true + - name: Upload test results to Codecov + if: ${{ !cancelled() }} + uses: codecov/test-results-action@v1 + with: + token: ${{ secrets.CODECOV_TOKEN }} event-file: runs-on: ubuntu-latest diff --git a/.github/workflows/gx-plugin.yml b/.github/workflows/gx-plugin.yml index 2fd814a0764858..825f8beda2f561 100644 --- a/.github/workflows/gx-plugin.yml +++ b/.github/workflows/gx-plugin.yml @@ -78,6 +78,11 @@ jobs: flags: gx-${{ matrix.python-version }}-${{ matrix.extraPythonRequirement }} name: pytest-gx verbose: true + - name: Upload test results to Codecov + if: ${{ !cancelled() }} + uses: codecov/test-results-action@v1 + with: + token: ${{ secrets.CODECOV_TOKEN }} event-file: runs-on: ubuntu-latest diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index f4d87b361b5edc..aa404c4c35c505 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -98,6 +98,11 @@ jobs: flags: ingestion-${{ matrix.python-version }}-${{ matrix.command }} name: pytest-ingestion verbose: true + - name: Upload test results to Codecov + if: ${{ !cancelled() }} + uses: codecov/test-results-action@v1 + with: + token: ${{ secrets.CODECOV_TOKEN }} event-file: runs-on: ubuntu-latest diff --git a/.github/workflows/metadata-io.yml b/.github/workflows/metadata-io.yml index aedcd9257d83ba..bcadc641ee2f7c 100644 --- a/.github/workflows/metadata-io.yml +++ b/.github/workflows/metadata-io.yml @@ -90,6 +90,11 @@ jobs: fail_ci_if_error: false name: metadata-io-test verbose: true + - name: Upload test results to Codecov + if: ${{ !cancelled() }} + uses: codecov/test-results-action@v1 + with: + token: ${{ secrets.CODECOV_TOKEN }} event-file: runs-on: ubuntu-latest diff --git a/.github/workflows/prefect-plugin.yml b/.github/workflows/prefect-plugin.yml index 879df032409f28..0bce4d5ef19f31 100644 --- a/.github/workflows/prefect-plugin.yml +++ b/.github/workflows/prefect-plugin.yml @@ -70,6 +70,11 @@ jobs: flags: prefect-${{ matrix.python-version }} name: pytest-prefect verbose: true + - name: Upload test results to Codecov + if: ${{ !cancelled() }} + uses: codecov/test-results-action@v1 + with: + token: ${{ secrets.CODECOV_TOKEN }} event-file: runs-on: ubuntu-latest From 333445326a627a28353f1def04955f5812dc17bb Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Wed, 8 Jan 2025 18:50:07 +0530 Subject: [PATCH 038/249] log(elastic/index builder): add est time remaining (#12280) --- .../indexbuilder/ESIndexBuilder.java | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java index 6de79b6c4b181e..792e67e69f2da6 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java @@ -411,6 +411,8 @@ private void reindex(ReindexConfig indexState) throws Throwable { boolean reindexTaskCompleted = false; Pair documentCounts = getDocumentCounts(indexState.name(), tempIndexName); long documentCountsLastUpdated = System.currentTimeMillis(); + long previousDocCount = documentCounts.getSecond(); + long estimatedMinutesRemaining = 0; while (System.currentTimeMillis() < timeoutAt) { log.info( @@ -421,8 +423,22 @@ private void reindex(ReindexConfig indexState) throws Throwable { Pair tempDocumentsCount = getDocumentCounts(indexState.name(), tempIndexName); if (!tempDocumentsCount.equals(documentCounts)) { - documentCountsLastUpdated = System.currentTimeMillis(); + long currentTime = System.currentTimeMillis(); + long timeElapsed = currentTime - documentCountsLastUpdated; + long docsIndexed = tempDocumentsCount.getSecond() - previousDocCount; + + // Calculate indexing rate (docs per millisecond) + double indexingRate = timeElapsed > 0 ? (double) docsIndexed / timeElapsed : 0; + + // Calculate remaining docs and estimated time + long remainingDocs = tempDocumentsCount.getFirst() - tempDocumentsCount.getSecond(); + long estimatedMillisRemaining = + indexingRate > 0 ? (long) (remainingDocs / indexingRate) : 0; + estimatedMinutesRemaining = estimatedMillisRemaining / (1000 * 60); + + documentCountsLastUpdated = currentTime; documentCounts = tempDocumentsCount; + previousDocCount = documentCounts.getSecond(); } if (documentCounts.getFirst().equals(documentCounts.getSecond())) { @@ -435,12 +451,15 @@ private void reindex(ReindexConfig indexState) throws Throwable { break; } else { + float progressPercentage = + 100 * (1.0f * documentCounts.getSecond()) / documentCounts.getFirst(); log.warn( - "Task: {} - Document counts do not match {} != {}. Complete: {}%", + "Task: {} - Document counts do not match {} != {}. Complete: {}%. Estimated time remaining: {} minutes", parentTaskId, documentCounts.getFirst(), documentCounts.getSecond(), - 100 * (1.0f * documentCounts.getSecond()) / documentCounts.getFirst()); + progressPercentage, + estimatedMinutesRemaining); long lastUpdateDelta = System.currentTimeMillis() - documentCountsLastUpdated; if (lastUpdateDelta > (300 * 1000)) { From 99c30f2b3c80ed55a7c39448ffc8fad3bfc010f3 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Wed, 8 Jan 2025 19:04:19 +0530 Subject: [PATCH 039/249] fix(ingest/glue): don't fail on profile (#12288) --- .../src/datahub/ingestion/source/aws/glue.py | 87 +++++++++++-------- 1 file changed, 52 insertions(+), 35 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py index 7a5ed154d40bc7..a0bed4ae9a7581 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py @@ -1054,49 +1054,66 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: yield from self.gen_database_containers(database) for table in tables: - database_name = table["DatabaseName"] table_name = table["Name"] - full_table_name = f"{database_name}.{table_name}" - self.report.report_table_scanned() - if not self.source_config.database_pattern.allowed( - database_name - ) or not self.source_config.table_pattern.allowed(full_table_name): - self.report.report_table_dropped(full_table_name) - continue + try: + yield from self._gen_table_wu(table=table) + except KeyError as e: + self.report.report_failure( + message="Failed to extract workunit for table", + context=f"Table: {table_name}", + exc=e, + ) + if self.extract_transforms: + yield from self._transform_extraction() - dataset_urn = make_dataset_urn_with_platform_instance( - platform=self.platform, - name=full_table_name, - env=self.env, - platform_instance=self.source_config.platform_instance, - ) + def _gen_table_wu(self, table: Dict) -> Iterable[MetadataWorkUnit]: + database_name = table["DatabaseName"] + table_name = table["Name"] + full_table_name = f"{database_name}.{table_name}" + self.report.report_table_scanned() + if not self.source_config.database_pattern.allowed( + database_name + ) or not self.source_config.table_pattern.allowed(full_table_name): + self.report.report_table_dropped(full_table_name) + return + + dataset_urn = make_dataset_urn_with_platform_instance( + platform=self.platform, + name=full_table_name, + env=self.env, + platform_instance=self.source_config.platform_instance, + ) - mce = self._extract_record(dataset_urn, table, full_table_name) - yield MetadataWorkUnit(full_table_name, mce=mce) + mce = self._extract_record(dataset_urn, table, full_table_name) + yield MetadataWorkUnit(full_table_name, mce=mce) - # We also want to assign "table" subType to the dataset representing glue table - unfortunately it is not - # possible via Dataset snapshot embedded in a mce, so we have to generate a mcp. - yield MetadataChangeProposalWrapper( - entityUrn=dataset_urn, - aspect=SubTypes(typeNames=[DatasetSubTypes.TABLE]), - ).as_workunit() + # We also want to assign "table" subType to the dataset representing glue table - unfortunately it is not + # possible via Dataset snapshot embedded in a mce, so we have to generate a mcp. + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=SubTypes(typeNames=[DatasetSubTypes.TABLE]), + ).as_workunit() - yield from self._get_domain_wu( - dataset_name=full_table_name, - entity_urn=dataset_urn, - ) - yield from self.add_table_to_database_container( - dataset_urn=dataset_urn, db_name=database_name - ) + yield from self._get_domain_wu( + dataset_name=full_table_name, + entity_urn=dataset_urn, + ) + yield from self.add_table_to_database_container( + dataset_urn=dataset_urn, db_name=database_name + ) - wu = self.get_lineage_if_enabled(mce) - if wu: - yield wu + wu = self.get_lineage_if_enabled(mce) + if wu: + yield wu + try: yield from self.get_profile_if_enabled(mce, database_name, table_name) - - if self.extract_transforms: - yield from self._transform_extraction() + except KeyError as e: + self.report.report_failure( + message="Failed to extract profile for table", + context=f"Table: {dataset_urn}", + exc=e, + ) def _transform_extraction(self) -> Iterable[MetadataWorkUnit]: dags: Dict[str, Optional[Dict[str, Any]]] = {} From 0fe4163332eec5cf527ee0a0110507eb8934c4c9 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Wed, 8 Jan 2025 19:13:31 +0530 Subject: [PATCH 040/249] fix(ingest/gc): also query data process instance (#12292) --- .../source/gc/soft_deleted_entity_cleanup.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py b/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py index 32243106bb53f6..0a52b7e17bf714 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +++ b/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py @@ -19,8 +19,8 @@ logger = logging.getLogger(__name__) -QUERY_QUERY_ENTITY = """ -query listQueries($input: ScrollAcrossEntitiesInput!) { +QUERY_ENTITIES = """ +query listEntities($input: ScrollAcrossEntitiesInput!) { scrollAcrossEntities(input: $input) { nextScrollId count @@ -29,6 +29,9 @@ ... on QueryEntity { urn } + ... on DataProcessInstance { + urn + } } } } @@ -225,16 +228,16 @@ def _process_futures(self, futures: Dict[Future, str]) -> Dict[Future, str]: time.sleep(self.config.delay) return futures - def _get_soft_deleted_queries(self) -> Iterable[str]: + def _get_soft_deleted(self, graphql_query: str, entity_type: str) -> Iterable[str]: assert self.ctx.graph scroll_id: Optional[str] = None while True: try: result = self.ctx.graph.execute_graphql( - QUERY_QUERY_ENTITY, + graphql_query, { "input": { - "types": ["QUERY"], + "types": [entity_type], "query": "*", "scrollId": scroll_id if scroll_id else None, "count": self.config.batch_size, @@ -254,7 +257,7 @@ def _get_soft_deleted_queries(self) -> Iterable[str]: ) except Exception as e: self.report.failure( - f"While trying to get queries with {scroll_id}", exc=e + f"While trying to get {entity_type} with {scroll_id}", exc=e ) break scroll_across_entities = result.get("scrollAcrossEntities") @@ -275,7 +278,8 @@ def _get_urns(self) -> Iterable[str]: status=RemovedStatusFilter.ONLY_SOFT_DELETED, batch_size=self.config.batch_size, ) - yield from self._get_soft_deleted_queries() + yield from self._get_soft_deleted(QUERY_ENTITIES, "QUERY") + yield from self._get_soft_deleted(QUERY_ENTITIES, "DATA_PROCESS_INSTANCE") def _times_up(self) -> bool: if ( From a4c47fa343cec4e6bc7addc11c553bace0a852a9 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Wed, 8 Jan 2025 19:46:57 +0530 Subject: [PATCH 041/249] fix(cli): correct url ending with acryl.io:8080 (#12289) --- metadata-ingestion/src/datahub/cli/cli_utils.py | 2 ++ metadata-ingestion/tests/unit/cli/test_cli_utils.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/metadata-ingestion/src/datahub/cli/cli_utils.py b/metadata-ingestion/src/datahub/cli/cli_utils.py index ca4a11b41925e5..f6b5ba6176c59d 100644 --- a/metadata-ingestion/src/datahub/cli/cli_utils.py +++ b/metadata-ingestion/src/datahub/cli/cli_utils.py @@ -330,6 +330,8 @@ def get_frontend_session_login_as( def _ensure_valid_gms_url_acryl_cloud(url: str) -> str: if "acryl.io" not in url: return url + if url.endswith(":8080"): + url = url.replace(":8080", "") if url.startswith("http://"): url = url.replace("http://", "https://") if url.endswith("acryl.io"): diff --git a/metadata-ingestion/tests/unit/cli/test_cli_utils.py b/metadata-ingestion/tests/unit/cli/test_cli_utils.py index c9693c75d96fe9..c430f585200e5a 100644 --- a/metadata-ingestion/tests/unit/cli/test_cli_utils.py +++ b/metadata-ingestion/tests/unit/cli/test_cli_utils.py @@ -70,6 +70,10 @@ def test_fixup_gms_url(): cli_utils.fixup_gms_url("http://abc.acryl.io/api/gms") == "https://abc.acryl.io/gms" ) + assert ( + cli_utils.fixup_gms_url("http://abcd.acryl.io:8080") + == "https://abcd.acryl.io/gms" + ) def test_guess_frontend_url_from_gms_url(): From 58b6a5bee54f23a6ea1801d9f1f92d3f3600b763 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Wed, 8 Jan 2025 21:57:55 +0530 Subject: [PATCH 042/249] dev: add pre-commit hooks installed by default (#12293) --- .github/scripts/generate_pre_commit.py | 265 +++++++++++++++ .github/scripts/pre-commit-override.yaml | 8 + .pre-commit-config.yaml | 402 +++++++++++++++++++++-- build.gradle | 4 - settings.gradle | 49 +++ 5 files changed, 700 insertions(+), 28 deletions(-) create mode 100755 .github/scripts/generate_pre_commit.py create mode 100644 .github/scripts/pre-commit-override.yaml diff --git a/.github/scripts/generate_pre_commit.py b/.github/scripts/generate_pre_commit.py new file mode 100755 index 00000000000000..740d3c20d263b0 --- /dev/null +++ b/.github/scripts/generate_pre_commit.py @@ -0,0 +1,265 @@ +"""Generate pre-commit hooks for Java and Python projects. + +This script scans a repository for Java and Python projects and generates appropriate +pre-commit hooks for linting and formatting. It also merges in additional hooks from +an override file. +""" + +import os +from dataclasses import dataclass +from enum import Enum, auto +from pathlib import Path + +import yaml + + +class ProjectType(Enum): + """Types of projects supported for hook generation.""" + + JAVA = auto() + PYTHON = auto() + + +@dataclass +class Project: + """Represents a project found in the repository.""" + + path: str + type: ProjectType + + @property + def gradle_path(self) -> str: + """Convert path to Gradle task format.""" + return ":" + self.path.replace("/", ":") + + @property + def project_id(self) -> str: + """Generate a unique identifier for the project.""" + return self.path.replace("/", "-").replace(".", "-") + + +class ProjectFinder: + """Find Java and Python projects in a repository.""" + + JAVA_PATTERNS = [ + "plugins.hasPlugin('java')", + "apply plugin: 'java'", + "id 'java'", + "id 'java-library'", + "plugins.hasPlugin('java-library')", + "apply plugin: 'java-library'", + "plugins.hasPlugin('pegasus')", + "org.springframework.boot", + ] + + EXCLUDED_DIRS = {".git", "build", "node_modules", ".tox", "venv"} + SOURCE_EXTENSIONS = {".java", ".kt", ".groovy"} + + def __init__(self, root_dir: str): + self.root_path = Path(root_dir) + + def find_all_projects(self) -> list[Project]: + """Find all Java and Python projects in the repository.""" + java_projects = self._find_java_projects() + python_projects = self._find_python_projects() + + all_projects = [] + all_projects.extend( + Project(path=p, type=ProjectType.JAVA) for p in java_projects + ) + all_projects.extend( + Project(path=p, type=ProjectType.PYTHON) for p in python_projects + ) + + return sorted(all_projects, key=lambda p: p.path) + + def _find_java_projects(self) -> set[str]: + """Find all Java projects by checking build.gradle files.""" + java_projects = set() + + # Search both build.gradle and build.gradle.kts + for pattern in ["build.gradle", "build.gradle.kts"]: + for gradle_file in self.root_path.rglob(pattern): + if self._should_skip_directory(gradle_file.parent): + continue + + if self._is_java_project(gradle_file): + java_projects.add(self._get_relative_path(gradle_file.parent)) + + return { + p + for p in java_projects + if "buildSrc" not in p and "spark-smoke-test" not in p and p != "." + } + + def _find_python_projects(self) -> set[str]: + """Find all Python projects by checking for setup.py or pyproject.toml.""" + python_projects = set() + + for file_name in ["setup.py", "pyproject.toml"]: + for path in self.root_path.rglob(file_name): + if self._should_skip_directory(path.parent): + continue + + rel_path = self._get_relative_path(path.parent) + if "examples" not in rel_path: + python_projects.add(rel_path) + + return python_projects + + def _should_skip_directory(self, path: Path) -> bool: + """Check if directory should be skipped.""" + return any( + part in self.EXCLUDED_DIRS or part.startswith(".") for part in path.parts + ) + + def _is_java_project(self, gradle_file: Path) -> bool: + """Check if a Gradle file represents a Java project.""" + try: + content = gradle_file.read_text() + has_java_plugin = any(pattern in content for pattern in self.JAVA_PATTERNS) + + if has_java_plugin: + # Verify presence of source files + return any( + list(gradle_file.parent.rglob(f"*{ext}")) + for ext in self.SOURCE_EXTENSIONS + ) + return False + + except Exception as e: + print(f"Warning: Error reading {gradle_file}: {e}") + return False + + def _get_relative_path(self, path: Path) -> str: + """Get relative path from root, normalized with forward slashes.""" + return str(path.relative_to(self.root_path)).replace("\\", "/") + + +class HookGenerator: + """Generate pre-commit hooks for projects.""" + + def __init__(self, projects: list[Project], override_file: str = None): + self.projects = projects + self.override_file = override_file + + def generate_config(self) -> dict: + """Generate the complete pre-commit config.""" + hooks = [] + + for project in self.projects: + if project.type == ProjectType.PYTHON: + hooks.append(self._generate_lint_fix_hook(project)) + else: # ProjectType.JAVA + hooks.append(self._generate_spotless_hook(project)) + + config = {"repos": [{"repo": "local", "hooks": hooks}]} + + # Merge override hooks if they exist + if self.override_file and os.path.exists(self.override_file): + try: + with open(self.override_file, 'r') as f: + override_config = yaml.safe_load(f) + + if override_config and 'repos' in override_config: + for override_repo in override_config['repos']: + matching_repo = next( + (repo for repo in config['repos'] + if repo['repo'] == override_repo['repo']), + None + ) + + if matching_repo: + matching_repo['hooks'].extend(override_repo.get('hooks', [])) + else: + config['repos'].append(override_repo) + + print(f"Merged additional hooks from {self.override_file}") + except Exception as e: + print(f"Warning: Error reading override file {self.override_file}: {e}") + + return config + + def _generate_lint_fix_hook(self, project: Project) -> dict: + """Generate a lint-fix hook for Python projects.""" + return { + "id": f"{project.project_id}-lint-fix", + "name": f"{project.path} Lint Fix", + "entry": f"./gradlew {project.gradle_path}:lintFix", + "language": "system", + "files": f"^{project.path}/.*\\.py$", + } + + def _generate_spotless_hook(self, project: Project) -> dict: + """Generate a spotless hook for Java projects.""" + return { + "id": f"{project.project_id}-spotless", + "name": f"{project.path} Spotless Apply", + "entry": f"./gradlew {project.gradle_path}:spotlessApply", + "language": "system", + "files": f"^{project.path}/.*\\.java$", + } + + +class PrecommitDumper(yaml.Dumper): + """Custom YAML dumper that maintains proper indentation.""" + + def increase_indent(self, flow=False, *args, **kwargs): + return super().increase_indent(flow=flow, indentless=False) + + +def write_yaml_with_spaces(file_path: str, data: dict): + """Write YAML file with extra spacing between hooks.""" + with open(file_path, "w") as f: + yaml_str = yaml.dump( + data, Dumper=PrecommitDumper, sort_keys=False, default_flow_style=False + ) + + # Add extra newline between hooks + lines = yaml_str.split("\n") + result = [] + in_hook = False + + for line in lines: + if line.strip().startswith("- id:"): + if in_hook: # If we were already in a hook, add extra newline + result.append("") + in_hook = True + elif not line.strip() and in_hook: + in_hook = False + + result.append(line) + + f.write("\n".join(result)) + + +def main(): + root_dir = os.path.abspath(os.curdir) + override_file = ".github/scripts/pre-commit-override.yaml" + + # Find projects + finder = ProjectFinder(root_dir) + projects = finder.find_all_projects() + + # Print summary + print("Found projects:") + print("\nJava projects:") + for project in projects: + if project.type == ProjectType.JAVA: + print(f" - {project.path}") + + print("\nPython projects:") + for project in projects: + if project.type == ProjectType.PYTHON: + print(f" - {project.path}") + + # Generate and write config + generator = HookGenerator(projects, override_file) + config = generator.generate_config() + write_yaml_with_spaces(".pre-commit-config.yaml", config) + + print("\nGenerated .pre-commit-config.yaml") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/.github/scripts/pre-commit-override.yaml b/.github/scripts/pre-commit-override.yaml new file mode 100644 index 00000000000000..a085d9ea3ee93b --- /dev/null +++ b/.github/scripts/pre-commit-override.yaml @@ -0,0 +1,8 @@ +repos: + - repo: local + hooks: + - id: smoke-test-cypress-lint-fix + name: smoke-test cypress Lint Fix + entry: ./gradlew :smoke-test:cypressLintFix + language: system + files: ^smoke-test/tests/cypress/.*$ \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 898e3d262b3941..c4edc2cc176355 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,26 +1,380 @@ -exclude: ^$ -files: ^(docs/|docs-website/|metadata-ingestion/) repos: - - repo: https://github.com/pre-commit/mirrors-isort - rev: v5.10.1 + - repo: local hooks: - - id: isort - - repo: https://github.com/ambv/black - rev: 23.1.0 - hooks: - - id: black - - repo: https://github.com/myint/autoflake - rev: v1.4 - hooks: - - id: autoflake - args: - - --in-place - - --remove-unused-variables - - --remove-all-unused-imports - - --expand-star-imports - - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v3.0.0-alpha.6" # Use the sha or tag you want to point at - hooks: - - id: prettier - args: - - --write \ No newline at end of file + - id: datahub-graphql-core-spotless + name: datahub-graphql-core Spotless Apply + entry: ./gradlew :datahub-graphql-core:spotlessApply + language: system + files: ^datahub-graphql-core/.*\.java$ + + - id: datahub-upgrade-spotless + name: datahub-upgrade Spotless Apply + entry: ./gradlew :datahub-upgrade:spotlessApply + language: system + files: ^datahub-upgrade/.*\.java$ + + - id: entity-registry-spotless + name: entity-registry Spotless Apply + entry: ./gradlew :entity-registry:spotlessApply + language: system + files: ^entity-registry/.*\.java$ + + - id: ingestion-scheduler-spotless + name: ingestion-scheduler Spotless Apply + entry: ./gradlew :ingestion-scheduler:spotlessApply + language: system + files: ^ingestion-scheduler/.*\.java$ + + - id: li-utils-spotless + name: li-utils Spotless Apply + entry: ./gradlew :li-utils:spotlessApply + language: system + files: ^li-utils/.*\.java$ + + - id: metadata-auth-auth-api-spotless + name: metadata-auth/auth-api Spotless Apply + entry: ./gradlew :metadata-auth:auth-api:spotlessApply + language: system + files: ^metadata-auth/auth-api/.*\.java$ + + - id: metadata-dao-impl-kafka-producer-spotless + name: metadata-dao-impl/kafka-producer Spotless Apply + entry: ./gradlew :metadata-dao-impl:kafka-producer:spotlessApply + language: system + files: ^metadata-dao-impl/kafka-producer/.*\.java$ + + - id: metadata-events-mxe-avro-spotless + name: metadata-events/mxe-avro Spotless Apply + entry: ./gradlew :metadata-events:mxe-avro:spotlessApply + language: system + files: ^metadata-events/mxe-avro/.*\.java$ + + - id: metadata-events-mxe-registration-spotless + name: metadata-events/mxe-registration Spotless Apply + entry: ./gradlew :metadata-events:mxe-registration:spotlessApply + language: system + files: ^metadata-events/mxe-registration/.*\.java$ + + - id: metadata-events-mxe-schemas-spotless + name: metadata-events/mxe-schemas Spotless Apply + entry: ./gradlew :metadata-events:mxe-schemas:spotlessApply + language: system + files: ^metadata-events/mxe-schemas/.*\.java$ + + - id: metadata-events-mxe-utils-avro-spotless + name: metadata-events/mxe-utils-avro Spotless Apply + entry: ./gradlew :metadata-events:mxe-utils-avro:spotlessApply + language: system + files: ^metadata-events/mxe-utils-avro/.*\.java$ + + - id: metadata-ingestion-lint-fix + name: metadata-ingestion Lint Fix + entry: ./gradlew :metadata-ingestion:lintFix + language: system + files: ^metadata-ingestion/.*\.py$ + + - id: metadata-ingestion-modules-airflow-plugin-lint-fix + name: metadata-ingestion-modules/airflow-plugin Lint Fix + entry: ./gradlew :metadata-ingestion-modules:airflow-plugin:lintFix + language: system + files: ^metadata-ingestion-modules/airflow-plugin/.*\.py$ + + - id: metadata-ingestion-modules-dagster-plugin-lint-fix + name: metadata-ingestion-modules/dagster-plugin Lint Fix + entry: ./gradlew :metadata-ingestion-modules:dagster-plugin:lintFix + language: system + files: ^metadata-ingestion-modules/dagster-plugin/.*\.py$ + + - id: metadata-ingestion-modules-gx-plugin-lint-fix + name: metadata-ingestion-modules/gx-plugin Lint Fix + entry: ./gradlew :metadata-ingestion-modules:gx-plugin:lintFix + language: system + files: ^metadata-ingestion-modules/gx-plugin/.*\.py$ + + - id: metadata-ingestion-modules-prefect-plugin-lint-fix + name: metadata-ingestion-modules/prefect-plugin Lint Fix + entry: ./gradlew :metadata-ingestion-modules:prefect-plugin:lintFix + language: system + files: ^metadata-ingestion-modules/prefect-plugin/.*\.py$ + + - id: metadata-integration-java-acryl-spark-lineage-spotless + name: metadata-integration/java/acryl-spark-lineage Spotless Apply + entry: ./gradlew :metadata-integration:java:acryl-spark-lineage:spotlessApply + language: system + files: ^metadata-integration/java/acryl-spark-lineage/.*\.java$ + + - id: metadata-integration-java-datahub-client-spotless + name: metadata-integration/java/datahub-client Spotless Apply + entry: ./gradlew :metadata-integration:java:datahub-client:spotlessApply + language: system + files: ^metadata-integration/java/datahub-client/.*\.java$ + + - id: metadata-integration-java-datahub-event-spotless + name: metadata-integration/java/datahub-event Spotless Apply + entry: ./gradlew :metadata-integration:java:datahub-event:spotlessApply + language: system + files: ^metadata-integration/java/datahub-event/.*\.java$ + + - id: metadata-integration-java-datahub-protobuf-spotless + name: metadata-integration/java/datahub-protobuf Spotless Apply + entry: ./gradlew :metadata-integration:java:datahub-protobuf:spotlessApply + language: system + files: ^metadata-integration/java/datahub-protobuf/.*\.java$ + + - id: metadata-integration-java-datahub-schematron-cli-spotless + name: metadata-integration/java/datahub-schematron/cli Spotless Apply + entry: ./gradlew :metadata-integration:java:datahub-schematron:cli:spotlessApply + language: system + files: ^metadata-integration/java/datahub-schematron/cli/.*\.java$ + + - id: metadata-integration-java-datahub-schematron-lib-spotless + name: metadata-integration/java/datahub-schematron/lib Spotless Apply + entry: ./gradlew :metadata-integration:java:datahub-schematron:lib:spotlessApply + language: system + files: ^metadata-integration/java/datahub-schematron/lib/.*\.java$ + + - id: metadata-integration-java-examples-spotless + name: metadata-integration/java/examples Spotless Apply + entry: ./gradlew :metadata-integration:java:examples:spotlessApply + language: system + files: ^metadata-integration/java/examples/.*\.java$ + + - id: metadata-integration-java-openlineage-converter-spotless + name: metadata-integration/java/openlineage-converter Spotless Apply + entry: ./gradlew :metadata-integration:java:openlineage-converter:spotlessApply + language: system + files: ^metadata-integration/java/openlineage-converter/.*\.java$ + + - id: metadata-integration-java-spark-lineage-legacy-spotless + name: metadata-integration/java/spark-lineage-legacy Spotless Apply + entry: ./gradlew :metadata-integration:java:spark-lineage-legacy:spotlessApply + language: system + files: ^metadata-integration/java/spark-lineage-legacy/.*\.java$ + + - id: metadata-io-spotless + name: metadata-io Spotless Apply + entry: ./gradlew :metadata-io:spotlessApply + language: system + files: ^metadata-io/.*\.java$ + + - id: metadata-io-metadata-io-api-spotless + name: metadata-io/metadata-io-api Spotless Apply + entry: ./gradlew :metadata-io:metadata-io-api:spotlessApply + language: system + files: ^metadata-io/metadata-io-api/.*\.java$ + + - id: metadata-jobs-common-spotless + name: metadata-jobs/common Spotless Apply + entry: ./gradlew :metadata-jobs:common:spotlessApply + language: system + files: ^metadata-jobs/common/.*\.java$ + + - id: metadata-jobs-mae-consumer-spotless + name: metadata-jobs/mae-consumer Spotless Apply + entry: ./gradlew :metadata-jobs:mae-consumer:spotlessApply + language: system + files: ^metadata-jobs/mae-consumer/.*\.java$ + + - id: metadata-jobs-mae-consumer-job-spotless + name: metadata-jobs/mae-consumer-job Spotless Apply + entry: ./gradlew :metadata-jobs:mae-consumer-job:spotlessApply + language: system + files: ^metadata-jobs/mae-consumer-job/.*\.java$ + + - id: metadata-jobs-mce-consumer-spotless + name: metadata-jobs/mce-consumer Spotless Apply + entry: ./gradlew :metadata-jobs:mce-consumer:spotlessApply + language: system + files: ^metadata-jobs/mce-consumer/.*\.java$ + + - id: metadata-jobs-mce-consumer-job-spotless + name: metadata-jobs/mce-consumer-job Spotless Apply + entry: ./gradlew :metadata-jobs:mce-consumer-job:spotlessApply + language: system + files: ^metadata-jobs/mce-consumer-job/.*\.java$ + + - id: metadata-jobs-pe-consumer-spotless + name: metadata-jobs/pe-consumer Spotless Apply + entry: ./gradlew :metadata-jobs:pe-consumer:spotlessApply + language: system + files: ^metadata-jobs/pe-consumer/.*\.java$ + + - id: metadata-models-spotless + name: metadata-models Spotless Apply + entry: ./gradlew :metadata-models:spotlessApply + language: system + files: ^metadata-models/.*\.java$ + + - id: metadata-models-custom-spotless + name: metadata-models-custom Spotless Apply + entry: ./gradlew :metadata-models-custom:spotlessApply + language: system + files: ^metadata-models-custom/.*\.java$ + + - id: metadata-models-validator-spotless + name: metadata-models-validator Spotless Apply + entry: ./gradlew :metadata-models-validator:spotlessApply + language: system + files: ^metadata-models-validator/.*\.java$ + + - id: metadata-operation-context-spotless + name: metadata-operation-context Spotless Apply + entry: ./gradlew :metadata-operation-context:spotlessApply + language: system + files: ^metadata-operation-context/.*\.java$ + + - id: metadata-service-auth-config-spotless + name: metadata-service/auth-config Spotless Apply + entry: ./gradlew :metadata-service:auth-config:spotlessApply + language: system + files: ^metadata-service/auth-config/.*\.java$ + + - id: metadata-service-auth-filter-spotless + name: metadata-service/auth-filter Spotless Apply + entry: ./gradlew :metadata-service:auth-filter:spotlessApply + language: system + files: ^metadata-service/auth-filter/.*\.java$ + + - id: metadata-service-auth-impl-spotless + name: metadata-service/auth-impl Spotless Apply + entry: ./gradlew :metadata-service:auth-impl:spotlessApply + language: system + files: ^metadata-service/auth-impl/.*\.java$ + + - id: metadata-service-auth-servlet-impl-spotless + name: metadata-service/auth-servlet-impl Spotless Apply + entry: ./gradlew :metadata-service:auth-servlet-impl:spotlessApply + language: system + files: ^metadata-service/auth-servlet-impl/.*\.java$ + + - id: metadata-service-configuration-spotless + name: metadata-service/configuration Spotless Apply + entry: ./gradlew :metadata-service:configuration:spotlessApply + language: system + files: ^metadata-service/configuration/.*\.java$ + + - id: metadata-service-factories-spotless + name: metadata-service/factories Spotless Apply + entry: ./gradlew :metadata-service:factories:spotlessApply + language: system + files: ^metadata-service/factories/.*\.java$ + + - id: metadata-service-graphql-servlet-impl-spotless + name: metadata-service/graphql-servlet-impl Spotless Apply + entry: ./gradlew :metadata-service:graphql-servlet-impl:spotlessApply + language: system + files: ^metadata-service/graphql-servlet-impl/.*\.java$ + + - id: metadata-service-openapi-analytics-servlet-spotless + name: metadata-service/openapi-analytics-servlet Spotless Apply + entry: ./gradlew :metadata-service:openapi-analytics-servlet:spotlessApply + language: system + files: ^metadata-service/openapi-analytics-servlet/.*\.java$ + + - id: metadata-service-openapi-entity-servlet-spotless + name: metadata-service/openapi-entity-servlet Spotless Apply + entry: ./gradlew :metadata-service:openapi-entity-servlet:spotlessApply + language: system + files: ^metadata-service/openapi-entity-servlet/.*\.java$ + + - id: metadata-service-openapi-entity-servlet-generators-spotless + name: metadata-service/openapi-entity-servlet/generators Spotless Apply + entry: ./gradlew :metadata-service:openapi-entity-servlet:generators:spotlessApply + language: system + files: ^metadata-service/openapi-entity-servlet/generators/.*\.java$ + + - id: metadata-service-openapi-servlet-spotless + name: metadata-service/openapi-servlet Spotless Apply + entry: ./gradlew :metadata-service:openapi-servlet:spotlessApply + language: system + files: ^metadata-service/openapi-servlet/.*\.java$ + + - id: metadata-service-openapi-servlet-models-spotless + name: metadata-service/openapi-servlet/models Spotless Apply + entry: ./gradlew :metadata-service:openapi-servlet:models:spotlessApply + language: system + files: ^metadata-service/openapi-servlet/models/.*\.java$ + + - id: metadata-service-plugin-spotless + name: metadata-service/plugin Spotless Apply + entry: ./gradlew :metadata-service:plugin:spotlessApply + language: system + files: ^metadata-service/plugin/.*\.java$ + + - id: metadata-service-plugin-src-test-sample-test-plugins-spotless + name: metadata-service/plugin/src/test/sample-test-plugins Spotless Apply + entry: ./gradlew :metadata-service:plugin:src:test:sample-test-plugins:spotlessApply + language: system + files: ^metadata-service/plugin/src/test/sample-test-plugins/.*\.java$ + + - id: metadata-service-restli-client-spotless + name: metadata-service/restli-client Spotless Apply + entry: ./gradlew :metadata-service:restli-client:spotlessApply + language: system + files: ^metadata-service/restli-client/.*\.java$ + + - id: metadata-service-restli-client-api-spotless + name: metadata-service/restli-client-api Spotless Apply + entry: ./gradlew :metadata-service:restli-client-api:spotlessApply + language: system + files: ^metadata-service/restli-client-api/.*\.java$ + + - id: metadata-service-restli-servlet-impl-spotless + name: metadata-service/restli-servlet-impl Spotless Apply + entry: ./gradlew :metadata-service:restli-servlet-impl:spotlessApply + language: system + files: ^metadata-service/restli-servlet-impl/.*\.java$ + + - id: metadata-service-schema-registry-api-spotless + name: metadata-service/schema-registry-api Spotless Apply + entry: ./gradlew :metadata-service:schema-registry-api:spotlessApply + language: system + files: ^metadata-service/schema-registry-api/.*\.java$ + + - id: metadata-service-schema-registry-servlet-spotless + name: metadata-service/schema-registry-servlet Spotless Apply + entry: ./gradlew :metadata-service:schema-registry-servlet:spotlessApply + language: system + files: ^metadata-service/schema-registry-servlet/.*\.java$ + + - id: metadata-service-services-spotless + name: metadata-service/services Spotless Apply + entry: ./gradlew :metadata-service:services:spotlessApply + language: system + files: ^metadata-service/services/.*\.java$ + + - id: metadata-service-servlet-spotless + name: metadata-service/servlet Spotless Apply + entry: ./gradlew :metadata-service:servlet:spotlessApply + language: system + files: ^metadata-service/servlet/.*\.java$ + + - id: metadata-utils-spotless + name: metadata-utils Spotless Apply + entry: ./gradlew :metadata-utils:spotlessApply + language: system + files: ^metadata-utils/.*\.java$ + + - id: mock-entity-registry-spotless + name: mock-entity-registry Spotless Apply + entry: ./gradlew :mock-entity-registry:spotlessApply + language: system + files: ^mock-entity-registry/.*\.java$ + + - id: smoke-test-lint-fix + name: smoke-test Lint Fix + entry: ./gradlew :smoke-test:lintFix + language: system + files: ^smoke-test/.*\.py$ + + - id: test-models-spotless + name: test-models Spotless Apply + entry: ./gradlew :test-models:spotlessApply + language: system + files: ^test-models/.*\.java$ + + - id: smoke-test-cypress-lint-fix + name: smoke-test cypress Lint Fix + entry: ./gradlew :smoke-test:cypressLintFix + language: system + files: ^smoke-test/tests/cypress/.*$ diff --git a/build.gradle b/build.gradle index 8929b4e644972c..3c36feadc5f4bb 100644 --- a/build.gradle +++ b/build.gradle @@ -474,10 +474,6 @@ subprojects { if (compileJavaTask != null) { spotlessJavaTask.dependsOn compileJavaTask } - // TODO - Do not run this in CI. How? - // tasks.withType(JavaCompile) { - // finalizedBy(tasks.findByName('spotlessApply')) - // } } } diff --git a/settings.gradle b/settings.gradle index b0c2c707d566c0..77d0706549a439 100644 --- a/settings.gradle +++ b/settings.gradle @@ -78,3 +78,52 @@ include ':metadata-operation-context' include ':metadata-service:openapi-servlet:models' include ':metadata-integration:java:datahub-schematron:lib' include ':metadata-integration:java:datahub-schematron:cli' + +def installPreCommitHooks() { + def preCommitInstalled = false + try { + def process = ["which", "pre-commit"].execute() + def stdout = new StringBuilder() + def stderr = new StringBuilder() + process.waitForProcessOutput(stdout, stderr) + preCommitInstalled = (process.exitValue() == 0) + println "Pre-commit check: ${stdout}" + } catch (Exception e) { + println "Error checking pre-commit: ${e.message}" + return + } + + if (!preCommitInstalled) { + try { + def installProcess = ["python", "-m", "pip", "install", "pre-commit"].execute() + def stdout = new StringBuilder() + def stderr = new StringBuilder() + installProcess.waitForProcessOutput(stdout, stderr) + if (installProcess.exitValue() != 0) { + println "Failed to install pre-commit: ${stderr}" + return + } + println "Install output: ${stdout}" + } catch (Exception e) { + println "Error installing pre-commit: ${e.message}" + return + } + } + + try { + def installHooksProcess = ["python", "-m", "pre_commit", "install"].execute() + def stdout = new StringBuilder() + def stderr = new StringBuilder() + installHooksProcess.waitForProcessOutput(stdout, stderr) + if (installHooksProcess.exitValue() != 0) { + println "Failed to install hooks: ${stderr}" + return + } + println "Hooks output: ${stdout}" + } catch (Exception e) { + println "Error installing hooks: ${e.message}" + return + } +} + +installPreCommitHooks() \ No newline at end of file From 92f013e6e179a63b8877ad7344f428a9869ae1d7 Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Wed, 8 Jan 2025 11:40:02 -0800 Subject: [PATCH 043/249] fix(ingest/file-backed-collections): Properly set _use_sqlite_on_conflict (#12297) --- .../utilities/file_backed_collections.py | 2 +- .../utilities/test_file_backed_collections.py | 31 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py index b8c27666d7f538..fb028605c35b77 100644 --- a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py +++ b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py @@ -243,7 +243,7 @@ def __post_init__(self) -> None: # This was added in 3.24.0 from 2018-06-04. # See https://www.sqlite.org/lang_conflict.html if OVERRIDE_SQLITE_VERSION_REQUIREMENT: - self.use_sqlite_on_conflict = False + self._use_sqlite_on_conflict = False else: raise RuntimeError("SQLite version 3.24.0 or later is required") diff --git a/metadata-ingestion/tests/unit/utilities/test_file_backed_collections.py b/metadata-ingestion/tests/unit/utilities/test_file_backed_collections.py index 6230c2e37edc6a..7e1627151c6ebf 100644 --- a/metadata-ingestion/tests/unit/utilities/test_file_backed_collections.py +++ b/metadata-ingestion/tests/unit/utilities/test_file_backed_collections.py @@ -5,6 +5,7 @@ import sqlite3 from dataclasses import dataclass from typing import Counter, Dict +from unittest.mock import patch import pytest @@ -15,6 +16,36 @@ ) +def test_set_use_sqlite_on_conflict(): + with patch("sqlite3.sqlite_version_info", (3, 24, 0)): + cache = FileBackedDict[int]( + tablename="cache", + cache_max_size=10, + cache_eviction_batch_size=10, + ) + assert cache._use_sqlite_on_conflict is True + + with pytest.raises(RuntimeError): + with patch("sqlite3.sqlite_version_info", (3, 23, 1)): + cache = FileBackedDict[int]( + tablename="cache", + cache_max_size=10, + cache_eviction_batch_size=10, + ) + assert cache._use_sqlite_on_conflict is False + + with patch("sqlite3.sqlite_version_info", (3, 23, 1)), patch( + "datahub.utilities.file_backed_collections.OVERRIDE_SQLITE_VERSION_REQUIREMENT", + True, + ): + cache = FileBackedDict[int]( + tablename="cache", + cache_max_size=10, + cache_eviction_batch_size=10, + ) + assert cache._use_sqlite_on_conflict is False + + @pytest.mark.parametrize("use_sqlite_on_conflict", [True, False]) def test_file_dict(use_sqlite_on_conflict: bool) -> None: cache = FileBackedDict[int]( From ea4d40e5353f4061fa02c4af229fdfc5a58af9d3 Mon Sep 17 00:00:00 2001 From: kevinkarchacryl Date: Wed, 8 Jan 2025 17:51:51 -0500 Subject: [PATCH 044/249] fix(doc): make folder_path_pattern usage more clear (#12298) --- .../datahub/ingestion/source/looker/looker_config.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py index bfae3060013d59..4e9d0f68928a45 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py @@ -300,11 +300,16 @@ class LookerDashboardSourceConfig( folder_path_pattern: AllowDenyPattern = Field( default=AllowDenyPattern.allow_all(), - description="Allow or deny dashboards from specific folders. " + description="Allow or deny dashboards from specific folders using their fully qualified paths. " "For example: \n" "deny: \n" - " - sales/deprecated \n" - "This pattern will deny the ingestion of all dashboards and looks within the sales/deprecated folder. \n" + " - Shared/deprecated \n" + "This pattern will deny the ingestion of all dashboards and looks within the Shared/deprecated folder. \n" + "allow: \n" + " - Shared/sales \n" + "This pattern will allow only the ingestion of dashboards within the Shared/sales folder. \n" + "To get the correct path from Looker, take the folder hierarchy shown in the UI and join it with slashes. " + "For example, Shared -> Customer Reports -> Sales becomes Shared/Customer Reports/Sales. " "Dashboards will only be ingested if they're allowed by both this config and dashboard_pattern.", ) From 42b2cd3e7d95a84cbd1b078e2fa81a5ecc8b9fa8 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Thu, 9 Jan 2025 18:28:19 +0530 Subject: [PATCH 045/249] dev: fix pre-commit passing filenames incorrectly (#12304) --- .github/scripts/generate_pre_commit.py | 16 +++++- .github/scripts/pre-commit-override.yaml | 3 +- .pre-commit-config.yaml | 65 ++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 2 deletions(-) diff --git a/.github/scripts/generate_pre_commit.py b/.github/scripts/generate_pre_commit.py index 740d3c20d263b0..2db73fd357ff5f 100755 --- a/.github/scripts/generate_pre_commit.py +++ b/.github/scripts/generate_pre_commit.py @@ -9,6 +9,7 @@ from dataclasses import dataclass from enum import Enum, auto from pathlib import Path +import datetime import yaml @@ -188,6 +189,7 @@ def _generate_lint_fix_hook(self, project: Project) -> dict: "entry": f"./gradlew {project.gradle_path}:lintFix", "language": "system", "files": f"^{project.path}/.*\\.py$", + "pass_filenames": False, } def _generate_spotless_hook(self, project: Project) -> dict: @@ -198,6 +200,7 @@ def _generate_spotless_hook(self, project: Project) -> dict: "entry": f"./gradlew {project.gradle_path}:spotlessApply", "language": "system", "files": f"^{project.path}/.*\\.java$", + "pass_filenames": False, } @@ -209,8 +212,19 @@ def increase_indent(self, flow=False, *args, **kwargs): def write_yaml_with_spaces(file_path: str, data: dict): - """Write YAML file with extra spacing between hooks.""" + """Write YAML file with extra spacing between hooks and a timestamp header.""" with open(file_path, "w") as f: + # Add timestamp header + current_time = datetime.datetime.now(datetime.timezone.utc) + formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S %Z") + header = f"# Auto-generated by .github/scripts/generate_pre_commit.py at {formatted_time}\n" + f.write(header) + header = f"# Do not edit this file directly. Run the script to regenerate.\n" + f.write(header) + header = f"# Add additional hooks in .github/scripts/pre-commit-override.yaml\n" + f.write(header) + + # Write the YAML content yaml_str = yaml.dump( data, Dumper=PrecommitDumper, sort_keys=False, default_flow_style=False ) diff --git a/.github/scripts/pre-commit-override.yaml b/.github/scripts/pre-commit-override.yaml index a085d9ea3ee93b..961134bebe2c98 100644 --- a/.github/scripts/pre-commit-override.yaml +++ b/.github/scripts/pre-commit-override.yaml @@ -5,4 +5,5 @@ repos: name: smoke-test cypress Lint Fix entry: ./gradlew :smoke-test:cypressLintFix language: system - files: ^smoke-test/tests/cypress/.*$ \ No newline at end of file + files: ^smoke-test/tests/cypress/.*$ + pass_filenames: false \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c4edc2cc176355..3697efa37770e7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,3 +1,6 @@ +# Auto-generated by .github/scripts/generate_pre_commit.py at 2025-01-09 10:08:09 UTC +# Do not edit this file directly. Run the script to regenerate. +# Add additional hooks in .github/scripts/pre-commit-override.yaml repos: - repo: local hooks: @@ -6,372 +9,434 @@ repos: entry: ./gradlew :datahub-graphql-core:spotlessApply language: system files: ^datahub-graphql-core/.*\.java$ + pass_filenames: false - id: datahub-upgrade-spotless name: datahub-upgrade Spotless Apply entry: ./gradlew :datahub-upgrade:spotlessApply language: system files: ^datahub-upgrade/.*\.java$ + pass_filenames: false - id: entity-registry-spotless name: entity-registry Spotless Apply entry: ./gradlew :entity-registry:spotlessApply language: system files: ^entity-registry/.*\.java$ + pass_filenames: false - id: ingestion-scheduler-spotless name: ingestion-scheduler Spotless Apply entry: ./gradlew :ingestion-scheduler:spotlessApply language: system files: ^ingestion-scheduler/.*\.java$ + pass_filenames: false - id: li-utils-spotless name: li-utils Spotless Apply entry: ./gradlew :li-utils:spotlessApply language: system files: ^li-utils/.*\.java$ + pass_filenames: false - id: metadata-auth-auth-api-spotless name: metadata-auth/auth-api Spotless Apply entry: ./gradlew :metadata-auth:auth-api:spotlessApply language: system files: ^metadata-auth/auth-api/.*\.java$ + pass_filenames: false - id: metadata-dao-impl-kafka-producer-spotless name: metadata-dao-impl/kafka-producer Spotless Apply entry: ./gradlew :metadata-dao-impl:kafka-producer:spotlessApply language: system files: ^metadata-dao-impl/kafka-producer/.*\.java$ + pass_filenames: false - id: metadata-events-mxe-avro-spotless name: metadata-events/mxe-avro Spotless Apply entry: ./gradlew :metadata-events:mxe-avro:spotlessApply language: system files: ^metadata-events/mxe-avro/.*\.java$ + pass_filenames: false - id: metadata-events-mxe-registration-spotless name: metadata-events/mxe-registration Spotless Apply entry: ./gradlew :metadata-events:mxe-registration:spotlessApply language: system files: ^metadata-events/mxe-registration/.*\.java$ + pass_filenames: false - id: metadata-events-mxe-schemas-spotless name: metadata-events/mxe-schemas Spotless Apply entry: ./gradlew :metadata-events:mxe-schemas:spotlessApply language: system files: ^metadata-events/mxe-schemas/.*\.java$ + pass_filenames: false - id: metadata-events-mxe-utils-avro-spotless name: metadata-events/mxe-utils-avro Spotless Apply entry: ./gradlew :metadata-events:mxe-utils-avro:spotlessApply language: system files: ^metadata-events/mxe-utils-avro/.*\.java$ + pass_filenames: false - id: metadata-ingestion-lint-fix name: metadata-ingestion Lint Fix entry: ./gradlew :metadata-ingestion:lintFix language: system files: ^metadata-ingestion/.*\.py$ + pass_filenames: false - id: metadata-ingestion-modules-airflow-plugin-lint-fix name: metadata-ingestion-modules/airflow-plugin Lint Fix entry: ./gradlew :metadata-ingestion-modules:airflow-plugin:lintFix language: system files: ^metadata-ingestion-modules/airflow-plugin/.*\.py$ + pass_filenames: false - id: metadata-ingestion-modules-dagster-plugin-lint-fix name: metadata-ingestion-modules/dagster-plugin Lint Fix entry: ./gradlew :metadata-ingestion-modules:dagster-plugin:lintFix language: system files: ^metadata-ingestion-modules/dagster-plugin/.*\.py$ + pass_filenames: false - id: metadata-ingestion-modules-gx-plugin-lint-fix name: metadata-ingestion-modules/gx-plugin Lint Fix entry: ./gradlew :metadata-ingestion-modules:gx-plugin:lintFix language: system files: ^metadata-ingestion-modules/gx-plugin/.*\.py$ + pass_filenames: false - id: metadata-ingestion-modules-prefect-plugin-lint-fix name: metadata-ingestion-modules/prefect-plugin Lint Fix entry: ./gradlew :metadata-ingestion-modules:prefect-plugin:lintFix language: system files: ^metadata-ingestion-modules/prefect-plugin/.*\.py$ + pass_filenames: false - id: metadata-integration-java-acryl-spark-lineage-spotless name: metadata-integration/java/acryl-spark-lineage Spotless Apply entry: ./gradlew :metadata-integration:java:acryl-spark-lineage:spotlessApply language: system files: ^metadata-integration/java/acryl-spark-lineage/.*\.java$ + pass_filenames: false - id: metadata-integration-java-datahub-client-spotless name: metadata-integration/java/datahub-client Spotless Apply entry: ./gradlew :metadata-integration:java:datahub-client:spotlessApply language: system files: ^metadata-integration/java/datahub-client/.*\.java$ + pass_filenames: false - id: metadata-integration-java-datahub-event-spotless name: metadata-integration/java/datahub-event Spotless Apply entry: ./gradlew :metadata-integration:java:datahub-event:spotlessApply language: system files: ^metadata-integration/java/datahub-event/.*\.java$ + pass_filenames: false - id: metadata-integration-java-datahub-protobuf-spotless name: metadata-integration/java/datahub-protobuf Spotless Apply entry: ./gradlew :metadata-integration:java:datahub-protobuf:spotlessApply language: system files: ^metadata-integration/java/datahub-protobuf/.*\.java$ + pass_filenames: false - id: metadata-integration-java-datahub-schematron-cli-spotless name: metadata-integration/java/datahub-schematron/cli Spotless Apply entry: ./gradlew :metadata-integration:java:datahub-schematron:cli:spotlessApply language: system files: ^metadata-integration/java/datahub-schematron/cli/.*\.java$ + pass_filenames: false - id: metadata-integration-java-datahub-schematron-lib-spotless name: metadata-integration/java/datahub-schematron/lib Spotless Apply entry: ./gradlew :metadata-integration:java:datahub-schematron:lib:spotlessApply language: system files: ^metadata-integration/java/datahub-schematron/lib/.*\.java$ + pass_filenames: false - id: metadata-integration-java-examples-spotless name: metadata-integration/java/examples Spotless Apply entry: ./gradlew :metadata-integration:java:examples:spotlessApply language: system files: ^metadata-integration/java/examples/.*\.java$ + pass_filenames: false - id: metadata-integration-java-openlineage-converter-spotless name: metadata-integration/java/openlineage-converter Spotless Apply entry: ./gradlew :metadata-integration:java:openlineage-converter:spotlessApply language: system files: ^metadata-integration/java/openlineage-converter/.*\.java$ + pass_filenames: false - id: metadata-integration-java-spark-lineage-legacy-spotless name: metadata-integration/java/spark-lineage-legacy Spotless Apply entry: ./gradlew :metadata-integration:java:spark-lineage-legacy:spotlessApply language: system files: ^metadata-integration/java/spark-lineage-legacy/.*\.java$ + pass_filenames: false - id: metadata-io-spotless name: metadata-io Spotless Apply entry: ./gradlew :metadata-io:spotlessApply language: system files: ^metadata-io/.*\.java$ + pass_filenames: false - id: metadata-io-metadata-io-api-spotless name: metadata-io/metadata-io-api Spotless Apply entry: ./gradlew :metadata-io:metadata-io-api:spotlessApply language: system files: ^metadata-io/metadata-io-api/.*\.java$ + pass_filenames: false - id: metadata-jobs-common-spotless name: metadata-jobs/common Spotless Apply entry: ./gradlew :metadata-jobs:common:spotlessApply language: system files: ^metadata-jobs/common/.*\.java$ + pass_filenames: false - id: metadata-jobs-mae-consumer-spotless name: metadata-jobs/mae-consumer Spotless Apply entry: ./gradlew :metadata-jobs:mae-consumer:spotlessApply language: system files: ^metadata-jobs/mae-consumer/.*\.java$ + pass_filenames: false - id: metadata-jobs-mae-consumer-job-spotless name: metadata-jobs/mae-consumer-job Spotless Apply entry: ./gradlew :metadata-jobs:mae-consumer-job:spotlessApply language: system files: ^metadata-jobs/mae-consumer-job/.*\.java$ + pass_filenames: false - id: metadata-jobs-mce-consumer-spotless name: metadata-jobs/mce-consumer Spotless Apply entry: ./gradlew :metadata-jobs:mce-consumer:spotlessApply language: system files: ^metadata-jobs/mce-consumer/.*\.java$ + pass_filenames: false - id: metadata-jobs-mce-consumer-job-spotless name: metadata-jobs/mce-consumer-job Spotless Apply entry: ./gradlew :metadata-jobs:mce-consumer-job:spotlessApply language: system files: ^metadata-jobs/mce-consumer-job/.*\.java$ + pass_filenames: false - id: metadata-jobs-pe-consumer-spotless name: metadata-jobs/pe-consumer Spotless Apply entry: ./gradlew :metadata-jobs:pe-consumer:spotlessApply language: system files: ^metadata-jobs/pe-consumer/.*\.java$ + pass_filenames: false - id: metadata-models-spotless name: metadata-models Spotless Apply entry: ./gradlew :metadata-models:spotlessApply language: system files: ^metadata-models/.*\.java$ + pass_filenames: false - id: metadata-models-custom-spotless name: metadata-models-custom Spotless Apply entry: ./gradlew :metadata-models-custom:spotlessApply language: system files: ^metadata-models-custom/.*\.java$ + pass_filenames: false - id: metadata-models-validator-spotless name: metadata-models-validator Spotless Apply entry: ./gradlew :metadata-models-validator:spotlessApply language: system files: ^metadata-models-validator/.*\.java$ + pass_filenames: false - id: metadata-operation-context-spotless name: metadata-operation-context Spotless Apply entry: ./gradlew :metadata-operation-context:spotlessApply language: system files: ^metadata-operation-context/.*\.java$ + pass_filenames: false - id: metadata-service-auth-config-spotless name: metadata-service/auth-config Spotless Apply entry: ./gradlew :metadata-service:auth-config:spotlessApply language: system files: ^metadata-service/auth-config/.*\.java$ + pass_filenames: false - id: metadata-service-auth-filter-spotless name: metadata-service/auth-filter Spotless Apply entry: ./gradlew :metadata-service:auth-filter:spotlessApply language: system files: ^metadata-service/auth-filter/.*\.java$ + pass_filenames: false - id: metadata-service-auth-impl-spotless name: metadata-service/auth-impl Spotless Apply entry: ./gradlew :metadata-service:auth-impl:spotlessApply language: system files: ^metadata-service/auth-impl/.*\.java$ + pass_filenames: false - id: metadata-service-auth-servlet-impl-spotless name: metadata-service/auth-servlet-impl Spotless Apply entry: ./gradlew :metadata-service:auth-servlet-impl:spotlessApply language: system files: ^metadata-service/auth-servlet-impl/.*\.java$ + pass_filenames: false - id: metadata-service-configuration-spotless name: metadata-service/configuration Spotless Apply entry: ./gradlew :metadata-service:configuration:spotlessApply language: system files: ^metadata-service/configuration/.*\.java$ + pass_filenames: false - id: metadata-service-factories-spotless name: metadata-service/factories Spotless Apply entry: ./gradlew :metadata-service:factories:spotlessApply language: system files: ^metadata-service/factories/.*\.java$ + pass_filenames: false - id: metadata-service-graphql-servlet-impl-spotless name: metadata-service/graphql-servlet-impl Spotless Apply entry: ./gradlew :metadata-service:graphql-servlet-impl:spotlessApply language: system files: ^metadata-service/graphql-servlet-impl/.*\.java$ + pass_filenames: false - id: metadata-service-openapi-analytics-servlet-spotless name: metadata-service/openapi-analytics-servlet Spotless Apply entry: ./gradlew :metadata-service:openapi-analytics-servlet:spotlessApply language: system files: ^metadata-service/openapi-analytics-servlet/.*\.java$ + pass_filenames: false - id: metadata-service-openapi-entity-servlet-spotless name: metadata-service/openapi-entity-servlet Spotless Apply entry: ./gradlew :metadata-service:openapi-entity-servlet:spotlessApply language: system files: ^metadata-service/openapi-entity-servlet/.*\.java$ + pass_filenames: false - id: metadata-service-openapi-entity-servlet-generators-spotless name: metadata-service/openapi-entity-servlet/generators Spotless Apply entry: ./gradlew :metadata-service:openapi-entity-servlet:generators:spotlessApply language: system files: ^metadata-service/openapi-entity-servlet/generators/.*\.java$ + pass_filenames: false - id: metadata-service-openapi-servlet-spotless name: metadata-service/openapi-servlet Spotless Apply entry: ./gradlew :metadata-service:openapi-servlet:spotlessApply language: system files: ^metadata-service/openapi-servlet/.*\.java$ + pass_filenames: false - id: metadata-service-openapi-servlet-models-spotless name: metadata-service/openapi-servlet/models Spotless Apply entry: ./gradlew :metadata-service:openapi-servlet:models:spotlessApply language: system files: ^metadata-service/openapi-servlet/models/.*\.java$ + pass_filenames: false - id: metadata-service-plugin-spotless name: metadata-service/plugin Spotless Apply entry: ./gradlew :metadata-service:plugin:spotlessApply language: system files: ^metadata-service/plugin/.*\.java$ + pass_filenames: false - id: metadata-service-plugin-src-test-sample-test-plugins-spotless name: metadata-service/plugin/src/test/sample-test-plugins Spotless Apply entry: ./gradlew :metadata-service:plugin:src:test:sample-test-plugins:spotlessApply language: system files: ^metadata-service/plugin/src/test/sample-test-plugins/.*\.java$ + pass_filenames: false - id: metadata-service-restli-client-spotless name: metadata-service/restli-client Spotless Apply entry: ./gradlew :metadata-service:restli-client:spotlessApply language: system files: ^metadata-service/restli-client/.*\.java$ + pass_filenames: false - id: metadata-service-restli-client-api-spotless name: metadata-service/restli-client-api Spotless Apply entry: ./gradlew :metadata-service:restli-client-api:spotlessApply language: system files: ^metadata-service/restli-client-api/.*\.java$ + pass_filenames: false - id: metadata-service-restli-servlet-impl-spotless name: metadata-service/restli-servlet-impl Spotless Apply entry: ./gradlew :metadata-service:restli-servlet-impl:spotlessApply language: system files: ^metadata-service/restli-servlet-impl/.*\.java$ + pass_filenames: false - id: metadata-service-schema-registry-api-spotless name: metadata-service/schema-registry-api Spotless Apply entry: ./gradlew :metadata-service:schema-registry-api:spotlessApply language: system files: ^metadata-service/schema-registry-api/.*\.java$ + pass_filenames: false - id: metadata-service-schema-registry-servlet-spotless name: metadata-service/schema-registry-servlet Spotless Apply entry: ./gradlew :metadata-service:schema-registry-servlet:spotlessApply language: system files: ^metadata-service/schema-registry-servlet/.*\.java$ + pass_filenames: false - id: metadata-service-services-spotless name: metadata-service/services Spotless Apply entry: ./gradlew :metadata-service:services:spotlessApply language: system files: ^metadata-service/services/.*\.java$ + pass_filenames: false - id: metadata-service-servlet-spotless name: metadata-service/servlet Spotless Apply entry: ./gradlew :metadata-service:servlet:spotlessApply language: system files: ^metadata-service/servlet/.*\.java$ + pass_filenames: false - id: metadata-utils-spotless name: metadata-utils Spotless Apply entry: ./gradlew :metadata-utils:spotlessApply language: system files: ^metadata-utils/.*\.java$ + pass_filenames: false - id: mock-entity-registry-spotless name: mock-entity-registry Spotless Apply entry: ./gradlew :mock-entity-registry:spotlessApply language: system files: ^mock-entity-registry/.*\.java$ + pass_filenames: false - id: smoke-test-lint-fix name: smoke-test Lint Fix entry: ./gradlew :smoke-test:lintFix language: system files: ^smoke-test/.*\.py$ + pass_filenames: false - id: test-models-spotless name: test-models Spotless Apply entry: ./gradlew :test-models:spotlessApply language: system files: ^test-models/.*\.java$ + pass_filenames: false - id: smoke-test-cypress-lint-fix name: smoke-test cypress Lint Fix From 45450f19a02e38f5dd155c10635520d29fd1a91f Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Thu, 9 Jan 2025 07:32:22 -0800 Subject: [PATCH 046/249] feat(sdk): structured properties - add support for listing (#12283) --- docs/api/tutorials/structured-properties.md | 329 ++++++++++++++++-- .../list_structured_properties.py | 12 + .../structuredproperties.py | 13 +- .../cli/specific/structuredproperties_cli.py | 84 +++++ .../test_structured_properties.py | 213 ++++++++++++ .../test_structured_properties.py | 46 +++ 6 files changed, 663 insertions(+), 34 deletions(-) create mode 100644 metadata-ingestion/examples/structured_properties/list_structured_properties.py create mode 100644 metadata-ingestion/tests/unit/structured_properties/test_structured_properties.py diff --git a/docs/api/tutorials/structured-properties.md b/docs/api/tutorials/structured-properties.md index 2caa015e206595..ed270811b82e92 100644 --- a/docs/api/tutorials/structured-properties.md +++ b/docs/api/tutorials/structured-properties.md @@ -6,7 +6,7 @@ import TabItem from '@theme/TabItem'; ## Why Would You Use Structured Properties? Structured properties are a structured, named set of properties that can be attached to logical entities like Datasets, DataJobs, etc. -Structured properties have values that are types. Conceptually, they are like “field definitions”. +Structured properties have values that are typed and support constraints. Learn more about structured properties in the [Structured Properties Feature Guide](../../../docs/features/feature-guides/properties/overview.md). @@ -15,6 +15,7 @@ Learn more about structured properties in the [Structured Properties Feature Gui This guide will show you how to execute the following actions with structured properties. - Create structured properties +- List structured properties - Read structured properties - Delete structured properties - Add structured properties to a dataset @@ -32,7 +33,8 @@ Additionally, you need to have the following tools installed according to the me -Install the relevant CLI version. Forms are available as of CLI version `0.13.1`. The corresponding DataHub Cloud release version is `v0.2.16.5` +Install the relevant CLI version. +Structured Properties were introduced in version `0.13.1`, but we continuously improve and add new functionality, so you should always [upgrade](https://datahubproject.io/docs/cli/#installation) to the latest cli for best results. Connect to your instance via [init](https://datahubproject.io/docs/cli/#init): - Run `datahub init` to update the instance you want to load into. @@ -56,33 +58,8 @@ Requirements for OpenAPI are: The following code will create a structured property `io.acryl.privacy.retentionTime`. - -```graphql -mutation createStructuredProperty { - createStructuredProperty( - input: { - id: "retentionTime", - qualifiedName:"retentionTime", - displayName: "Retention Time", - description: "Retention Time is used to figure out how long to retain records in a dataset", - valueType: "urn:li:dataType:datahub.number", - allowedValues: [ - {numberValue: 30, description: "30 days, usually reserved for datasets that are ephemeral and contain pii"}, - {numberValue: 90, description:"description: Use this for datasets that drive monthly reporting but contain pii"}, - {numberValue: 365, description:"Use this for non-sensitive data that can be retained for longer"} - ], - cardinality: SINGLE, - entityTypes: ["urn:li:entityType:datahub.dataset", "urn:li:entityType:datahub.dataFlow"], - } - ) { - urn - } -} -``` - - - + Create a yaml file representing the properties you’d like to load. For example, below file represents a property `io.acryl.privacy.retentionTime`. You can see the full example [here](https://github.com/datahub-project/datahub/blob/example-yaml-sp/metadata-ingestion/examples/structured_properties/struct_props.yaml). @@ -108,13 +85,41 @@ For example, below file represents a property `io.acryl.privacy.retentionTime`. ``` Use the CLI to create your properties: -```commandline +```shell datahub properties upsert -f {properties_yaml} ``` If successful, you should see `Created structured property urn:li:structuredProperty:...` + + + +```graphql +mutation createStructuredProperty { + createStructuredProperty( + input: { + id: "retentionTime", + qualifiedName:"retentionTime", + displayName: "Retention Time", + description: "Retention Time is used to figure out how long to retain records in a dataset", + valueType: "urn:li:dataType:datahub.number", + allowedValues: [ + {numberValue: 30, description: "30 days, usually reserved for datasets that are ephemeral and contain pii"}, + {numberValue: 90, description:"description: Use this for datasets that drive monthly reporting but contain pii"}, + {numberValue: 365, description:"Use this for non-sensitive data that can be retained for longer"} + ], + cardinality: SINGLE, + entityTypes: ["urn:li:entityType:datahub.dataset", "urn:li:entityType:datahub.dataFlow"], + } + ) { + urn + } +} +``` + + + ```shell @@ -236,9 +241,182 @@ Example Response: -## Read Structured Properties +## List Structured Properties + +You can list all structured properties in your DataHub instance using the following methods: + + + + +```shell +datahub properties list +``` + +This will show all properties with their full details. + +Example Response: +```json +{ + "urn": "urn:li:structuredProperty:clusterName", + "qualified_name": "clusterName", + "type": "urn:li:dataType:datahub.string", + "description": "Test Cluster Name Property", + "display_name": "Cluster's name", + "entity_types": [ + "urn:li:entityType:datahub.dataset" + ], + "cardinality": "SINGLE" +} +{ + "urn": "urn:li:structuredProperty:projectNames", + "qualified_name": "projectNames", + "type": "urn:li:dataType:datahub.string", + "description": "Test property for project name", + "display_name": "Project Name", + "entity_types": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.dataFlow" + ], + "cardinality": "MULTIPLE", + "allowed_values": [ + { + "value": "Tracking", + "description": "test value 1 for project" + }, + { + "value": "DataHub", + "description": "test value 2 for project" + } + ] +} +``` + + +If you only want to see the URNs, you can use: + +```shell +datahub properties list --no-details +``` + +Example Response: +``` +[2025-01-08 22:23:00,625] INFO {datahub.cli.specific.structuredproperties_cli:134} - Listing structured property urns only, use --details for more information +urn:li:structuredProperty:clusterName +urn:li:structuredProperty:clusterType +urn:li:structuredProperty:io.acryl.dataManagement.deprecationDate +urn:li:structuredProperty:projectNames +``` + +To download all the structured property definitions into a single file that you can use with the `upsert` command as described in the [create section](#create-structured-properties), you can run the list command with the `--to-file` option. + +```shell +datahub properties list --to-file structured_properties.yaml +``` + +Example Response: +```yaml + - urn: urn:li:structuredProperty:clusterName + qualified_name: clusterName + type: urn:li:dataType:datahub.string + description: Test Cluster Name Property + display_name: Cluster's name + entity_types: + - urn:li:entityType:datahub.dataset + cardinality: SINGLE + - urn: urn:li:structuredProperty:clusterType + qualified_name: clusterType + type: urn:li:dataType:datahub.string + description: Test Cluster Type Property + display_name: Cluster's type + entity_types: + - urn:li:entityType:datahub.dataset + cardinality: SINGLE + - urn: urn:li:structuredProperty:io.acryl.dataManagement.deprecationDate + qualified_name: io.acryl.dataManagement.deprecationDate + type: urn:li:dataType:datahub.date + display_name: Deprecation Date + entity_types: + - urn:li:entityType:datahub.dataset + - urn:li:entityType:datahub.dataFlow + - urn:li:entityType:datahub.dataJob + - urn:li:entityType:datahub.schemaField + cardinality: SINGLE + - urn: urn:li:structuredProperty:io.acryl.privacy.enumProperty5712 + qualified_name: io.acryl.privacy.enumProperty5712 + type: urn:li:dataType:datahub.string + description: The retention policy for the dataset + entity_types: + - urn:li:entityType:datahub.dataset + cardinality: MULTIPLE + allowed_values: + - value: foo + - value: bar +... etc. +``` + + + + + +Example Request: +```bash +curl -X 'GET' \ + 'http://localhost:9002/openapi/v3/entity/structuredproperty?systemMetadata=false&includeSoftDelete=false&skipCache=false&aspects=structuredPropertySettings&aspects=propertyDefinition&aspects=institutionalMemory&aspects=structuredPropertyKey&aspects=status&count=10&sortCriteria=urn&sortOrder=ASCENDING&query=*' \ + -H 'accept: application/json' +``` + +Example Response: +```json +{ + "scrollId": "...", + "entities": [ + { + "urn": "urn:li:structuredProperty:clusterName", + "propertyDefinition": { + "value": { + "immutable": false, + "qualifiedName": "clusterName", + "displayName": "Cluster's name", + "valueType": "urn:li:dataType:datahub.string", + "description": "Test Cluster Name Property", + "entityTypes": [ + "urn:li:entityType:datahub.dataset" + ], + "cardinality": "SINGLE" + } + }, + "structuredPropertyKey": { + "value": { + "id": "clusterName" + } + } + } + ] +} +``` + +Key Query Parameters: +- `count`: Number of results to return per page (default: 10) +- `sortCriteria`: Field to sort by (default: urn) +- `sortOrder`: Sort order (ASCENDING or DESCENDING) +- `query`: Search query to filter properties (* for all) + + + + +The list endpoint returns all structured properties in your DataHub instance. Each property includes: +- URN: Unique identifier for the property +- Qualified Name: The property's qualified name +- Type: The data type of the property (string, number, date, etc.) +- Description: A description of the property's purpose +- Display Name: Human-readable name for the property +- Entity Types: The types of entities this property can be applied to +- Cardinality: Whether the property accepts single (SINGLE) or multiple (MULTIPLE) values +- Allowed Values: If specified, the list of allowed values for this property -You can see the properties you created by running the following command: +## Read a single Structured Property + +You can read an individual property you created by running the following command: @@ -279,6 +457,91 @@ If successful, you should see metadata about your properties returned. } ``` + + + +Example Request: +```graphql +query { + structuredProperty(urn: "urn:li:structuredProperty:projectNames") { + urn + type + definition { + qualifiedName + displayName + description + cardinality + allowedValues { + value { + ... on StringValue { + stringValue + } + ... on NumberValue { + numberValue + } + } + description + } + entityTypes { + urn + info { + type + qualifiedName + } + } + } + } +} +``` + +Example Response: +```json +{ + "data": { + "structuredProperty": { + "urn": "urn:li:structuredProperty:projectNames", + "type": "STRUCTURED_PROPERTY", + "definition": { + "qualifiedName": "projectNames", + "displayName": "Project Name", + "description": "Test property for project name", + "cardinality": "MULTIPLE", + "allowedValues": [ + { + "value": { + "stringValue": "Tracking" + }, + "description": "test value 1 for project" + }, + { + "value": { + "stringValue": "DataHub" + }, + "description": "test value 2 for project" + } + ], + "entityTypes": [ + { + "urn": "urn:li:entityType:datahub.dataset", + "info": { + "type": "DATASET", + "qualifiedName": "datahub.dataset" + } + }, + { + "urn": "urn:li:entityType:datahub.dataFlow", + "info": { + "type": "DATA_FLOW", + "qualifiedName": "datahub.dataFlow" + } + } + ] + } + } + }, + "extensions": {} +} +``` @@ -389,7 +652,7 @@ Example Response: This action will set/replace all structured properties on the entity. See PATCH operations to add/remove a single property. - + ```graphql mutation upsertStructuredProperties { @@ -537,7 +800,7 @@ datahub dataset get --urn {urn} For reading all structured properties from a dataset: - + ```graphql query getDataset { diff --git a/metadata-ingestion/examples/structured_properties/list_structured_properties.py b/metadata-ingestion/examples/structured_properties/list_structured_properties.py new file mode 100644 index 00000000000000..66ac90c1228a37 --- /dev/null +++ b/metadata-ingestion/examples/structured_properties/list_structured_properties.py @@ -0,0 +1,12 @@ +# Usage: python3 list_structured_properties.py +# Expected Output: List of structured properties +# This script lists all structured properties in DataHub +from datahub.api.entities.structuredproperties.structuredproperties import ( + StructuredProperties, +) +from datahub.ingestion.graph.client import get_default_graph + +with get_default_graph() as graph: + structuredproperties = StructuredProperties.list(graph) + for structuredproperty in structuredproperties: + print(structuredproperty.dict()) diff --git a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py index 619f69b016262d..179dbdb231c912 100644 --- a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py +++ b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py @@ -1,7 +1,7 @@ import logging from enum import Enum from pathlib import Path -from typing import List, Optional +from typing import Iterable, List, Optional import yaml from pydantic import validator @@ -226,3 +226,14 @@ def to_yaml( yaml.indent(mapping=2, sequence=4, offset=2) yaml.default_flow_style = False yaml.dump(self.dict(), fp) + + @staticmethod + def list_urns(graph: DataHubGraph) -> Iterable[str]: + return graph.get_urns_by_filter( + entity_types=["structuredProperty"], + ) + + @staticmethod + def list(graph: DataHubGraph) -> Iterable["StructuredProperties"]: + for urn in StructuredProperties.list_urns(graph): + yield StructuredProperties.from_datahub(graph, urn) diff --git a/metadata-ingestion/src/datahub/cli/specific/structuredproperties_cli.py b/metadata-ingestion/src/datahub/cli/specific/structuredproperties_cli.py index 42285cf13a5ddc..5cd28516a076d9 100644 --- a/metadata-ingestion/src/datahub/cli/specific/structuredproperties_cli.py +++ b/metadata-ingestion/src/datahub/cli/specific/structuredproperties_cli.py @@ -1,9 +1,11 @@ import json import logging from pathlib import Path +from typing import Iterable import click from click_default_group import DefaultGroup +from ruamel.yaml import YAML from datahub.api.entities.structuredproperties.structuredproperties import ( StructuredProperties, @@ -61,3 +63,85 @@ def get(urn: str, to_file: str) -> None: ) else: click.secho(f"Structured property {urn} does not exist") + + +@properties.command( + name="list", +) +@click.option("--details/--no-details", is_flag=True, default=True) +@click.option("--to-file", required=False, type=str) +@telemetry.with_telemetry() +def list(details: bool, to_file: str) -> None: + """List structured properties in DataHub""" + + def to_yaml_list( + objects: Iterable[StructuredProperties], # iterable of objects to dump + file: Path, + ) -> None: + # if file exists, first we read it + yaml = YAML(typ="rt") # default, if not specfied, is 'rt' (round-trip) + yaml.indent(mapping=2, sequence=4, offset=2) + yaml.default_flow_style = False + serialized_objects = [] + if file.exists(): + with open(file, "r") as fp: + existing_objects = yaml.load(fp) # this is a list of dicts + existing_objects = [ + StructuredProperties.parse_obj(obj) for obj in existing_objects + ] + objects = [obj for obj in objects] + # do a positional update of the existing objects + existing_urns = {obj.urn for obj in existing_objects} + # existing_urns = {obj["urn"] if "urn" in obj else f"urn:li:structuredProperty:{obj['id']}" for obj in existing_objects} + for i, obj in enumerate(existing_objects): + # existing_urn = obj["urn"] if "urn" in obj else f"urn:li:structuredProperty:{obj['id']}" + existing_urn = obj.urn + # breakpoint() + if existing_urn in {obj.urn for obj in objects}: + existing_objects[i] = next( + obj.dict(exclude_unset=True, exclude_none=True) + for obj in objects + if obj.urn == existing_urn + ) + new_objects = [ + obj.dict(exclude_unset=True, exclude_none=True) + for obj in objects + if obj.urn not in existing_urns + ] + serialized_objects = existing_objects + new_objects + else: + serialized_objects = [ + obj.dict(exclude_unset=True, exclude_none=True) for obj in objects + ] + + with open(file, "w") as fp: + yaml.dump(serialized_objects, fp) + + with get_default_graph() as graph: + if details: + logger.info( + "Listing structured properties with details. Use --no-details for urns only" + ) + structuredproperties = StructuredProperties.list(graph) + if to_file: + to_yaml_list(structuredproperties, Path(to_file)) + else: + for structuredproperty in structuredproperties: + click.secho( + f"{json.dumps(structuredproperty.dict(exclude_unset=True, exclude_none=True), indent=2)}" + ) + else: + logger.info( + "Listing structured property urns only, use --details for more information" + ) + structured_property_urns = StructuredProperties.list_urns(graph) + if to_file: + with open(to_file, "w") as f: + for urn in structured_property_urns: + f.write(f"{urn}\n") + click.secho( + f"Structured property urns written to {to_file}", fg="green" + ) + else: + for urn in structured_property_urns: + click.secho(f"{urn}") diff --git a/metadata-ingestion/tests/unit/structured_properties/test_structured_properties.py b/metadata-ingestion/tests/unit/structured_properties/test_structured_properties.py new file mode 100644 index 00000000000000..d03b08b77d5a96 --- /dev/null +++ b/metadata-ingestion/tests/unit/structured_properties/test_structured_properties.py @@ -0,0 +1,213 @@ +from unittest.mock import Mock + +import pytest +import yaml + +from datahub.api.entities.structuredproperties.structuredproperties import ( + AllowedValue, + StructuredProperties, + TypeQualifierAllowedTypes, +) +from datahub.ingestion.graph.client import DataHubGraph +from datahub.metadata.schema_classes import ( + PropertyValueClass, + StructuredPropertyDefinitionClass, +) + + +@pytest.fixture +def sample_yaml_content(): + return """ +- id: test_property + type: string + description: Test description + display_name: Test Property + entity_types: + - dataset + cardinality: SINGLE + allowed_values: + - value: test_value + description: Test value description +""" + + +@pytest.fixture +def sample_yaml_file(tmp_path, sample_yaml_content): + yaml_file = tmp_path / "test_properties.yaml" + yaml_file.write_text(sample_yaml_content) + return str(yaml_file) + + +@pytest.fixture +def mock_graph(): + return Mock(spec=DataHubGraph) + + +def test_structured_properties_basic_creation(): + props = StructuredProperties( + id="test_prop", type="string", description="Test description" + ) + assert props.id == "test_prop" + assert props.type == "urn:li:dataType:datahub.string" + assert props.description == "Test description" + assert props.urn == "urn:li:structuredProperty:test_prop" + + +def test_structured_properties_validate_type(): + # Test valid types + props = StructuredProperties(id="test", type="string") + assert props.type == "urn:li:dataType:datahub.string" + + # Test invalid type + with pytest.raises(ValueError, match="Type .* is not allowed"): + StructuredProperties(id="test", type="invalid_type") + + +def test_structured_properties_validate_entity_types(): + # Test valid entity type + props = StructuredProperties(id="test", type="string", entity_types=["dataset"]) + assert props.entity_types + assert "urn:li:entityType:datahub.dataset" in props.entity_types + + # Test invalid entity type + with pytest.raises(ValueError, match="not a valid entity type"): + StructuredProperties(id="test", type="string", entity_types=["invalid_entity"]) + + +def test_structured_properties_from_yaml(sample_yaml_file): + props = StructuredProperties.from_yaml(sample_yaml_file) + assert len(props) == 1 + assert props[0].id == "test_property" + assert props[0].type == "urn:li:dataType:datahub.string" + assert props[0].description == "Test description" + assert props[0].display_name + assert props[0].display_name == "Test Property" + assert props[0].allowed_values + assert len(props[0].allowed_values) == 1 + assert props[0].allowed_values[0].value == "test_value" + + +def test_structured_properties_generate_mcps(): + props = StructuredProperties( + id="test_prop", + type="string", + description="Test description", + display_name="Test Property", + entity_types=["dataset"], + allowed_values=[ + AllowedValue(value="test_value", description="Test value description") + ], + ) + + mcps = props.generate_mcps() + assert len(mcps) == 1 + mcp = mcps[0] + + assert mcp.entityUrn == "urn:li:structuredProperty:test_prop" + assert isinstance(mcp.aspect, StructuredPropertyDefinitionClass) + assert mcp.aspect.valueType == "urn:li:dataType:datahub.string" + assert mcp.aspect.description == "Test description" + assert mcp.aspect.allowedValues + assert len(mcp.aspect.allowedValues) == 1 + assert mcp.aspect.allowedValues[0].value == "test_value" + + +def test_structured_properties_from_datahub(mock_graph): + mock_aspect = StructuredPropertyDefinitionClass( + qualifiedName="test_prop", + valueType="urn:li:dataType:datahub.string", + displayName="Test Property", + description="Test description", + entityTypes=["urn:li:entityType:datahub.dataset"], + cardinality="SINGLE", + allowedValues=[ + PropertyValueClass(value="test_value", description="Test description") + ], + ) + + mock_graph.get_aspect.return_value = mock_aspect + + props = StructuredProperties.from_datahub( + mock_graph, "urn:li:structuredProperty:test_prop" + ) + + assert props.qualified_name == "test_prop" + assert props.type == "urn:li:dataType:datahub.string" + assert props.display_name == "Test Property" + assert props.allowed_values + assert len(props.allowed_values) == 1 + assert props.allowed_values[0].value == "test_value" + + +def test_structured_properties_to_yaml(tmp_path): + props = StructuredProperties( + id="test_prop", + type="string", + description="Test description", + allowed_values=[ + AllowedValue(value="test_value", description="Test value description") + ], + ) + + yaml_file = tmp_path / "output.yaml" + props.to_yaml(yaml_file) + + # Verify the yaml file was created and contains expected content + assert yaml_file.exists() + with open(yaml_file) as f: + content = yaml.safe_load(f) + assert content["id"] == "test_prop" + assert content["type"] == "urn:li:dataType:datahub.string" + assert content["description"] == "Test description" + + +@pytest.mark.parametrize( + "input_type,expected_type", + [ + ("string", "urn:li:dataType:datahub.string"), + ("STRING", "urn:li:dataType:datahub.string"), + ("number", "urn:li:dataType:datahub.number"), + ("date", "urn:li:dataType:datahub.date"), + ], +) +def test_structured_properties_type_normalization(input_type, expected_type): + props = StructuredProperties(id="test_prop", type=input_type) + assert props.type == expected_type + + +def test_structured_properties_type_qualifier(): + props = StructuredProperties( + id="test_prop", + type="urn", + type_qualifier=TypeQualifierAllowedTypes(allowed_types=["dataset"]), + ) + + mcps = props.generate_mcps() + assert mcps[0].aspect + assert mcps[0].aspect.typeQualifier["allowedTypes"] == [ # type: ignore + "urn:li:entityType:datahub.dataset" + ] + + +def test_structured_properties_list(mock_graph): + mock_graph.get_urns_by_filter.return_value = [ + "urn:li:structuredProperty:prop1", + "urn:li:structuredProperty:prop2", + ] + + mock_aspect = StructuredPropertyDefinitionClass( + qualifiedName="test_prop", + valueType="urn:li:dataType:string", + entityTypes=["urn:li:entityType:datahub.dataset"], + ) + mock_graph.get_aspect.return_value = mock_aspect + + props = list(StructuredProperties.list(mock_graph)) + + # Verify get_urns_by_filter was called with correct arguments + mock_graph.get_urns_by_filter.assert_called_once_with( + entity_types=["structuredProperty"] + ) + + assert len(props) == 2 + assert all(isinstance(prop, StructuredProperties) for prop in props) diff --git a/smoke-test/tests/structured_properties/test_structured_properties.py b/smoke-test/tests/structured_properties/test_structured_properties.py index 533a03a55735a1..e3c33aa406efc4 100644 --- a/smoke-test/tests/structured_properties/test_structured_properties.py +++ b/smoke-test/tests/structured_properties/test_structured_properties.py @@ -839,3 +839,49 @@ def validate_search(qualified_name, expected): # Validate search works for property #1 & #2 validate_search(property1.qualified_name, expected=[]) validate_search(property2.qualified_name, expected=[dataset_urns[0]]) + + +def test_structured_properties_list(ingest_cleanup_data, graph_client, caplog): + # Create property, assign value to target dataset urn + def create_property(): + property_name = f"listTest{randint(10, 10000)}Property" + value_type = "string" + property_urn = f"urn:li:structuredProperty:{default_namespace}.{property_name}" + + create_property_definition( + property_name=property_name, + graph=graph_client, + value_type=value_type, + cardinality="SINGLE", + ) + + test_property = StructuredProperties.from_datahub( + graph=graph_client, urn=property_urn + ) + assert test_property is not None + + return test_property + + # create 2 structured properties + property1 = create_property() + property2 = create_property() + wait_for_writes_to_sync() + + # validate that urns are in the list + structured_properties_urns = [ + u for u in StructuredProperties.list_urns(graph_client) + ] + assert property1.urn in structured_properties_urns + assert property2.urn in structured_properties_urns + + # list structured properties (full) + structured_properties = StructuredProperties.list(graph_client) + matched_properties = [ + p for p in structured_properties if p.urn in [property1.urn, property2.urn] + ] + assert len(matched_properties) == 2 + retrieved_property1 = next(p for p in matched_properties if p.urn == property1.urn) + retrieved_property2 = next(p for p in matched_properties if p.urn == property2.urn) + + assert property1.dict() == retrieved_property1.dict() + assert property2.dict() == retrieved_property2.dict() From 9d9a368deaf14410c8d56f36e878d137f9088932 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Thu, 9 Jan 2025 19:04:37 +0100 Subject: [PATCH 047/249] chore(tableau): set ingestion stage report and perftimers (#12234) --- .../src/datahub/ingestion/api/source.py | 2 + .../ingestion/source/bigquery_v2/bigquery.py | 64 ++++---- .../source/bigquery_v2/bigquery_report.py | 3 - .../source/bigquery_v2/bigquery_schema_gen.py | 22 +-- .../ingestion/source/bigquery_v2/lineage.py | 14 +- .../ingestion/source/bigquery_v2/usage.py | 114 ++++++------- .../source/cassandra/cassandra_profiling.py | 48 +++--- .../source/cassandra/cassandra_utils.py | 3 - .../source/dremio/dremio_reporting.py | 3 - .../ingestion/source/dremio/dremio_source.py | 4 +- .../datahub/ingestion/source/gc/datahub_gc.py | 24 ++- .../ingestion/source/redshift/redshift.py | 66 ++++---- .../ingestion/source/redshift/usage.py | 58 +++---- .../source/snowflake/snowflake_report.py | 3 - .../source/snowflake/snowflake_schema_gen.py | 34 ++-- .../source/snowflake/snowflake_usage_v2.py | 93 ++++++----- .../source/snowflake/snowflake_v2.py | 77 +++++---- .../datahub/ingestion/source/sql/teradata.py | 4 +- .../ingestion/source/tableau/tableau.py | 150 ++++++++++++++---- .../datahub/ingestion/source/unity/source.py | 142 ++++++++--------- .../source_report/ingestion_stage.py | 44 ++--- .../src/datahub/utilities/perf_timer.py | 17 +- .../bigquery/test_bigquery_usage.py | 83 +++++----- .../performance/databricks/test_unity.py | 2 +- .../performance/snowflake/test_snowflake.py | 2 +- .../performance/sql/test_sql_formatter.py | 6 +- .../unit/reporting/test_ingestion_stage.py | 42 +++++ 27 files changed, 625 insertions(+), 499 deletions(-) create mode 100644 metadata-ingestion/tests/unit/reporting/test_ingestion_stage.py diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py index 75dc980e234ac8..53cb1b0ecad4ee 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source.py @@ -334,6 +334,8 @@ def as_obj(self) -> dict: } def compute_stats(self) -> None: + super().compute_stats() + duration = datetime.datetime.now() - self.start_time workunits_produced = self.events_produced if duration.total_seconds() > 0: diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index db7b0540e49e71..508b4bbaa277dc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -253,14 +253,14 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: for project in projects: yield from self.bq_schema_extractor.get_project_workunits(project) - self.report.set_ingestion_stage("*", "View and Snapshot Lineage") - yield from self.lineage_extractor.get_lineage_workunits_for_views_and_snapshots( - [p.id for p in projects], - self.bq_schema_extractor.view_refs_by_project, - self.bq_schema_extractor.view_definitions, - self.bq_schema_extractor.snapshot_refs_by_project, - self.bq_schema_extractor.snapshots_by_ref, - ) + with self.report.new_stage("*: View and Snapshot Lineage"): + yield from self.lineage_extractor.get_lineage_workunits_for_views_and_snapshots( + [p.id for p in projects], + self.bq_schema_extractor.view_refs_by_project, + self.bq_schema_extractor.view_definitions, + self.bq_schema_extractor.snapshot_refs_by_project, + self.bq_schema_extractor.snapshots_by_ref, + ) if self.config.use_queries_v2: # if both usage and lineage are disabled then skip queries extractor piece @@ -270,31 +270,29 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: ): return - self.report.set_ingestion_stage("*", QUERIES_EXTRACTION) - - with BigQueryQueriesExtractor( - connection=self.config.get_bigquery_client(), - schema_api=self.bq_schema_extractor.schema_api, - config=BigQueryQueriesExtractorConfig( - window=self.config, - user_email_pattern=self.config.usage.user_email_pattern, - include_lineage=self.config.include_table_lineage, - include_usage_statistics=self.config.include_usage_statistics, - include_operations=self.config.usage.include_operational_stats, - include_queries=self.config.include_queries, - include_query_usage_statistics=self.config.include_query_usage_statistics, - top_n_queries=self.config.usage.top_n_queries, - region_qualifiers=self.config.region_qualifiers, - ), - structured_report=self.report, - filters=self.filters, - identifiers=self.identifiers, - schema_resolver=self.sql_parser_schema_resolver, - discovered_tables=self.bq_schema_extractor.table_refs, - ) as queries_extractor: - self.report.queries_extractor = queries_extractor.report - yield from queries_extractor.get_workunits_internal() - + with self.report.new_stage(f"*: {QUERIES_EXTRACTION}"): + with BigQueryQueriesExtractor( + connection=self.config.get_bigquery_client(), + schema_api=self.bq_schema_extractor.schema_api, + config=BigQueryQueriesExtractorConfig( + window=self.config, + user_email_pattern=self.config.usage.user_email_pattern, + include_lineage=self.config.include_table_lineage, + include_usage_statistics=self.config.include_usage_statistics, + include_operations=self.config.usage.include_operational_stats, + include_queries=self.config.include_queries, + include_query_usage_statistics=self.config.include_query_usage_statistics, + top_n_queries=self.config.usage.top_n_queries, + region_qualifiers=self.config.region_qualifiers, + ), + structured_report=self.report, + filters=self.filters, + identifiers=self.identifiers, + schema_resolver=self.sql_parser_schema_resolver, + discovered_tables=self.bq_schema_extractor.table_refs, + ) as queries_extractor: + self.report.queries_extractor = queries_extractor.report + yield from queries_extractor.get_workunits_internal() else: if self.config.include_usage_statistics: yield from self.usage_extractor.get_usage_workunits( diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py index 06842da67f76ca..8e55d81aac5fe3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py @@ -190,6 +190,3 @@ class BigQueryV2Report( num_skipped_external_table_lineage: int = 0 queries_extractor: Optional[BigQueryQueriesExtractorReport] = None - - def set_ingestion_stage(self, project_id: str, stage: str) -> None: - self.report_ingestion_stage_start(f"{project_id}: {stage}") diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py index bc2688e6b481ab..56e930dfb811f1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py @@ -248,9 +248,9 @@ def modified_base32decode(self, text_to_decode: str) -> str: def get_project_workunits( self, project: BigqueryProject ) -> Iterable[MetadataWorkUnit]: - self.report.set_ingestion_stage(project.id, METADATA_EXTRACTION) - logger.info(f"Processing project: {project.id}") - yield from self._process_project(project) + with self.report.new_stage(f"{project.id}: {METADATA_EXTRACTION}"): + logger.info(f"Processing project: {project.id}") + yield from self._process_project(project) def get_dataplatform_instance_aspect( self, dataset_urn: str, project_id: str @@ -405,11 +405,11 @@ def _process_project( if self.config.is_profiling_enabled(): logger.info(f"Starting profiling project {project_id}") - self.report.set_ingestion_stage(project_id, PROFILING) - yield from self.profiler.get_workunits( - project_id=project_id, - tables=db_tables, - ) + with self.report.new_stage(f"{project_id}: {PROFILING}"): + yield from self.profiler.get_workunits( + project_id=project_id, + tables=db_tables, + ) def _process_project_datasets( self, @@ -1203,9 +1203,9 @@ def get_tables_for_dataset( report=self.report, ) - self.report.metadata_extraction_sec[f"{project_id}.{dataset.name}"] = round( - timer.elapsed_seconds(), 2 - ) + self.report.metadata_extraction_sec[ + f"{project_id}.{dataset.name}" + ] = timer.elapsed_seconds(digits=2) def get_core_table_details( self, dataset_name: str, project_id: str, temp_table_dataset_prefix: str diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py index ba3357aa8ca20c..433282a21fdb66 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py @@ -330,11 +330,11 @@ def get_lineage_workunits( projects = ["*"] # project_id not used when using exported metadata for project in projects: - self.report.set_ingestion_stage(project, LINEAGE_EXTRACTION) - yield from self.generate_lineage( - project, - table_refs, - ) + with self.report.new_stage(f"{project}: {LINEAGE_EXTRACTION}"): + yield from self.generate_lineage( + project, + table_refs, + ) if self.redundant_run_skip_handler: # Update the checkpoint state for this run. @@ -368,8 +368,8 @@ def generate_lineage( self.report.lineage_metadata_entries[project_id] = len(lineage) logger.info(f"Built lineage map containing {len(lineage)} entries.") logger.debug(f"lineage metadata is {lineage}") - self.report.lineage_extraction_sec[project_id] = round( - timer.elapsed_seconds(), 2 + self.report.lineage_extraction_sec[project_id] = timer.elapsed_seconds( + digits=2 ) self.report.lineage_mem_size[project_id] = humanfriendly.format_size( memory_footprint.total_size(lineage) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py index 876ffab85ba311..f2f6cc731858d1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py @@ -495,62 +495,62 @@ def _ingest_events( def _generate_operational_workunits( self, usage_state: BigQueryUsageState, table_refs: Collection[str] ) -> Iterable[MetadataWorkUnit]: - self.report.set_ingestion_stage("*", USAGE_EXTRACTION_OPERATIONAL_STATS) - for audit_event in usage_state.standalone_events(): - try: - operational_wu = self._create_operation_workunit( - audit_event, table_refs - ) - if operational_wu: - yield operational_wu - self.report.num_operational_stats_workunits_emitted += 1 - except Exception as e: - self.report.warning( - message="Unable to generate operation workunit", - context=f"{audit_event}", - exc=e, - ) + with self.report.new_stage(f"*: {USAGE_EXTRACTION_OPERATIONAL_STATS}"): + for audit_event in usage_state.standalone_events(): + try: + operational_wu = self._create_operation_workunit( + audit_event, table_refs + ) + if operational_wu: + yield operational_wu + self.report.num_operational_stats_workunits_emitted += 1 + except Exception as e: + self.report.warning( + message="Unable to generate operation workunit", + context=f"{audit_event}", + exc=e, + ) def _generate_usage_workunits( self, usage_state: BigQueryUsageState ) -> Iterable[MetadataWorkUnit]: - self.report.set_ingestion_stage("*", USAGE_EXTRACTION_USAGE_AGGREGATION) - top_n = ( - self.config.usage.top_n_queries - if self.config.usage.include_top_n_queries - else 0 - ) - for entry in usage_state.usage_statistics(top_n=top_n): - try: - query_freq = [ - ( - self.uuid_to_query.get( - query_hash, usage_state.queries[query_hash] - ), - count, + with self.report.new_stage(f"*: {USAGE_EXTRACTION_USAGE_AGGREGATION}"): + top_n = ( + self.config.usage.top_n_queries + if self.config.usage.include_top_n_queries + else 0 + ) + for entry in usage_state.usage_statistics(top_n=top_n): + try: + query_freq = [ + ( + self.uuid_to_query.get( + query_hash, usage_state.queries[query_hash] + ), + count, + ) + for query_hash, count in entry.query_freq + ] + yield make_usage_workunit( + bucket_start_time=datetime.fromisoformat(entry.timestamp), + resource=BigQueryTableRef.from_string_name(entry.resource), + query_count=entry.query_count, + query_freq=query_freq, + user_freq=entry.user_freq, + column_freq=entry.column_freq, + bucket_duration=self.config.bucket_duration, + resource_urn_builder=self.identifiers.gen_dataset_urn_from_raw_ref, + top_n_queries=self.config.usage.top_n_queries, + format_sql_queries=self.config.usage.format_sql_queries, + queries_character_limit=self.config.usage.queries_character_limit, + ) + self.report.num_usage_workunits_emitted += 1 + except Exception as e: + self.report.warning( + message="Unable to generate usage statistics workunit", + context=f"{entry.timestamp}, {entry.resource}", + exc=e, ) - for query_hash, count in entry.query_freq - ] - yield make_usage_workunit( - bucket_start_time=datetime.fromisoformat(entry.timestamp), - resource=BigQueryTableRef.from_string_name(entry.resource), - query_count=entry.query_count, - query_freq=query_freq, - user_freq=entry.user_freq, - column_freq=entry.column_freq, - bucket_duration=self.config.bucket_duration, - resource_urn_builder=self.identifiers.gen_dataset_urn_from_raw_ref, - top_n_queries=self.config.usage.top_n_queries, - format_sql_queries=self.config.usage.format_sql_queries, - queries_character_limit=self.config.usage.queries_character_limit, - ) - self.report.num_usage_workunits_emitted += 1 - except Exception as e: - self.report.warning( - message="Unable to generate usage statistics workunit", - context=f"{entry.timestamp}, {entry.resource}", - exc=e, - ) def _get_usage_events(self, projects: Iterable[str]) -> Iterable[AuditEvent]: if self.config.use_exported_bigquery_audit_metadata: @@ -559,10 +559,10 @@ def _get_usage_events(self, projects: Iterable[str]) -> Iterable[AuditEvent]: for project_id in projects: with PerfTimer() as timer: try: - self.report.set_ingestion_stage( - project_id, USAGE_EXTRACTION_INGESTION - ) - yield from self._get_parsed_bigquery_log_events(project_id) + with self.report.new_stage( + f"{project_id}: {USAGE_EXTRACTION_INGESTION}" + ): + yield from self._get_parsed_bigquery_log_events(project_id) except Exception as e: self.report.usage_failed_extraction.append(project_id) self.report.warning( @@ -572,8 +572,8 @@ def _get_usage_events(self, projects: Iterable[str]) -> Iterable[AuditEvent]: ) self.report_status(f"usage-extraction-{project_id}", False) - self.report.usage_extraction_sec[project_id] = round( - timer.elapsed_seconds(), 2 + self.report.usage_extraction_sec[project_id] = timer.elapsed_seconds( + digits=2 ) def _store_usage_event( diff --git a/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra_profiling.py b/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra_profiling.py index d8ab62f1d6d91f..7bf1d66f618a4b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra_profiling.py +++ b/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra_profiling.py @@ -70,30 +70,30 @@ def get_workunits( ) -> Iterable[MetadataWorkUnit]: for keyspace_name in cassandra_data.keyspaces: tables = cassandra_data.tables.get(keyspace_name, []) - self.report.set_ingestion_stage(keyspace_name, PROFILING) - with ThreadPoolExecutor( - max_workers=self.config.profiling.max_workers - ) as executor: - future_to_dataset = { - executor.submit( - self.generate_profile, - keyspace_name, - table_name, - cassandra_data.columns.get(table_name, []), - ): table_name - for table_name in tables - } - for future in as_completed(future_to_dataset): - table_name = future_to_dataset[future] - try: - yield from future.result() - except Exception as exc: - self.report.profiling_skipped_other[table_name] += 1 - self.report.failure( - message="Failed to profile for table", - context=f"{keyspace_name}.{table_name}", - exc=exc, - ) + with self.report.new_stage(f"{keyspace_name}: {PROFILING}"): + with ThreadPoolExecutor( + max_workers=self.config.profiling.max_workers + ) as executor: + future_to_dataset = { + executor.submit( + self.generate_profile, + keyspace_name, + table_name, + cassandra_data.columns.get(table_name, []), + ): table_name + for table_name in tables + } + for future in as_completed(future_to_dataset): + table_name = future_to_dataset[future] + try: + yield from future.result() + except Exception as exc: + self.report.profiling_skipped_other[table_name] += 1 + self.report.failure( + message="Failed to profile for table", + context=f"{keyspace_name}.{table_name}", + exc=exc, + ) def generate_profile( self, diff --git a/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra_utils.py b/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra_utils.py index 41d4ac7ced6035..75a0ba0c617734 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/cassandra/cassandra_utils.py @@ -54,9 +54,6 @@ def report_entity_scanned(self, name: str, ent_type: str = "View") -> None: else: raise KeyError(f"Unknown entity {ent_type}.") - def set_ingestion_stage(self, keyspace: str, stage: str) -> None: - self.report_ingestion_stage_start(f"{keyspace}: {stage}") - # TODO Need to create seperate common config for profiling report profiling_skipped_other: TopKDict[str, int] = field(default_factory=int_top_k_dict) profiling_skipped_table_profile_pattern: TopKDict[str, int] = field( diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_reporting.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_reporting.py index c8eb035461ca16..9712d4ddc67998 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_reporting.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_reporting.py @@ -45,6 +45,3 @@ def report_entity_scanned(self, name: str, ent_type: str = "View") -> None: self.views_scanned += 1 else: raise KeyError(f"Unknown entity {ent_type}.") - - def set_ingestion_stage(self, dataset: str, stage: str) -> None: - self.report_ingestion_stage_start(f"{dataset}: {stage}") diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py index 319290d25169af..6d34e86be6282e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_source.py @@ -472,8 +472,8 @@ def generate_profiles( env=self.config.env, platform_instance=self.config.platform_instance, ) - self.report.set_ingestion_stage(dataset_info.resource_name, PROFILING) - yield from self.profiler.get_workunits(dataset_info, dataset_urn) + with self.report.new_stage(f"{dataset_info.resource_name}: {PROFILING}"): + yield from self.profiler.get_workunits(dataset_info, dataset_urn) def generate_view_lineage( self, dataset_urn: str, parents: List[str] diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py b/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py index 443368e6d8b4fb..b4cc5423277c5a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py +++ b/metadata-ingestion/src/datahub/ingestion/source/gc/datahub_gc.py @@ -141,40 +141,36 @@ def get_workunits_internal( ) -> Iterable[MetadataWorkUnit]: if self.config.cleanup_expired_tokens: try: - self.report.report_ingestion_stage_start("Expired Token Cleanup") - self.revoke_expired_tokens() + with self.report.new_stage("Expired Token Cleanup"): + self.revoke_expired_tokens() except Exception as e: self.report.failure("While trying to cleanup expired token ", exc=e) if self.config.truncate_indices: try: - self.report.report_ingestion_stage_start("Truncate Indices") - self.truncate_indices() + with self.report.new_stage("Truncate Indices"): + self.truncate_indices() except Exception as e: self.report.failure("While trying to truncate indices ", exc=e) if self.config.soft_deleted_entities_cleanup.enabled: try: - self.report.report_ingestion_stage_start( - "Soft Deleted Entities Cleanup" - ) - self.soft_deleted_entities_cleanup.cleanup_soft_deleted_entities() + with self.report.new_stage("Soft Deleted Entities Cleanup"): + self.soft_deleted_entities_cleanup.cleanup_soft_deleted_entities() except Exception as e: self.report.failure( "While trying to cleanup soft deleted entities ", exc=e ) if self.config.dataprocess_cleanup.enabled: try: - self.report.report_ingestion_stage_start("Data Process Cleanup") - yield from self.dataprocess_cleanup.get_workunits_internal() + with self.report.new_stage("Data Process Cleanup"): + yield from self.dataprocess_cleanup.get_workunits_internal() except Exception as e: self.report.failure("While trying to cleanup data process ", exc=e) if self.config.execution_request_cleanup.enabled: try: - self.report.report_ingestion_stage_start("Execution request Cleanup") - self.execution_request_cleanup.run() + with self.report.new_stage("Execution request Cleanup"): + self.execution_request_cleanup.run() except Exception as e: self.report.failure("While trying to cleanup execution request ", exc=e) - # Otherwise last stage's duration does not get calculated. - self.report.report_ingestion_stage_start("End") yield from [] def truncate_indices(self) -> None: diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py index 49f7941563c1a7..5371017a2a3212 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py @@ -423,10 +423,10 @@ def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit database = self.config.database logger.info(f"Processing db {database}") - self.report.report_ingestion_stage_start(METADATA_EXTRACTION) - self.db_tables[database] = defaultdict() - self.db_views[database] = defaultdict() - self.db_schemas.setdefault(database, {}) + with self.report.new_stage(METADATA_EXTRACTION): + self.db_tables[database] = defaultdict() + self.db_views[database] = defaultdict() + self.db_schemas.setdefault(database, {}) # TODO: Ideally, we'd push down exception handling to the place where the connection is used, as opposed to keeping # this fallback. For now, this gets us broad coverage quickly. @@ -462,12 +462,12 @@ def _extract_metadata( self.process_schemas(connection, database) ) - self.report.report_ingestion_stage_start(LINEAGE_EXTRACTION) - yield from self.extract_lineage_v2( - connection=connection, - database=database, - lineage_extractor=lineage_extractor, - ) + with self.report.new_stage(LINEAGE_EXTRACTION): + yield from self.extract_lineage_v2( + connection=connection, + database=database, + lineage_extractor=lineage_extractor, + ) all_tables = self.get_all_tables() else: @@ -480,25 +480,25 @@ def _extract_metadata( or self.config.include_view_lineage or self.config.include_copy_lineage ): - self.report.report_ingestion_stage_start(LINEAGE_EXTRACTION) - yield from self.extract_lineage( - connection=connection, all_tables=all_tables, database=database - ) + with self.report.new_stage(LINEAGE_EXTRACTION): + yield from self.extract_lineage( + connection=connection, all_tables=all_tables, database=database + ) - self.report.report_ingestion_stage_start(USAGE_EXTRACTION_INGESTION) if self.config.include_usage_statistics: - yield from self.extract_usage( - connection=connection, all_tables=all_tables, database=database - ) + with self.report.new_stage(USAGE_EXTRACTION_INGESTION): + yield from self.extract_usage( + connection=connection, all_tables=all_tables, database=database + ) if self.config.is_profiling_enabled(): - self.report.report_ingestion_stage_start(PROFILING) - profiler = RedshiftProfiler( - config=self.config, - report=self.report, - state_handler=self.profiling_state_handler, - ) - yield from profiler.get_workunits(self.db_tables) + with self.report.new_stage(PROFILING): + profiler = RedshiftProfiler( + config=self.config, + report=self.report, + state_handler=self.profiling_state_handler, + ) + yield from profiler.get_workunits(self.db_tables) def process_schemas(self, connection, database): for schema in self.data_dictionary.get_schemas( @@ -633,8 +633,8 @@ def process_schema( else: logger.info("View processing disabled, skipping") - self.report.metadata_extraction_sec[report_key] = round( - timer.elapsed_seconds(), 2 + self.report.metadata_extraction_sec[report_key] = timer.elapsed_seconds( + digits=2 ) def _process_table( @@ -986,9 +986,7 @@ def extract_usage( yield from usage_extractor.get_usage_workunits(all_tables=all_tables) - self.report.usage_extraction_sec[database] = round( - timer.elapsed_seconds(), 2 - ) + self.report.usage_extraction_sec[database] = timer.elapsed_seconds(digits=2) def extract_lineage( self, @@ -1011,8 +1009,8 @@ def extract_lineage( database=database, connection=connection, all_tables=all_tables ) - self.report.lineage_extraction_sec[f"{database}"] = round( - timer.elapsed_seconds(), 2 + self.report.lineage_extraction_sec[f"{database}"] = timer.elapsed_seconds( + digits=2 ) yield from self.generate_lineage( database, lineage_extractor=lineage_extractor @@ -1042,8 +1040,8 @@ def extract_lineage_v2( yield from lineage_extractor.generate() - self.report.lineage_extraction_sec[f"{database}"] = round( - timer.elapsed_seconds(), 2 + self.report.lineage_extraction_sec[f"{database}"] = timer.elapsed_seconds( + digits=2 ) if self.redundant_lineage_run_skip_handler: diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py index e0bf8b23dd0f7d..d66a1ee18be40f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py @@ -182,38 +182,38 @@ def _get_workunits_internal( self.report.num_operational_stats_filtered = 0 if self.config.include_operational_stats: - self.report.report_ingestion_stage_start(USAGE_EXTRACTION_OPERATIONAL_STATS) - with PerfTimer() as timer: - # Generate operation aspect workunits - yield from self._gen_operation_aspect_workunits( - self.connection, all_tables - ) - self.report.operational_metadata_extraction_sec[ - self.config.database - ] = round(timer.elapsed_seconds(), 2) + with self.report.new_stage(USAGE_EXTRACTION_OPERATIONAL_STATS): + with PerfTimer() as timer: + # Generate operation aspect workunits + yield from self._gen_operation_aspect_workunits( + self.connection, all_tables + ) + self.report.operational_metadata_extraction_sec[ + self.config.database + ] = timer.elapsed_seconds(digits=2) # Generate aggregate events - self.report.report_ingestion_stage_start(USAGE_EXTRACTION_USAGE_AGGREGATION) - query: str = self.queries.usage_query( - start_time=self.start_time.strftime(REDSHIFT_DATETIME_FORMAT), - end_time=self.end_time.strftime(REDSHIFT_DATETIME_FORMAT), - database=self.config.database, - ) - access_events_iterable: Iterable[ - RedshiftAccessEvent - ] = self._gen_access_events_from_history_query( - query, connection=self.connection, all_tables=all_tables - ) + with self.report.new_stage(USAGE_EXTRACTION_USAGE_AGGREGATION): + query: str = self.queries.usage_query( + start_time=self.start_time.strftime(REDSHIFT_DATETIME_FORMAT), + end_time=self.end_time.strftime(REDSHIFT_DATETIME_FORMAT), + database=self.config.database, + ) + access_events_iterable: Iterable[ + RedshiftAccessEvent + ] = self._gen_access_events_from_history_query( + query, connection=self.connection, all_tables=all_tables + ) - aggregated_events: AggregatedAccessEvents = self._aggregate_access_events( - access_events_iterable - ) - # Generate usage workunits from aggregated events. - for time_bucket in aggregated_events.values(): - for aggregate in time_bucket.values(): - wu: MetadataWorkUnit = self._make_usage_stat(aggregate) - self.report.num_usage_workunits_emitted += 1 - yield wu + aggregated_events: AggregatedAccessEvents = self._aggregate_access_events( + access_events_iterable + ) + # Generate usage workunits from aggregated events. + for time_bucket in aggregated_events.values(): + for aggregate in time_bucket.values(): + wu: MetadataWorkUnit = self._make_usage_stat(aggregate) + self.report.num_usage_workunits_emitted += 1 + yield wu def _gen_operation_aspect_workunits( self, diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py index 030b2d43be81f9..b24471f8666afa 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py @@ -166,6 +166,3 @@ def _is_tag_scanned(self, tag_name: str) -> bool: def report_tag_processed(self, tag_name: str) -> None: self._processed_tags.add(tag_name) - - def set_ingestion_stage(self, database: str, stage: str) -> None: - self.report_ingestion_stage_start(f"{database}: {stage}") diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py index 8a1bf15b7a7bc4..6f09c26b08da2d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py @@ -216,21 +216,23 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: try: for snowflake_db in self.databases: - self.report.set_ingestion_stage(snowflake_db.name, METADATA_EXTRACTION) - yield from self._process_database(snowflake_db) + with self.report.new_stage( + f"{snowflake_db.name}: {METADATA_EXTRACTION}" + ): + yield from self._process_database(snowflake_db) - self.report.set_ingestion_stage("*", EXTERNAL_TABLE_DDL_LINEAGE) - discovered_tables: List[str] = [ - self.identifiers.get_dataset_identifier( - table_name, schema.name, db.name - ) - for db in self.databases - for schema in db.schemas - for table_name in schema.tables - ] - if self.aggregator: - for entry in self._external_tables_ddl_lineage(discovered_tables): - self.aggregator.add(entry) + with self.report.new_stage(f"*: {EXTERNAL_TABLE_DDL_LINEAGE}"): + discovered_tables: List[str] = [ + self.identifiers.get_dataset_identifier( + table_name, schema.name, db.name + ) + for db in self.databases + for schema in db.schemas + for table_name in schema.tables + ] + if self.aggregator: + for entry in self._external_tables_ddl_lineage(discovered_tables): + self.aggregator.add(entry) except SnowflakePermissionError as e: self.structured_reporter.failure( @@ -332,8 +334,8 @@ def _process_database( yield from self._process_db_schemas(snowflake_db, db_tables) if self.profiler and db_tables: - self.report.set_ingestion_stage(snowflake_db.name, PROFILING) - yield from self.profiler.get_workunits(snowflake_db, db_tables) + with self.report.new_stage(f"{snowflake_db.name}: {PROFILING}"): + yield from self.profiler.get_workunits(snowflake_db, db_tables) def _process_db_schemas( self, diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py index 4bdf559f293b51..85e4071aec07df 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py @@ -146,59 +146,58 @@ def get_usage_workunits( if not self._should_ingest_usage(): return - self.report.set_ingestion_stage("*", USAGE_EXTRACTION_USAGE_AGGREGATION) - if self.report.edition == SnowflakeEdition.STANDARD.value: - logger.info( - "Snowflake Account is Standard Edition. Usage and Operation History Feature is not supported." - ) - return + with self.report.new_stage(f"*: {USAGE_EXTRACTION_USAGE_AGGREGATION}"): + if self.report.edition == SnowflakeEdition.STANDARD.value: + logger.info( + "Snowflake Account is Standard Edition. Usage and Operation History Feature is not supported." + ) + return - logger.info("Checking usage date ranges") + logger.info("Checking usage date ranges") - self._check_usage_date_ranges() + self._check_usage_date_ranges() - # If permission error, execution returns from here - if ( - self.report.min_access_history_time is None - or self.report.max_access_history_time is None - ): - return + # If permission error, execution returns from here + if ( + self.report.min_access_history_time is None + or self.report.max_access_history_time is None + ): + return - # NOTE: In earlier `snowflake-usage` connector, users with no email were not considered in usage counts as well as in operation - # Now, we report the usage as well as operation metadata even if user email is absent + # NOTE: In earlier `snowflake-usage` connector, users with no email were not considered in usage counts as well as in operation + # Now, we report the usage as well as operation metadata even if user email is absent - if self.config.include_usage_stats: - yield from auto_empty_dataset_usage_statistics( - self._get_workunits_internal(discovered_datasets), - config=BaseTimeWindowConfig( - start_time=self.start_time, - end_time=self.end_time, - bucket_duration=self.config.bucket_duration, - ), - dataset_urns={ - self.identifiers.gen_dataset_urn(dataset_identifier) - for dataset_identifier in discovered_datasets - }, - ) + if self.config.include_usage_stats: + yield from auto_empty_dataset_usage_statistics( + self._get_workunits_internal(discovered_datasets), + config=BaseTimeWindowConfig( + start_time=self.start_time, + end_time=self.end_time, + bucket_duration=self.config.bucket_duration, + ), + dataset_urns={ + self.identifiers.gen_dataset_urn(dataset_identifier) + for dataset_identifier in discovered_datasets + }, + ) - self.report.set_ingestion_stage("*", USAGE_EXTRACTION_OPERATIONAL_STATS) + with self.report.new_stage(f"*: {USAGE_EXTRACTION_OPERATIONAL_STATS}"): + if self.config.include_operational_stats: + # Generate the operation workunits. + access_events = self._get_snowflake_history() + for event in access_events: + yield from self._get_operation_aspect_work_unit( + event, discovered_datasets + ) - if self.config.include_operational_stats: - # Generate the operation workunits. - access_events = self._get_snowflake_history() - for event in access_events: - yield from self._get_operation_aspect_work_unit( - event, discovered_datasets + if self.redundant_run_skip_handler: + # Update the checkpoint state for this run. + self.redundant_run_skip_handler.update_state( + self.config.start_time, + self.config.end_time, + self.config.bucket_duration, ) - if self.redundant_run_skip_handler: - # Update the checkpoint state for this run. - self.redundant_run_skip_handler.update_state( - self.config.start_time, - self.config.end_time, - self.config.bucket_duration, - ) - def _get_workunits_internal( self, discovered_datasets: List[str] ) -> Iterable[MetadataWorkUnit]: @@ -386,7 +385,7 @@ def _get_snowflake_history(self) -> Iterable[SnowflakeJoinedAccessEvent]: ) self.report_status(USAGE_EXTRACTION_OPERATIONAL_STATS, False) return - self.report.access_history_query_secs = round(timer.elapsed_seconds(), 2) + self.report.access_history_query_secs = timer.elapsed_seconds(digits=2) for row in results: yield from self._process_snowflake_history_row(row) @@ -434,8 +433,8 @@ def _check_usage_date_ranges(self) -> None: self.report.max_access_history_time = db_row["MAX_TIME"].astimezone( tz=timezone.utc ) - self.report.access_history_range_query_secs = round( - timer.elapsed_seconds(), 2 + self.report.access_history_range_query_secs = timer.elapsed_seconds( + digits=2 ) def _get_operation_aspect_work_unit( diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index aede3d056709a2..c0385a8d5af30a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -480,8 +480,8 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: identifiers=self.identifiers, ) - self.report.set_ingestion_stage("*", METADATA_EXTRACTION) - yield from schema_extractor.get_workunits_internal() + with self.report.new_stage(f"*: {METADATA_EXTRACTION}"): + yield from schema_extractor.get_workunits_internal() databases = schema_extractor.databases @@ -513,47 +513,46 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: discovered_datasets = discovered_tables + discovered_views if self.config.use_queries_v2: - self.report.set_ingestion_stage("*", VIEW_PARSING) - yield from auto_workunit(self.aggregator.gen_metadata()) - - self.report.set_ingestion_stage("*", QUERIES_EXTRACTION) - - schema_resolver = self.aggregator._schema_resolver - - queries_extractor = SnowflakeQueriesExtractor( - connection=self.connection, - config=SnowflakeQueriesExtractorConfig( - window=self.config, - temporary_tables_pattern=self.config.temporary_tables_pattern, - include_lineage=self.config.include_table_lineage, - include_usage_statistics=self.config.include_usage_stats, - include_operations=self.config.include_operational_stats, - include_queries=self.config.include_queries, - include_query_usage_statistics=self.config.include_query_usage_statistics, - user_email_pattern=self.config.user_email_pattern, - ), - structured_report=self.report, - filters=self.filters, - identifiers=self.identifiers, - schema_resolver=schema_resolver, - discovered_tables=discovered_datasets, - graph=self.ctx.graph, - ) + with self.report.new_stage(f"*: {VIEW_PARSING}"): + yield from auto_workunit(self.aggregator.gen_metadata()) - # TODO: This is slightly suboptimal because we create two SqlParsingAggregator instances with different configs - # but a shared schema resolver. That's fine for now though - once we remove the old lineage/usage extractors, - # it should be pretty straightforward to refactor this and only initialize the aggregator once. - self.report.queries_extractor = queries_extractor.report - yield from queries_extractor.get_workunits_internal() - queries_extractor.close() + with self.report.new_stage(f"*: {QUERIES_EXTRACTION}"): + schema_resolver = self.aggregator._schema_resolver + + queries_extractor = SnowflakeQueriesExtractor( + connection=self.connection, + config=SnowflakeQueriesExtractorConfig( + window=self.config, + temporary_tables_pattern=self.config.temporary_tables_pattern, + include_lineage=self.config.include_table_lineage, + include_usage_statistics=self.config.include_usage_stats, + include_operations=self.config.include_operational_stats, + include_queries=self.config.include_queries, + include_query_usage_statistics=self.config.include_query_usage_statistics, + user_email_pattern=self.config.user_email_pattern, + ), + structured_report=self.report, + filters=self.filters, + identifiers=self.identifiers, + schema_resolver=schema_resolver, + discovered_tables=discovered_datasets, + graph=self.ctx.graph, + ) + + # TODO: This is slightly suboptimal because we create two SqlParsingAggregator instances with different configs + # but a shared schema resolver. That's fine for now though - once we remove the old lineage/usage extractors, + # it should be pretty straightforward to refactor this and only initialize the aggregator once. + self.report.queries_extractor = queries_extractor.report + yield from queries_extractor.get_workunits_internal() + queries_extractor.close() else: if self.lineage_extractor: - self.report.set_ingestion_stage("*", LINEAGE_EXTRACTION) - self.lineage_extractor.add_time_based_lineage_to_aggregator( - discovered_tables=discovered_tables, - discovered_views=discovered_views, - ) + with self.report.new_stage(f"*: {LINEAGE_EXTRACTION}"): + self.lineage_extractor.add_time_based_lineage_to_aggregator( + discovered_tables=discovered_tables, + discovered_views=discovered_views, + ) # This would emit view and external table ddl lineage # as well as query lineage via lineage_extractor diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py index e42564975c3d19..5b76fe41d92e97 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py @@ -878,7 +878,7 @@ def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit urns = self.schema_resolver.get_urns() if self.config.include_table_lineage or self.config.include_usage_statistics: - self.report.report_ingestion_stage_start("audit log extraction") - yield from self.get_audit_log_mcps(urns=urns) + with self.report.new_stage("Audit log extraction"): + yield from self.get_audit_log_mcps(urns=urns) yield from self.builder.gen_workunits() diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index d149402741e82f..2543cbe653ba72 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -118,6 +118,7 @@ ) from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo from datahub.ingestion.source.tableau.tableau_validation import check_user_role +from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport from datahub.metadata.com.linkedin.pegasus2avro.common import ( AuditStamp, ChangeAuditStamps, @@ -170,6 +171,8 @@ create_lineage_sql_parsed_result, ) from datahub.utilities import config_clean +from datahub.utilities.perf_timer import PerfTimer +from datahub.utilities.stats_collections import TopKDict from datahub.utilities.urns.dataset_urn import DatasetUrn try: @@ -643,12 +646,41 @@ class SiteIdContentUrl: @dataclass -class TableauSourceReport(StaleEntityRemovalSourceReport): +class TableauSourceReport( + StaleEntityRemovalSourceReport, + IngestionStageReport, +): get_all_datasources_query_failed: bool = False num_get_datasource_query_failures: int = 0 num_datasource_field_skipped_no_name: int = 0 num_csql_field_skipped_no_name: int = 0 num_table_field_skipped_no_name: int = 0 + # timers + extract_usage_stats_timer: Dict[str, float] = dataclass_field( + default_factory=TopKDict + ) + fetch_groups_timer: Dict[str, float] = dataclass_field(default_factory=TopKDict) + populate_database_server_hostname_map_timer: Dict[str, float] = dataclass_field( + default_factory=TopKDict + ) + populate_projects_registry_timer: Dict[str, float] = dataclass_field( + default_factory=TopKDict + ) + emit_workbooks_timer: Dict[str, float] = dataclass_field(default_factory=TopKDict) + emit_sheets_timer: Dict[str, float] = dataclass_field(default_factory=TopKDict) + emit_dashboards_timer: Dict[str, float] = dataclass_field(default_factory=TopKDict) + emit_embedded_datasources_timer: Dict[str, float] = dataclass_field( + default_factory=TopKDict + ) + emit_published_datasources_timer: Dict[str, float] = dataclass_field( + default_factory=TopKDict + ) + emit_custom_sql_datasources_timer: Dict[str, float] = dataclass_field( + default_factory=TopKDict + ) + emit_upstream_tables_timer: Dict[str, float] = dataclass_field( + default_factory=TopKDict + ) # lineage num_tables_with_upstream_lineage: int = 0 num_upstream_table_lineage: int = 0 @@ -660,6 +692,7 @@ class TableauSourceReport(StaleEntityRemovalSourceReport): num_upstream_fine_grained_lineage_failed_parse_sql: int = 0 num_hidden_assets_skipped: int = 0 logged_in_user: List[UserInfo] = dataclass_field(default_factory=list) + last_authenticated_at: Optional[datetime] = None num_expected_tableau_metadata_queries: int = 0 @@ -834,6 +867,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: platform=self.platform, ) yield from site_source.ingest_tableau_site() + except MetadataQueryException as md_exception: self.report.failure( title="Failed to Retrieve Tableau Metadata", @@ -3489,33 +3523,87 @@ def _create_workbook_properties( return {"permissions": json.dumps(groups)} if len(groups) > 0 else None def ingest_tableau_site(self): - # Initialise the dictionary to later look-up for chart and dashboard stat - if self.config.extract_usage_stats: - self._populate_usage_stat_registry() - - if self.config.permission_ingestion: - self._fetch_groups() - - # Populate the map of database names and database hostnames to be used later to map - # databases to platform instances. - if self.config.database_hostname_to_platform_instance_map: - self._populate_database_server_hostname_map() - - self._populate_projects_registry() - - if self.config.add_site_container: - yield from self.emit_site_container() - yield from self.emit_project_containers() - yield from self.emit_workbooks() - if self.sheet_ids: - yield from self.emit_sheets() - if self.dashboard_ids: - yield from self.emit_dashboards() - if self.embedded_datasource_ids_being_used: - yield from self.emit_embedded_datasources() - if self.datasource_ids_being_used: - yield from self.emit_published_datasources() - if self.custom_sql_ids_being_used: - yield from self.emit_custom_sql_datasources() - if self.database_tables: - yield from self.emit_upstream_tables() + with self.report.new_stage( + f"Ingesting Tableau Site: {self.site_id} {self.site_content_url}" + ): + # Initialise the dictionary to later look-up for chart and dashboard stat + if self.config.extract_usage_stats: + with PerfTimer() as timer: + self._populate_usage_stat_registry() + self.report.extract_usage_stats_timer[ + self.site_content_url + ] = timer.elapsed_seconds(digits=2) + + if self.config.permission_ingestion: + with PerfTimer() as timer: + self._fetch_groups() + self.report.fetch_groups_timer[ + self.site_content_url + ] = timer.elapsed_seconds(digits=2) + + # Populate the map of database names and database hostnames to be used later to map + # databases to platform instances. + if self.config.database_hostname_to_platform_instance_map: + with PerfTimer() as timer: + self._populate_database_server_hostname_map() + self.report.populate_database_server_hostname_map_timer[ + self.site_content_url + ] = timer.elapsed_seconds(digits=2) + + with PerfTimer() as timer: + self._populate_projects_registry() + self.report.populate_projects_registry_timer[ + self.site_content_url + ] = timer.elapsed_seconds(digits=2) + + if self.config.add_site_container: + yield from self.emit_site_container() + yield from self.emit_project_containers() + + with PerfTimer() as timer: + yield from self.emit_workbooks() + self.report.emit_workbooks_timer[ + self.site_content_url + ] = timer.elapsed_seconds(digits=2) + + if self.sheet_ids: + with PerfTimer() as timer: + yield from self.emit_sheets() + self.report.emit_sheets_timer[ + self.site_content_url + ] = timer.elapsed_seconds(digits=2) + + if self.dashboard_ids: + with PerfTimer() as timer: + yield from self.emit_dashboards() + self.report.emit_dashboards_timer[ + self.site_content_url + ] = timer.elapsed_seconds(digits=2) + + if self.embedded_datasource_ids_being_used: + with PerfTimer() as timer: + yield from self.emit_embedded_datasources() + self.report.emit_embedded_datasources_timer[ + self.site_content_url + ] = timer.elapsed_seconds(digits=2) + + if self.datasource_ids_being_used: + with PerfTimer() as timer: + yield from self.emit_published_datasources() + self.report.emit_published_datasources_timer[ + self.site_content_url + ] = timer.elapsed_seconds(digits=2) + + if self.custom_sql_ids_being_used: + with PerfTimer() as timer: + yield from self.emit_custom_sql_datasources() + self.report.emit_custom_sql_datasources_timer[ + self.site_content_url + ] = timer.elapsed_seconds(digits=2) + + if self.database_tables: + with PerfTimer() as timer: + yield from self.emit_upstream_tables() + self.report.emit_upstream_tables_timer[ + self.site_content_url + ] = timer.elapsed_seconds(digits=2) diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py index 9d9a746580f939..43bd788f809c3e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py @@ -263,86 +263,86 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: ] def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: - self.report.report_ingestion_stage_start("Ingestion Setup") - wait_on_warehouse = None - if self.config.include_hive_metastore: - self.report.report_ingestion_stage_start("Start warehouse") - # Can take several minutes, so start now and wait later - wait_on_warehouse = self.unity_catalog_api_proxy.start_warehouse() - if wait_on_warehouse is None: - self.report.report_failure( - "initialization", - f"SQL warehouse {self.config.profiling.warehouse_id} not found", - ) - return - else: - # wait until warehouse is started - wait_on_warehouse.result() + with self.report.new_stage("Ingestion Setup"): + wait_on_warehouse = None + if self.config.include_hive_metastore: + with self.report.new_stage("Start warehouse"): + # Can take several minutes, so start now and wait later + wait_on_warehouse = self.unity_catalog_api_proxy.start_warehouse() + if wait_on_warehouse is None: + self.report.report_failure( + "initialization", + f"SQL warehouse {self.config.profiling.warehouse_id} not found", + ) + return + else: + # wait until warehouse is started + wait_on_warehouse.result() if self.config.include_ownership: - self.report.report_ingestion_stage_start("Ingest service principals") - self.build_service_principal_map() - self.build_groups_map() + with self.report.new_stage("Ingest service principals"): + self.build_service_principal_map() + self.build_groups_map() if self.config.include_notebooks: - self.report.report_ingestion_stage_start("Ingest notebooks") - yield from self.process_notebooks() + with self.report.new_stage("Ingest notebooks"): + yield from self.process_notebooks() yield from self.process_metastores() yield from self.get_view_lineage() if self.config.include_notebooks: - self.report.report_ingestion_stage_start("Notebook lineage") - for notebook in self.notebooks.values(): - wu = self._gen_notebook_lineage(notebook) - if wu: - yield wu + with self.report.new_stage("Notebook lineage"): + for notebook in self.notebooks.values(): + wu = self._gen_notebook_lineage(notebook) + if wu: + yield wu if self.config.include_usage_statistics: - self.report.report_ingestion_stage_start("Ingest usage") - usage_extractor = UnityCatalogUsageExtractor( - config=self.config, - report=self.report, - proxy=self.unity_catalog_api_proxy, - table_urn_builder=self.gen_dataset_urn, - user_urn_builder=self.gen_user_urn, - ) - yield from usage_extractor.get_usage_workunits( - self.table_refs | self.view_refs - ) - - if self.config.is_profiling_enabled(): - self.report.report_ingestion_stage_start("Start warehouse") - # Need to start the warehouse again for profiling, - # as it may have been stopped after ingestion might take - # longer time to complete - wait_on_warehouse = self.unity_catalog_api_proxy.start_warehouse() - if wait_on_warehouse is None: - self.report.report_failure( - "initialization", - f"SQL warehouse {self.config.profiling.warehouse_id} not found", + with self.report.new_stage("Ingest usage"): + usage_extractor = UnityCatalogUsageExtractor( + config=self.config, + report=self.report, + proxy=self.unity_catalog_api_proxy, + table_urn_builder=self.gen_dataset_urn, + user_urn_builder=self.gen_user_urn, + ) + yield from usage_extractor.get_usage_workunits( + self.table_refs | self.view_refs ) - return - else: - # wait until warehouse is started - wait_on_warehouse.result() - self.report.report_ingestion_stage_start("Profiling") - if isinstance(self.config.profiling, UnityCatalogAnalyzeProfilerConfig): - yield from UnityCatalogAnalyzeProfiler( - self.config.profiling, - self.report, - self.unity_catalog_api_proxy, - self.gen_dataset_urn, - ).get_workunits(self.table_refs) - elif isinstance(self.config.profiling, UnityCatalogGEProfilerConfig): - yield from UnityCatalogGEProfiler( - sql_common_config=self.config, - profiling_config=self.config.profiling, - report=self.report, - ).get_workunits(list(self.tables.values())) - else: - raise ValueError("Unknown profiling config method") + if self.config.is_profiling_enabled(): + with self.report.new_stage("Start warehouse"): + # Need to start the warehouse again for profiling, + # as it may have been stopped after ingestion might take + # longer time to complete + wait_on_warehouse = self.unity_catalog_api_proxy.start_warehouse() + if wait_on_warehouse is None: + self.report.report_failure( + "initialization", + f"SQL warehouse {self.config.profiling.warehouse_id} not found", + ) + return + else: + # wait until warehouse is started + wait_on_warehouse.result() + + with self.report.new_stage("Profiling"): + if isinstance(self.config.profiling, UnityCatalogAnalyzeProfilerConfig): + yield from UnityCatalogAnalyzeProfiler( + self.config.profiling, + self.report, + self.unity_catalog_api_proxy, + self.gen_dataset_urn, + ).get_workunits(self.table_refs) + elif isinstance(self.config.profiling, UnityCatalogGEProfilerConfig): + yield from UnityCatalogGEProfiler( + sql_common_config=self.config, + profiling_config=self.config.profiling, + report=self.report, + ).get_workunits(list(self.tables.values())) + else: + raise ValueError("Unknown profiling config method") def build_service_principal_map(self) -> None: try: @@ -462,11 +462,11 @@ def process_schemas(self, catalog: Catalog) -> Iterable[MetadataWorkUnit]: self.report.schemas.dropped(schema.id) continue - self.report.report_ingestion_stage_start(f"Ingest schema {schema.id}") - yield from self.gen_schema_containers(schema) - yield from self.process_tables(schema) + with self.report.new_stage(f"Ingest schema {schema.id}"): + yield from self.gen_schema_containers(schema) + yield from self.process_tables(schema) - self.report.schemas.processed(schema.id) + self.report.schemas.processed(schema.id) def process_tables(self, schema: Schema) -> Iterable[MetadataWorkUnit]: for table in self.unity_catalog_api_proxy.tables(schema=schema): diff --git a/metadata-ingestion/src/datahub/ingestion/source_report/ingestion_stage.py b/metadata-ingestion/src/datahub/ingestion/source_report/ingestion_stage.py index ce683e64b3f468..130a36e254fefd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source_report/ingestion_stage.py +++ b/metadata-ingestion/src/datahub/ingestion/source_report/ingestion_stage.py @@ -1,7 +1,7 @@ import logging +from contextlib import AbstractContextManager from dataclasses import dataclass, field from datetime import datetime, timezone -from typing import Optional from datahub.utilities.perf_timer import PerfTimer from datahub.utilities.stats_collections import TopKDict @@ -22,25 +22,29 @@ @dataclass class IngestionStageReport: - ingestion_stage: Optional[str] = None ingestion_stage_durations: TopKDict[str, float] = field(default_factory=TopKDict) - _timer: Optional[PerfTimer] = field( - default=None, init=False, repr=False, compare=False - ) - - def report_ingestion_stage_start(self, stage: str) -> None: - if self._timer: - elapsed = round(self._timer.elapsed_seconds(), 2) - logger.info( - f"Time spent in stage <{self.ingestion_stage}>: {elapsed} seconds", - stacklevel=2, - ) - if self.ingestion_stage: - self.ingestion_stage_durations[self.ingestion_stage] = elapsed - else: - self._timer = PerfTimer() - - self.ingestion_stage = f"{stage} at {datetime.now(timezone.utc)}" - logger.info(f"Stage started: {self.ingestion_stage}") + def new_stage(self, stage: str) -> "IngestionStageContext": + return IngestionStageContext(stage, self) + + +@dataclass +class IngestionStageContext(AbstractContextManager): + def __init__(self, stage: str, report: IngestionStageReport): + self._ingestion_stage = f"{stage} at {datetime.now(timezone.utc)}" + self._timer: PerfTimer = PerfTimer() + self._report = report + + def __enter__(self) -> "IngestionStageContext": + logger.info(f"Stage started: {self._ingestion_stage}") self._timer.start() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + elapsed = self._timer.elapsed_seconds(digits=2) + logger.info( + f"Time spent in stage <{self._ingestion_stage}>: {elapsed} seconds", + stacklevel=2, + ) + self._report.ingestion_stage_durations[self._ingestion_stage] = elapsed + return None diff --git a/metadata-ingestion/src/datahub/utilities/perf_timer.py b/metadata-ingestion/src/datahub/utilities/perf_timer.py index 9488683d6d8cac..fc1b1ed58244c3 100644 --- a/metadata-ingestion/src/datahub/utilities/perf_timer.py +++ b/metadata-ingestion/src/datahub/utilities/perf_timer.py @@ -57,7 +57,7 @@ def __exit__( self.finish() return None - def elapsed_seconds(self) -> float: + def elapsed_seconds(self, digits: int = 4) -> float: """ Returns the elapsed time in seconds. """ @@ -65,11 +65,18 @@ def elapsed_seconds(self) -> float: return self._past_active_time if self.end_time is None: - return (time.perf_counter() - self.start_time) + (self._past_active_time) + elapsed = (time.perf_counter() - self.start_time) + (self._past_active_time) else: - return (self.end_time - self.start_time) + self._past_active_time + elapsed = (self.end_time - self.start_time) + self._past_active_time + + return round(elapsed, digits) def assert_timer_is_running(self) -> None: + if not self.is_running(): + self._error_state = True + logger.warning("Did you forget to start the timer ?") + + def is_running(self) -> bool: """ Returns true if timer is in running state. Timer is in NOT in running state if @@ -77,9 +84,7 @@ def assert_timer_is_running(self) -> None: 2. it is in paused state. 3. it had been started and finished in the past but not started again. """ - if self.start_time is None or self.paused or self.end_time: - self._error_state = True - logger.warning("Did you forget to start the timer ?") + return self.start_time is not None and not self.paused and self.end_time is None def __repr__(self) -> str: return repr(self.as_obj()) diff --git a/metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py b/metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py index 9cb80ff02657bb..24460f38298069 100644 --- a/metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py +++ b/metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py @@ -26,14 +26,14 @@ def run_test(): report = BigQueryV2Report() - report.set_ingestion_stage("All", "Seed Data Generation") - seed_metadata = generate_data( - num_containers=2000, - num_tables=20000, - num_views=2000, - time_range=timedelta(days=7), - ) - all_tables = seed_metadata.all_tables + with report.new_stage("All: Seed Data Generation"): + seed_metadata = generate_data( + num_containers=2000, + num_tables=20000, + num_views=2000, + time_range=timedelta(days=7), + ) + all_tables = seed_metadata.all_tables config = BigQueryV2Config( start_time=seed_metadata.start_time, @@ -51,42 +51,45 @@ def run_test(): schema_resolver=SchemaResolver(platform="bigquery"), identifiers=BigQueryIdentifierBuilder(config, report), ) - report.set_ingestion_stage("All", "Event Generation") - - num_projects = 100 - projects = [f"project-{i}" for i in range(num_projects)] - table_to_project = {table.name: random.choice(projects) for table in all_tables} - table_refs = {str(ref_from_table(table, table_to_project)) for table in all_tables} + with report.new_stage("All: Event Generation"): + num_projects = 100 + projects = [f"project-{i}" for i in range(num_projects)] + table_to_project = {table.name: random.choice(projects) for table in all_tables} + table_refs = { + str(ref_from_table(table, table_to_project)) for table in all_tables + } - queries = list( - generate_queries( - seed_metadata, - num_selects=240_000, - num_operations=800_000, - num_unique_queries=50_000, - num_users=2000, - query_length=NormalDistribution(2000, 500), + queries = list( + generate_queries( + seed_metadata, + num_selects=240_000, + num_operations=800_000, + num_unique_queries=50_000, + num_users=2000, + query_length=NormalDistribution(2000, 500), + ) ) - ) - queries.sort(key=lambda q: q.timestamp) - events = list(generate_events(queries, projects, table_to_project, config=config)) - print(f"Events generated: {len(events)}") - pre_mem_usage = psutil.Process(os.getpid()).memory_info().rss - print(f"Test data size: {humanfriendly.format_size(pre_mem_usage)}") + queries.sort(key=lambda q: q.timestamp) + events = list( + generate_events(queries, projects, table_to_project, config=config) + ) + print(f"Events generated: {len(events)}") + pre_mem_usage = psutil.Process(os.getpid()).memory_info().rss + print(f"Test data size: {humanfriendly.format_size(pre_mem_usage)}") - report.set_ingestion_stage("All", "Event Ingestion") - with PerfTimer() as timer: - workunits = usage_extractor._get_workunits_internal(events, table_refs) - num_workunits, peak_memory_usage = workunit_sink(workunits) - report.set_ingestion_stage("All", "Done") - print(f"Workunits Generated: {num_workunits}") - print(f"Seconds Elapsed: {timer.elapsed_seconds():.2f} seconds") + with report.new_stage("All: Event Ingestion"): + with PerfTimer() as timer: + workunits = usage_extractor._get_workunits_internal(events, table_refs) + num_workunits, peak_memory_usage = workunit_sink(workunits) + with report.new_stage("All: Done"): + print(f"Workunits Generated: {num_workunits}") + print(f"Seconds Elapsed: {timer.elapsed_seconds(digits=2)} seconds") - print( - f"Peak Memory Used: {humanfriendly.format_size(peak_memory_usage - pre_mem_usage)}" - ) - print(f"Disk Used: {report.processing_perf.usage_state_size}") - print(f"Hash collisions: {report.num_usage_query_hash_collisions}") + print( + f"Peak Memory Used: {humanfriendly.format_size(peak_memory_usage - pre_mem_usage)}" + ) + print(f"Disk Used: {report.processing_perf.usage_state_size}") + print(f"Hash collisions: {report.num_usage_query_hash_collisions}") if __name__ == "__main__": diff --git a/metadata-ingestion/tests/performance/databricks/test_unity.py b/metadata-ingestion/tests/performance/databricks/test_unity.py index ddd19804ba1841..71192dc5b509bc 100644 --- a/metadata-ingestion/tests/performance/databricks/test_unity.py +++ b/metadata-ingestion/tests/performance/databricks/test_unity.py @@ -59,7 +59,7 @@ def run_test(): workunits = source.get_workunits() num_workunits, peak_memory_usage = workunit_sink(workunits) print(f"Workunits Generated: {num_workunits}") - print(f"Seconds Elapsed: {timer.elapsed_seconds():.2f} seconds") + print(f"Seconds Elapsed: {timer.elapsed_seconds(digits=2)} seconds") print( f"Peak Memory Used: {humanfriendly.format_size(peak_memory_usage - pre_mem_usage)}" diff --git a/metadata-ingestion/tests/performance/snowflake/test_snowflake.py b/metadata-ingestion/tests/performance/snowflake/test_snowflake.py index 984d9e42957452..a940cce46a8f74 100644 --- a/metadata-ingestion/tests/performance/snowflake/test_snowflake.py +++ b/metadata-ingestion/tests/performance/snowflake/test_snowflake.py @@ -53,7 +53,7 @@ def run_test(): workunits = source.get_workunits() num_workunits, peak_memory_usage = workunit_sink(workunits) logging.info(f"Workunits Generated: {num_workunits}") - logging.info(f"Seconds Elapsed: {timer.elapsed_seconds():.2f} seconds") + logging.info(f"Seconds Elapsed: {timer.elapsed_seconds(digits=2)} seconds") logging.info(source.get_report().as_string()) logging.info( diff --git a/metadata-ingestion/tests/performance/sql/test_sql_formatter.py b/metadata-ingestion/tests/performance/sql/test_sql_formatter.py index 5f783efc559bc9..f09047c0ec4a4f 100644 --- a/metadata-ingestion/tests/performance/sql/test_sql_formatter.py +++ b/metadata-ingestion/tests/performance/sql/test_sql_formatter.py @@ -12,12 +12,14 @@ def run_test() -> None: for i in range(N): if i % 50 == 0: print( - f"Running iteration {i}, elapsed time: {timer.elapsed_seconds():.2f} seconds" + f"Running iteration {i}, elapsed time: {timer.elapsed_seconds(digits=2)} seconds" ) try_format_query.__wrapped__(large_sql_query, platform="snowflake") - print(f"Total time taken for {N} iterations: {timer.elapsed_seconds():.2f} seconds") + print( + f"Total time taken for {N} iterations: {timer.elapsed_seconds(digits=2)} seconds" + ) if __name__ == "__main__": diff --git a/metadata-ingestion/tests/unit/reporting/test_ingestion_stage.py b/metadata-ingestion/tests/unit/reporting/test_ingestion_stage.py new file mode 100644 index 00000000000000..8bae38eaa74446 --- /dev/null +++ b/metadata-ingestion/tests/unit/reporting/test_ingestion_stage.py @@ -0,0 +1,42 @@ +import time + +from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport + + +def test_ingestion_stage_context_records_duration(): + report = IngestionStageReport() + with report.new_stage(stage="Test Stage"): + pass + assert len(report.ingestion_stage_durations) == 1 + assert "Test Stage" in next(iter(report.ingestion_stage_durations.keys())) + + +def test_ingestion_stage_context_handles_exceptions(): + report = IngestionStageReport() + try: + with report.new_stage(stage="Test Stage"): + raise ValueError("Test Exception") + except ValueError: + pass + assert len(report.ingestion_stage_durations) == 1 + assert "Test Stage" in next(iter(report.ingestion_stage_durations)) + + +def test_ingestion_stage_context_report_handles_multiple_stages(): + report = IngestionStageReport() + with report.new_stage(stage="Test Stage 1"): + time.sleep(0.1) + with report.new_stage(stage="Test Stage 2"): + time.sleep(0.1) + with report.new_stage(stage="Test Stage 3"): + time.sleep(0.1) + assert len(report.ingestion_stage_durations) == 3 + assert all( + isinstance(duration, float) and duration > 0.0 + for duration in report.ingestion_stage_durations.values() + ) + + sorted_stages = list(sorted(report.ingestion_stage_durations.keys())) + assert "Test Stage 1" in sorted_stages[0] + assert "Test Stage 2" in sorted_stages[1] + assert "Test Stage 3" in sorted_stages[2] From 0d328f77ab2d1f3f73640d69782253c5eb2c3747 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 9 Jan 2025 12:08:30 -0600 Subject: [PATCH 048/249] chore(version): bump jdbc drivers (#12301) --- build.gradle | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.gradle b/build.gradle index 3c36feadc5f4bb..eff36ee3a79775 100644 --- a/build.gradle +++ b/build.gradle @@ -211,7 +211,7 @@ project.ext.externalDependency = [ 'mockitoInline': 'org.mockito:mockito-inline:4.11.0', 'mockServer': 'org.mock-server:mockserver-netty:5.11.2', 'mockServerClient': 'org.mock-server:mockserver-client-java:5.11.2', - 'mysqlConnector': 'mysql:mysql-connector-java:8.0.28', + 'mysqlConnector': 'com.mysql:mysql-connector-j:8.4.0', 'neo4jHarness': 'org.neo4j.test:neo4j-harness:' + neo4jTestVersion, 'neo4jJavaDriver': 'org.neo4j.driver:neo4j-java-driver:' + neo4jVersion, 'neo4jTestJavaDriver': 'org.neo4j.driver:neo4j-java-driver:' + neo4jTestVersion, @@ -235,7 +235,7 @@ project.ext.externalDependency = [ 'playFilters': "com.typesafe.play:filters-helpers_$playScalaVersion:$playVersion", 'pac4j': 'org.pac4j:pac4j-oidc:6.0.6', 'playPac4j': "org.pac4j:play-pac4j_$playScalaVersion:12.0.0-PLAY2.8", - 'postgresql': 'org.postgresql:postgresql:42.3.9', + 'postgresql': 'org.postgresql:postgresql:42.7.4', 'protobuf': 'com.google.protobuf:protobuf-java:3.25.5', 'grpcProtobuf': 'io.grpc:grpc-protobuf:1.53.0', 'rangerCommons': 'org.apache.ranger:ranger-plugins-common:2.3.0', From d3ac112d2851e4a17074cffff6aec08706e798ee Mon Sep 17 00:00:00 2001 From: Chakru <161002324+chakru-r@users.noreply.github.com> Date: Fri, 10 Jan 2025 00:17:35 +0530 Subject: [PATCH 049/249] build(coverage): fix carry-forward coverage (#12306) --- .github/.codecov.yml | 65 ++++++++++++++++++++++++ .github/workflows/airflow-plugin.yml | 8 +-- .github/workflows/build-and-test.yml | 26 ++++++++-- .github/workflows/dagster-plugin.yml | 6 +-- .github/workflows/gx-plugin.yml | 6 +-- .github/workflows/metadata-ingestion.yml | 10 ++-- .github/workflows/metadata-io.yml | 3 +- .github/workflows/prefect-plugin.yml | 8 +-- gradle/coverage/java-coverage.gradle | 2 +- gradle/coverage/python-coverage.gradle | 2 +- 10 files changed, 110 insertions(+), 26 deletions(-) create mode 100644 .github/.codecov.yml diff --git a/.github/.codecov.yml b/.github/.codecov.yml new file mode 100644 index 00000000000000..1faf5a6bab4644 --- /dev/null +++ b/.github/.codecov.yml @@ -0,0 +1,65 @@ +comment: + layout: "header, files, footer" # remove "new" from "header" and "footer" + hide_project_coverage: true # set to false + require_changes: false # if true: only post the comment if coverage changes + +codecov: + #due to ci-optimization, reports for modules that have not changed may be quite old + max_report_age: off + +flag_management: + default_rules: # the rules that will be followed for any flag added, generally + carryforward: true + statuses: + - type: project + target: auto + threshold: 0% #Not enforcing project coverage yet. + - type: patch + target: 90% + individual_flags: # exceptions to the default rules above, stated flag by flag + - name: frontend + paths: + - "datahub-frontend/**" + - "datahub-web-react/**" + - name: backend + paths: + - "metadata-models/**" + - "datahub-upgrade/**" + - "entity-registry/**" + - "li-utils/**" + - "metadata-auth/**" + - "metadata-dao-impl/**" + - "metadata-events/**" + - "metadata-jobs/**" + - "metadata-service/**" + - "metadata-utils/**" + - "metadata-operation-context/**" + - "datahub-graphql-core/**" + - name: metadata-io + paths: + - "metadata-io/**" + - name: ingestion + paths: + - "metadata-ingestion/**" + - name: ingestion-airflow + paths: + - "metadata-ingestion-modules/airflow-plugin/**" + - name: ingestion-dagster + paths: + - "metadata-ingestion-modules/dagster-plugin/**" + - name: ingestion-gx-plugin + paths: + - "metadata-ingestion-modules/gx-plugin/**" + - name: ingestion-prefect + paths: + - "metadata-ingestion-modules/prefect-plugin/**" +coverage: + status: + project: + default: + target: 0% # no threshold enforcement yet + only_pulls: true + patch: + default: + target: 90% # for new code added in the patch + only_pulls: true diff --git a/.github/workflows/airflow-plugin.yml b/.github/workflows/airflow-plugin.yml index 89e0c9e2513d8b..c1eba45609fd52 100644 --- a/.github/workflows/airflow-plugin.yml +++ b/.github/workflows/airflow-plugin.yml @@ -77,15 +77,15 @@ jobs: **/build/test-results/test/** **/junit.*.xml !**/binary/** - - name: Upload coverage to Codecov + - name: Upload coverage to Codecov with ingestion flag if: always() uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} - directory: ./build/coverage-reports/ + directory: ./build/coverage-reports/metadata-ingestion-modules/airflow-plugin/ fail_ci_if_error: false - flags: airflow-${{ matrix.python-version }}-${{ matrix.extra_pip_extras }} - name: pytest-airflow + flags: ingestion-airflow + name: pytest-airflow-${{ matrix.python-version }}-${{ matrix.extra_pip_requirements }} verbose: true - name: Upload test results to Codecov if: ${{ !cancelled() }} diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 058ac4a5c9b1e5..923abac5ef34af 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -113,6 +113,10 @@ jobs: if: ${{ matrix.command == 'except_metadata_ingestion' && needs.setup.outputs.backend_change == 'true' }} run: | ./gradlew -PjavaClassVersionDefault=8 :metadata-integration:java:spark-lineage:compileJava + - name: Gather coverage files + run: | + echo "BACKEND_FILES=`find ./build/coverage-reports/ -type f | grep -E '(metadata-models|entity-registry|datahuyb-graphql-core|metadata-io|metadata-jobs|metadata-utils|metadata-service|medata-dao-impl|metadata-operation|li-utils|metadata-integration|metadata-events|metadata-auth|ingestion-scheduler|notifications|datahub-upgrade)' | xargs | sed 's/ /,/g'`" >> $GITHUB_ENV + echo "FRONTEND_FILES=`find ./build/coverage-reports/ -type f | grep -E '(datahub-frontend|datahub-web-react).*\.(xml|json)$' | xargs | sed 's/ /,/g'`" >> $GITHUB_ENV - uses: actions/upload-artifact@v4 if: always() with: @@ -124,14 +128,28 @@ jobs: !**/binary/** - name: Ensure codegen is updated uses: ./.github/actions/ensure-codegen-updated - - name: Upload coverage to Codecov - if: always() + - name: Upload backend coverage to Codecov + if: ${{ matrix.command == 'except_metadata_ingestion' && needs.setup.outputs.backend_change == 'true' }} + uses: codecov/codecov-action@v5 + with: + token: ${{ secrets.CODECOV_TOKEN }} + files: ${{ env.BACKEND_FILES }} + disable_search: true + #handle_no_reports_found: true + fail_ci_if_error: false + flags: backend + name: ${{ matrix.command }} + verbose: true + - name: Upload frontend coverage to Codecov + if: ${{ matrix.command == 'frontend' && needs.setup.outputs.frontend_change == 'true' }} uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} - directory: ./build/coverage-reports/ + files: ${{ env.FRONTEND_FILES }} + disable_search: true + #handle_no_reports_found: true fail_ci_if_error: false - flags: ${{ matrix.timezone }} + flags: frontend name: ${{ matrix.command }} verbose: true - name: Upload test results to Codecov diff --git a/.github/workflows/dagster-plugin.yml b/.github/workflows/dagster-plugin.yml index c29e72367c53c5..fa15a280c9d39f 100644 --- a/.github/workflows/dagster-plugin.yml +++ b/.github/workflows/dagster-plugin.yml @@ -64,14 +64,14 @@ jobs: **/build/reports/tests/test/** **/build/test-results/test/** **/junit.*.xml - - name: Upload coverage to Codecov + - name: Upload coverage to Codecov with ingestion flag if: always() uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} - directory: ./build/coverage-reports/ + directory: ./build/coverage-reports/metadata-ingestion-modules/dagster-plugin/ fail_ci_if_error: false - flags: dagster-${{ matrix.python-version }}-${{ matrix.extraPythonRequirement }} + flags: ingestion-dagster-plugin name: pytest-dagster verbose: true - name: Upload test results to Codecov diff --git a/.github/workflows/gx-plugin.yml b/.github/workflows/gx-plugin.yml index 825f8beda2f561..eb0ca9a7dbbb97 100644 --- a/.github/workflows/gx-plugin.yml +++ b/.github/workflows/gx-plugin.yml @@ -68,14 +68,14 @@ jobs: **/build/reports/tests/test/** **/build/test-results/test/** **/junit.*.xml - - name: Upload coverage to Codecov + - name: Upload coverage to Codecov with ingestion flag if: always() uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} - directory: ./build/coverage-reports/ + directory: ./build/coverage-reports/metadata-ingestion-modules/gx-plugin/ fail_ci_if_error: false - flags: gx-${{ matrix.python-version }}-${{ matrix.extraPythonRequirement }} + flags: ingestion-gx-plugin name: pytest-gx verbose: true - name: Upload test results to Codecov diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index aa404c4c35c505..8cfc2d396badd2 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -88,15 +88,15 @@ jobs: **/build/test-results/test/** **/junit.*.xml !**/binary/** - - name: Upload coverage to Codecov - if: ${{ always() }} + - name: Upload coverage to Codecov with ingestion flag + if: ${{ always() && matrix.python-version == '3.11' }} uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} - directory: ./build/coverage-reports/ + directory: ./build/coverage-reports/metadata-ingestion/ fail_ci_if_error: false - flags: ingestion-${{ matrix.python-version }}-${{ matrix.command }} - name: pytest-ingestion + flags: ingestion + name: pytest-${{ matrix.python-version }}-${{ matrix.command }} verbose: true - name: Upload test results to Codecov if: ${{ !cancelled() }} diff --git a/.github/workflows/metadata-io.yml b/.github/workflows/metadata-io.yml index bcadc641ee2f7c..6efcf58c700b1f 100644 --- a/.github/workflows/metadata-io.yml +++ b/.github/workflows/metadata-io.yml @@ -86,8 +86,9 @@ jobs: uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} - directory: ./build/coverage-reports/ + directory: ./build/coverage-reports/metadata-io/ fail_ci_if_error: false + flags: metadata-io name: metadata-io-test verbose: true - name: Upload test results to Codecov diff --git a/.github/workflows/prefect-plugin.yml b/.github/workflows/prefect-plugin.yml index 0bce4d5ef19f31..68736f9fd10156 100644 --- a/.github/workflows/prefect-plugin.yml +++ b/.github/workflows/prefect-plugin.yml @@ -60,15 +60,15 @@ jobs: **/build/test-results/test/** **/junit.*.xml !**/binary/** - - name: Upload coverage to Codecov + - name: Upload coverage to Codecov with ingestion flag if: always() uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} - directory: ./build/coverage-reports/ + directory: ./build/coverage-reports/metadata-ingestion-modules/prefect-plugin/ fail_ci_if_error: false - flags: prefect-${{ matrix.python-version }} - name: pytest-prefect + flags: ingestion-prefect-plugin + name: pytest-prefect-${{ matrix.python-version }} verbose: true - name: Upload test results to Codecov if: ${{ !cancelled() }} diff --git a/gradle/coverage/java-coverage.gradle b/gradle/coverage/java-coverage.gradle index 17260c1a309788..fe8bc65336a983 100644 --- a/gradle/coverage/java-coverage.gradle +++ b/gradle/coverage/java-coverage.gradle @@ -22,7 +22,7 @@ afterEvaluate { Tools that aggregate and analyse coverage tools search for the coverage result files. Keeping them under one folder will minimize the time spent searching through the full source tree. */ - outputLocation = rootProject.layout.buildDirectory.file("coverage-reports/jacoco-${project.name}.xml") + outputLocation = rootProject.layout.buildDirectory.file("coverage-reports/${rootProject.relativePath(project.projectDir)}/jacoco-${project.name}.xml") } csv.required = false html.required = false diff --git a/gradle/coverage/python-coverage.gradle b/gradle/coverage/python-coverage.gradle index 23d6e37387ed83..05eb79cf5659e2 100644 --- a/gradle/coverage/python-coverage.gradle +++ b/gradle/coverage/python-coverage.gradle @@ -7,7 +7,7 @@ ext.get_coverage_args = { test_name = "" -> Tools that aggregate and analyse coverage tools search for the coverage result files. Keeping them under one folder will minimize the time spent searching through the full source tree. */ - def base_path = "${rootProject.buildDir}/coverage-reports" + def base_path = "${rootProject.buildDir}/coverage-reports/${rootProject.relativePath(project.projectDir)}/" /* --cov=src was added via setup.cfg in many of the python projects but for some reason, was not getting picked up From f6c86df1f5ad310698ca13e4e5b5833af08c96eb Mon Sep 17 00:00:00 2001 From: pankajmahato-visa <154867659+pankajmahato-visa@users.noreply.github.com> Date: Fri, 10 Jan 2025 00:45:09 +0530 Subject: [PATCH 050/249] chore(deps): Migrate EOL vulnerability of javax.mail to jakarta.mail (#12282) --- build.gradle | 4 +++- .../java/com/linkedin/entity/client/RestliEntityClient.java | 4 ++-- metadata-utils/build.gradle | 4 ++++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/build.gradle b/build.gradle index eff36ee3a79775..5b6613d3057f3e 100644 --- a/build.gradle +++ b/build.gradle @@ -286,7 +286,8 @@ project.ext.externalDependency = [ 'annotationApi': 'javax.annotation:javax.annotation-api:1.3.2', 'jakartaAnnotationApi': 'jakarta.annotation:jakarta.annotation-api:3.0.0', 'classGraph': 'io.github.classgraph:classgraph:4.8.172', - 'mustache': 'com.github.spullara.mustache.java:compiler:0.9.14' + 'mustache': 'com.github.spullara.mustache.java:compiler:0.9.14', + 'javaxMail': 'com.sun.mail:jakarta.mail:1.6.7' ] allprojects { @@ -374,6 +375,7 @@ configure(subprojects.findAll {! it.name.startsWith('spark-lineage')}) { exclude group: "org.slf4j", module: "slf4j-nop" exclude group: "org.slf4j", module: "slf4j-ext" exclude group: "org.codehaus.jackson", module: "jackson-mapper-asl" + exclude group: "javax.mail", module: "mail" resolutionStrategy.force externalDependency.antlr4Runtime resolutionStrategy.force externalDependency.antlr4 diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java index 8d4c5e9228a71c..ca775619220831 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java @@ -103,10 +103,10 @@ import java.util.stream.StreamSupport; import javax.annotation.Nonnull; import javax.annotation.Nullable; -import javax.mail.MethodNotSupportedException; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.NotImplementedException; +import org.apache.http.MethodNotSupportedException; import org.opensearch.core.common.util.CollectionUtils; @Slf4j @@ -1195,7 +1195,7 @@ public DataMap getRawAspect( @Nonnull String aspect, @Nonnull Long version) throws RemoteInvocationException { - throw new MethodNotSupportedException(); + throw new MethodNotSupportedException("Method not supported"); } @Override diff --git a/metadata-utils/build.gradle b/metadata-utils/build.gradle index 07ce50993655d2..4b24eeac50b0b7 100644 --- a/metadata-utils/build.gradle +++ b/metadata-utils/build.gradle @@ -26,6 +26,7 @@ dependencies { implementation externalDependency.slf4jApi compileOnly externalDependency.lombok + runtimeOnly externalDependency.javaxMail annotationProcessor externalDependency.lombok @@ -40,6 +41,9 @@ dependencies { implementation(externalDependency.log4jApi) { because("previous versions are vulnerable to CVE-2021-45105") } + implementation(externalDependency.javaxMail) { + because("previous versions are vulnerable") + } } implementation externalDependency.logbackClassic From 210e2c1fa206ed3901edebf9e654d21c90583149 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 9 Jan 2025 14:32:18 -0600 Subject: [PATCH 051/249] chore(alpine): bump alpine images 3.21 (#12302) --- docker/datahub-frontend/Dockerfile | 2 +- docker/datahub-gms/Dockerfile | 2 +- docker/datahub-mae-consumer/Dockerfile | 2 +- docker/datahub-mce-consumer/Dockerfile | 2 +- docker/datahub-upgrade/Dockerfile | 2 +- docker/elasticsearch-setup/Dockerfile | 2 +- docker/mysql-setup/Dockerfile | 2 +- docker/mysql-setup/init.sh | 3 ++- docker/postgres-setup/Dockerfile | 2 +- 9 files changed, 10 insertions(+), 9 deletions(-) diff --git a/docker/datahub-frontend/Dockerfile b/docker/datahub-frontend/Dockerfile index 89974e56575b07..16e6477c37ce69 100644 --- a/docker/datahub-frontend/Dockerfile +++ b/docker/datahub-frontend/Dockerfile @@ -1,7 +1,7 @@ # Defining environment ARG APP_ENV=prod -FROM alpine:3.20 AS base +FROM alpine:3.21 AS base # Configurable repositories ARG ALPINE_REPO_URL=http://dl-cdn.alpinelinux.org/alpine diff --git a/docker/datahub-gms/Dockerfile b/docker/datahub-gms/Dockerfile index 47b10535f8deea..52cc507f9268d1 100644 --- a/docker/datahub-gms/Dockerfile +++ b/docker/datahub-gms/Dockerfile @@ -23,7 +23,7 @@ WORKDIR /go/src/github.com/jwilder/dockerize RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION -FROM alpine:3.20 AS base +FROM alpine:3.21 AS base ENV JMX_VERSION=0.18.0 ENV JETTY_VERSION=11.0.21 diff --git a/docker/datahub-mae-consumer/Dockerfile b/docker/datahub-mae-consumer/Dockerfile index 74375072761d89..4ddec56311fb96 100644 --- a/docker/datahub-mae-consumer/Dockerfile +++ b/docker/datahub-mae-consumer/Dockerfile @@ -23,7 +23,7 @@ WORKDIR /go/src/github.com/jwilder/dockerize RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION -FROM alpine:3.20 AS base +FROM alpine:3.21 AS base # Re-declaring args from above to make them available in this stage (will inherit default values) ARG ALPINE_REPO_URL diff --git a/docker/datahub-mce-consumer/Dockerfile b/docker/datahub-mce-consumer/Dockerfile index 3adef53cd06068..42e40cd5942144 100644 --- a/docker/datahub-mce-consumer/Dockerfile +++ b/docker/datahub-mce-consumer/Dockerfile @@ -23,7 +23,7 @@ WORKDIR /go/src/github.com/jwilder/dockerize RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION -FROM alpine:3.20 AS base +FROM alpine:3.21 AS base # Re-declaring args from above to make them available in this stage (will inherit default values) ARG ALPINE_REPO_URL diff --git a/docker/datahub-upgrade/Dockerfile b/docker/datahub-upgrade/Dockerfile index a8ef4e8034fdd5..488cb46c94cf28 100644 --- a/docker/datahub-upgrade/Dockerfile +++ b/docker/datahub-upgrade/Dockerfile @@ -23,7 +23,7 @@ WORKDIR /go/src/github.com/jwilder/dockerize RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION -FROM alpine:3.20 AS base +FROM alpine:3.21 AS base # Re-declaring args from above to make them available in this stage (will inherit default values) ARG ALPINE_REPO_URL diff --git a/docker/elasticsearch-setup/Dockerfile b/docker/elasticsearch-setup/Dockerfile index 1a6fe5bee6c840..584007a5fb0a9c 100644 --- a/docker/elasticsearch-setup/Dockerfile +++ b/docker/elasticsearch-setup/Dockerfile @@ -23,7 +23,7 @@ WORKDIR /go/src/github.com/jwilder/dockerize RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION -FROM alpine:3.20 AS base +FROM alpine:3.21 AS base ARG ALPINE_REPO_URL diff --git a/docker/mysql-setup/Dockerfile b/docker/mysql-setup/Dockerfile index 8a2d42bc233180..21b696a1b906fe 100644 --- a/docker/mysql-setup/Dockerfile +++ b/docker/mysql-setup/Dockerfile @@ -17,7 +17,7 @@ WORKDIR /go/src/github.com/jwilder/dockerize RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION -FROM alpine:3.20 +FROM alpine:3.21 COPY --from=binary /go/bin/dockerize /usr/local/bin ARG ALPINE_REPO_URL diff --git a/docker/mysql-setup/init.sh b/docker/mysql-setup/init.sh index b5ee294ddd6559..2760da86a9a33f 100755 --- a/docker/mysql-setup/init.sh +++ b/docker/mysql-setup/init.sh @@ -1,6 +1,7 @@ #!/bin/bash : ${MYSQL_PORT:=3306} +: ${MYSQL_ARGS:=--ssl=0} sed -e "s/DATAHUB_DB_NAME/${DATAHUB_DB_NAME}/g" /init.sql | tee -a /tmp/init-final.sql -mysql -u $MYSQL_USERNAME -p"$MYSQL_PASSWORD" -h $MYSQL_HOST -P $MYSQL_PORT < /tmp/init-final.sql \ No newline at end of file +mariadb -u $MYSQL_USERNAME -p"$MYSQL_PASSWORD" -h $MYSQL_HOST -P $MYSQL_PORT $MYSQL_ARGS < /tmp/init-final.sql \ No newline at end of file diff --git a/docker/postgres-setup/Dockerfile b/docker/postgres-setup/Dockerfile index 31e9687cea15e8..5362e0d787c15d 100644 --- a/docker/postgres-setup/Dockerfile +++ b/docker/postgres-setup/Dockerfile @@ -17,7 +17,7 @@ WORKDIR /go/src/github.com/jwilder/dockerize RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION -FROM alpine:3.20 +FROM alpine:3.21 COPY --from=binary /go/bin/dockerize /usr/local/bin ARG ALPINE_REPO_URL From 0c31d9ae0d39ed652a296b8292ebcdf366c2a353 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 9 Jan 2025 15:55:56 -0800 Subject: [PATCH 052/249] feat(ingest/datahub): support dropping duplicate schema fields (#12308) --- .../src/datahub/ingestion/source/datahub/config.py | 6 ++++++ .../ingestion/source/datahub/datahub_source.py | 14 ++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py index 09f38913f11b19..8622e221940317 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py @@ -108,6 +108,12 @@ class DataHubSourceConfig(StatefulIngestionConfigBase): urn_pattern: AllowDenyPattern = Field(default=AllowDenyPattern()) + drop_duplicate_schema_fields: bool = Field( + default=False, + description="Whether to drop duplicate schema fields in the schemaMetadata aspect. " + "Useful if the source system has duplicate field paths in the db, but we're pushing to a system with server-side duplicate checking.", + ) + @root_validator(skip_on_failure=True) def check_ingesting_data(cls, values): if ( diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py index 12daba298a2014..472abd0a97ec70 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py @@ -12,7 +12,10 @@ support_status, ) from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceReport -from datahub.ingestion.api.source_helpers import auto_workunit_reporter +from datahub.ingestion.api.source_helpers import ( + auto_fix_duplicate_schema_field_paths, + auto_workunit_reporter, +) from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.datahub.config import DataHubSourceConfig from datahub.ingestion.source.datahub.datahub_api_reader import DataHubApiReader @@ -57,7 +60,14 @@ def get_report(self) -> SourceReport: def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: # Exactly replicate data from DataHub source - return [partial(auto_workunit_reporter, self.get_report())] + return [ + ( + auto_fix_duplicate_schema_field_paths + if self.config.drop_duplicate_schema_fields + else None + ), + partial(auto_workunit_reporter, self.get_report()), + ] def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: self.report.stop_time = datetime.now(tz=timezone.utc) From 6dbabb2161bc16145eb6b170b34495a9066eae9d Mon Sep 17 00:00:00 2001 From: Chakru <161002324+chakru-r@users.noreply.github.com> Date: Fri, 10 Jan 2025 10:44:23 +0530 Subject: [PATCH 053/249] feat(ci): add manual trigger for full build (#12307) --- .github/actions/ci-optimization/action.yml | 15 ++++++++++++--- .github/workflows/airflow-plugin.yml | 1 + .github/workflows/build-and-test.yml | 1 + .github/workflows/dagster-plugin.yml | 1 + .github/workflows/gx-plugin.yml | 1 + .github/workflows/metadata-ingestion.yml | 1 + .github/workflows/metadata-io.yml | 1 + .github/workflows/prefect-plugin.yml | 1 + 8 files changed, 19 insertions(+), 3 deletions(-) diff --git a/.github/actions/ci-optimization/action.yml b/.github/actions/ci-optimization/action.yml index 0d435963382675..8a81859ae903a8 100644 --- a/.github/actions/ci-optimization/action.yml +++ b/.github/actions/ci-optimization/action.yml @@ -13,16 +13,16 @@ outputs: value: ${{ steps.filter.outputs.frontend == 'false' && steps.filter.outputs.ingestion == 'false' && steps.filter.outputs.backend == 'true' }} backend-change: description: "Backend code has changed" - value: ${{ steps.filter.outputs.backend == 'true' }} + value: ${{ steps.filter.outputs.backend == 'true' || steps.trigger.outputs.trigger == 'manual' }} ingestion-change: description: "Ingestion code has changed" - value: ${{ steps.filter.outputs.ingestion == 'true' }} + value: ${{ steps.filter.outputs.ingestion == 'true' || steps.trigger.outputs.trigger == 'manual' }} ingestion-base-change: description: "Ingestion base image docker image has changed" value: ${{ steps.filter.outputs.ingestion-base == 'true' }} frontend-change: description: "Frontend code has changed" - value: ${{ steps.filter.outputs.frontend == 'true' }} + value: ${{ steps.filter.outputs.frontend == 'true' || steps.trigger.outputs.trigger == 'manual' }} docker-change: description: "Docker code has changed" value: ${{ steps.filter.outputs.docker == 'true' }} @@ -44,6 +44,15 @@ outputs: runs: using: "composite" steps: + - name: Check trigger type + id: trigger # Add an ID to reference this step + shell: bash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "trigger=manual" >> $GITHUB_OUTPUT + else + echo "trigger=pr" >> $GITHUB_OUTPUT + fi - uses: dorny/paths-filter@v3 id: filter with: diff --git a/.github/workflows/airflow-plugin.yml b/.github/workflows/airflow-plugin.yml index c1eba45609fd52..e1e0fb0a85e977 100644 --- a/.github/workflows/airflow-plugin.yml +++ b/.github/workflows/airflow-plugin.yml @@ -18,6 +18,7 @@ on: - "metadata-models/**" release: types: [published] + workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 923abac5ef34af..624e5d5df32178 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -12,6 +12,7 @@ on: paths-ignore: - "docs/**" - "**.md" + workflow_dispatch: release: types: [published] diff --git a/.github/workflows/dagster-plugin.yml b/.github/workflows/dagster-plugin.yml index fa15a280c9d39f..a2ac59d6989a9f 100644 --- a/.github/workflows/dagster-plugin.yml +++ b/.github/workflows/dagster-plugin.yml @@ -18,6 +18,7 @@ on: - "metadata-models/**" release: types: [published] + workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} diff --git a/.github/workflows/gx-plugin.yml b/.github/workflows/gx-plugin.yml index eb0ca9a7dbbb97..c28bdbb30eb36d 100644 --- a/.github/workflows/gx-plugin.yml +++ b/.github/workflows/gx-plugin.yml @@ -18,6 +18,7 @@ on: - "metadata-models/**" release: types: [published] + workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index 8cfc2d396badd2..be6026098ce420 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -18,6 +18,7 @@ on: - "metadata-models/**" release: types: [published] + workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} diff --git a/.github/workflows/metadata-io.yml b/.github/workflows/metadata-io.yml index 6efcf58c700b1f..80af03e77eef82 100644 --- a/.github/workflows/metadata-io.yml +++ b/.github/workflows/metadata-io.yml @@ -20,6 +20,7 @@ on: - ".github/workflows/metadata-io.yml" release: types: [published] + workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} diff --git a/.github/workflows/prefect-plugin.yml b/.github/workflows/prefect-plugin.yml index 68736f9fd10156..401efa340ae8ca 100644 --- a/.github/workflows/prefect-plugin.yml +++ b/.github/workflows/prefect-plugin.yml @@ -18,6 +18,7 @@ on: - "metadata-models/**" release: types: [published] + workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} From 3b827f356a9b1b8e511f1b39c7ddfe3e6a6afa01 Mon Sep 17 00:00:00 2001 From: Chakru <161002324+chakru-r@users.noreply.github.com> Date: Fri, 10 Jan 2025 12:34:59 +0530 Subject: [PATCH 054/249] fix(ci): make upload-artifact name unique (#12312) --- .github/workflows/build-and-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 624e5d5df32178..9a940ef8040d17 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -121,7 +121,7 @@ jobs: - uses: actions/upload-artifact@v4 if: always() with: - name: Test Results (build) + name: Test Results (build) - ${{ matrix.command}}-${{ matrix.timezone }} path: | **/build/reports/tests/test/** **/build/test-results/test/** From d8e7cb25e014b73a579412e11435b08f2049de6f Mon Sep 17 00:00:00 2001 From: Austin SeungJun Park <110667795+eagle-25@users.noreply.github.com> Date: Fri, 10 Jan 2025 17:41:28 +0900 Subject: [PATCH 055/249] fix(ingestion/s3): groupby group-splitting issue (#12254) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Sergio Gómez Villamor --- .../datahub/ingestion/source/aws/s3_util.py | 25 ++++++- .../src/datahub/ingestion/source/s3/source.py | 24 +++---- .../tests/unit/s3/test_s3_source.py | 65 ++++++++++++++++++- .../tests/unit/s3/test_s3_util.py | 29 +++++++++ 4 files changed, 127 insertions(+), 16 deletions(-) create mode 100644 metadata-ingestion/tests/unit/s3/test_s3_util.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/s3_util.py b/metadata-ingestion/src/datahub/ingestion/source/aws/s3_util.py index 878b8dd1bb9a51..360f18aa448f27 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/s3_util.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/s3_util.py @@ -1,6 +1,11 @@ import logging import os -from typing import Optional +from collections import defaultdict +from typing import TYPE_CHECKING, Dict, Iterable, List, Optional + +if TYPE_CHECKING: + from mypy_boto3_s3.service_resource import ObjectSummary + S3_PREFIXES = ["s3://", "s3n://", "s3a://"] @@ -68,3 +73,21 @@ def get_key_prefix(s3_uri: str) -> str: f"Not an S3 URI. Must start with one of the following prefixes: {str(S3_PREFIXES)}" ) return strip_s3_prefix(s3_uri).split("/", maxsplit=1)[1] + + +def group_s3_objects_by_dirname( + s3_objects: Iterable["ObjectSummary"], +) -> Dict[str, List["ObjectSummary"]]: + """ + Groups S3 objects by their directory name. + + If a s3_object in the root directory (i.e., s3://bucket/file.txt), it is grouped under '/'. + """ + grouped_s3_objs = defaultdict(list) + for obj in s3_objects: + if "/" in obj.key: + dirname = obj.key.rsplit("/", 1)[0] + else: + dirname = "/" + grouped_s3_objs[dirname].append(obj) + return grouped_s3_objs diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py index ceac9e96d1ddd0..989d0d734352a2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py @@ -6,9 +6,8 @@ import re import time from datetime import datetime -from itertools import groupby from pathlib import PurePath -from typing import Any, Dict, Iterable, List, Optional, Tuple +from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple from urllib.parse import urlparse import smart_open.compression as so_compression @@ -41,6 +40,7 @@ get_bucket_name, get_bucket_relative_path, get_key_prefix, + group_s3_objects_by_dirname, strip_s3_prefix, ) from datahub.ingestion.source.data_lake_common.data_lake_utils import ContainerWUCreator @@ -75,6 +75,9 @@ from datahub.telemetry import stats, telemetry from datahub.utilities.perf_timer import PerfTimer +if TYPE_CHECKING: + from mypy_boto3_s3.service_resource import Bucket + # hide annoying debug errors from py4j logging.getLogger("py4j").setLevel(logging.ERROR) logger: logging.Logger = logging.getLogger(__name__) @@ -842,7 +845,7 @@ def get_dir_to_process( def get_folder_info( self, path_spec: PathSpec, - bucket: Any, # Todo: proper type + bucket: "Bucket", prefix: str, ) -> List[Folder]: """ @@ -857,22 +860,15 @@ def get_folder_info( Parameters: path_spec (PathSpec): The path specification used to determine partitioning. - bucket (Any): The S3 bucket object. + bucket (Bucket): The S3 bucket object. prefix (str): The prefix path in the S3 bucket to list objects from. Returns: List[Folder]: A list of Folder objects representing the partitions found. """ - - prefix_to_list = prefix - files = list( - bucket.objects.filter(Prefix=f"{prefix_to_list}").page_size(PAGE_SIZE) - ) - files = sorted(files, key=lambda a: a.last_modified) - grouped_files = groupby(files, lambda x: x.key.rsplit("/", 1)[0]) - partitions: List[Folder] = [] - for key, group in grouped_files: + s3_objects = bucket.objects.filter(Prefix=prefix).page_size(PAGE_SIZE) + for key, group in group_s3_objects_by_dirname(s3_objects).items(): file_size = 0 creation_time = None modification_time = None @@ -904,7 +900,7 @@ def get_folder_info( Folder( partition_id=id, is_partition=bool(id), - creation_time=creation_time if creation_time else None, + creation_time=creation_time if creation_time else None, # type: ignore[arg-type] modification_time=modification_time, sample_file=self.create_s3_path(max_file.bucket_name, max_file.key), size=file_size, diff --git a/metadata-ingestion/tests/unit/s3/test_s3_source.py b/metadata-ingestion/tests/unit/s3/test_s3_source.py index f826cf0179e221..902987213e122f 100644 --- a/metadata-ingestion/tests/unit/s3/test_s3_source.py +++ b/metadata-ingestion/tests/unit/s3/test_s3_source.py @@ -1,12 +1,15 @@ +from datetime import datetime from typing import List, Tuple +from unittest.mock import Mock import pytest from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.data_lake_common.data_lake_utils import ContainerWUCreator from datahub.ingestion.source.data_lake_common.path_spec import PathSpec -from datahub.ingestion.source.s3.source import partitioned_folder_comparator +from datahub.ingestion.source.s3.source import S3Source, partitioned_folder_comparator def test_partition_comparator_numeric_folder_name(): @@ -240,3 +243,63 @@ def container_properties_filter(x: MetadataWorkUnit) -> bool: "folder_abs_path": "my-bucket/my-dir/my-dir2", "platform": "s3", } + + +def test_get_folder_info(): + """ + Test S3Source.get_folder_info returns the latest file in each folder + """ + + def _get_s3_source(path_spec_: PathSpec) -> S3Source: + return S3Source.create( + config_dict={ + "path_spec": { + "include": path_spec_.include, + "table_name": path_spec_.table_name, + }, + }, + ctx=PipelineContext(run_id="test-s3"), + ) + + # arrange + path_spec = PathSpec( + include="s3://my-bucket/{table}/{partition0}/*.csv", + table_name="{table}", + ) + + bucket = Mock() + bucket.objects.filter().page_size = Mock( + return_value=[ + Mock( + bucket_name="my-bucket", + key="my-folder/dir1/0001.csv", + creation_time=datetime(2025, 1, 1, 1), + last_modified=datetime(2025, 1, 1, 1), + size=100, + ), + Mock( + bucket_name="my-bucket", + key="my-folder/dir2/0001.csv", + creation_time=datetime(2025, 1, 1, 2), + last_modified=datetime(2025, 1, 1, 2), + size=100, + ), + Mock( + bucket_name="my-bucket", + key="my-folder/dir1/0002.csv", + creation_time=datetime(2025, 1, 1, 2), + last_modified=datetime(2025, 1, 1, 2), + size=100, + ), + ] + ) + + # act + res = _get_s3_source(path_spec).get_folder_info( + path_spec, bucket, prefix="/my-folder" + ) + + # assert + assert len(res) == 2 + assert res[0].sample_file == "s3://my-bucket/my-folder/dir1/0002.csv" + assert res[1].sample_file == "s3://my-bucket/my-folder/dir2/0001.csv" diff --git a/metadata-ingestion/tests/unit/s3/test_s3_util.py b/metadata-ingestion/tests/unit/s3/test_s3_util.py new file mode 100644 index 00000000000000..7850d65ca8b01f --- /dev/null +++ b/metadata-ingestion/tests/unit/s3/test_s3_util.py @@ -0,0 +1,29 @@ +from unittest.mock import Mock + +from datahub.ingestion.source.aws.s3_util import group_s3_objects_by_dirname + + +def test_group_s3_objects_by_dirname(): + s3_objects = [ + Mock(key="/dir1/file1.txt"), + Mock(key="/dir2/file2.txt"), + Mock(key="/dir1/file3.txt"), + ] + + grouped_objects = group_s3_objects_by_dirname(s3_objects) + + assert len(grouped_objects) == 2 + assert grouped_objects["/dir1"] == [s3_objects[0], s3_objects[2]] + assert grouped_objects["/dir2"] == [s3_objects[1]] + + +def test_group_s3_objects_by_dirname_files_in_root_directory(): + s3_objects = [ + Mock(key="file1.txt"), + Mock(key="file2.txt"), + ] + + grouped_objects = group_s3_objects_by_dirname(s3_objects) + + assert len(grouped_objects) == 1 + assert grouped_objects["/"] == s3_objects From efc5d31f0388f74529abaf89458c3ce1f9163a86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Fri, 10 Jan 2025 10:52:57 +0100 Subject: [PATCH 056/249] feat(graphql): adds container aspect for dataflow and datajob entities (#12236) Co-authored-by: Chris Collins --- .../datahub/graphql/GmsGraphQLEngine.java | 22 ++++++++++ .../graphql/types/dataflow/DataFlowType.java | 1 + .../dataflow/mappers/DataFlowMapper.java | 13 ++++++ .../graphql/types/datajob/DataJobType.java | 1 + .../types/datajob/mappers/DataJobMapper.java | 9 ++++ .../src/main/resources/entity.graphql | 20 +++++++++ .../dataflow/mappers/DataFlowMapperTest.java | 42 +++++++++++++++++++ .../datajob/mappers/DataJobMapperTest.java | 42 +++++++++++++++++++ .../app/entity/dataFlow/DataFlowEntity.tsx | 1 + .../app/entity/dataFlow/preview/Preview.tsx | 4 ++ .../src/app/entity/dataJob/DataJobEntity.tsx | 1 + .../app/entity/dataJob/preview/Preview.tsx | 4 ++ .../src/graphql/dataFlow.graphql | 3 ++ .../src/graphql/fragments.graphql | 3 ++ datahub-web-react/src/graphql/search.graphql | 12 ++++++ docs/how/updating-datahub.md | 1 + .../src/main/resources/entity-registry.yml | 2 + 17 files changed, 181 insertions(+) create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataflow/mappers/DataFlowMapperTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapperTest.java diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 94f0e8a055b701..59335ba605a741 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -2377,6 +2377,17 @@ private void configureDataJobResolvers(final RuntimeWiring.Builder builder) { ? dataJob.getDataPlatformInstance().getUrn() : null; })) + .dataFetcher( + "container", + new LoadableTypeResolver<>( + containerType, + (env) -> { + final DataJob dataJob = env.getSource(); + return dataJob.getContainer() != null + ? dataJob.getContainer().getUrn() + : null; + })) + .dataFetcher("parentContainers", new ParentContainersResolver(entityClient)) .dataFetcher("runs", new DataJobRunsResolver(entityClient)) .dataFetcher("privileges", new EntityPrivilegesResolver(entityClient)) .dataFetcher("exists", new EntityExistsResolver(entityService)) @@ -2454,6 +2465,17 @@ private void configureDataFlowResolvers(final RuntimeWiring.Builder builder) { ? dataFlow.getDataPlatformInstance().getUrn() : null; })) + .dataFetcher( + "container", + new LoadableTypeResolver<>( + containerType, + (env) -> { + final DataFlow dataFlow = env.getSource(); + return dataFlow.getContainer() != null + ? dataFlow.getContainer().getUrn() + : null; + })) + .dataFetcher("parentContainers", new ParentContainersResolver(entityClient)) .dataFetcher( "health", new EntityHealthResolver( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/DataFlowType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/DataFlowType.java index 3a697517bdecee..f2d38aadf49656 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/DataFlowType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/DataFlowType.java @@ -74,6 +74,7 @@ public class DataFlowType DOMAINS_ASPECT_NAME, DEPRECATION_ASPECT_NAME, DATA_PLATFORM_INSTANCE_ASPECT_NAME, + CONTAINER_ASPECT_NAME, DATA_PRODUCTS_ASPECT_NAME, BROWSE_PATHS_V2_ASPECT_NAME, STRUCTURED_PROPERTIES_ASPECT_NAME, diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/mappers/DataFlowMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/mappers/DataFlowMapper.java index 44bc6a99eae4bb..0902d6f2080b8f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/mappers/DataFlowMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/mappers/DataFlowMapper.java @@ -16,6 +16,7 @@ import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.generated.Container; import com.linkedin.datahub.graphql.generated.DataFlow; import com.linkedin.datahub.graphql.generated.DataFlowEditableProperties; import com.linkedin.datahub.graphql.generated.DataFlowInfo; @@ -106,6 +107,7 @@ public DataFlow apply( (dataset, dataMap) -> dataset.setDataPlatformInstance( DataPlatformInstanceAspectMapper.map(context, new DataPlatformInstance(dataMap)))); + mappingHelper.mapToResult(context, CONTAINER_ASPECT_NAME, DataFlowMapper::mapContainers); mappingHelper.mapToResult( BROWSE_PATHS_V2_ASPECT_NAME, (dataFlow, dataMap) -> @@ -206,6 +208,17 @@ private static void mapGlobalTags( dataFlow.setTags(globalTags); } + private static void mapContainers( + @Nullable final QueryContext context, @Nonnull DataFlow dataFlow, @Nonnull DataMap dataMap) { + final com.linkedin.container.Container gmsContainer = + new com.linkedin.container.Container(dataMap); + dataFlow.setContainer( + Container.builder() + .setType(EntityType.CONTAINER) + .setUrn(gmsContainer.getContainer().toString()) + .build()); + } + private static void mapDomains( @Nullable final QueryContext context, @Nonnull DataFlow dataFlow, @Nonnull DataMap dataMap) { final Domains domains = new Domains(dataMap); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java index 8d55ca6dbf7ac9..317ee39ea565e5 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java @@ -75,6 +75,7 @@ public class DataJobType DOMAINS_ASPECT_NAME, DEPRECATION_ASPECT_NAME, DATA_PLATFORM_INSTANCE_ASPECT_NAME, + CONTAINER_ASPECT_NAME, DATA_PRODUCTS_ASPECT_NAME, BROWSE_PATHS_V2_ASPECT_NAME, SUB_TYPES_ASPECT_NAME, diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapper.java index ec57c95ce151e2..3403d1f8f7b7f2 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapper.java @@ -9,6 +9,7 @@ import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.generated.Container; import com.linkedin.datahub.graphql.generated.DataFlow; import com.linkedin.datahub.graphql.generated.DataJob; import com.linkedin.datahub.graphql.generated.DataJobEditableProperties; @@ -112,6 +113,14 @@ public DataJob apply( } else if (DATA_PLATFORM_INSTANCE_ASPECT_NAME.equals(name)) { result.setDataPlatformInstance( DataPlatformInstanceAspectMapper.map(context, new DataPlatformInstance(data))); + } else if (CONTAINER_ASPECT_NAME.equals(name)) { + final com.linkedin.container.Container gmsContainer = + new com.linkedin.container.Container(data); + result.setContainer( + Container.builder() + .setType(EntityType.CONTAINER) + .setUrn(gmsContainer.getContainer().toString()) + .build()); } else if (BROWSE_PATHS_V2_ASPECT_NAME.equals(name)) { result.setBrowsePathV2(BrowsePathsV2Mapper.map(context, new BrowsePathsV2(data))); } else if (SUB_TYPES_ASPECT_NAME.equals(name)) { diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index a5cb0893a64fae..adb24d92587b58 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -6275,6 +6275,16 @@ type DataFlow implements EntityWithRelationships & Entity & BrowsableEntity { """ dataPlatformInstance: DataPlatformInstance + """ + The parent container in which the entity resides + """ + container: Container + + """ + Recursively get the lineage of containers for this entity + """ + parentContainers: ParentContainersResult + """ Granular API for querying edges extending from this entity """ @@ -6457,6 +6467,16 @@ type DataJob implements EntityWithRelationships & Entity & BrowsableEntity { """ dataPlatformInstance: DataPlatformInstance + """ + The parent container in which the entity resides + """ + container: Container + + """ + Recursively get the lineage of containers for this entity + """ + parentContainers: ParentContainersResult + """ Additional read write properties associated with the Data Job """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataflow/mappers/DataFlowMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataflow/mappers/DataFlowMapperTest.java new file mode 100644 index 00000000000000..a49f063f94d336 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataflow/mappers/DataFlowMapperTest.java @@ -0,0 +1,42 @@ +package com.linkedin.datahub.graphql.types.dataflow.mappers; + +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.generated.DataFlow; +import com.linkedin.entity.Aspect; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.metadata.Constants; +import java.net.URISyntaxException; +import java.util.HashMap; +import java.util.Map; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class DataFlowMapperTest { + private static final Urn TEST_DATA_FLOW_URN = + Urn.createFromTuple(Constants.DATA_FLOW_ENTITY_NAME, "dataflow1"); + private static final Urn TEST_CONTAINER_URN = + Urn.createFromTuple(Constants.CONTAINER_ENTITY_NAME, "container1"); + + @Test + public void testMapDataFlowContainer() throws URISyntaxException { + com.linkedin.container.Container input = new com.linkedin.container.Container(); + input.setContainer(TEST_CONTAINER_URN); + + final Map containerAspect = new HashMap<>(); + containerAspect.put( + Constants.CONTAINER_ASPECT_NAME, + new com.linkedin.entity.EnvelopedAspect().setValue(new Aspect(input.data()))); + final EntityResponse response = + new EntityResponse() + .setEntityName(Constants.DATA_FLOW_ENTITY_NAME) + .setUrn(TEST_DATA_FLOW_URN) + .setAspects(new EnvelopedAspectMap(containerAspect)); + + final DataFlow actual = DataFlowMapper.map(null, response); + + Assert.assertEquals(actual.getUrn(), TEST_DATA_FLOW_URN.toString()); + Assert.assertEquals(actual.getContainer().getUrn(), TEST_CONTAINER_URN.toString()); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapperTest.java new file mode 100644 index 00000000000000..d7fc0f198977eb --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapperTest.java @@ -0,0 +1,42 @@ +package com.linkedin.datahub.graphql.types.datajob.mappers; + +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.generated.DataJob; +import com.linkedin.entity.Aspect; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.metadata.Constants; +import java.net.URISyntaxException; +import java.util.HashMap; +import java.util.Map; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class DataJobMapperTest { + private static final Urn TEST_DATA_JOB_URN = + Urn.createFromTuple(Constants.DATA_JOB_ENTITY_NAME, "datajob1"); + private static final Urn TEST_CONTAINER_URN = + Urn.createFromTuple(Constants.CONTAINER_ENTITY_NAME, "container1"); + + @Test + public void testMapDataJobContainer() throws URISyntaxException { + com.linkedin.container.Container input = new com.linkedin.container.Container(); + input.setContainer(TEST_CONTAINER_URN); + + final Map containerAspect = new HashMap<>(); + containerAspect.put( + Constants.CONTAINER_ASPECT_NAME, + new com.linkedin.entity.EnvelopedAspect().setValue(new Aspect(input.data()))); + final EntityResponse response = + new EntityResponse() + .setEntityName(Constants.DATA_JOB_ENTITY_NAME) + .setUrn(TEST_DATA_JOB_URN) + .setAspects(new EnvelopedAspectMap(containerAspect)); + + final DataJob actual = DataJobMapper.map(null, response); + + Assert.assertEquals(actual.getUrn(), TEST_DATA_JOB_URN.toString()); + Assert.assertEquals(actual.getContainer().getUrn(), TEST_CONTAINER_URN.toString()); + } +} diff --git a/datahub-web-react/src/app/entity/dataFlow/DataFlowEntity.tsx b/datahub-web-react/src/app/entity/dataFlow/DataFlowEntity.tsx index 3c03dfb65ccbcd..9e26bbadaca070 100644 --- a/datahub-web-react/src/app/entity/dataFlow/DataFlowEntity.tsx +++ b/datahub-web-react/src/app/entity/dataFlow/DataFlowEntity.tsx @@ -184,6 +184,7 @@ export class DataFlowEntity implements Entity { degree={(result as any).degree} paths={(result as any).paths} health={data.health} + parentContainers={data.parentContainers} /> ); }; diff --git a/datahub-web-react/src/app/entity/dataFlow/preview/Preview.tsx b/datahub-web-react/src/app/entity/dataFlow/preview/Preview.tsx index f210f7c985ebf7..0c86e745eba29f 100644 --- a/datahub-web-react/src/app/entity/dataFlow/preview/Preview.tsx +++ b/datahub-web-react/src/app/entity/dataFlow/preview/Preview.tsx @@ -10,6 +10,7 @@ import { GlobalTags, Health, Owner, + ParentContainersResult, SearchInsight, } from '../../../../types.generated'; import DefaultPreviewCard from '../../../preview/DefaultPreviewCard'; @@ -40,6 +41,7 @@ export const Preview = ({ degree, paths, health, + parentContainers, }: { urn: string; name: string; @@ -59,6 +61,7 @@ export const Preview = ({ degree?: number; paths?: EntityPath[]; health?: Health[] | null; + parentContainers?: ParentContainersResult | null; }): JSX.Element => { const entityRegistry = useEntityRegistry(); return ( @@ -91,6 +94,7 @@ export const Preview = ({ degree={degree} paths={paths} health={health || undefined} + parentContainers={parentContainers} /> ); }; diff --git a/datahub-web-react/src/app/entity/dataJob/DataJobEntity.tsx b/datahub-web-react/src/app/entity/dataJob/DataJobEntity.tsx index 5b1aaeaef76d5b..ff6490ebc91b0c 100644 --- a/datahub-web-react/src/app/entity/dataJob/DataJobEntity.tsx +++ b/datahub-web-react/src/app/entity/dataJob/DataJobEntity.tsx @@ -205,6 +205,7 @@ export class DataJobEntity implements Entity { degree={(result as any).degree} paths={(result as any).paths} health={data.health} + parentContainers={data.parentContainers} /> ); }; diff --git a/datahub-web-react/src/app/entity/dataJob/preview/Preview.tsx b/datahub-web-react/src/app/entity/dataJob/preview/Preview.tsx index b163722b5151c7..07ff81effbbc65 100644 --- a/datahub-web-react/src/app/entity/dataJob/preview/Preview.tsx +++ b/datahub-web-react/src/app/entity/dataJob/preview/Preview.tsx @@ -12,6 +12,7 @@ import { GlobalTags, Health, Owner, + ParentContainersResult, SearchInsight, } from '../../../../types.generated'; import DefaultPreviewCard from '../../../preview/DefaultPreviewCard'; @@ -44,6 +45,7 @@ export const Preview = ({ degree, paths, health, + parentContainers, }: { urn: string; name: string; @@ -64,6 +66,7 @@ export const Preview = ({ degree?: number; paths?: EntityPath[]; health?: Health[] | null; + parentContainers?: ParentContainersResult | null; }): JSX.Element => { const entityRegistry = useEntityRegistry(); return ( @@ -98,6 +101,7 @@ export const Preview = ({ degree={degree} paths={paths} health={health || undefined} + parentContainers={parentContainers} /> ); }; diff --git a/datahub-web-react/src/graphql/dataFlow.graphql b/datahub-web-react/src/graphql/dataFlow.graphql index 2441ce600c3c55..199c47811ce08e 100644 --- a/datahub-web-react/src/graphql/dataFlow.graphql +++ b/datahub-web-react/src/graphql/dataFlow.graphql @@ -50,6 +50,9 @@ fragment dataFlowFields on DataFlow { dataPlatformInstance { ...dataPlatformInstanceFields } + parentContainers { + ...parentContainersFields + } browsePathV2 { ...browsePathV2Fields } diff --git a/datahub-web-react/src/graphql/fragments.graphql b/datahub-web-react/src/graphql/fragments.graphql index 788c68349b4268..68c57c5cb5db55 100644 --- a/datahub-web-react/src/graphql/fragments.graphql +++ b/datahub-web-react/src/graphql/fragments.graphql @@ -403,6 +403,9 @@ fragment dataJobFields on DataJob { dataPlatformInstance { ...dataPlatformInstanceFields } + parentContainers { + ...parentContainersFields + } privileges { canEditLineage } diff --git a/datahub-web-react/src/graphql/search.graphql b/datahub-web-react/src/graphql/search.graphql index 58c9a51f3d7e90..72e7d347187828 100644 --- a/datahub-web-react/src/graphql/search.graphql +++ b/datahub-web-react/src/graphql/search.graphql @@ -128,6 +128,9 @@ fragment autoCompleteFields on Entity { dataPlatformInstance { ...dataPlatformInstanceFields } + parentContainers { + ...parentContainersFields + } } ... on DataJob { dataFlow { @@ -146,6 +149,9 @@ fragment autoCompleteFields on Entity { dataPlatformInstance { ...dataPlatformInstanceFields } + parentContainers { + ...parentContainersFields + } } ... on GlossaryTerm { name @@ -626,6 +632,9 @@ fragment searchResultsWithoutSchemaField on Entity { dataPlatformInstance { ...dataPlatformInstanceFields } + parentContainers { + ...parentContainersFields + } domain { ...entityDomain } @@ -677,6 +686,9 @@ fragment searchResultsWithoutSchemaField on Entity { dataPlatformInstance { ...dataPlatformInstanceFields } + parentContainers { + ...parentContainersFields + } subTypes { typeNames } diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 07577079d66d12..68b41c907c6ad6 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -44,6 +44,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe - OpenAPI Update: PIT Keep Alive parameter added to scroll. NOTE: This parameter requires the `pointInTimeCreationEnabled` feature flag to be enabled and the `elasticSearch.implementation` configuration to be `elasticsearch`. This feature is not supported for OpenSearch at this time and the parameter will not be respected without both of these set. - OpenAPI Update 2: Previously there was an incorrectly marked parameter named `sort` on the generic list entities endpoint for v3. This parameter is deprecated and only supports a single string value while the documentation indicates it supports a list of strings. This documentation error has been fixed and the correct field, `sortCriteria`, is now documented which supports a list of strings. - #12223: For dbt Cloud ingestion, the "View in dbt" link will point at the "Explore" page in the dbt Cloud UI. You can revert to the old behavior of linking to the dbt Cloud IDE by setting `external_url_mode: ide". +- #12236: Data flow and data job entities may additionally produce container aspect that will require a corresponding upgrade of server. Otherwise server can reject the aspect. ### Breaking Changes diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index 0193e5e2c5c6c3..1556b72e4aefb1 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -70,6 +70,7 @@ entities: - glossaryTerms - institutionalMemory - dataPlatformInstance + - container - browsePathsV2 - structuredProperties - forms @@ -93,6 +94,7 @@ entities: - glossaryTerms - institutionalMemory - dataPlatformInstance + - container - browsePathsV2 - structuredProperties - incidentsSummary From a92a10770ec06e67e5bc750d2319da06ebef3f15 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Fri, 10 Jan 2025 16:24:29 +0530 Subject: [PATCH 057/249] docs(ingest/glue): add permissions for glue (#12290) --- .../src/datahub/ingestion/source/aws/glue.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py index a0bed4ae9a7581..30e81643837375 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py @@ -248,6 +248,9 @@ def report_table_dropped(self, table: str) -> None: "Enabled by default when stateful ingestion is turned on.", ) @capability(SourceCapability.LINEAGE_COARSE, "Enabled by default") +@capability( + SourceCapability.LINEAGE_FINE, "Support via the `emit_s3_lineage` config field" +) class GlueSource(StatefulIngestionSourceBase): """ Note: if you also have files in S3 that you'd like to ingest, we recommend you use Glue's built-in data catalog. See [here](../../../../docs/generated/ingestion/sources/s3.md) for a quick guide on how to set up a crawler on Glue and ingest the outputs with DataHub. @@ -284,12 +287,22 @@ class GlueSource(StatefulIngestionSourceBase): "Action": [ "glue:GetDataflowGraph", "glue:GetJobs", + "s3:GetObject", ], "Resource": "*" } ``` - plus `s3:GetObject` for the job script locations. + For profiling datasets, the following additional permissions are required: + ```json + { + "Effect": "Allow", + "Action": [ + "glue:GetPartitions", + ], + "Resource": "*" + } + ``` """ From a4f5ab4443cc669f24b60ef6b2a66bbb1117394b Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Fri, 10 Jan 2025 16:24:47 +0530 Subject: [PATCH 058/249] fix(ingest/gc): add delete limit execution request (#12313) --- .../source/gc/execution_request_cleanup.py | 37 ++++++++++++++++--- .../source/gc/soft_deleted_entity_cleanup.py | 15 +++++++- .../bootstrap_mcps/ingestion-datahub-gc.yaml | 19 ++++++++-- 3 files changed, 60 insertions(+), 11 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/execution_request_cleanup.py b/metadata-ingestion/src/datahub/ingestion/source/gc/execution_request_cleanup.py index f9a00d7f009058..c1763b16f3670f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/gc/execution_request_cleanup.py +++ b/metadata-ingestion/src/datahub/ingestion/source/gc/execution_request_cleanup.py @@ -29,7 +29,7 @@ class DatahubExecutionRequestCleanupConfig(ConfigModel): ) keep_history_max_days: int = Field( - 30, + 90, description="Maximum number of days to keep execution requests for, per ingestion source", ) @@ -48,6 +48,10 @@ class DatahubExecutionRequestCleanupConfig(ConfigModel): description="Maximum runtime in seconds for the cleanup task", ) + limit_entities_delete: Optional[int] = Field( + 10000, description="Max number of execution requests to hard delete." + ) + max_read_errors: int = Field( default=10, description="Maximum number of read errors before aborting", @@ -65,6 +69,8 @@ class DatahubExecutionRequestCleanupReport(SourceReport): ergc_delete_errors: int = 0 ergc_start_time: Optional[datetime.datetime] = None ergc_end_time: Optional[datetime.datetime] = None + ergc_delete_limit_reached: bool = False + ergc_runtime_limit_reached: bool = False class CleanupRecord(BaseModel): @@ -85,12 +91,20 @@ def __init__( self.graph = graph self.report = report self.instance_id = int(time.time()) + self.last_print_time = 0.0 if config is not None: self.config = config else: self.config = DatahubExecutionRequestCleanupConfig() + def _print_report(self) -> None: + time_taken = round(time.time() - self.last_print_time, 1) + # Print report every 2 minutes + if time_taken > 120: + self.last_print_time = time.time() + logger.info(f"\n{self.report.as_string()}") + def _to_cleanup_record(self, entry: Dict) -> CleanupRecord: input_aspect = ( entry.get("aspects", {}) @@ -175,6 +189,7 @@ def _scroll_garbage_records(self): running_guard_timeout = now_ms - 30 * 24 * 3600 * 1000 for entry in self._scroll_execution_requests(): + self._print_report() self.report.ergc_records_read += 1 key = entry.ingestion_source @@ -225,15 +240,12 @@ def _scroll_garbage_records(self): f"record timestamp: {entry.requested_at}." ) ) - self.report.ergc_records_deleted += 1 yield entry def _delete_entry(self, entry: CleanupRecord) -> None: try: - logger.info( - f"ergc({self.instance_id}): going to delete ExecutionRequest {entry.request_id}" - ) self.graph.delete_entity(entry.urn, True) + self.report.ergc_records_deleted += 1 except Exception as e: self.report.ergc_delete_errors += 1 self.report.failure( @@ -252,10 +264,23 @@ def _reached_runtime_limit(self) -> bool: >= datetime.timedelta(seconds=self.config.runtime_limit_seconds) ) ): + self.report.ergc_runtime_limit_reached = True logger.info(f"ergc({self.instance_id}): max runtime reached.") return True return False + def _reached_delete_limit(self) -> bool: + if ( + self.config.limit_entities_delete + and self.report.ergc_records_deleted >= self.config.limit_entities_delete + ): + logger.info( + f"ergc({self.instance_id}): max delete limit reached: {self.config.limit_entities_delete}." + ) + self.report.ergc_delete_limit_reached = True + return True + return False + def run(self) -> None: if not self.config.enabled: logger.info( @@ -274,7 +299,7 @@ def run(self) -> None: ) for entry in self._scroll_garbage_records(): - if self._reached_runtime_limit(): + if self._reached_runtime_limit() or self._reached_delete_limit(): break self._delete_entry(entry) diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py b/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py index 0a52b7e17bf714..471eeff0224ed1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +++ b/metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py @@ -231,6 +231,15 @@ def _process_futures(self, futures: Dict[Future, str]) -> Dict[Future, str]: def _get_soft_deleted(self, graphql_query: str, entity_type: str) -> Iterable[str]: assert self.ctx.graph scroll_id: Optional[str] = None + + batch_size = self.config.batch_size + if entity_type == "DATA_PROCESS_INSTANCE": + # Due to a bug in Data process instance querying this is a temp workaround + # to avoid a giant stacktrace by having a smaller batch size in first call + # This will be remove in future version after server with fix has been + # around for a while + batch_size = 10 + while True: try: result = self.ctx.graph.execute_graphql( @@ -240,7 +249,7 @@ def _get_soft_deleted(self, graphql_query: str, entity_type: str) -> Iterable[st "types": [entity_type], "query": "*", "scrollId": scroll_id if scroll_id else None, - "count": self.config.batch_size, + "count": batch_size, "orFilters": [ { "and": [ @@ -263,6 +272,10 @@ def _get_soft_deleted(self, graphql_query: str, entity_type: str) -> Iterable[st scroll_across_entities = result.get("scrollAcrossEntities") if not scroll_across_entities or not scroll_across_entities.get("count"): break + if entity_type == "DATA_PROCESS_INSTANCE": + # Temp workaround. See note in beginning of the function + # We make the batch size = config after call has succeeded once + batch_size = self.config.batch_size scroll_id = scroll_across_entities.get("nextScrollId") self.report.num_queries_found += scroll_across_entities.get("count") for query in scroll_across_entities.get("searchResults"): diff --git a/metadata-service/configuration/src/main/resources/bootstrap_mcps/ingestion-datahub-gc.yaml b/metadata-service/configuration/src/main/resources/bootstrap_mcps/ingestion-datahub-gc.yaml index c0c5be85b16b1d..8879a2f6549945 100644 --- a/metadata-service/configuration/src/main/resources/bootstrap_mcps/ingestion-datahub-gc.yaml +++ b/metadata-service/configuration/src/main/resources/bootstrap_mcps/ingestion-datahub-gc.yaml @@ -21,19 +21,30 @@ truncate_indices: {{truncate_indices}}{{^truncate_indices}}true{{/truncate_indices}} truncate_index_older_than_days: {{truncate_indices_retention_days}}{{^truncate_indices_retention_days}}30{{/truncate_indices_retention_days}} dataprocess_cleanup: + enabled: {{dataprocess_cleanup.enabled}}{{^dataprocess_cleanup.enabled}}false{{/dataprocess_cleanup.enabled}} retention_days: {{dataprocess_cleanup.retention_days}}{{^dataprocess_cleanup.retention_days}}10{{/dataprocess_cleanup.retention_days}} - delete_empty_data_jobs: {{dataprocess_cleanup.delete_empty_data_jobs}}{{^dataprocess_cleanup.delete_empty_data_jobs}}true{{/dataprocess_cleanup.delete_empty_data_jobs}} - delete_empty_data_flows: {{dataprocess_cleanup.delete_empty_data_flows}}{{^dataprocess_cleanup.delete_empty_data_flows}}true{{/dataprocess_cleanup.delete_empty_data_flows}} + delete_empty_data_jobs: {{dataprocess_cleanup.delete_empty_data_jobs}}{{^dataprocess_cleanup.delete_empty_data_jobs}}false{{/dataprocess_cleanup.delete_empty_data_jobs}} + delete_empty_data_flows: {{dataprocess_cleanup.delete_empty_data_flows}}{{^dataprocess_cleanup.delete_empty_data_flows}}false{{/dataprocess_cleanup.delete_empty_data_flows}} hard_delete_entities: {{dataprocess_cleanup.hard_delete_entities}}{{^dataprocess_cleanup.hard_delete_entities}}false{{/dataprocess_cleanup.hard_delete_entities}} keep_last_n: {{dataprocess_cleanup.keep_last_n}}{{^dataprocess_cleanup.keep_last_n}}5{{/dataprocess_cleanup.keep_last_n}} + batch_size: {{dataprocess_cleanup.batch_size}}{{^dataprocess_cleanup.batch_size}}500{{/dataprocess_cleanup.batch_size}} + max_workers: {{dataprocess_cleanup.max_workers}}{{^dataprocess_cleanup.max_workers}}10{{/dataprocess_cleanup.max_workers}} soft_deleted_entities_cleanup: retention_days: {{soft_deleted_entities_cleanup.retention_days}}{{^soft_deleted_entities_cleanup.retention_days}}10{{/soft_deleted_entities_cleanup.retention_days}} + enabled: {{soft_deleted_entities_cleanup.enabled}}{{^soft_deleted_entities_cleanup.enabled}}true{{/soft_deleted_entities_cleanup.enabled}} + batch_size: {{soft_deleted_entities_cleanup.batch_size}}{{^soft_deleted_entities_cleanup.batch_size}}500{{/soft_deleted_entities_cleanup.batch_size}} + max_workers: {{soft_deleted_entities_cleanup.max_workers}}{{^soft_deleted_entities_cleanup.max_workers}}10{{/soft_deleted_entities_cleanup.max_workers}} + limit_entities_delete: {{soft_deleted_entities_cleanup.limit_entities_delete}}{{^soft_deleted_entities_cleanup.limit_entities_delete}}25000{{/soft_deleted_entities_cleanup.limit_entities_delete}} + runtime_limit_seconds: {{soft_deleted_entities_cleanup.runtime_limit_seconds}}{{^soft_deleted_entities_cleanup.runtime_limit_seconds}}7200{{/soft_deleted_entities_cleanup.runtime_limit_seconds}} execution_request_cleanup: keep_history_min_count: {{execution_request_cleanup.keep_history_min_count}}{{^execution_request_cleanup.keep_history_min_count}}10{{/execution_request_cleanup.keep_history_min_count}} keep_history_max_count: {{execution_request_cleanup.keep_history_max_count}}{{^execution_request_cleanup.keep_history_max_count}}1000{{/execution_request_cleanup.keep_history_max_count}} - keep_history_max_days: {{execution_request_cleanup.keep_history_max_days}}{{^execution_request_cleanup.keep_history_max_days}}30{{/execution_request_cleanup.keep_history_max_days}} + keep_history_max_days: {{execution_request_cleanup.keep_history_max_days}}{{^execution_request_cleanup.keep_history_max_days}}90{{/execution_request_cleanup.keep_history_max_days}} batch_read_size: {{execution_request_cleanup.batch_read_size}}{{^execution_request_cleanup.batch_read_size}}100{{/execution_request_cleanup.batch_read_size}} - enabled: {{execution_request_cleanup.enabled}}{{^execution_request_cleanup.enabled}}false{{/execution_request_cleanup.enabled}} + enabled: {{execution_request_cleanup.enabled}}{{^execution_request_cleanup.enabled}}true{{/execution_request_cleanup.enabled}} + runtime_limit_seconds: {{execution_request_cleanup.runtime_limit_seconds}}{{^execution_request_cleanup.runtime_limit_seconds}}3600{{/execution_request_cleanup.runtime_limit_seconds}} + limit_entities_delete: {{execution_request_cleanup.limit_entities_delete}}{{^execution_request_cleanup.limit_entities_delete}}10000{{/execution_request_cleanup.limit_entities_delete}} + max_read_errors: {{execution_request_cleanup.max_read_errors}}{{^execution_request_cleanup.max_read_errors}}10{{/execution_request_cleanup.max_read_errors}} extraArgs: {} debugMode: false executorId: default From c6bb65fc8d3cbd10e52e2f64ee808fa6e0265360 Mon Sep 17 00:00:00 2001 From: pankajmahato-visa <154867659+pankajmahato-visa@users.noreply.github.com> Date: Fri, 10 Jan 2025 20:46:45 +0530 Subject: [PATCH 059/249] chore(deps): Migrate CVE-2024-52046 with severity >= 9 (severity = 9.3) vulnerability of org.apache.mina:mina-core:2.2.3 (#12305) --- build.gradle | 1 + 1 file changed, 1 insertion(+) diff --git a/build.gradle b/build.gradle index 5b6613d3057f3e..284092e2b14f49 100644 --- a/build.gradle +++ b/build.gradle @@ -379,6 +379,7 @@ configure(subprojects.findAll {! it.name.startsWith('spark-lineage')}) { resolutionStrategy.force externalDependency.antlr4Runtime resolutionStrategy.force externalDependency.antlr4 + resolutionStrategy.force 'org.apache.mina:mina-core:2.2.4' } } From 208447d20e74e35b6cd6a755ead64a4a11a1f6c6 Mon Sep 17 00:00:00 2001 From: Chakru <161002324+chakru-r@users.noreply.github.com> Date: Fri, 10 Jan 2025 22:27:10 +0530 Subject: [PATCH 060/249] fix(ci): fix artifact upload name (#12319) --- .github/workflows/build-and-test.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 9a940ef8040d17..86545946d6afea 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -118,10 +118,12 @@ jobs: run: | echo "BACKEND_FILES=`find ./build/coverage-reports/ -type f | grep -E '(metadata-models|entity-registry|datahuyb-graphql-core|metadata-io|metadata-jobs|metadata-utils|metadata-service|medata-dao-impl|metadata-operation|li-utils|metadata-integration|metadata-events|metadata-auth|ingestion-scheduler|notifications|datahub-upgrade)' | xargs | sed 's/ /,/g'`" >> $GITHUB_ENV echo "FRONTEND_FILES=`find ./build/coverage-reports/ -type f | grep -E '(datahub-frontend|datahub-web-react).*\.(xml|json)$' | xargs | sed 's/ /,/g'`" >> $GITHUB_ENV + - name: Generate tz artifact name + run: echo "NAME_TZ=$(echo ${{ matrix.timezone }} | tr '/' '-')" >> $GITHUB_ENV - uses: actions/upload-artifact@v4 if: always() with: - name: Test Results (build) - ${{ matrix.command}}-${{ matrix.timezone }} + name: Test Results (build) - ${{ matrix.command}}-${{ env.NAME_TZ }} path: | **/build/reports/tests/test/** **/build/test-results/test/** From 5f63f3fba96aeab06767cde495d709ef2e5ed5a5 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 10 Jan 2025 09:45:31 -0800 Subject: [PATCH 061/249] feat(sdk): support urns in other urn constructors (#12311) --- metadata-ingestion/scripts/avro_codegen.py | 38 ++++++++++++------- .../tests/unit/urns/test_urn.py | 22 ++++++++++- 2 files changed, 46 insertions(+), 14 deletions(-) diff --git a/metadata-ingestion/scripts/avro_codegen.py b/metadata-ingestion/scripts/avro_codegen.py index 2841985ad07808..0fe79a2c6a8e47 100644 --- a/metadata-ingestion/scripts/avro_codegen.py +++ b/metadata-ingestion/scripts/avro_codegen.py @@ -346,7 +346,7 @@ def write_urn_classes(key_aspects: List[dict], urn_dir: Path) -> None: code = """ # This file contains classes corresponding to entity URNs. -from typing import ClassVar, List, Optional, Type, TYPE_CHECKING +from typing import ClassVar, List, Optional, Type, TYPE_CHECKING, Union import functools from deprecated.sphinx import deprecated as _sphinx_deprecated @@ -547,10 +547,31 @@ def generate_urn_class(entity_type: str, key_aspect: dict) -> str: assert fields[0]["type"] == ["null", "string"] fields[0]["type"] = "string" + field_urn_type_classes = {} + for field in fields: + # Figure out if urn types are valid for each field. + field_urn_type_class = None + if field_name(field) == "platform": + field_urn_type_class = "DataPlatformUrn" + elif field.get("Urn"): + if len(field.get("entityTypes", [])) == 1: + field_entity_type = field["entityTypes"][0] + field_urn_type_class = f"{capitalize_entity_name(field_entity_type)}Urn" + else: + field_urn_type_class = "Urn" + + field_urn_type_classes[field_name(field)] = field_urn_type_class + _init_arg_parts: List[str] = [] for field in fields: + field_urn_type_class = field_urn_type_classes[field_name(field)] + default = '"PROD"' if field_name(field) == "env" else None - _arg_part = f"{field_name(field)}: {field_type(field)}" + + type_hint = field_type(field) + if field_urn_type_class: + type_hint = f'Union["{field_urn_type_class}", str]' + _arg_part = f"{field_name(field)}: {type_hint}" if default: _arg_part += f" = {default}" _init_arg_parts.append(_arg_part) @@ -579,16 +600,7 @@ def generate_urn_class(entity_type: str, key_aspect: dict) -> str: init_validation += f'if not {field_name(field)}:\n raise InvalidUrnError("{class_name} {field_name(field)} cannot be empty")\n' # Generalized mechanism for validating embedded urns. - field_urn_type_class = None - if field_name(field) == "platform": - field_urn_type_class = "DataPlatformUrn" - elif field.get("Urn"): - if len(field.get("entityTypes", [])) == 1: - field_entity_type = field["entityTypes"][0] - field_urn_type_class = f"{capitalize_entity_name(field_entity_type)}Urn" - else: - field_urn_type_class = "Urn" - + field_urn_type_class = field_urn_type_classes[field_name(field)] if field_urn_type_class: init_validation += f"{field_name(field)} = str({field_name(field)})\n" init_validation += ( @@ -608,7 +620,7 @@ def generate_urn_class(entity_type: str, key_aspect: dict) -> str: init_coercion += " platform_name = DataPlatformUrn.from_string(platform_name).platform_name\n" if field_name(field) == "platform": - init_coercion += "platform = DataPlatformUrn(platform).urn()\n" + init_coercion += "platform = platform.urn() if isinstance(platform, DataPlatformUrn) else DataPlatformUrn(platform).urn()\n" elif field_urn_type_class is None: # For all non-urns, run the value through the UrnEncoder. init_coercion += ( diff --git a/metadata-ingestion/tests/unit/urns/test_urn.py b/metadata-ingestion/tests/unit/urns/test_urn.py index 0c362473c0cf18..bee80ec33148e9 100644 --- a/metadata-ingestion/tests/unit/urns/test_urn.py +++ b/metadata-ingestion/tests/unit/urns/test_urn.py @@ -4,7 +4,13 @@ import pytest -from datahub.metadata.urns import CorpUserUrn, DatasetUrn, Urn +from datahub.metadata.urns import ( + CorpUserUrn, + DataPlatformUrn, + DatasetUrn, + SchemaFieldUrn, + Urn, +) from datahub.utilities.urns.error import InvalidUrnError pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning") @@ -60,6 +66,20 @@ def test_urn_coercion() -> None: assert urn == Urn.from_string(urn.urn()) +def test_urns_in_init() -> None: + platform = DataPlatformUrn("abc") + assert platform.urn() == "urn:li:dataPlatform:abc" + + dataset_urn = DatasetUrn(platform, "def", "PROD") + assert dataset_urn.urn() == "urn:li:dataset:(urn:li:dataPlatform:abc,def,PROD)" + + schema_field = SchemaFieldUrn(dataset_urn, "foo") + assert ( + schema_field.urn() + == "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:abc,def,PROD),foo)" + ) + + def test_urn_type_dispatch_1() -> None: urn = Urn.from_string("urn:li:dataset:(urn:li:dataPlatform:abc,def,PROD)") assert isinstance(urn, DatasetUrn) From cf35dcca4f2e0b43c215f421aef6a17dbbf186e4 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 10 Jan 2025 09:49:23 -0800 Subject: [PATCH 062/249] fix(ingest): improve error reporting in `emit_all` (#12309) --- metadata-ingestion/src/datahub/ingestion/graph/client.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index 7de6e8130a7ab6..8c5f894a072d93 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -248,9 +248,11 @@ def make_rest_sink( with DatahubRestSink(PipelineContext(run_id=run_id), sink_config) as sink: yield sink if sink.report.failures: + logger.error( + f"Failed to emit {len(sink.report.failures)} records\n{sink.report.as_string()}" + ) raise OperationalError( - f"Failed to emit {len(sink.report.failures)} records", - info=sink.report.as_obj(), + f"Failed to emit {len(sink.report.failures)} records" ) def emit_all( From a6cd995df62b304f69853350214da004422a0fbb Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 10 Jan 2025 10:35:13 -0800 Subject: [PATCH 063/249] docs(ingest): refactor docgen process (#12300) --- docs-website/README.md | 24 +- docs-website/generateDocsDir.ts | 37 + ...tadata-file_recipe.yml => file_recipe.yml} | 0 .../powerbi-report-server_pre.md | 16 + .../powerbi-report-server_recipe.yml | 0 .../powerbi/powerbi-report-server_pre.md | 13 - metadata-ingestion/scripts/docgen.py | 908 +++++------------- metadata-ingestion/scripts/docgen_types.py | 45 + .../scripts/docs_config_table.py | 376 ++++++++ .../powerbi_report_server/report_server.py | 2 +- 10 files changed, 748 insertions(+), 673 deletions(-) rename metadata-ingestion/docs/sources/metadata-file/{metadata-file_recipe.yml => file_recipe.yml} (100%) create mode 100644 metadata-ingestion/docs/sources/powerbi-report-server/powerbi-report-server_pre.md rename metadata-ingestion/docs/sources/{powerbi => powerbi-report-server}/powerbi-report-server_recipe.yml (100%) delete mode 100644 metadata-ingestion/docs/sources/powerbi/powerbi-report-server_pre.md create mode 100644 metadata-ingestion/scripts/docgen_types.py create mode 100644 metadata-ingestion/scripts/docs_config_table.py diff --git a/docs-website/README.md b/docs-website/README.md index 3b24cb869a444d..b40e4636422781 100644 --- a/docs-website/README.md +++ b/docs-website/README.md @@ -130,7 +130,6 @@ The purpose of this section is to provide developers & technical users with conc This section aims to provide plain-language feature overviews for both technical and non-technical readers alike. - ## Docs Generation Features **Includes all markdown files** @@ -145,16 +144,33 @@ You can suppress this check by adding the path to the file in a comment in `side Use an "inline" directive to include code snippets from other files. The `show_path_as_comment` option will include the path to the file as a comment at the top of the snippet. - ```python - {{ inline /metadata-ingestion/examples/library/data_quality_mcpw_rest.py show_path_as_comment }} - ``` + ```python + {{ inline /metadata-ingestion/examples/library/data_quality_mcpw_rest.py show_path_as_comment }} + ``` + +**Command Output** + +Use the `{{ command-output cmd }}` directive to run subprocesses and inject the outputs into the final markdown. + + {{ command-output python -c 'print("Hello world")' }} +This also works for multi-line scripts. + + {{ command-output + source metadata-ingestion/venv/bin/activate + python -m + }} + +Regardless of the location of the markdown file, the subcommands will be executed with working directory set to the repo root. + +Only the stdout of the subprocess will be outputted. The stderr, if any, will be included as a comment in the markdown. ## Docs site generation process This process is orchestrated by a combination of Gradle and Yarn tasks. The main entrypoint is via the `docs-website:yarnGenerate` task, which in turn eventually runs `yarn run generate`. Steps: + 1. Generate the GraphQL combined schema using the gradle's `docs-website:generateGraphQLSchema` task. This generates `./graphql/combined.graphql`. 2. Generate docs for ingestion sources using the `:metadata-ingestion:docGen` gradle task. 3. Generate docs for our metadata model using the `:metadata-ingestion:modelDocGen` gradle task. diff --git a/docs-website/generateDocsDir.ts b/docs-website/generateDocsDir.ts index ad82a85f9e5672..3a14baee073c2a 100644 --- a/docs-website/generateDocsDir.ts +++ b/docs-website/generateDocsDir.ts @@ -439,6 +439,42 @@ function markdown_process_inline_directives( contents.content = new_content; } +function markdown_process_command_output( + contents: matter.GrayMatterFile, + filepath: string +): void { + const new_content = contents.content.replace( + /^{{\s*command-output\s*([\s\S]*?)\s*}}$/gm, + (_, command: string) => { + try { + // Change to repo root directory before executing command + const repoRoot = path.resolve(__dirname, ".."); + + console.log(`Executing command: ${command}`); + + // Execute the command and capture output + const output = execSync(command, { + cwd: repoRoot, + encoding: "utf8", + stdio: ["pipe", "pipe", "pipe"], + }); + + // Return the command output + return output.trim(); + } catch (error: any) { + // If there's an error, include it as a comment + const errorMessage = error.stderr + ? error.stderr.toString() + : error.message; + return `${ + error.stdout ? error.stdout.toString().trim() : "" + }\n`; + } + } + ); + contents.content = new_content; +} + function markdown_sanitize_and_linkify(content: string): string { // MDX escaping content = content.replace(/ str: - if len(value) > DEFAULT_VALUE_MAX_LENGTH: - return value[:DEFAULT_VALUE_MAX_LENGTH] + DEFAULT_VALUE_TRUNCATION_MESSAGE - return value - - -def _format_path_component(path: str) -> str: - """ - Given a path like 'a.b.c', adds css tags to the components. - """ - path_components = path.rsplit(".", maxsplit=1) - if len(path_components) == 1: - return f'{path_components[0]}' - - return ( - f'{path_components[0]}.' - f'{path_components[1]}' - ) - - -def _format_type_name(type_name: str) -> str: - return f'{type_name}' - - -def _format_default_line(default_value: str, has_desc_above: bool) -> str: - default_value = _truncate_default_value(default_value) - escaped_value = ( - html.escape(default_value) - # Replace curly braces to avoid JSX issues. - .replace("{", "{") - .replace("}", "}") - # We also need to replace markdown special characters. - .replace("*", "*") - .replace("_", "_") - .replace("[", "[") - .replace("]", "]") - .replace("|", "|") - .replace("`", "`") - ) - value_elem = f'{escaped_value}' - return f'
Default: {value_elem}
' - - -class FieldRow(BaseModel): - path: str - parent: Optional[str] - type_name: str - required: bool - has_default: bool - default: str - description: str - inner_fields: List["FieldRow"] = Field(default_factory=list) - discriminated_type: Optional[str] = None - - class Component(BaseModel): - type: str - field_name: Optional[str] - - # matches any [...] style section inside a field path - _V2_FIELD_PATH_TOKEN_MATCHER = r"\[[\w.]*[=]*[\w\(\-\ \_\).]*\][\.]*" - # matches a .?[...] style section inside a field path anchored to the beginning - _V2_FIELD_PATH_TOKEN_MATCHER_PREFIX = rf"^[\.]*{_V2_FIELD_PATH_TOKEN_MATCHER}" - _V2_FIELD_PATH_FIELD_NAME_MATCHER = r"^\w+" - - @staticmethod - def map_field_path_to_components(field_path: str) -> List[Component]: - m = re.match(FieldRow._V2_FIELD_PATH_TOKEN_MATCHER_PREFIX, field_path) - v = re.match(FieldRow._V2_FIELD_PATH_FIELD_NAME_MATCHER, field_path) - components: List[FieldRow.Component] = [] - while m or v: - token = m.group() if m else v.group() # type: ignore - if v: - if components: - if components[-1].field_name is None: - components[-1].field_name = token - else: - components.append( - FieldRow.Component(type="non_map_type", field_name=token) - ) - else: - components.append( - FieldRow.Component(type="non_map_type", field_name=token) - ) - - if m: - if token.startswith("[version="): - pass - elif "[type=" in token: - type_match = re.match(r"[\.]*\[type=(.*)\]", token) - if type_match: - type_string = type_match.group(1) - if components and components[-1].type == "map": - if components[-1].field_name is None: - pass - else: - new_component = FieldRow.Component( - type="map_key", field_name="`key`" - ) - components.append(new_component) - new_component = FieldRow.Component( - type=type_string, field_name=None - ) - components.append(new_component) - if type_string == "map": - new_component = FieldRow.Component( - type=type_string, field_name=None - ) - components.append(new_component) - - field_path = field_path[m.span()[1] :] if m else field_path[v.span()[1] :] # type: ignore - m = re.match(FieldRow._V2_FIELD_PATH_TOKEN_MATCHER_PREFIX, field_path) - v = re.match(FieldRow._V2_FIELD_PATH_FIELD_NAME_MATCHER, field_path) - - return components - - @staticmethod - def field_path_to_components(field_path: str) -> List[str]: - """ - Inverts the field_path v2 format to get the canonical field path - [version=2.0].[type=x].foo.[type=string(format=uri)].bar => ["foo","bar"] - """ - if "type=map" not in field_path: - return re.sub(FieldRow._V2_FIELD_PATH_TOKEN_MATCHER, "", field_path).split( - "." - ) - else: - # fields with maps in them need special handling to insert the `key` fragment - return [ - c.field_name - for c in FieldRow.map_field_path_to_components(field_path) - if c.field_name - ] - - @classmethod - def from_schema_field(cls, schema_field: SchemaFieldClass) -> "FieldRow": - path_components = FieldRow.field_path_to_components(schema_field.fieldPath) - - parent = path_components[-2] if len(path_components) >= 2 else None - if parent == "`key`": - # the real parent node is one index above - parent = path_components[-3] - json_props = ( - json.loads(schema_field.jsonProps) if schema_field.jsonProps else {} - ) - - required = json_props.get("required", True) - has_default = "default" in json_props - default_value = str(json_props.get("default")) - - field_path = ".".join(path_components) - - return FieldRow( - path=field_path, - parent=parent, - type_name=str(schema_field.nativeDataType), - required=required, - has_default=has_default, - default=default_value, - description=schema_field.description, - inner_fields=[], - discriminated_type=schema_field.nativeDataType, - ) - - def get_checkbox(self) -> str: - if self.required and not self.has_default: - # Using a non-breaking space to prevent the checkbox from being - # broken into a new line. - if not self.parent: # None and empty string both count - return ' ' - else: - return f' ' - else: - return "" - - def to_md_line(self) -> str: - if self.inner_fields: - if len(self.inner_fields) == 1: - type_name = self.inner_fields[0].type_name or self.type_name - else: - # To deal with unions that have essentially the same simple field path, - # we combine the type names into a single string. - type_name = "One of " + ", ".join( - [x.type_name for x in self.inner_fields if x.discriminated_type] - ) - else: - type_name = self.type_name - - description = self.description.strip() - description = self.description.replace( - "\n", "
" - ) # descriptions with newlines in them break markdown rendering - - md_line = ( - f'|
{_format_path_component(self.path)}' - f"{self.get_checkbox()}
" - f'
{_format_type_name(type_name)}
' - f"| {description} " - f"{_format_default_line(self.default, bool(description)) if self.has_default else ''} |\n" - ) - return md_line - - -class FieldHeader(FieldRow): - def to_md_line(self) -> str: - return "\n".join( - [ - "| Field | Description |", - "|:--- |:--- |", - "", - ] - ) - - def __init__(self): - pass - - -def get_prefixed_name(field_prefix: Optional[str], field_name: Optional[str]) -> str: - assert ( - field_prefix or field_name - ), "One of field_prefix or field_name should be present" - return ( - f"{field_prefix}.{field_name}" # type: ignore - if field_prefix and field_name - else field_name - if not field_prefix - else field_prefix - ) - - -def custom_comparator(path: str) -> str: - """ - Projects a string onto a separate space - Low_prio string will start with Z else start with A - Number of field paths will add the second set of letters: 00 - 99 - - """ - opt1 = path - prio_value = priority_value(opt1) - projection = f"{prio_value}" - projection = f"{projection}{opt1}" - return projection - - -class FieldTree: - """ - A helper class that re-constructs the tree hierarchy of schema fields - to help sort fields by importance while keeping nesting intact - """ - - def __init__(self, field: Optional[FieldRow] = None): - self.field = field - self.fields: Dict[str, "FieldTree"] = {} - - def add_field(self, row: FieldRow, path: Optional[str] = None) -> "FieldTree": - # logger.warn(f"Add field: path:{path}, row:{row}") - if self.field and self.field.path == row.path: - # we have an incoming field with the same path as us, this is probably a union variant - # attach to existing field - self.field.inner_fields.append(row) - else: - path = path if path is not None else row.path - top_level_field = path.split(".")[0] - if top_level_field in self.fields: - self.fields[top_level_field].add_field( - row, ".".join(path.split(".")[1:]) - ) - else: - self.fields[top_level_field] = FieldTree(field=row) - # logger.warn(f"{self}") - return self - - def sort(self): - # Required fields before optionals - required_fields = { - k: v for k, v in self.fields.items() if v.field and v.field.required - } - optional_fields = { - k: v for k, v in self.fields.items() if v.field and not v.field.required - } - - self.sorted_fields = [] - for field_map in [required_fields, optional_fields]: - # Top-level fields before fields with nesting - self.sorted_fields.extend( - sorted( - [f for f, val in field_map.items() if val.fields == {}], - key=custom_comparator, - ) - ) - self.sorted_fields.extend( - sorted( - [f for f, val in field_map.items() if val.fields != {}], - key=custom_comparator, - ) - ) - - for field_tree in self.fields.values(): - field_tree.sort() - - def get_fields(self) -> Iterable[FieldRow]: - if self.field: - yield self.field - for key in self.sorted_fields: - yield from self.fields[key].get_fields() - - def __repr__(self) -> str: - result = {} - if self.field: - result["_self"] = json.loads(json.dumps(self.field.dict())) - for f in self.fields: - result[f] = json.loads(str(self.fields[f])) - return json.dumps(result, indent=2) - - -def priority_value(path: str) -> str: - # A map of low value tokens to their relative importance - low_value_token_map = {"env": "X", "profiling": "Y", "stateful_ingestion": "Z"} - tokens = path.split(".") - for low_value_token in low_value_token_map: - if low_value_token in tokens: - return low_value_token_map[low_value_token] - - # everything else high-prio - return "A" - - -def gen_md_table_from_struct(schema_dict: Dict[str, Any]) -> List[str]: - from datahub.ingestion.extractor.json_schema_util import JsonSchemaTranslator - - # we don't want default field values to be injected into the description of the field - JsonSchemaTranslator._INJECT_DEFAULTS_INTO_DESCRIPTION = False - schema_fields = list(JsonSchemaTranslator.get_fields_from_schema(schema_dict)) - result: List[str] = [FieldHeader().to_md_line()] - - field_tree = FieldTree(field=None) - for field in schema_fields: - row: FieldRow = FieldRow.from_schema_field(field) - field_tree.add_field(row) - - field_tree.sort() - - for row in field_tree.get_fields(): - result.append(row.to_md_line()) - - # Wrap with a .config-table div. - result = ["\n
\n\n", *result, "\n
\n"] - - return result - def get_snippet(long_string: str, max_length: int = 100) -> str: snippet = "" @@ -424,19 +68,6 @@ def get_capability_text(src_capability: SourceCapability) -> str: ) -def create_or_update( - something: Dict[Any, Any], path: List[str], value: Any -) -> Dict[Any, Any]: - dict_under_operation = something - for p in path[:-1]: - if p not in dict_under_operation: - dict_under_operation[p] = {} - dict_under_operation = dict_under_operation[p] - - dict_under_operation[path[-1]] = value - return something - - def does_extra_exist(extra_name: str) -> bool: for key, value in metadata("acryl-datahub").items(): if key == "Provides-Extra" and value == extra_name: @@ -498,6 +129,102 @@ def new_url(original_url: str, file_path: str) -> str: return new_content +def load_plugin(plugin_name: str, out_dir: str) -> Plugin: + logger.debug(f"Loading {plugin_name}") + class_or_exception = source_registry._ensure_not_lazy(plugin_name) + if isinstance(class_or_exception, Exception): + raise class_or_exception + source_type = source_registry.get(plugin_name) + logger.debug(f"Source class is {source_type}") + + if hasattr(source_type, "get_platform_name"): + platform_name = source_type.get_platform_name() + else: + platform_name = ( + plugin_name.title() + ) # we like platform names to be human readable + + platform_id = None + if hasattr(source_type, "get_platform_id"): + platform_id = source_type.get_platform_id() + if platform_id is None: + raise ValueError(f"Platform ID not found for {plugin_name}") + + plugin = Plugin( + name=plugin_name, + platform_id=platform_id, + platform_name=platform_name, + classname=".".join([source_type.__module__, source_type.__name__]), + ) + + if hasattr(source_type, "get_platform_doc_order"): + platform_doc_order = source_type.get_platform_doc_order() + plugin.doc_order = platform_doc_order + + plugin_file_name = "src/" + "/".join(source_type.__module__.split(".")) + if os.path.exists(plugin_file_name) and os.path.isdir(plugin_file_name): + plugin_file_name = plugin_file_name + "/__init__.py" + else: + plugin_file_name = plugin_file_name + ".py" + if os.path.exists(plugin_file_name): + plugin.filename = plugin_file_name + else: + logger.info( + f"Failed to locate filename for {plugin_name}. Guessed {plugin_file_name}, but that doesn't exist" + ) + + if hasattr(source_type, "__doc__"): + plugin.source_docstring = textwrap.dedent(source_type.__doc__ or "") + + if hasattr(source_type, "get_support_status"): + plugin.support_status = source_type.get_support_status() + + if hasattr(source_type, "get_capabilities"): + capabilities = list(source_type.get_capabilities()) + capabilities.sort(key=lambda x: x.capability.value) + plugin.capabilities = capabilities + + try: + extra_plugin = plugin_name if does_extra_exist(plugin_name) else None + plugin.extra_deps = ( + get_additional_deps_for_extra(extra_plugin) if extra_plugin else [] + ) + except Exception as e: + logger.info( + f"Failed to load extras for {plugin_name} due to exception {e}", exc_info=e + ) + + if hasattr(source_type, "get_config_class"): + source_config_class: ConfigModel = source_type.get_config_class() + + plugin.config_json_schema = source_config_class.schema_json(indent=2) + plugin.config_md = gen_md_table_from_json_schema(source_config_class.schema()) + + # Write the config json schema to the out_dir. + config_dir = pathlib.Path(out_dir) / "config_schemas" + config_dir.mkdir(parents=True, exist_ok=True) + (config_dir / f"{plugin_name}_config.json").write_text( + plugin.config_json_schema + ) + + return plugin + + +@dataclasses.dataclass +class PluginMetrics: + discovered: int = 0 + loaded: int = 0 + generated: int = 0 + failed: int = 0 + + +@dataclasses.dataclass +class PlatformMetrics: + discovered: int = 0 + generated: int = 0 + warnings: List[str] = dataclasses.field(default_factory=list) + + @click.command() @click.option("--out-dir", type=str, required=True) @click.option("--extra-docs", type=str, required=False) @@ -505,239 +232,111 @@ def new_url(original_url: str, file_path: str) -> str: def generate( out_dir: str, extra_docs: Optional[str] = None, source: Optional[str] = None ) -> None: # noqa: C901 - source_documentation: Dict[str, Any] = {} - metrics = {} - metrics["source_platforms"] = {"discovered": 0, "generated": 0, "warnings": []} - metrics["plugins"] = {"discovered": 0, "generated": 0, "failed": 0} - - if extra_docs: - for path in glob.glob(f"{extra_docs}/**/*[.md|.yaml|.yml]", recursive=True): - m = re.search("/docs/sources/(.*)/(.*).md", path) - if m: - platform_name = m.group(1).lower() - file_name = m.group(2) - destination_md: str = ( - f"../docs/generated/ingestion/sources/{platform_name}.md" - ) - - with open(path, "r") as doc_file: - file_contents = doc_file.read() - final_markdown = rewrite_markdown( - file_contents, path, destination_md - ) - - if file_name == "README": - # README goes as platform level docs - # all other docs are assumed to be plugin level - create_or_update( - source_documentation, - [platform_name, "custom_docs"], - final_markdown, - ) - else: - if "_" in file_name: - plugin_doc_parts = file_name.split("_") - if len(plugin_doc_parts) != 2 or plugin_doc_parts[ - 1 - ] not in ["pre", "post"]: - raise Exception( - f"{file_name} needs to be of the form _pre.md or _post.md" - ) - - docs_key_name = f"custom_docs_{plugin_doc_parts[1]}" - create_or_update( - source_documentation, - [ - platform_name, - "plugins", - plugin_doc_parts[0], - docs_key_name, - ], - final_markdown, - ) - else: - create_or_update( - source_documentation, - [ - platform_name, - "plugins", - file_name, - "custom_docs_post", - ], - final_markdown, - ) - else: - yml_match = re.search("/docs/sources/(.*)/(.*)_recipe.yml", path) - if yml_match: - platform_name = yml_match.group(1).lower() - plugin_name = yml_match.group(2) - with open(path, "r") as doc_file: - file_contents = doc_file.read() - create_or_update( - source_documentation, - [platform_name, "plugins", plugin_name, "recipe"], - file_contents, - ) + plugin_metrics = PluginMetrics() + platform_metrics = PlatformMetrics() + platforms: Dict[str, Platform] = {} for plugin_name in sorted(source_registry.mapping.keys()): if source and source != plugin_name: continue if plugin_name in { "snowflake-summary", + "snowflake-queries", + "bigquery-queries", }: logger.info(f"Skipping {plugin_name} as it is on the deny list") continue - metrics["plugins"]["discovered"] = metrics["plugins"]["discovered"] + 1 # type: ignore - # We want to attempt to load all plugins before printing a summary. - source_type = None + plugin_metrics.discovered += 1 try: - # output = subprocess.check_output( - # ["/bin/bash", "-c", f"pip install -e '.[{key}]'"] - # ) - class_or_exception = source_registry._ensure_not_lazy(plugin_name) - if isinstance(class_or_exception, Exception): - raise class_or_exception - logger.debug(f"Processing {plugin_name}") - source_type = source_registry.get(plugin_name) - logger.debug(f"Source class is {source_type}") - extra_plugin = plugin_name if does_extra_exist(plugin_name) else None - extra_deps = ( - get_additional_deps_for_extra(extra_plugin) if extra_plugin else [] - ) + plugin = load_plugin(plugin_name, out_dir=out_dir) except Exception as e: - logger.warning( - f"Failed to process {plugin_name} due to exception {e}", exc_info=e + logger.error( + f"Failed to load {plugin_name} due to exception {e}", exc_info=e ) - metrics["plugins"]["failed"] = metrics["plugins"].get("failed", 0) + 1 # type: ignore - - if source_type and hasattr(source_type, "get_config_class"): - try: - source_config_class: ConfigModel = source_type.get_config_class() - support_status = SupportStatus.UNKNOWN - capabilities = [] - if hasattr(source_type, "__doc__"): - source_doc = textwrap.dedent(source_type.__doc__ or "") - if hasattr(source_type, "get_platform_name"): - platform_name = source_type.get_platform_name() - else: - platform_name = ( - plugin_name.title() - ) # we like platform names to be human readable - - if hasattr(source_type, "get_platform_id"): - platform_id = source_type.get_platform_id() - - if hasattr(source_type, "get_platform_doc_order"): - platform_doc_order = source_type.get_platform_doc_order() - create_or_update( - source_documentation, - [platform_id, "plugins", plugin_name, "doc_order"], - platform_doc_order, - ) - - source_documentation[platform_id] = ( - source_documentation.get(platform_id) or {} - ) - - create_or_update( - source_documentation, - [platform_id, "plugins", plugin_name, "classname"], - ".".join([source_type.__module__, source_type.__name__]), - ) - plugin_file_name = "src/" + "/".join(source_type.__module__.split(".")) - if os.path.exists(plugin_file_name) and os.path.isdir(plugin_file_name): - plugin_file_name = plugin_file_name + "/__init__.py" - else: - plugin_file_name = plugin_file_name + ".py" - if os.path.exists(plugin_file_name): - create_or_update( - source_documentation, - [platform_id, "plugins", plugin_name, "filename"], - plugin_file_name, - ) - else: - logger.info( - f"Failed to locate filename for {plugin_name}. Guessed {plugin_file_name}" - ) - - if hasattr(source_type, "get_support_status"): - support_status = source_type.get_support_status() - - if hasattr(source_type, "get_capabilities"): - capabilities = list(source_type.get_capabilities()) - capabilities.sort(key=lambda x: x.capability.value) - - create_or_update( - source_documentation, - [platform_id, "plugins", plugin_name, "capabilities"], - capabilities, - ) - - create_or_update( - source_documentation, [platform_id, "name"], platform_name - ) - - create_or_update( - source_documentation, - [platform_id, "plugins", plugin_name, "extra_deps"], - extra_deps, - ) + plugin_metrics.failed += 1 + continue + else: + plugin_metrics.loaded += 1 - config_dir = f"{out_dir}/config_schemas" - os.makedirs(config_dir, exist_ok=True) - with open(f"{config_dir}/{plugin_name}_config.json", "w") as f: - f.write(source_config_class.schema_json(indent=2)) + # Add to the platform list if not already present. + platforms.setdefault( + plugin.platform_id, + Platform( + id=plugin.platform_id, + name=plugin.platform_name, + ), + ).add_plugin(plugin_name=plugin.name, plugin=plugin) - create_or_update( - source_documentation, - [platform_id, "plugins", plugin_name, "config_schema"], - source_config_class.schema_json(indent=2) or "", + if extra_docs: + for path in glob.glob(f"{extra_docs}/**/*[.md|.yaml|.yml]", recursive=True): + if m := re.search("/docs/sources/(.*)/(.*).md", path): + platform_name = m.group(1).lower() # TODO: rename this to platform_id + file_name = m.group(2) + destination_md: str = ( + f"../docs/generated/ingestion/sources/{platform_name}.md" ) - table_md = gen_md_table_from_struct(source_config_class.schema()) - create_or_update( - source_documentation, - [platform_id, "plugins", plugin_name, "source_doc"], - source_doc or "", - ) - create_or_update( - source_documentation, - [platform_id, "plugins", plugin_name, "config"], - table_md, - ) - create_or_update( - source_documentation, - [platform_id, "plugins", plugin_name, "support_status"], - support_status, - ) + with open(path, "r") as doc_file: + file_contents = doc_file.read() + final_markdown = rewrite_markdown(file_contents, path, destination_md) + + if file_name == "README": + # README goes as platform level docs + # all other docs are assumed to be plugin level + platforms[platform_name].custom_docs_pre = final_markdown + + elif "_" in file_name: + plugin_doc_parts = file_name.split("_") + if len(plugin_doc_parts) != 2: + raise ValueError( + f"{file_name} needs to be of the form _pre.md or _post.md" + ) + plugin_name, suffix = plugin_doc_parts + if suffix == "pre": + platforms[platform_name].plugins[ + plugin_name + ].custom_docs_pre = final_markdown + elif suffix == "post": + platforms[platform_name].plugins[ + plugin_name + ].custom_docs_post = final_markdown + else: + raise ValueError( + f"{file_name} needs to be of the form _pre.md or _post.md" + ) - except Exception as e: - raise e + else: # assume this is the platform post. + # TODO: Probably need better error checking here. + platforms[platform_name].plugins[ + file_name + ].custom_docs_post = final_markdown + elif yml_match := re.search("/docs/sources/(.*)/(.*)_recipe.yml", path): + platform_name = yml_match.group(1).lower() + plugin_name = yml_match.group(2) + platforms[platform_name].plugins[ + plugin_name + ].starter_recipe = pathlib.Path(path).read_text() sources_dir = f"{out_dir}/sources" os.makedirs(sources_dir, exist_ok=True) + # Sort platforms by platform name. + platforms = dict(sorted(platforms.items(), key=lambda x: x[1].name.casefold())) + i = 0 - for platform_id, platform_docs in sorted( - source_documentation.items(), - key=lambda x: (x[1]["name"].casefold(), x[1]["name"]) - if "name" in x[1] - else (x[0].casefold(), x[0]), - ): + for platform_id, platform in platforms.items(): if source and platform_id != source: continue - metrics["source_platforms"]["discovered"] = ( - metrics["source_platforms"]["discovered"] + 1 # type: ignore - ) + platform_metrics.discovered += 1 platform_doc_file = f"{sources_dir}/{platform_id}.md" - if "name" not in platform_docs: - # We seem to have discovered written docs that corresponds to a platform, but haven't found linkage to it from the source classes - warning_msg = f"Failed to find source classes for platform {platform_id}. Did you remember to annotate your source class with @platform_name({platform_id})?" - logger.error(warning_msg) - metrics["source_platforms"]["warnings"].append(warning_msg) # type: ignore - continue + # if "name" not in platform_docs: + # # We seem to have discovered written docs that corresponds to a platform, but haven't found linkage to it from the source classes + # warning_msg = f"Failed to find source classes for platform {platform_id}. Did you remember to annotate your source class with @platform_name({platform_id})?" + # logger.error(warning_msg) + # metrics["source_platforms"]["warnings"].append(warning_msg) # type: ignore + # continue with open(platform_doc_file, "w") as f: i += 1 @@ -745,12 +344,12 @@ def generate( f.write( "import Tabs from '@theme/Tabs';\nimport TabItem from '@theme/TabItem';\n\n" ) - f.write(f"# {platform_docs['name']}\n") + f.write(f"# {platform.name}\n") - if len(platform_docs["plugins"].keys()) > 1: + if len(platform.plugins) > 1: # More than one plugin used to provide integration with this platform f.write( - f"There are {len(platform_docs['plugins'].keys())} sources that provide integration with {platform_docs['name']}\n" + f"There are {len(platform.plugins)} sources that provide integration with {platform.name}\n" ) f.write("\n") f.write("\n") @@ -759,18 +358,22 @@ def generate( f.write(f"") f.write("") + # Sort plugins in the platform. + # It's a dict, so we need to recreate it. + platform.plugins = dict( + sorted( + platform.plugins.items(), + key=lambda x: str(x[1].doc_order) if x[1].doc_order else x[0], + ) + ) + # f.write("| Source Module | Documentation |\n") # f.write("| ------ | ---- |\n") - for plugin, plugin_docs in sorted( - platform_docs["plugins"].items(), - key=lambda x: str(x[1].get("doc_order")) - if x[1].get("doc_order") - else x[0], - ): + for plugin_name, plugin in platform.plugins.items(): f.write("\n") - f.write(f"\n") + f.write(f"\n") f.write( - f"\n" + f"\n" ) f.write("\n") # f.write( @@ -778,43 +381,33 @@ def generate( # ) f.write("
{col_header}
\n\n`{plugin}`\n\n\n\n`{plugin_name}`\n\n\n\n\n{platform_docs['plugins'][plugin].get('source_doc') or ''} [Read more...](#module-{plugin})\n\n\n\n\n\n{plugin.source_docstring or ''} [Read more...](#module-{plugin_name})\n\n\n
\n\n") # insert platform level custom docs before plugin section - f.write(platform_docs.get("custom_docs") or "") + f.write(platform.custom_docs_pre or "") # all_plugins = platform_docs["plugins"].keys() - for plugin, plugin_docs in sorted( - platform_docs["plugins"].items(), - key=lambda x: str(x[1].get("doc_order")) - if x[1].get("doc_order") - else x[0], - ): - if len(platform_docs["plugins"].keys()) > 1: + for plugin_name, plugin in platform.plugins.items(): + if len(platform.plugins) > 1: # We only need to show this if there are multiple modules. - f.write(f"\n\n## Module `{plugin}`\n") + f.write(f"\n\n## Module `{plugin_name}`\n") - if "support_status" in plugin_docs: - f.write( - get_support_status_badge(plugin_docs["support_status"]) + "\n\n" - ) - if "capabilities" in plugin_docs and len(plugin_docs["capabilities"]): + if plugin.support_status != SupportStatus.UNKNOWN: + f.write(get_support_status_badge(plugin.support_status) + "\n\n") + if plugin.capabilities and len(plugin.capabilities): f.write("\n### Important Capabilities\n") f.write("| Capability | Status | Notes |\n") f.write("| ---------- | ------ | ----- |\n") - plugin_capabilities: List[CapabilitySetting] = plugin_docs[ - "capabilities" - ] - for cap_setting in plugin_capabilities: + for cap_setting in plugin.capabilities: f.write( f"| {get_capability_text(cap_setting.capability)} | {get_capability_supported_badge(cap_setting.supported)} | {cap_setting.description} |\n" ) f.write("\n") - f.write(f"{plugin_docs.get('source_doc') or ''}\n") + f.write(f"{plugin.source_docstring or ''}\n") # Insert custom pre section - f.write(plugin_docs.get("custom_docs_pre", "")) + f.write(plugin.custom_docs_pre or "") f.write("\n### CLI based Ingestion\n") - if "extra_deps" in plugin_docs: + if plugin.extra_deps and len(plugin.extra_deps): f.write("\n#### Install the Plugin\n") - if plugin_docs["extra_deps"] != []: + if plugin.extra_deps != []: f.write("```shell\n") f.write(f"pip install 'acryl-datahub[{plugin}]'\n") f.write("```\n") @@ -822,7 +415,7 @@ def generate( f.write( f"The `{plugin}` source works out of the box with `acryl-datahub`.\n" ) - if "recipe" in plugin_docs: + if plugin.starter_recipe: f.write("\n### Starter Recipe\n") f.write( "Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.\n\n\n" @@ -831,9 +424,10 @@ def generate( "For general pointers on writing and running a recipe, see our [main recipe guide](../../../../metadata-ingestion/README.md#recipes).\n" ) f.write("```yaml\n") - f.write(plugin_docs["recipe"]) + f.write(plugin.starter_recipe) f.write("\n```\n") - if "config" in plugin_docs: + if plugin.config_json_schema: + assert plugin.config_md is not None f.write("\n### Config Details\n") f.write( """ @@ -845,8 +439,8 @@ def generate( # f.write( # "\n
\nView All Configuration Options\n\n" # ) - for doc in plugin_docs["config"]: - f.write(doc) + f.write(plugin.config_md) + f.write("\n\n") # f.write("\n
\n\n") f.write( f"""
@@ -854,39 +448,49 @@ def generate( The [JSONSchema](https://json-schema.org/) for this configuration is inlined below.\n\n ```javascript -{plugin_docs['config_schema']} +{plugin.config_json_schema} ```\n\n
\n\n""" ) + # insert custom plugin docs after config details - f.write(plugin_docs.get("custom_docs_post", "")) - if "classname" in plugin_docs: + f.write(plugin.custom_docs_post or "") + if plugin.classname: f.write("\n### Code Coordinates\n") - f.write(f"- Class Name: `{plugin_docs['classname']}`\n") - if "filename" in plugin_docs: + f.write(f"- Class Name: `{plugin.classname}`\n") + if plugin.filename: f.write( - f"- Browse on [GitHub](../../../../metadata-ingestion/{plugin_docs['filename']})\n\n" + f"- Browse on [GitHub](../../../../metadata-ingestion/{plugin.filename})\n\n" ) - metrics["plugins"]["generated"] = metrics["plugins"]["generated"] + 1 # type: ignore + plugin_metrics.generated += 1 # Using an h2 tag to prevent this from showing up in page's TOC sidebar. f.write("\n

Questions

\n\n") f.write( - f"If you've got any questions on configuring ingestion for {platform_docs.get('name',platform_id)}, feel free to ping us on [our Slack](https://slack.datahubproject.io).\n" - ) - metrics["source_platforms"]["generated"] = ( - metrics["source_platforms"]["generated"] + 1 # type: ignore + f"If you've got any questions on configuring ingestion for {platform.name}, feel free to ping us on [our Slack](https://slack.datahubproject.io).\n" ) + platform_metrics.generated += 1 print("Ingestion Documentation Generation Complete") print("############################################") - print(json.dumps(metrics, indent=2)) + print( + json.dumps( + { + "plugin_metrics": dataclasses.asdict(plugin_metrics), + "platform_metrics": dataclasses.asdict(platform_metrics), + }, + indent=2, + ) + ) print("############################################") - if metrics["plugins"].get("failed", 0) > 0: # type: ignore + if plugin_metrics.failed > 0: sys.exit(1) - ### Create Lineage doc + # Create Lineage doc + generate_lineage_doc(platforms) + +def generate_lineage_doc(platforms: Dict[str, Platform]) -> None: source_dir = "../docs/generated/lineage" os.makedirs(source_dir, exist_ok=True) doc_file = f"{source_dir}/lineage-feature-guide.md" @@ -894,7 +498,7 @@ def generate( f.write( "import FeatureAvailability from '@site/src/components/FeatureAvailability';\n\n" ) - f.write(f"# About DataHub Lineage\n\n") + f.write("# About DataHub Lineage\n\n") f.write("\n") f.write( @@ -996,30 +600,24 @@ def generate( ) f.write("| ---------- | ------ | ----- |----- |\n") - for platform_id, platform_docs in sorted( - source_documentation.items(), - key=lambda x: (x[1]["name"].casefold(), x[1]["name"]) - if "name" in x[1] - else (x[0].casefold(), x[0]), - ): - for plugin, plugin_docs in sorted( - platform_docs["plugins"].items(), - key=lambda x: str(x[1].get("doc_order")) - if x[1].get("doc_order") - else x[0], + for platform_id, platform in platforms.items(): + for plugin in sorted( + platform.plugins.values(), + key=lambda x: str(x.doc_order) if x.doc_order else x.name, ): - platform_name = platform_docs["name"] - if len(platform_docs["plugins"].keys()) > 1: + if len(platform.plugins) > 1: # We only need to show this if there are multiple modules. - platform_name = f"{platform_name} `{plugin}`" + platform_plugin_name = f"{platform.name} `{plugin.name}`" + else: + platform_plugin_name = platform.name # Initialize variables table_level_supported = "❌" column_level_supported = "❌" config_names = "" - if "capabilities" in plugin_docs: - plugin_capabilities = plugin_docs["capabilities"] + if plugin.capabilities and len(plugin.capabilities): + plugin_capabilities = plugin.capabilities for cap_setting in plugin_capabilities: capability_text = get_capability_text(cap_setting.capability) @@ -1040,10 +638,10 @@ def generate( column_level_supported = "✅" if not (table_level_supported == "❌" and column_level_supported == "❌"): - if "config_schema" in plugin_docs: - config_properties = json.loads( - plugin_docs["config_schema"] - ).get("properties", {}) + if plugin.config_json_schema: + config_properties = json.loads(plugin.config_json_schema).get( + "properties", {} + ) config_names = "
".join( [ f"- {property_name}" @@ -1065,7 +663,7 @@ def generate( ] if platform_id not in lineage_not_applicable_sources: f.write( - f"| [{platform_name}](../../generated/ingestion/sources/{platform_id}.md) | {table_level_supported} | {column_level_supported} | {config_names}|\n" + f"| [{platform_plugin_name}](../../generated/ingestion/sources/{platform_id}.md) | {table_level_supported} | {column_level_supported} | {config_names}|\n" ) f.write( diff --git a/metadata-ingestion/scripts/docgen_types.py b/metadata-ingestion/scripts/docgen_types.py new file mode 100644 index 00000000000000..c96ab955e8cce2 --- /dev/null +++ b/metadata-ingestion/scripts/docgen_types.py @@ -0,0 +1,45 @@ +from dataclasses import dataclass, field +from typing import Dict, List, Optional + +from datahub.ingestion.api.decorators import CapabilitySetting, SupportStatus + + +@dataclass +class Plugin: + # Required fields + name: str + platform_id: str + platform_name: str + classname: str + + # Optional documentation fields + source_docstring: Optional[str] = None + config_json_schema: Optional[str] = None + config_md: Optional[str] = None + custom_docs_pre: Optional[str] = None + custom_docs_post: Optional[str] = None + starter_recipe: Optional[str] = None + + # Optional metadata fields + support_status: SupportStatus = SupportStatus.UNKNOWN + filename: Optional[str] = None + doc_order: Optional[int] = None + + # Lists with empty defaults + capabilities: List[CapabilitySetting] = field(default_factory=list) + extra_deps: List[str] = field(default_factory=list) + + +@dataclass +class Platform: + # Required fields + id: str + name: str + + # Optional fields + custom_docs_pre: Optional[str] = None + plugins: Dict[str, Plugin] = field(default_factory=dict) + + def add_plugin(self, plugin_name: str, plugin: Plugin) -> None: + """Helper method to add a plugin to the platform""" + self.plugins[plugin_name] = plugin diff --git a/metadata-ingestion/scripts/docs_config_table.py b/metadata-ingestion/scripts/docs_config_table.py new file mode 100644 index 00000000000000..3c5d9d0b0a2ba5 --- /dev/null +++ b/metadata-ingestion/scripts/docs_config_table.py @@ -0,0 +1,376 @@ +import html +import json +import re +from typing import Any, Dict, Iterable, List, Optional, Type + +from pydantic import BaseModel, Field + +from datahub.ingestion.extractor.json_schema_util import JsonSchemaTranslator +from datahub.metadata.schema_classes import SchemaFieldClass + +DEFAULT_VALUE_MAX_LENGTH = 50 +DEFAULT_VALUE_TRUNCATION_MESSAGE = "..." + + +def _truncate_default_value(value: str) -> str: + if len(value) > DEFAULT_VALUE_MAX_LENGTH: + return value[:DEFAULT_VALUE_MAX_LENGTH] + DEFAULT_VALUE_TRUNCATION_MESSAGE + return value + + +def _format_path_component(path: str) -> str: + """ + Given a path like 'a.b.c', adds css tags to the components. + """ + path_components = path.rsplit(".", maxsplit=1) + if len(path_components) == 1: + return f'{path_components[0]}' + + return ( + f'{path_components[0]}.' + f'{path_components[1]}' + ) + + +def _format_type_name(type_name: str) -> str: + return f'{type_name}' + + +def _format_default_line(default_value: str, has_desc_above: bool) -> str: + default_value = _truncate_default_value(default_value) + escaped_value = ( + html.escape(default_value) + # Replace curly braces to avoid JSX issues. + .replace("{", "{") + .replace("}", "}") + # We also need to replace markdown special characters. + .replace("*", "*") + .replace("_", "_") + .replace("[", "[") + .replace("]", "]") + .replace("|", "|") + .replace("`", "`") + ) + value_elem = f'{escaped_value}' + return f'
Default: {value_elem}
' + + +class FieldRow(BaseModel): + path: str + parent: Optional[str] + type_name: str + required: bool + has_default: bool + default: str + description: str + inner_fields: List["FieldRow"] = Field(default_factory=list) + discriminated_type: Optional[str] = None + + class Component(BaseModel): + type: str + field_name: Optional[str] + + # matches any [...] style section inside a field path + _V2_FIELD_PATH_TOKEN_MATCHER = r"\[[\w.]*[=]*[\w\(\-\ \_\).]*\][\.]*" + # matches a .?[...] style section inside a field path anchored to the beginning + _V2_FIELD_PATH_TOKEN_MATCHER_PREFIX = rf"^[\.]*{_V2_FIELD_PATH_TOKEN_MATCHER}" + _V2_FIELD_PATH_FIELD_NAME_MATCHER = r"^\w+" + + @staticmethod + def map_field_path_to_components(field_path: str) -> List[Component]: + m = re.match(FieldRow._V2_FIELD_PATH_TOKEN_MATCHER_PREFIX, field_path) + v = re.match(FieldRow._V2_FIELD_PATH_FIELD_NAME_MATCHER, field_path) + components: List[FieldRow.Component] = [] + while m or v: + token = m.group() if m else v.group() # type: ignore + if v: + if components: + if components[-1].field_name is None: + components[-1].field_name = token + else: + components.append( + FieldRow.Component(type="non_map_type", field_name=token) + ) + else: + components.append( + FieldRow.Component(type="non_map_type", field_name=token) + ) + + if m: + if token.startswith("[version="): + pass + elif "[type=" in token: + type_match = re.match(r"[\.]*\[type=(.*)\]", token) + if type_match: + type_string = type_match.group(1) + if components and components[-1].type == "map": + if components[-1].field_name is None: + pass + else: + new_component = FieldRow.Component( + type="map_key", field_name="`key`" + ) + components.append(new_component) + new_component = FieldRow.Component( + type=type_string, field_name=None + ) + components.append(new_component) + if type_string == "map": + new_component = FieldRow.Component( + type=type_string, field_name=None + ) + components.append(new_component) + + field_path = field_path[m.span()[1] :] if m else field_path[v.span()[1] :] # type: ignore + m = re.match(FieldRow._V2_FIELD_PATH_TOKEN_MATCHER_PREFIX, field_path) + v = re.match(FieldRow._V2_FIELD_PATH_FIELD_NAME_MATCHER, field_path) + + return components + + @staticmethod + def field_path_to_components(field_path: str) -> List[str]: + """ + Inverts the field_path v2 format to get the canonical field path + [version=2.0].[type=x].foo.[type=string(format=uri)].bar => ["foo","bar"] + """ + if "type=map" not in field_path: + return re.sub(FieldRow._V2_FIELD_PATH_TOKEN_MATCHER, "", field_path).split( + "." + ) + else: + # fields with maps in them need special handling to insert the `key` fragment + return [ + c.field_name + for c in FieldRow.map_field_path_to_components(field_path) + if c.field_name + ] + + @classmethod + def from_schema_field(cls, schema_field: SchemaFieldClass) -> "FieldRow": + path_components = FieldRow.field_path_to_components(schema_field.fieldPath) + + parent = path_components[-2] if len(path_components) >= 2 else None + if parent == "`key`": + # the real parent node is one index above + parent = path_components[-3] + json_props = ( + json.loads(schema_field.jsonProps) if schema_field.jsonProps else {} + ) + + required = json_props.get("required", True) + has_default = "default" in json_props + default_value = str(json_props.get("default")) + + field_path = ".".join(path_components) + + return FieldRow( + path=field_path, + parent=parent, + type_name=str(schema_field.nativeDataType), + required=required, + has_default=has_default, + default=default_value, + description=schema_field.description, + inner_fields=[], + discriminated_type=schema_field.nativeDataType, + ) + + def get_checkbox(self) -> str: + if self.required and not self.has_default: + # Using a non-breaking space to prevent the checkbox from being + # broken into a new line. + if not self.parent: # None and empty string both count + return ' ' + else: + return f' ' + else: + return "" + + def to_md_line(self) -> str: + if self.inner_fields: + if len(self.inner_fields) == 1: + type_name = self.inner_fields[0].type_name or self.type_name + else: + # To deal with unions that have essentially the same simple field path, + # we combine the type names into a single string. + type_name = "One of " + ", ".join( + [x.type_name for x in self.inner_fields if x.discriminated_type] + ) + else: + type_name = self.type_name + + description = self.description.strip() + description = self.description.replace( + "\n", "
" + ) # descriptions with newlines in them break markdown rendering + + md_line = ( + f'|
{_format_path_component(self.path)}' + f"{self.get_checkbox()}
" + f'
{_format_type_name(type_name)}
' + f"| {description} " + f"{_format_default_line(self.default, bool(description)) if self.has_default else ''} |\n" + ) + return md_line + + +class FieldHeader(FieldRow): + def to_md_line(self) -> str: + return "\n".join( + [ + "| Field | Description |", + "|:--- |:--- |", + "", + ] + ) + + def __init__(self): + pass + + +def get_prefixed_name(field_prefix: Optional[str], field_name: Optional[str]) -> str: + assert ( + field_prefix or field_name + ), "One of field_prefix or field_name should be present" + return ( + f"{field_prefix}.{field_name}" # type: ignore + if field_prefix and field_name + else field_name + if not field_prefix + else field_prefix + ) + + +def custom_comparator(path: str) -> str: + """ + Projects a string onto a separate space + Low_prio string will start with Z else start with A + Number of field paths will add the second set of letters: 00 - 99 + + """ + opt1 = path + prio_value = priority_value(opt1) + projection = f"{prio_value}" + projection = f"{projection}{opt1}" + return projection + + +class FieldTree: + """ + A helper class that re-constructs the tree hierarchy of schema fields + to help sort fields by importance while keeping nesting intact + """ + + def __init__(self, field: Optional[FieldRow] = None): + self.field = field + self.fields: Dict[str, "FieldTree"] = {} + + def add_field(self, row: FieldRow, path: Optional[str] = None) -> "FieldTree": + # logger.warn(f"Add field: path:{path}, row:{row}") + if self.field and self.field.path == row.path: + # we have an incoming field with the same path as us, this is probably a union variant + # attach to existing field + self.field.inner_fields.append(row) + else: + path = path if path is not None else row.path + top_level_field = path.split(".")[0] + if top_level_field in self.fields: + self.fields[top_level_field].add_field( + row, ".".join(path.split(".")[1:]) + ) + else: + self.fields[top_level_field] = FieldTree(field=row) + # logger.warn(f"{self}") + return self + + def sort(self): + # Required fields before optionals + required_fields = { + k: v for k, v in self.fields.items() if v.field and v.field.required + } + optional_fields = { + k: v for k, v in self.fields.items() if v.field and not v.field.required + } + + self.sorted_fields = [] + for field_map in [required_fields, optional_fields]: + # Top-level fields before fields with nesting + self.sorted_fields.extend( + sorted( + [f for f, val in field_map.items() if val.fields == {}], + key=custom_comparator, + ) + ) + self.sorted_fields.extend( + sorted( + [f for f, val in field_map.items() if val.fields != {}], + key=custom_comparator, + ) + ) + + for field_tree in self.fields.values(): + field_tree.sort() + + def get_fields(self) -> Iterable[FieldRow]: + if self.field: + yield self.field + for key in self.sorted_fields: + yield from self.fields[key].get_fields() + + def __repr__(self) -> str: + result = {} + if self.field: + result["_self"] = json.loads(json.dumps(self.field.dict())) + for f in self.fields: + result[f] = json.loads(str(self.fields[f])) + return json.dumps(result, indent=2) + + +def priority_value(path: str) -> str: + # A map of low value tokens to their relative importance + low_value_token_map = { + "env": "X", + "classification": "Y", + "profiling": "Y", + "stateful_ingestion": "Z", + } + tokens = path.split(".") + for low_value_token in low_value_token_map: + if low_value_token in tokens: + return low_value_token_map[low_value_token] + + # everything else high-prio + return "A" + + +def gen_md_table_from_json_schema(schema_dict: Dict[str, Any]) -> str: + # we don't want default field values to be injected into the description of the field + JsonSchemaTranslator._INJECT_DEFAULTS_INTO_DESCRIPTION = False + schema_fields = list(JsonSchemaTranslator.get_fields_from_schema(schema_dict)) + result: List[str] = [FieldHeader().to_md_line()] + + field_tree = FieldTree(field=None) + for field in schema_fields: + row: FieldRow = FieldRow.from_schema_field(field) + field_tree.add_field(row) + + field_tree.sort() + + for row in field_tree.get_fields(): + result.append(row.to_md_line()) + + # Wrap with a .config-table div. + result = ["\n
\n\n", *result, "\n
\n"] + + return "".join(result) + + +def gen_md_table_from_pydantic(model: Type[BaseModel]) -> str: + return gen_md_table_from_json_schema(model.schema()) + + +if __name__ == "__main__": + # Simple test code. + from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config + + print("".join(gen_md_table_from_pydantic(SnowflakeV2Config))) diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py index 2a247d0c63957a..4764400215e12a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py @@ -485,7 +485,7 @@ def report_dropped(self, view: str) -> None: self.filtered_reports.append(view) -@platform_name("PowerBI") +@platform_name("PowerBI Report Server") @config_class(PowerBiReportServerDashboardSourceConfig) @support_status(SupportStatus.INCUBATING) @capability(SourceCapability.OWNERSHIP, "Enabled by default") From 8a944752779e31044bf979e386b4127aa6b8f92b Mon Sep 17 00:00:00 2001 From: ryota-cloud Date: Fri, 10 Jan 2025 11:34:36 -0800 Subject: [PATCH 064/249] fix(dockerfile) Remove all references to jetty from the docker file (#12310) Co-authored-by: Ryota Egashira --- docker/datahub-upgrade/Dockerfile | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docker/datahub-upgrade/Dockerfile b/docker/datahub-upgrade/Dockerfile index 488cb46c94cf28..d63ceb83dc5295 100644 --- a/docker/datahub-upgrade/Dockerfile +++ b/docker/datahub-upgrade/Dockerfile @@ -34,16 +34,12 @@ ARG MAVEN_CENTRAL_REPO_URL RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi ENV JMX_VERSION=0.18.0 -ENV JETTY_VERSION=11.0.21 # Upgrade Alpine and base packages # PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762 RUN apk --no-cache --update-cache --available upgrade \ && apk --no-cache add curl bash coreutils gcompat sqlite libc6-compat snappy \ && apk --no-cache add openjdk17-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \ - && curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-runner/${JETTY_VERSION}/jetty-runner-${JETTY_VERSION}.jar --output jetty-runner.jar \ - && curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-jmx/${JETTY_VERSION}/jetty-jmx-${JETTY_VERSION}.jar --output jetty-jmx.jar \ - && curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-util/${JETTY_VERSION}/jetty-util-${JETTY_VERSION}.jar --output jetty-util.jar \ && wget --no-verbose ${GITHUB_REPO_URL}/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \ && wget --no-verbose ${MAVEN_CENTRAL_REPO_URL}/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar \ && cp /usr/lib/jvm/java-17-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks From 9897804e2ab2baa4539a11dfdb4ef3104e7f9ecc Mon Sep 17 00:00:00 2001 From: ethan-cartwright Date: Fri, 10 Jan 2025 14:36:47 -0500 Subject: [PATCH 065/249] docs(notification): docs on platform notifications and multiple channels (#10801) Co-authored-by: Jay <159848059+jayacryl@users.noreply.github.com> --- .../subscription-and-notification.md | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/docs/managed-datahub/subscription-and-notification.md b/docs/managed-datahub/subscription-and-notification.md index c3c31d5fed7e61..c27754a6371265 100644 --- a/docs/managed-datahub/subscription-and-notification.md +++ b/docs/managed-datahub/subscription-and-notification.md @@ -17,9 +17,30 @@ Email will work out of box. For installing the DataHub Slack App, see: This feature is especially useful in helping you stay on top of any upstream changes that could impact the assets you or your stakeholders rely on. It eliminates the need for you and your team to manually check for upstream changes, or for upstream stakeholders to identify and notify impacted users. As a user, you can subscribe to and receive notifications about changes such as deprecations, schema changes, changes in ownership, assertions, or incidents. You’ll always been in the know about potential data quality issues so you can proactively manage your data resources. + +## Platform Admin Notifications + +Datahub provides three levels of notifications: + +- **Platform-level** +- **Group-level** (described in other sections) +- **User-level** (described in other sections) + +**Setting Platform-Level Notifications:** +This requires appropriate permissions. Go to `Settings` > `Notifications` (under the `Platform` section, not `My Notifications`). + +**Platform-level Notifications:** +Platform-level notifications are applied to all assets within Datahub. +Example: If "An owner is added or removed from a data asset" is ticked, the designated Slack channel or email will receive notifications for any such changes across all assets. + +**Our Recommendations:** + +Notifying on tag changes for every asset in the platform would be noisy, and so we recommend to use these platform-level notifications only where appropriate. For example, we recommend notifications for ingestion failures routed to a central Slack channel or email. This will help you proactively ensure your Datahub metadata stays fresh. + ## Prerequisites Once you have [configured Slack within your DataHub instance](slack/saas-slack-setup.md), you will be able to subscribe to any Entity in DataHub and begin recieving notifications via DM. + To begin receiving personal notifications, go to Settings > "My Notifications". From here, toggle on Slack Notifications and input your Slack Member ID. If you want to create and manage group-level Subscriptions for your team, you will need [the following privileges](../../docs/authorization/roles.md#role-privileges): @@ -162,6 +183,21 @@ You can unsubscribe from any asset to stop receiving notifications about it. On What if I want to be notified about different changes? To modify your subscription, use the dropdown menu next to the Subscribe button to modify the changes you want to be notified about. + +
+ +I want to configure multiple channels. How many Slack channels or emails can I configure to get notified? + +At the platform-level, you can configure one email and one Slack channel. + +At the user and group -levels, you can configure one default email and Slack channel as well as overwrite that email/channel when you +go to a specific asset to subscribe to. + +To configure multiple channels, as a prereq, ensure you have the appropriate privileges. And then: +1. Create a datahub group for each channel you want notifications for. +2. Add yourself as a member to each of the groups. +3. Now, when you visit an asset and go to subscribe, you'll see the option "Manage Group Subscriptions". +
## Reference From 8d48622c0ff79b50d29694785df23610d15570c7 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Sun, 12 Jan 2025 22:09:34 -0800 Subject: [PATCH 066/249] fix(cli/delete): prevent duplicates in delete message (#12323) --- metadata-ingestion/src/datahub/cli/delete_cli.py | 6 +++--- metadata-ingestion/src/datahub/cli/migrate.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/metadata-ingestion/src/datahub/cli/delete_cli.py b/metadata-ingestion/src/datahub/cli/delete_cli.py index 1a75459a92c5cf..8501cf71f0d544 100644 --- a/metadata-ingestion/src/datahub/cli/delete_cli.py +++ b/metadata-ingestion/src/datahub/cli/delete_cli.py @@ -1,8 +1,8 @@ import logging +import random from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass from datetime import datetime -from random import choices from typing import Dict, List, Optional import click @@ -457,11 +457,11 @@ def by_filter( click.echo("Found urns of multiple entity types") for entity_type, entity_urns in urns_by_type.items(): click.echo( - f"- {len(entity_urns)} {entity_type} urn(s). Sample: {choices(entity_urns, k=min(5, len(entity_urns)))}" + f"- {len(entity_urns)} {entity_type} urn(s). Sample: {random.sample(entity_urns, k=min(5, len(entity_urns)))}" ) else: click.echo( - f"Found {len(urns)} {entity_type} urn(s). Sample: {choices(urns, k=min(5, len(urns)))}" + f"Found {len(urns)} {entity_type} urn(s). Sample: {random.sample(urns, k=min(5, len(urns)))}" ) if not force and not dry_run: diff --git a/metadata-ingestion/src/datahub/cli/migrate.py b/metadata-ingestion/src/datahub/cli/migrate.py index ea5375c9471283..1bf1211674f596 100644 --- a/metadata-ingestion/src/datahub/cli/migrate.py +++ b/metadata-ingestion/src/datahub/cli/migrate.py @@ -179,7 +179,7 @@ def dataplatform2instance_func( if not force and not dry_run: # get a confirmation from the operator before proceeding if this is not a dry run - sampled_urns_to_migrate = random.choices( + sampled_urns_to_migrate = random.sample( urns_to_migrate, k=min(10, len(urns_to_migrate)) ) sampled_new_urns: List[str] = [ @@ -193,7 +193,7 @@ def dataplatform2instance_func( if key ] click.echo( - f"Will migrate {len(urns_to_migrate)} urns such as {random.choices(urns_to_migrate, k=min(10, len(urns_to_migrate)))}" + f"Will migrate {len(urns_to_migrate)} urns such as {random.sample(urns_to_migrate, k=min(10, len(urns_to_migrate)))}" ) click.echo(f"New urns will look like {sampled_new_urns}") click.confirm("Ok to proceed?", abort=True) From 457f96e8c5f00cc8525bb7afe90f00f86c6b093c Mon Sep 17 00:00:00 2001 From: skrydal Date: Mon, 13 Jan 2025 14:37:09 +0100 Subject: [PATCH 067/249] feat(ingestion/iceberg): Improve iceberg connector logging (#12317) --- .../ingestion/source/iceberg/iceberg.py | 13 ++++- .../source/iceberg/iceberg_common.py | 58 ++++++++++++++++--- .../source/iceberg/iceberg_profiler.py | 4 +- 3 files changed, 62 insertions(+), 13 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py index 76f24bfd63d476..8101f0110509e3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py +++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py @@ -203,7 +203,9 @@ def _process_dataset(dataset_path: Identifier) -> Iterable[MetadataWorkUnit]: with PerfTimer() as timer: table = thread_local.local_catalog.load_table(dataset_path) time_taken = timer.elapsed_seconds() - self.report.report_table_load_time(time_taken) + self.report.report_table_load_time( + time_taken, dataset_name, table.metadata_location + ) LOGGER.debug(f"Loaded table: {table.name()}, time taken: {time_taken}") yield from self._create_iceberg_workunit(dataset_name, table) except NoSuchPropertyException as e: @@ -247,7 +249,10 @@ def _process_dataset(dataset_path: Identifier) -> Iterable[MetadataWorkUnit]: f"Iceberg Rest Catalog server error (500 status) encountered when processing table {dataset_path}, skipping it." ) except Exception as e: - self.report.report_failure("general", f"Failed to create workunit: {e}") + self.report.report_failure( + "general", + f"Failed to create workunit for dataset {dataset_name}: {e}", + ) LOGGER.exception( f"Exception while processing table {dataset_path}, skipping it.", ) @@ -312,7 +317,9 @@ def _create_iceberg_workunit( dataset_snapshot.aspects.append(schema_metadata) mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot) - self.report.report_table_processing_time(timer.elapsed_seconds()) + self.report.report_table_processing_time( + timer.elapsed_seconds(), dataset_name, table.metadata_location + ) yield MetadataWorkUnit(id=dataset_name, mce=mce) dpi_aspect = self._get_dataplatform_instance_aspect(dataset_urn=dataset_urn) diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py index 4a7f6bf4d60c1d..83fe3d1c079f17 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py @@ -5,6 +5,7 @@ from humanfriendly import format_timespan from pydantic import Field, validator from pyiceberg.catalog import Catalog, load_catalog +from sortedcontainers import SortedList from datahub.configuration.common import AllowDenyPattern, ConfigModel from datahub.configuration.source_common import DatasetSourceConfigMixin @@ -146,19 +147,40 @@ def get_catalog(self) -> Catalog: return load_catalog(name=catalog_name, **catalog_config) +class TopTableTimings: + _VALUE_FIELD: str = "timing" + top_entites: SortedList + _size: int + + def __init__(self, size: int = 10): + self._size = size + self.top_entites = SortedList(key=lambda x: -x.get(self._VALUE_FIELD, 0)) + + def add(self, entity: Dict[str, Any]) -> None: + if self._VALUE_FIELD not in entity: + return + self.top_entites.add(entity) + if len(self.top_entites) > self._size: + self.top_entites.pop() + + def __str__(self) -> str: + if len(self.top_entites) == 0: + return "no timings reported" + return str(list(self.top_entites)) + + class TimingClass: - times: List[int] + times: SortedList def __init__(self): - self.times = [] + self.times = SortedList() - def add_timing(self, t): - self.times.append(t) + def add_timing(self, t: float) -> None: + self.times.add(t) - def __str__(self): + def __str__(self) -> str: if len(self.times) == 0: return "no timings reported" - self.times.sort() total = sum(self.times) avg = total / len(self.times) return str( @@ -180,6 +202,9 @@ class IcebergSourceReport(StaleEntityRemovalSourceReport): load_table_timings: TimingClass = field(default_factory=TimingClass) processing_table_timings: TimingClass = field(default_factory=TimingClass) profiling_table_timings: TimingClass = field(default_factory=TimingClass) + tables_load_timings: TopTableTimings = field(default_factory=TopTableTimings) + tables_profile_timings: TopTableTimings = field(default_factory=TopTableTimings) + tables_process_timings: TopTableTimings = field(default_factory=TopTableTimings) listed_namespaces: int = 0 total_listed_tables: int = 0 tables_listed_per_namespace: TopKDict[str, int] = field( @@ -201,11 +226,26 @@ def report_table_scanned(self, name: str) -> None: def report_dropped(self, ent_name: str) -> None: self.filtered.append(ent_name) - def report_table_load_time(self, t: float) -> None: + def report_table_load_time( + self, t: float, table_name: str, table_metadata_location: str + ) -> None: self.load_table_timings.add_timing(t) + self.tables_load_timings.add( + {"table": table_name, "timing": t, "metadata_file": table_metadata_location} + ) - def report_table_processing_time(self, t: float) -> None: + def report_table_processing_time( + self, t: float, table_name: str, table_metadata_location: str + ) -> None: self.processing_table_timings.add_timing(t) + self.tables_process_timings.add( + {"table": table_name, "timing": t, "metadata_file": table_metadata_location} + ) - def report_table_profiling_time(self, t: float) -> None: + def report_table_profiling_time( + self, t: float, table_name: str, table_metadata_location: str + ) -> None: self.profiling_table_timings.add_timing(t) + self.tables_profile_timings.add( + {"table": table_name, "timing": t, "metadata_file": table_metadata_location} + ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_profiler.py index 9cc6dd08544e4e..7642cabbd1404c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_profiler.py @@ -204,7 +204,9 @@ def profile_table( ) dataset_profile.fieldProfiles.append(column_profile) time_taken = timer.elapsed_seconds() - self.report.report_table_profiling_time(time_taken) + self.report.report_table_profiling_time( + time_taken, dataset_name, table.metadata_location + ) LOGGER.debug( f"Finished profiling of dataset: {dataset_name} in {time_taken}" ) From fa1faf429b91d8dd1ab0376855ff732851639c39 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Mon, 13 Jan 2025 13:38:04 -0600 Subject: [PATCH 068/249] fix(header): prevent clickjack/iframing (#12328) --- docs-website/docusaurus.config.js | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js index 31644f459ed731..350521ea8ee643 100644 --- a/docs-website/docusaurus.config.js +++ b/docs-website/docusaurus.config.js @@ -13,6 +13,15 @@ module.exports = { projectName: "datahub", // Usually your repo name. staticDirectories: ["static", "genStatic"], stylesheets: ["https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700&display=swap"], + headTags: [ + { + tagName: 'meta', + attributes: { + httpEquiv: 'Content-Security-Policy', + content: "frame-ancestors 'self' https://*.acryl.io https://acryldata.io http://localhost:*" + } + }, + ], scripts: [ { src: "https://tools.luckyorange.com/core/lo.js?site-id=28ea8a38", From 244f35de5c3381a33b35628967d9e78d33bfb66d Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 13 Jan 2025 12:55:57 -0800 Subject: [PATCH 069/249] fix(ingest): tighten Source.create type annotations (#12325) --- metadata-ingestion/src/datahub/ingestion/api/source.py | 4 ++-- .../src/datahub/ingestion/source/delta_lake/source.py | 5 ----- .../src/datahub/ingestion/source/demo_data.py | 2 +- .../src/datahub/ingestion/source/fivetran/fivetran.py | 7 +------ .../ingestion/source/kafka_connect/kafka_connect.py | 7 +------ .../src/datahub/ingestion/source/metabase.py | 7 +------ metadata-ingestion/src/datahub/ingestion/source/mlflow.py | 5 ----- metadata-ingestion/src/datahub/ingestion/source/nifi.py | 5 ----- metadata-ingestion/src/datahub/ingestion/source/redash.py | 5 ----- .../src/datahub/ingestion/source/snowflake/snowflake_v2.py | 6 ------ .../src/datahub/ingestion/source/superset.py | 7 +------ .../src/datahub/ingestion/source/tableau/tableau.py | 6 ------ metadata-ingestion/tests/unit/api/test_pipeline.py | 3 ++- 13 files changed, 9 insertions(+), 60 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py index 53cb1b0ecad4ee..b04ffdb3258934 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source.py @@ -23,7 +23,7 @@ ) from pydantic import BaseModel -from typing_extensions import LiteralString +from typing_extensions import LiteralString, Self from datahub.configuration.common import ConfigModel from datahub.configuration.source_common import PlatformInstanceConfigMixin @@ -400,7 +400,7 @@ class Source(Closeable, metaclass=ABCMeta): ctx: PipelineContext @classmethod - def create(cls, config_dict: dict, ctx: PipelineContext) -> "Source": + def create(cls, config_dict: dict, ctx: PipelineContext) -> Self: # Technically, this method should be abstract. However, the @config_class # decorator automatically generates a create method at runtime if one is # not defined. Python still treats the class as abstract because it thinks diff --git a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py index 98133ca69011e7..9df3905437b3b2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py @@ -122,11 +122,6 @@ def __init__(self, config: DeltaLakeSourceConfig, ctx: PipelineContext): config_report, ) - @classmethod - def create(cls, config_dict: dict, ctx: PipelineContext) -> "Source": - config = DeltaLakeSourceConfig.parse_obj(config_dict) - return cls(config, ctx) - def _parse_datatype(self, raw_field_json_str: str) -> List[SchemaFieldClass]: raw_field_json = json.loads(raw_field_json_str) diff --git a/metadata-ingestion/src/datahub/ingestion/source/demo_data.py b/metadata-ingestion/src/datahub/ingestion/source/demo_data.py index 79831c016e2d5d..1d7aedb151864f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/demo_data.py +++ b/metadata-ingestion/src/datahub/ingestion/source/demo_data.py @@ -29,7 +29,7 @@ class DemoDataSource(Source): def __init__(self, ctx: PipelineContext, config: DemoDataConfig): file_config = FileSourceConfig(path=str(download_sample_data())) - self.file_source = GenericFileSource(ctx, file_config) + self.file_source: GenericFileSource = GenericFileSource(ctx, file_config) def get_workunits(self) -> Iterable[MetadataWorkUnit]: yield from self.file_source.get_workunits() diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py index adbfc48692db93..d8ebbe5b63d1ae 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py +++ b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py @@ -16,7 +16,7 @@ platform_name, support_status, ) -from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source, SourceReport +from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.fivetran.config import ( KNOWN_DATA_PLATFORM_MAPPING, @@ -291,11 +291,6 @@ def _get_connector_workunits( dpi = self._generate_dpi_from_job(job, datajob) yield from self._get_dpi_workunits(job, dpi) - @classmethod - def create(cls, config_dict: dict, ctx: PipelineContext) -> Source: - config = FivetranSourceConfig.parse_obj(config_dict) - return cls(config, ctx) - def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: return [ *super().get_workunit_processors(), diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/kafka_connect.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/kafka_connect.py index fa6b614c4b52a6..72be864fc30a1c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/kafka_connect.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/kafka_connect.py @@ -17,7 +17,7 @@ platform_name, support_status, ) -from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source +from datahub.ingestion.api.source import MetadataWorkUnitProcessor from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.kafka_connect.common import ( CONNECTOR_CLASS, @@ -94,11 +94,6 @@ def __init__(self, config: KafkaConnectSourceConfig, ctx: PipelineContext): if not jpype.isJVMStarted(): jpype.startJVM() - @classmethod - def create(cls, config_dict: dict, ctx: PipelineContext) -> Source: - config = KafkaConnectSourceConfig.parse_obj(config_dict) - return cls(config, ctx) - def get_connectors_manifest(self) -> Iterable[ConnectorManifest]: """Get Kafka Connect connectors manifest using REST API. Enrich with lineages metadata. diff --git a/metadata-ingestion/src/datahub/ingestion/source/metabase.py b/metadata-ingestion/src/datahub/ingestion/source/metabase.py index 828bbd213a796f..ef16dc0a49a223 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metabase.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metabase.py @@ -23,7 +23,7 @@ platform_name, support_status, ) -from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source, SourceReport +from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.state.stale_entity_removal_handler import ( StaleEntityRemovalHandler, @@ -789,11 +789,6 @@ def get_datasource_from_id( return platform, dbname, schema, platform_instance - @classmethod - def create(cls, config_dict: dict, ctx: PipelineContext) -> Source: - config = MetabaseConfig.parse_obj(config_dict) - return cls(ctx, config) - def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: return [ *super().get_workunit_processors(), diff --git a/metadata-ingestion/src/datahub/ingestion/source/mlflow.py b/metadata-ingestion/src/datahub/ingestion/source/mlflow.py index 26d160acf330cf..b0b04dff20bffc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mlflow.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mlflow.py @@ -333,8 +333,3 @@ def _get_global_tags_workunit( aspect=global_tags, ) return wu - - @classmethod - def create(cls, config_dict: dict, ctx: PipelineContext) -> Source: - config = MLflowConfig.parse_obj(config_dict) - return cls(ctx, config) diff --git a/metadata-ingestion/src/datahub/ingestion/source/nifi.py b/metadata-ingestion/src/datahub/ingestion/source/nifi.py index f55d7a883edefe..7f446f6d1c2718 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/nifi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/nifi.py @@ -484,11 +484,6 @@ def __init__(self, config: NifiSourceConfig, ctx: PipelineContext) -> None: def rest_api_base_url(self): return self.config.site_url[: -len("nifi/")] + "nifi-api/" - @classmethod - def create(cls, config_dict: dict, ctx: PipelineContext) -> "Source": - config = NifiSourceConfig.parse_obj(config_dict) - return cls(config, ctx) - def get_report(self) -> SourceReport: return self.report diff --git a/metadata-ingestion/src/datahub/ingestion/source/redash.py b/metadata-ingestion/src/datahub/ingestion/source/redash.py index f11d1944029ebb..666cc8c63aa9ed 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redash.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redash.py @@ -369,11 +369,6 @@ def validate_connection(self) -> None: else: raise ValueError(f"Failed to connect to {self.config.connect_uri}/api") - @classmethod - def create(cls, config_dict: dict, ctx: PipelineContext) -> Source: - config = RedashConfig.parse_obj(config_dict) - return cls(ctx, config) - def _get_chart_data_source(self, data_source_id: Optional[int] = None) -> Dict: url = f"/api/data_sources/{data_source_id}" resp = self.client._get(url).json() diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index c0385a8d5af30a..b8afd145727400 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -23,7 +23,6 @@ from datahub.ingestion.api.source import ( CapabilityReport, MetadataWorkUnitProcessor, - Source, SourceCapability, SourceReport, TestableSource, @@ -251,11 +250,6 @@ def __init__(self, ctx: PipelineContext, config: SnowflakeV2Config): self.add_config_to_report() - @classmethod - def create(cls, config_dict: dict, ctx: PipelineContext) -> "Source": - config = SnowflakeV2Config.parse_obj(config_dict) - return cls(ctx, config) - @staticmethod def test_connection(config_dict: dict) -> TestConnectionReport: test_report = TestConnectionReport() diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py index 1da233bf0b22ab..a8b328f6e17739 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/superset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py @@ -33,7 +33,7 @@ platform_name, support_status, ) -from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source +from datahub.ingestion.api.source import MetadataWorkUnitProcessor from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.sql.sql_types import resolve_sql_type from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import ( @@ -265,11 +265,6 @@ def login(self) -> requests.Session: # TODO(Gabe): how should we message about this error? return requests_session - @classmethod - def create(cls, config_dict: dict, ctx: PipelineContext) -> Source: - config = SupersetConfig.parse_obj(config_dict) - return cls(ctx, config) - def paginate_entity_api_results(self, entity_type, page_size=100): current_page = 0 total_items = page_size diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index 2543cbe653ba72..ea3fb6c979a19c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -71,7 +71,6 @@ from datahub.ingestion.api.source import ( CapabilityReport, MetadataWorkUnitProcessor, - Source, StructuredLogLevel, TestableSource, TestConnectionReport, @@ -804,11 +803,6 @@ def test_connection(config_dict: dict) -> TestConnectionReport: def get_report(self) -> TableauSourceReport: return self.report - @classmethod - def create(cls, config_dict: dict, ctx: PipelineContext) -> Source: - config = TableauConfig.parse_obj(config_dict) - return cls(config, ctx) - def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: return [ *super().get_workunit_processors(), diff --git a/metadata-ingestion/tests/unit/api/test_pipeline.py b/metadata-ingestion/tests/unit/api/test_pipeline.py index fe3d3160b729a1..324e4ed0f6e853 100644 --- a/metadata-ingestion/tests/unit/api/test_pipeline.py +++ b/metadata-ingestion/tests/unit/api/test_pipeline.py @@ -4,6 +4,7 @@ import pytest from freezegun import freeze_time +from typing_extensions import Self from datahub.configuration.common import DynamicTypedConfig from datahub.ingestion.api.committable import CommitPolicy, Committable @@ -440,7 +441,7 @@ def __init__(self, ctx: PipelineContext): ] @classmethod - def create(cls, config_dict: dict, ctx: PipelineContext) -> "Source": + def create(cls, config_dict: dict, ctx: PipelineContext) -> Self: assert not config_dict return cls(ctx) From e34b2e453c5ba0d4d13f0a6bbc993acbff2576aa Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 13 Jan 2025 12:56:25 -0800 Subject: [PATCH 070/249] fix(ci): only upload metadata model on root repo (#12324) --- .github/workflows/metadata-model.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/metadata-model.yml b/.github/workflows/metadata-model.yml index 632e6ac35d673e..6f62284afcc172 100644 --- a/.github/workflows/metadata-model.yml +++ b/.github/workflows/metadata-model.yml @@ -20,11 +20,9 @@ jobs: steps: - name: Check whether upload to datahub is enabled id: publish - env: - ENABLE_PUBLISH: ${{ secrets.DataHubToken }} run: | - echo "Enable publish: ${{ env.ENABLE_PUBLISH != '' }}" - echo "publish=${{ env.ENABLE_PUBLISH != '' }}" >> $GITHUB_OUTPUT + echo "Enable publish: ${{ github.repository == 'datahub-project/datahub' }}" + echo "publish=${{ github.repository == 'datahub-project/datahub' }}" >> $GITHUB_OUTPUT metadata-ingestion-docgen: runs-on: ubuntu-latest needs: setup From ddd0d21bf9e2a084dea3dfc6f4e000a322c56e7b Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Tue, 14 Jan 2025 07:57:10 +0900 Subject: [PATCH 071/249] feat(models): update mlflow-related mappers (#12263) Co-authored-by: Shirshanka Das Co-authored-by: RyanHolstien --- .../datahub/graphql/GmsGraphQLEngine.java | 37 ++- .../DataPlatformInstanceAspectMapper.java | 6 + .../mappers/TimeStampToAuditStampMapper.java | 24 ++ .../common/mappers/UrnToEntityMapper.java | 6 + .../DataProcessInstanceType.java | 102 ++++++ .../mappers/DataProcessInstanceMapper.java | 109 ++++++- .../types/entitytype/EntityTypeUrnMapper.java | 3 + .../mappers/MLModelPropertiesMapper.java | 10 + .../src/main/resources/entity.graphql | 2 +- .../TimeStampToAuditStampMapperTest.java | 46 +++ .../DataPlatformInstanceAspectMapperTest.java | 75 +++++ .../DataProcessInstanceTypeTest.java | 246 +++++++++++++++ .../DataProcessInstanceMapperTest.java | 127 ++++++++ .../src/app/buildEntityRegistry.ts | 2 + .../DataProcessInstanceEntity.tsx | 264 ++++++++++++++++ .../dataProcessInstance/preview/Preview.tsx | 103 ++++++ .../src/graphql/dataProcessInstance.graphql | 181 +++++++++++ .../src/graphql/fragments.graphql | 14 + datahub-web-react/src/graphql/lineage.graphql | 19 ++ .../src/graphql/mlModelGroup.graphql | 12 + .../java/com/linkedin/metadata/Constants.java | 4 + .../request/SearchRequestHandlerTest.java | 5 + .../ml/metadata/MLModelGroupProperties.pdl | 14 +- .../ml/metadata/MLModelLineageInfo.pdl | 35 +++ .../ml/metadata/MLModelProperties.pdl | 27 +- .../graphql/featureflags/FeatureFlags.java | 1 + .../com.linkedin.entity.aspects.snapshot.json | 71 +++-- ...com.linkedin.entity.entities.snapshot.json | 88 +++--- .../com.linkedin.entity.runs.snapshot.json | 71 +++-- ...nkedin.operations.operations.snapshot.json | 71 +++-- ...m.linkedin.platform.platform.snapshot.json | 88 +++--- .../tests/data_process_instance/__init__.py | 0 .../test_data_process_instance.py | 293 ++++++++++++++++++ smoke-test/tests/ml_models/__init__.py | 0 smoke-test/tests/ml_models/test_ml_models.py | 133 ++++++++ 35 files changed, 2046 insertions(+), 243 deletions(-) create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapper.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceType.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapperTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataplatforminstance/mapper/DataPlatformInstanceAspectMapperTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java create mode 100644 datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx create mode 100644 datahub-web-react/src/app/entity/dataProcessInstance/preview/Preview.tsx create mode 100644 datahub-web-react/src/graphql/dataProcessInstance.graphql create mode 100644 metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelLineageInfo.pdl create mode 100644 smoke-test/tests/data_process_instance/__init__.py create mode 100644 smoke-test/tests/data_process_instance/test_data_process_instance.py create mode 100644 smoke-test/tests/ml_models/__init__.py create mode 100644 smoke-test/tests/ml_models/test_ml_models.py diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 59335ba605a741..3c46c1a8dce35c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -56,6 +56,7 @@ import com.linkedin.datahub.graphql.generated.DataJobInputOutput; import com.linkedin.datahub.graphql.generated.DataPlatform; import com.linkedin.datahub.graphql.generated.DataPlatformInstance; +import com.linkedin.datahub.graphql.generated.DataProcessInstance; import com.linkedin.datahub.graphql.generated.DataQualityContract; import com.linkedin.datahub.graphql.generated.Dataset; import com.linkedin.datahub.graphql.generated.DatasetStatsSummary; @@ -346,6 +347,7 @@ import com.linkedin.datahub.graphql.types.datajob.DataJobType; import com.linkedin.datahub.graphql.types.dataplatform.DataPlatformType; import com.linkedin.datahub.graphql.types.dataplatforminstance.DataPlatformInstanceType; +import com.linkedin.datahub.graphql.types.dataprocessinst.DataProcessInstanceType; import com.linkedin.datahub.graphql.types.dataprocessinst.mappers.DataProcessInstanceRunEventMapper; import com.linkedin.datahub.graphql.types.dataproduct.DataProductType; import com.linkedin.datahub.graphql.types.dataset.DatasetType; @@ -530,6 +532,7 @@ public class GmsGraphQLEngine { private final FormType formType; private final IncidentType incidentType; private final RestrictedType restrictedType; + private final DataProcessInstanceType dataProcessInstanceType; private final int graphQLQueryComplexityLimit; private final int graphQLQueryDepthLimit; @@ -649,6 +652,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { this.formType = new FormType(entityClient); this.incidentType = new IncidentType(entityClient); this.restrictedType = new RestrictedType(entityClient, restrictedService); + this.dataProcessInstanceType = new DataProcessInstanceType(entityClient, featureFlags); this.graphQLQueryComplexityLimit = args.graphQLQueryComplexityLimit; this.graphQLQueryDepthLimit = args.graphQLQueryDepthLimit; @@ -699,7 +703,8 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { formType, incidentType, restrictedType, - businessAttributeType)); + businessAttributeType, + dataProcessInstanceType)); this.loadableTypes = new ArrayList<>(entityTypes); // Extend loadable types with types from the plugins // This allows us to offer search and browse capabilities out of the box for @@ -1024,6 +1029,7 @@ private void configureQueryResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("tag", getResolver(tagType)) .dataFetcher("dataFlow", getResolver(dataFlowType)) .dataFetcher("dataJob", getResolver(dataJobType)) + .dataFetcher("dataProcessInstance", getResolver(dataProcessInstanceType)) .dataFetcher("glossaryTerm", getResolver(glossaryTermType)) .dataFetcher("glossaryNode", getResolver(glossaryNodeType)) .dataFetcher("domain", getResolver((domainType))) @@ -3058,6 +3064,35 @@ private void configureDataProcessInstanceResolvers(final RuntimeWiring.Builder b "DataProcessInstance", typeWiring -> typeWiring + .dataFetcher( + "dataPlatformInstance", + new LoadableTypeResolver<>( + dataPlatformInstanceType, + (env) -> { + final DataProcessInstance dataProcessInstance = env.getSource(); + return dataProcessInstance.getDataPlatformInstance() != null + ? dataProcessInstance.getDataPlatformInstance().getUrn() + : null; + })) + .dataFetcher( + "platform", + new LoadableTypeResolver<>( + dataPlatformType, + (env) -> { + final DataProcessInstance dataProcessInstance = env.getSource(); + return dataProcessInstance.getPlatform() != null + ? dataProcessInstance.getPlatform().getUrn() + : null; + })) + .dataFetcher("parentContainers", new ParentContainersResolver(entityClient)) + .dataFetcher( + "container", + new LoadableTypeResolver<>( + containerType, + (env) -> { + final DataProcessInstance dpi = env.getSource(); + return dpi.getContainer() != null ? dpi.getContainer().getUrn() : null; + })) .dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient)) .dataFetcher( "lineage", diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DataPlatformInstanceAspectMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DataPlatformInstanceAspectMapper.java index 4345819867617b..ab3127a3ae232b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DataPlatformInstanceAspectMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DataPlatformInstanceAspectMapper.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.types.common.mappers; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.DataPlatform; import com.linkedin.datahub.graphql.generated.DataPlatformInstance; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; @@ -28,6 +29,11 @@ public DataPlatformInstance apply( result.setType(EntityType.DATA_PLATFORM_INSTANCE); result.setUrn(input.getInstance().toString()); } + result.setPlatform( + DataPlatform.builder() + .setUrn(input.getPlatform().toString()) + .setType(EntityType.DATA_PLATFORM) + .build()); return result; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapper.java new file mode 100644 index 00000000000000..58f78b146b406c --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapper.java @@ -0,0 +1,24 @@ +package com.linkedin.datahub.graphql.types.common.mappers; + +import com.linkedin.common.TimeStamp; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.AuditStamp; +import javax.annotation.Nullable; + +public class TimeStampToAuditStampMapper { + + public static final TimeStampToAuditStampMapper INSTANCE = new TimeStampToAuditStampMapper(); + + public static AuditStamp map( + @Nullable final QueryContext context, @Nullable final TimeStamp input) { + if (input == null) { + return null; + } + final AuditStamp result = new AuditStamp(); + result.setTime(input.getTime()); + if (input.hasActor()) { + result.setActor(input.getActor().toString()); + } + return result; + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java index 1988cafc486c18..eae33e6da2e56d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java @@ -18,6 +18,7 @@ import com.linkedin.datahub.graphql.generated.DataJob; import com.linkedin.datahub.graphql.generated.DataPlatform; import com.linkedin.datahub.graphql.generated.DataPlatformInstance; +import com.linkedin.datahub.graphql.generated.DataProcessInstance; import com.linkedin.datahub.graphql.generated.DataProduct; import com.linkedin.datahub.graphql.generated.Dataset; import com.linkedin.datahub.graphql.generated.Domain; @@ -225,6 +226,11 @@ public Entity apply(@Nullable QueryContext context, Urn input) { ((BusinessAttribute) partialEntity).setUrn(input.toString()); ((BusinessAttribute) partialEntity).setType(EntityType.BUSINESS_ATTRIBUTE); } + if (input.getEntityType().equals(DATA_PROCESS_INSTANCE_ENTITY_NAME)) { + partialEntity = new DataProcessInstance(); + ((DataProcessInstance) partialEntity).setUrn(input.toString()); + ((DataProcessInstance) partialEntity).setType(EntityType.DATA_PROCESS_INSTANCE); + } return partialEntity; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceType.java new file mode 100644 index 00000000000000..eeaaaa96f51704 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceType.java @@ -0,0 +1,102 @@ +package com.linkedin.datahub.graphql.types.dataprocessinst; + +import static com.linkedin.metadata.Constants.*; + +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.DataProcessInstance; +import com.linkedin.datahub.graphql.generated.Entity; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.types.dataprocessinst.mappers.DataProcessInstanceMapper; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.client.EntityClient; +import graphql.execution.DataFetcherResult; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public class DataProcessInstanceType + implements com.linkedin.datahub.graphql.types.EntityType { + + public static final Set ASPECTS_TO_FETCH = + ImmutableSet.of( + DATA_PROCESS_INSTANCE_KEY_ASPECT_NAME, + DATA_PLATFORM_INSTANCE_ASPECT_NAME, + DATA_PROCESS_INSTANCE_PROPERTIES_ASPECT_NAME, + DATA_PROCESS_INSTANCE_INPUT_ASPECT_NAME, + DATA_PROCESS_INSTANCE_OUTPUT_ASPECT_NAME, + DATA_PROCESS_INSTANCE_RUN_EVENT_ASPECT_NAME, + TEST_RESULTS_ASPECT_NAME, + DATA_PROCESS_INSTANCE_RELATIONSHIPS_ASPECT_NAME, + ML_TRAINING_RUN_PROPERTIES_ASPECT_NAME, + SUB_TYPES_ASPECT_NAME, + CONTAINER_ASPECT_NAME); + + private final EntityClient _entityClient; + private final FeatureFlags _featureFlags; + + @Override + public EntityType type() { + return EntityType.DATA_PROCESS_INSTANCE; + } + + @Override + public Function getKeyProvider() { + return Entity::getUrn; + } + + @Override + public Class objectClass() { + return DataProcessInstance.class; + } + + @Override + public List> batchLoad( + @Nonnull List urns, @Nonnull QueryContext context) throws Exception { + final List dataProcessInstanceUrns = + urns.stream().map(UrnUtils::getUrn).collect(Collectors.toList()); + + try { + Map entities = new HashMap<>(); + if (_featureFlags.isDataProcessInstanceEntityEnabled()) { + entities = + _entityClient.batchGetV2( + context.getOperationContext(), + DATA_PROCESS_INSTANCE_ENTITY_NAME, + new HashSet<>(dataProcessInstanceUrns), + ASPECTS_TO_FETCH); + } + + final List gmsResults = new ArrayList<>(); + for (Urn urn : dataProcessInstanceUrns) { + if (_featureFlags.isDataProcessInstanceEntityEnabled()) { + gmsResults.add(entities.getOrDefault(urn, null)); + } + } + + return gmsResults.stream() + .map( + gmsResult -> + gmsResult == null + ? null + : DataFetcherResult.newResult() + .data(DataProcessInstanceMapper.map(context, gmsResult)) + .build()) + .collect(Collectors.toList()); + + } catch (Exception e) { + throw new RuntimeException("Failed to load Data Process Instance entity", e); + } + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java index 7a4d342281fe54..28c9c8936fdbfb 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java @@ -2,25 +2,38 @@ import static com.linkedin.metadata.Constants.*; +import com.linkedin.common.DataPlatformInstance; +import com.linkedin.common.SubTypes; +import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.DataPlatform; import com.linkedin.datahub.graphql.generated.DataProcessInstance; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.types.common.mappers.AuditStampMapper; +import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; +import com.linkedin.datahub.graphql.types.common.mappers.DataPlatformInstanceAspectMapper; +import com.linkedin.datahub.graphql.types.common.mappers.SubTypesMapper; import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.datahub.graphql.types.mlmodel.mappers.MLHyperParamMapper; +import com.linkedin.datahub.graphql.types.mlmodel.mappers.MLMetricMapper; import com.linkedin.dataprocess.DataProcessInstanceProperties; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.ml.metadata.MLTrainingRunProperties; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; /** * Maps Pegasus {@link RecordTemplate} objects to objects conforming to the GQL schema. * *

To be replaced by auto-generated mappers implementations */ +@Slf4j public class DataProcessInstanceMapper implements ModelMapper { public static final DataProcessInstanceMapper INSTANCE = new DataProcessInstanceMapper(); @@ -30,6 +43,19 @@ public static DataProcessInstance map( return INSTANCE.apply(context, entityResponse); } + private void mapContainers( + @Nullable final QueryContext context, + @Nonnull DataProcessInstance dataProcessInstance, + @Nonnull DataMap dataMap) { + final com.linkedin.container.Container gmsContainer = + new com.linkedin.container.Container(dataMap); + dataProcessInstance.setContainer( + com.linkedin.datahub.graphql.generated.Container.builder() + .setType(EntityType.CONTAINER) + .setUrn(gmsContainer.getContainer().toString()) + .build()); + } + @Override public DataProcessInstance apply( @Nullable QueryContext context, @Nonnull final EntityResponse entityResponse) { @@ -37,24 +63,97 @@ public DataProcessInstance apply( result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.DATA_PROCESS_INSTANCE); + Urn entityUrn = entityResponse.getUrn(); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); mappingHelper.mapToResult( - context, DATA_PROCESS_INSTANCE_PROPERTIES_ASPECT_NAME, this::mapDataProcessProperties); + DATA_PROCESS_INSTANCE_PROPERTIES_ASPECT_NAME, + (dataProcessInstance, dataMap) -> + mapDataProcessProperties(context, dataProcessInstance, dataMap, entityUrn)); + mappingHelper.mapToResult( + ML_TRAINING_RUN_PROPERTIES_ASPECT_NAME, + (dataProcessInstance, dataMap) -> + mapTrainingRunProperties(context, dataProcessInstance, dataMap)); + mappingHelper.mapToResult( + DATA_PLATFORM_INSTANCE_ASPECT_NAME, + (dataProcessInstance, dataMap) -> { + DataPlatformInstance dataPlatformInstance = new DataPlatformInstance(dataMap); + dataProcessInstance.setDataPlatformInstance( + DataPlatformInstanceAspectMapper.map(context, dataPlatformInstance)); + DataPlatform dataPlatform = new DataPlatform(); + dataPlatform.setUrn(dataPlatformInstance.getPlatform().toString()); + dataPlatform.setType(EntityType.DATA_PLATFORM); + dataProcessInstance.setPlatform(dataPlatform); + }); + mappingHelper.mapToResult( + SUB_TYPES_ASPECT_NAME, + (dataProcessInstance, dataMap) -> + dataProcessInstance.setSubTypes(SubTypesMapper.map(context, new SubTypes(dataMap)))); + mappingHelper.mapToResult( + CONTAINER_ASPECT_NAME, + (dataProcessInstance, dataMap) -> mapContainers(context, dataProcessInstance, dataMap)); return mappingHelper.getResult(); } - private void mapDataProcessProperties( + private void mapTrainingRunProperties( @Nonnull QueryContext context, @Nonnull DataProcessInstance dpi, @Nonnull DataMap dataMap) { + MLTrainingRunProperties trainingProperties = new MLTrainingRunProperties(dataMap); + + com.linkedin.datahub.graphql.generated.MLTrainingRunProperties properties = + new com.linkedin.datahub.graphql.generated.MLTrainingRunProperties(); + if (trainingProperties.hasId()) { + properties.setId(trainingProperties.getId()); + } + if (trainingProperties.hasOutputUrls()) { + properties.setOutputUrls( + trainingProperties.getOutputUrls().stream() + .map(url -> url.toString()) + .collect(Collectors.toList())); + } + if (trainingProperties.getHyperParams() != null) { + properties.setHyperParams( + trainingProperties.getHyperParams().stream() + .map(param -> MLHyperParamMapper.map(context, param)) + .collect(Collectors.toList())); + } + if (trainingProperties.getTrainingMetrics() != null) { + properties.setTrainingMetrics( + trainingProperties.getTrainingMetrics().stream() + .map(metric -> MLMetricMapper.map(context, metric)) + .collect(Collectors.toList())); + } + if (trainingProperties.hasId()) { + properties.setId(trainingProperties.getId()); + } + dpi.setMlTrainingRunProperties(properties); + } + + private void mapDataProcessProperties( + @Nonnull QueryContext context, + @Nonnull DataProcessInstance dpi, + @Nonnull DataMap dataMap, + @Nonnull Urn entityUrn) { DataProcessInstanceProperties dataProcessInstanceProperties = new DataProcessInstanceProperties(dataMap); + + com.linkedin.datahub.graphql.generated.DataProcessInstanceProperties properties = + new com.linkedin.datahub.graphql.generated.DataProcessInstanceProperties(); + dpi.setName(dataProcessInstanceProperties.getName()); - if (dataProcessInstanceProperties.hasCreated()) { - dpi.setCreated(AuditStampMapper.map(context, dataProcessInstanceProperties.getCreated())); - } + properties.setName(dataProcessInstanceProperties.getName()); if (dataProcessInstanceProperties.hasExternalUrl()) { dpi.setExternalUrl(dataProcessInstanceProperties.getExternalUrl().toString()); + properties.setExternalUrl(dataProcessInstanceProperties.getExternalUrl().toString()); + } + if (dataProcessInstanceProperties.hasCustomProperties()) { + properties.setCustomProperties( + CustomPropertiesMapper.map( + dataProcessInstanceProperties.getCustomProperties(), entityUrn)); + } + if (dataProcessInstanceProperties.hasCreated()) { + dpi.setCreated(AuditStampMapper.map(context, dataProcessInstanceProperties.getCreated())); } + dpi.setProperties(properties); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java index 334faf753cb8b5..5b72c2b3c11c5e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java @@ -77,6 +77,9 @@ public class EntityTypeUrnMapper { .put( Constants.BUSINESS_ATTRIBUTE_ENTITY_NAME, "urn:li:entityType:datahub.businessAttribute") + .put( + Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME, + "urn:li:entityType:datahub.dataProcessInstance") .build(); private static final Map ENTITY_TYPE_URN_TO_NAME = diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java index 265005c2caa9ee..7b00fe88f2d683 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java @@ -7,6 +7,7 @@ import com.linkedin.datahub.graphql.generated.MLModelGroup; import com.linkedin.datahub.graphql.generated.MLModelProperties; import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; +import com.linkedin.datahub.graphql.types.common.mappers.TimeStampToAuditStampMapper; import com.linkedin.datahub.graphql.types.mappers.EmbeddedModelMapper; import java.util.stream.Collectors; import javax.annotation.Nonnull; @@ -31,6 +32,15 @@ public MLModelProperties apply( final MLModelProperties result = new MLModelProperties(); result.setDate(mlModelProperties.getDate()); + if (mlModelProperties.getName() != null) { + result.setName(mlModelProperties.getName()); + } else { + // backfill name from URN for backwards compatibility + result.setName(entityUrn.getEntityKey().get(1)); // indexed access is safe here + } + result.setCreated(TimeStampToAuditStampMapper.map(context, mlModelProperties.getCreated())); + result.setLastModified( + TimeStampToAuditStampMapper.map(context, mlModelProperties.getLastModified())); result.setDescription(mlModelProperties.getDescription()); if (mlModelProperties.getExternalUrl() != null) { result.setExternalUrl(mlModelProperties.getExternalUrl().toString()); diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index adb24d92587b58..9dd1948e18e042 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -10098,7 +10098,7 @@ type MLModelProperties { """ The display name of the model used in the UI """ - name: String! + name: String """ Detailed description of the model's purpose and characteristics diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapperTest.java new file mode 100644 index 00000000000000..4e0dbd7b1733b4 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapperTest.java @@ -0,0 +1,46 @@ +package com.linkedin.datahub.graphql.types.common.mappers; + +import static org.testng.Assert.*; + +import com.linkedin.common.TimeStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.generated.AuditStamp; +import org.testng.annotations.Test; + +public class TimeStampToAuditStampMapperTest { + + private static final String TEST_ACTOR_URN = "urn:li:corpuser:testUser"; + private static final long TEST_TIME = 1234567890L; + + @Test + public void testMapWithActor() throws Exception { + TimeStamp input = new TimeStamp(); + input.setTime(TEST_TIME); + input.setActor(Urn.createFromString(TEST_ACTOR_URN)); + + AuditStamp result = TimeStampToAuditStampMapper.map(null, input); + + assertNotNull(result); + assertEquals(result.getTime().longValue(), TEST_TIME); + assertEquals(result.getActor(), TEST_ACTOR_URN); + } + + @Test + public void testMapWithoutActor() { + TimeStamp input = new TimeStamp(); + input.setTime(TEST_TIME); + + AuditStamp result = TimeStampToAuditStampMapper.map(null, input); + + assertNotNull(result); + assertEquals(result.getTime().longValue(), TEST_TIME); + assertNull(result.getActor()); + } + + @Test + public void testMapNull() { + AuditStamp result = TimeStampToAuditStampMapper.map(null, null); + + assertNull(result); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataplatforminstance/mapper/DataPlatformInstanceAspectMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataplatforminstance/mapper/DataPlatformInstanceAspectMapperTest.java new file mode 100644 index 00000000000000..479d7340fef945 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataplatforminstance/mapper/DataPlatformInstanceAspectMapperTest.java @@ -0,0 +1,75 @@ +package com.linkedin.datahub.graphql.types.dataplatforminstance.mapper; + +import static org.testng.Assert.*; + +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.generated.DataPlatformInstance; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.types.common.mappers.DataPlatformInstanceAspectMapper; +import org.testng.annotations.Test; + +public class DataPlatformInstanceAspectMapperTest { + + private static final String TEST_PLATFORM = "hive"; + private static final String TEST_INSTANCE = "prod"; + private static final String TEST_PLATFORM_URN = "urn:li:dataPlatform:" + TEST_PLATFORM; + private static final String TEST_INSTANCE_URN = + String.format( + "urn:li:dataPlatformInstance:(urn:li:dataPlatform:%s,%s)", TEST_PLATFORM, TEST_INSTANCE); + + @Test + public void testMapWithInstance() throws Exception { + // Create test input + com.linkedin.common.DataPlatformInstance input = new com.linkedin.common.DataPlatformInstance(); + DataPlatformUrn platformUrn = new DataPlatformUrn(TEST_PLATFORM); + Urn instanceUrn = Urn.createFromString(TEST_INSTANCE_URN); + + input.setPlatform(platformUrn); + input.setInstance(instanceUrn); + + // Map and verify + DataPlatformInstance result = DataPlatformInstanceAspectMapper.map(null, input); + + assertNotNull(result); + assertEquals(result.getType(), EntityType.DATA_PLATFORM_INSTANCE); + assertEquals(result.getUrn(), TEST_INSTANCE_URN); + + // Verify platform mapping + assertNotNull(result.getPlatform()); + assertEquals(result.getPlatform().getType(), EntityType.DATA_PLATFORM); + assertEquals(result.getPlatform().getUrn(), TEST_PLATFORM_URN); + } + + @Test + public void testMapWithoutInstance() throws Exception { + // Create test input with only platform + com.linkedin.common.DataPlatformInstance input = new com.linkedin.common.DataPlatformInstance(); + DataPlatformUrn platformUrn = new DataPlatformUrn(TEST_PLATFORM); + input.setPlatform(platformUrn); + + // Map and verify + DataPlatformInstance result = DataPlatformInstanceAspectMapper.map(null, input); + + assertNotNull(result); + assertNull(result.getType()); // Type should be null when no instance + assertNull(result.getUrn()); // URN should be null when no instance + + // Verify platform is still mapped correctly + assertNotNull(result.getPlatform()); + assertEquals(result.getPlatform().getType(), EntityType.DATA_PLATFORM); + assertEquals(result.getPlatform().getUrn(), TEST_PLATFORM_URN); + } + + @Test(expectedExceptions = NullPointerException.class) + public void testMapNull() { + DataPlatformInstanceAspectMapper.map(null, null); + } + + @Test + public void testSingleton() { + assertNotNull(DataPlatformInstanceAspectMapper.INSTANCE); + assertSame( + DataPlatformInstanceAspectMapper.INSTANCE, DataPlatformInstanceAspectMapper.INSTANCE); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java new file mode 100644 index 00000000000000..437c74ab669146 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java @@ -0,0 +1,246 @@ +package com.linkedin.datahub.graphql.types.dataprocessinst; + +import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; +import static org.mockito.ArgumentMatchers.any; +import static org.testng.Assert.*; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.DataPlatformInstance; +import com.linkedin.common.FabricType; +import com.linkedin.common.Status; +import com.linkedin.common.SubTypes; +import com.linkedin.common.UrnArray; +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.container.Container; +import com.linkedin.data.template.StringArray; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.DataProcessInstance; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.dataprocess.DataProcessInstanceInput; +import com.linkedin.dataprocess.DataProcessInstanceOutput; +import com.linkedin.dataprocess.DataProcessInstanceProperties; +import com.linkedin.dataprocess.DataProcessInstanceRelationships; +import com.linkedin.dataprocess.DataProcessInstanceRunEvent; +import com.linkedin.dataprocess.DataProcessRunStatus; +import com.linkedin.dataprocess.DataProcessType; +import com.linkedin.entity.Aspect; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.key.DataProcessInstanceKey; +import com.linkedin.ml.metadata.MLTrainingRunProperties; +import com.linkedin.r2.RemoteInvocationException; +import com.linkedin.test.TestResult; +import com.linkedin.test.TestResultArray; +import com.linkedin.test.TestResultType; +import com.linkedin.test.TestResults; +import graphql.execution.DataFetcherResult; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +public class DataProcessInstanceTypeTest { + + private static final String TEST_INSTANCE_URN = + "urn:li:dataProcessInstance:(test-workflow,test-instance-1)"; + private static final String TEST_DPI_1_URN = "urn:li:dataProcessInstance:id-1"; + private static final DatasetUrn DATASET_URN = + new DatasetUrn(new DataPlatformUrn("kafka"), "dataset1", FabricType.TEST); + private static final Urn DPI_URN_REL = UrnUtils.getUrn("urn:li:dataProcessInstance:id-2"); + private static final DataProcessInstanceKey TEST_DPI_1_KEY = + new DataProcessInstanceKey().setId("id-1"); + private static final DataProcessInstanceProperties TEST_DPI_1_PROPERTIES = + new DataProcessInstanceProperties().setName("Test DPI").setType(DataProcessType.STREAMING); + private static final DataProcessInstanceInput TEST_DPI_1_DPI_INPUT = + new DataProcessInstanceInput().setInputs(new UrnArray(ImmutableList.of(DATASET_URN))); + private static final DataProcessInstanceOutput TEST_DPI_1_DPI_OUTPUT = + new DataProcessInstanceOutput().setOutputs(new UrnArray(ImmutableList.of(DATASET_URN))); + private static final DataProcessInstanceRelationships TEST_DPI_1_DPI_RELATIONSHIPS = + new DataProcessInstanceRelationships() + .setParentInstance(DPI_URN_REL) + .setUpstreamInstances(new UrnArray(ImmutableList.of(DPI_URN_REL))) + .setParentTemplate(DPI_URN_REL); + private static final DataProcessInstanceRunEvent TEST_DPI_1_DPI_RUN_EVENT = + new DataProcessInstanceRunEvent().setStatus(DataProcessRunStatus.COMPLETE); + private static final DataPlatformInstance TEST_DPI_1_DATA_PLATFORM_INSTANCE = + new DataPlatformInstance().setPlatform(new DataPlatformUrn("kafka")); + private static final Status TEST_DPI_1_STATUS = new Status().setRemoved(false); + private static final TestResults TEST_DPI_1_TEST_RESULTS = + new TestResults() + .setPassing( + new TestResultArray( + ImmutableList.of( + new TestResult() + .setTest(UrnUtils.getUrn("urn:li:test:123")) + .setType(TestResultType.SUCCESS)))) + .setFailing(new TestResultArray()); + private static final SubTypes TEST_DPI_1_SUB_TYPES = + new SubTypes().setTypeNames(new StringArray("subtype1")); + private static final Container TEST_DPI_1_CONTAINER = + new Container().setContainer(UrnUtils.getUrn("urn:li:container:123")); + private static final MLTrainingRunProperties ML_TRAINING_RUN_PROPERTIES = + new MLTrainingRunProperties().setId("mytrainingrun"); + + private static final String TEST_DPI_2_URN = "urn:li:dataProcessInstance:id-2"; + + @Test + public void testBatchLoadFull() throws Exception { + EntityClient client = Mockito.mock(EntityClient.class); + + Urn dpiUrn1 = Urn.createFromString(TEST_DPI_1_URN); + Urn dpiUrn2 = Urn.createFromString(TEST_DPI_2_URN); + + Map aspectMap = new HashMap<>(); + aspectMap.put( + Constants.DATA_PROCESS_INSTANCE_KEY_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_KEY.data()))); + aspectMap.put( + Constants.DATA_PROCESS_INSTANCE_PROPERTIES_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_PROPERTIES.data()))); + aspectMap.put( + Constants.DATA_PROCESS_INSTANCE_INPUT_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_DPI_INPUT.data()))); + aspectMap.put( + Constants.DATA_PROCESS_INSTANCE_OUTPUT_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_DPI_OUTPUT.data()))); + aspectMap.put( + Constants.DATA_PROCESS_INSTANCE_RELATIONSHIPS_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_DPI_RELATIONSHIPS.data()))); + aspectMap.put( + Constants.DATA_PROCESS_INSTANCE_RUN_EVENT_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_DPI_RUN_EVENT.data()))); + aspectMap.put( + Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_DATA_PLATFORM_INSTANCE.data()))); + aspectMap.put( + Constants.STATUS_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_STATUS.data()))); + aspectMap.put( + Constants.TEST_RESULTS_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_TEST_RESULTS.data()))); + aspectMap.put( + Constants.SUB_TYPES_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_SUB_TYPES.data()))); + aspectMap.put( + Constants.CONTAINER_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_CONTAINER.data()))); + aspectMap.put( + Constants.ML_TRAINING_RUN_PROPERTIES_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(ML_TRAINING_RUN_PROPERTIES.data()))); + + Mockito.when( + client.batchGetV2( + any(), + Mockito.eq(Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME), + Mockito.eq(new HashSet<>(ImmutableSet.of(dpiUrn1, dpiUrn2))), + Mockito.eq(DataProcessInstanceType.ASPECTS_TO_FETCH))) + .thenReturn( + ImmutableMap.of( + dpiUrn1, + new EntityResponse() + .setEntityName(Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME) + .setUrn(dpiUrn1) + .setAspects(new EnvelopedAspectMap(aspectMap)))); + + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + Mockito.when(mockFeatureFlags.isDataProcessInstanceEntityEnabled()).thenReturn(true); + + DataProcessInstanceType type = new DataProcessInstanceType(client, mockFeatureFlags); + + QueryContext mockContext = getMockAllowContext(); + List> result = + type.batchLoad(ImmutableList.of(TEST_DPI_1_URN, TEST_DPI_2_URN), mockContext); + + // Verify response + Mockito.verify(client, Mockito.times(1)) + .batchGetV2( + any(), + Mockito.eq(Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME), + Mockito.eq(ImmutableSet.of(dpiUrn1, dpiUrn2)), + Mockito.eq(DataProcessInstanceType.ASPECTS_TO_FETCH)); + + assertEquals(result.size(), 2); + + DataProcessInstance dpi1 = result.get(0).getData(); + assertEquals(dpi1.getUrn(), TEST_DPI_1_URN); + assertEquals(dpi1.getName(), "Test DPI"); + assertEquals(dpi1.getType(), EntityType.DATA_PROCESS_INSTANCE); + + // Assert second element is null + assertNull(result.get(1)); + } + + @Test + public void testBatchLoad() throws Exception { + EntityClient mockClient = Mockito.mock(EntityClient.class); + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + Mockito.when(mockFeatureFlags.isDataProcessInstanceEntityEnabled()).thenReturn(true); + + DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); + + List> result = + type.batchLoad(ImmutableList.of(TEST_INSTANCE_URN), getMockAllowContext()); + + assertEquals(result.size(), 1); + } + + @Test + public void testBatchLoadFeatureFlagDisabled() throws Exception { + EntityClient mockClient = Mockito.mock(EntityClient.class); + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + Mockito.when(mockFeatureFlags.isDataProcessInstanceEntityEnabled()).thenReturn(false); + + DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); + + List> result = + type.batchLoad(ImmutableList.of(TEST_INSTANCE_URN), getMockAllowContext()); + + assertEquals(result.size(), 0); + + Mockito.verify(mockClient, Mockito.never()) + .batchGetV2(any(), Mockito.anyString(), Mockito.anySet(), Mockito.anySet()); + } + + @Test(expectedExceptions = RuntimeException.class) + public void testBatchLoadClientException() throws Exception { + EntityClient mockClient = Mockito.mock(EntityClient.class); + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + Mockito.when(mockFeatureFlags.isDataProcessInstanceEntityEnabled()).thenReturn(true); + + Mockito.doThrow(RemoteInvocationException.class) + .when(mockClient) + .batchGetV2(any(), Mockito.anyString(), Mockito.anySet(), Mockito.anySet()); + + DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); + type.batchLoad(ImmutableList.of(TEST_INSTANCE_URN), getMockAllowContext()); + } + + @Test + public void testGetType() { + EntityClient mockClient = Mockito.mock(EntityClient.class); + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); + + assertEquals(type.type(), EntityType.DATA_PROCESS_INSTANCE); + } + + @Test + public void testObjectClass() { + EntityClient mockClient = Mockito.mock(EntityClient.class); + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); + + assertEquals(type.objectClass(), DataProcessInstance.class); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java new file mode 100644 index 00000000000000..dc1ce935ad5ecd --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java @@ -0,0 +1,127 @@ +package com.linkedin.datahub.graphql.types.dataprocessinst.mappers; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; + +import com.linkedin.common.DataPlatformInstance; +import com.linkedin.common.url.Url; +import com.linkedin.common.urn.Urn; +import com.linkedin.container.Container; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.data.template.StringArray; +import com.linkedin.datahub.graphql.generated.DataProcessInstance; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.dataprocess.DataProcessInstanceProperties; +import com.linkedin.entity.Aspect; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.metadata.Constants; +import com.linkedin.ml.metadata.MLTrainingRunProperties; +import java.util.HashMap; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class DataProcessInstanceMapperTest { + + private static final String TEST_PLATFORM_URN = "urn:li:dataPlatform:kafka"; + private static final String TEST_INSTANCE_URN = + "urn:li:dataProcessInstance:(test-workflow,test-instance)"; + private static final String TEST_CONTAINER_URN = "urn:li:container:testContainer"; + private static final String TEST_EXTERNAL_URL = "https://example.com/process"; + private static final String TEST_NAME = "Test Process Instance"; + + private EntityResponse entityResponse; + private Urn urn; + + @BeforeMethod + public void setup() throws Exception { + urn = Urn.createFromString(TEST_INSTANCE_URN); + entityResponse = new EntityResponse(); + entityResponse.setUrn(urn); + entityResponse.setAspects(new EnvelopedAspectMap(new HashMap<>())); + } + + @Test + public void testMapBasicFields() throws Exception { + DataProcessInstance instance = DataProcessInstanceMapper.map(null, entityResponse); + + assertNotNull(instance); + assertEquals(instance.getUrn(), urn.toString()); + assertEquals(instance.getType(), EntityType.DATA_PROCESS_INSTANCE); + } + + @Test + public void testMapDataProcessProperties() throws Exception { + // Create DataProcessInstanceProperties + DataProcessInstanceProperties properties = new DataProcessInstanceProperties(); + properties.setName(TEST_NAME); + properties.setExternalUrl(new Url(TEST_EXTERNAL_URL)); + + // Add properties aspect + addAspect(Constants.DATA_PROCESS_INSTANCE_PROPERTIES_ASPECT_NAME, properties); + + DataProcessInstance instance = DataProcessInstanceMapper.map(null, entityResponse); + + assertNotNull(instance.getProperties()); + assertEquals(instance.getName(), TEST_NAME); + assertEquals(instance.getExternalUrl(), TEST_EXTERNAL_URL); + } + + @Test + public void testMapPlatformInstance() throws Exception { + // Create DataPlatformInstance + DataPlatformInstance platformInstance = new DataPlatformInstance(); + platformInstance.setPlatform(Urn.createFromString(TEST_PLATFORM_URN)); + + // Add platform instance aspect + addAspect(Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME, platformInstance); + + DataProcessInstance instance = DataProcessInstanceMapper.map(null, entityResponse); + + assertNotNull(instance.getDataPlatformInstance()); + assertNotNull(instance.getPlatform()); + assertEquals(instance.getPlatform().getUrn(), TEST_PLATFORM_URN); + assertEquals(instance.getPlatform().getType(), EntityType.DATA_PLATFORM); + } + + @Test + public void testMapContainer() throws Exception { + // Create Container aspect + Container container = new Container(); + container.setContainer(Urn.createFromString(TEST_CONTAINER_URN)); + + // Add container aspect + addAspect(Constants.CONTAINER_ASPECT_NAME, container); + + DataProcessInstance instance = DataProcessInstanceMapper.map(null, entityResponse); + + assertNotNull(instance.getContainer()); + assertEquals(instance.getContainer().getUrn(), TEST_CONTAINER_URN); + assertEquals(instance.getContainer().getType(), EntityType.CONTAINER); + } + + @Test + public void testMapMLTrainingProperties() throws Exception { + // Create MLTrainingRunProperties + MLTrainingRunProperties trainingProperties = new MLTrainingRunProperties(); + trainingProperties.setId("test-run-id"); + trainingProperties.setOutputUrls(new StringArray("s3://test-bucket/model")); + + // Add ML training properties aspect + addAspect(Constants.ML_TRAINING_RUN_PROPERTIES_ASPECT_NAME, trainingProperties); + + DataProcessInstance instance = DataProcessInstanceMapper.map(null, entityResponse); + + assertNotNull(instance); + assertEquals(instance.getMlTrainingRunProperties().getId(), "test-run-id"); + assertEquals( + instance.getMlTrainingRunProperties().getOutputUrls().get(0), "s3://test-bucket/model"); + } + + private void addAspect(String aspectName, RecordTemplate aspect) { + EnvelopedAspect envelopedAspect = new EnvelopedAspect(); + envelopedAspect.setValue(new Aspect(aspect.data())); + entityResponse.getAspects().put(aspectName, envelopedAspect); + } +} diff --git a/datahub-web-react/src/app/buildEntityRegistry.ts b/datahub-web-react/src/app/buildEntityRegistry.ts index 181ec7d328a587..b7ff97b3a07469 100644 --- a/datahub-web-react/src/app/buildEntityRegistry.ts +++ b/datahub-web-react/src/app/buildEntityRegistry.ts @@ -25,6 +25,7 @@ import { RestrictedEntity } from './entity/restricted/RestrictedEntity'; import { BusinessAttributeEntity } from './entity/businessAttribute/BusinessAttributeEntity'; import { SchemaFieldPropertiesEntity } from './entity/schemaField/SchemaFieldPropertiesEntity'; import { StructuredPropertyEntity } from './entity/structuredProperty/StructuredPropertyEntity'; +import { DataProcessInstanceEntity } from './entity/dataProcessInstance/DataProcessInstanceEntity'; export default function buildEntityRegistry() { const registry = new EntityRegistry(); @@ -54,5 +55,6 @@ export default function buildEntityRegistry() { registry.register(new BusinessAttributeEntity()); registry.register(new SchemaFieldPropertiesEntity()); registry.register(new StructuredPropertyEntity()); + registry.register(new DataProcessInstanceEntity()); return registry; } diff --git a/datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx b/datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx new file mode 100644 index 00000000000000..4834a026ad94a3 --- /dev/null +++ b/datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx @@ -0,0 +1,264 @@ +import React from 'react'; +import { ApiOutlined } from '@ant-design/icons'; +import { + DataProcessInstance, + Entity as GeneratedEntity, + EntityType, + OwnershipType, + SearchResult, +} from '../../../types.generated'; +import { Preview } from './preview/Preview'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { EntityProfile } from '../shared/containers/profile/EntityProfile'; +import { useGetDataProcessInstanceQuery } from '../../../graphql/dataProcessInstance.generated'; +import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; +import { LineageTab } from '../shared/tabs/Lineage/LineageTab'; +import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection'; +import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection'; +import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection'; +import { GenericEntityProperties } from '../shared/types'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import { SidebarDomainSection } from '../shared/containers/profile/sidebar/Domain/SidebarDomainSection'; +import { EntityMenuItems } from '../shared/EntityDropdown/EntityDropdown'; +import { capitalizeFirstLetterOnly } from '../../shared/textUtil'; +import DataProductSection from '../shared/containers/profile/sidebar/DataProduct/DataProductSection'; +import { getDataProduct } from '../shared/utils'; +// import SummaryTab from './profile/DataProcessInstaceSummary'; + +// const getProcessPlatformName = (data?: DataProcessInstance): string => { +// return ( +// data?.dataPlatformInstance?.platform?.properties?.displayName || +// capitalizeFirstLetterOnly(data?.dataPlatformInstance?.platform?.name) || +// '' +// ); +// }; + +const getParentEntities = (data: DataProcessInstance): GeneratedEntity[] => { + const parentEntity = data?.relationships?.relationships?.find( + (rel) => rel.type === 'InstanceOf' && rel.entity?.type === EntityType.DataJob, + ); + + if (!parentEntity?.entity) return []; + + // Convert to GeneratedEntity + return [ + { + type: parentEntity.entity.type, + urn: (parentEntity.entity as any).urn, // Make sure urn exists + relationships: (parentEntity.entity as any).relationships, + }, + ]; +}; +/** + * Definition of the DataHub DataProcessInstance entity. + */ +export class DataProcessInstanceEntity implements Entity { + type: EntityType = EntityType.DataProcessInstance; + + icon = (fontSize: number, styleType: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ; + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + isBrowseEnabled = () => true; + + isLineageEnabled = () => true; + + getAutoCompleteFieldName = () => 'name'; + + getPathName = () => 'dataProcessInstance'; + + getEntityName = () => 'Process Instance'; + + getGraphName = () => 'dataProcessInstance'; + + getCollectionName = () => 'Process Instances'; + + useEntityQuery = useGetDataProcessInstanceQuery; + + renderProfile = (urn: string) => ( + { + // const activeIncidentCount = processInstance?.dataProcessInstance?.activeIncidents.total; + // return `Incidents${(activeIncidentCount && ` (${activeIncidentCount})`) || ''}`; + // }, + // }, + ]} + sidebarSections={this.getSidebarSections()} + /> + ); + + getSidebarSections = () => [ + { + component: SidebarAboutSection, + }, + { + component: SidebarOwnerSection, + properties: { + defaultOwnerType: OwnershipType.TechnicalOwner, + }, + }, + { + component: SidebarTagsSection, + properties: { + hasTags: true, + hasTerms: true, + }, + }, + { + component: SidebarDomainSection, + }, + { + component: DataProductSection, + }, + ]; + + getOverridePropertiesFromEntity = (processInstance?: DataProcessInstance | null): GenericEntityProperties => { + const name = processInstance?.name; + const externalUrl = processInstance?.externalUrl; + return { + name, + externalUrl, + }; + }; + + renderPreview = (_: PreviewType, data: DataProcessInstance) => { + const genericProperties = this.getGenericEntityProperties(data); + const parentEntities = getParentEntities(data); + return ( + + ); + }; + + renderSearch = (result: SearchResult) => { + const data = result.entity as DataProcessInstance; + const genericProperties = this.getGenericEntityProperties(data); + const parentEntities = getParentEntities(data); + return ( + + ); + }; + + getLineageVizConfig = (entity: DataProcessInstance) => { + return { + urn: entity?.urn, + name: this.displayName(entity), + type: EntityType.DataProcessInstance, + subtype: entity?.subTypes?.typeNames?.[0], + icon: entity?.platform?.properties?.logoUrl || undefined, + platform: entity?.platform, + container: entity?.container, + // health: entity?.health || undefined, + }; + }; + + displayName = (data: DataProcessInstance) => { + return data.properties?.name || data.urn; + }; + + getGenericEntityProperties = (data: DataProcessInstance) => { + return getDataForEntityType({ + data, + entityType: this.type, + getOverrideProperties: this.getOverridePropertiesFromEntity, + }); + }; + + supportedCapabilities = () => { + return new Set([ + EntityCapabilityType.OWNERS, + EntityCapabilityType.GLOSSARY_TERMS, + EntityCapabilityType.TAGS, + EntityCapabilityType.DOMAINS, + EntityCapabilityType.DEPRECATION, + EntityCapabilityType.SOFT_DELETE, + EntityCapabilityType.DATA_PRODUCTS, + ]); + }; +} diff --git a/datahub-web-react/src/app/entity/dataProcessInstance/preview/Preview.tsx b/datahub-web-react/src/app/entity/dataProcessInstance/preview/Preview.tsx new file mode 100644 index 00000000000000..3a3b0340695d96 --- /dev/null +++ b/datahub-web-react/src/app/entity/dataProcessInstance/preview/Preview.tsx @@ -0,0 +1,103 @@ +import React from 'react'; +import { + DataProduct, + Deprecation, + Domain, + Entity as GeneratedEntity, + EntityPath, + EntityType, + GlobalTags, + Health, + Owner, + SearchInsight, + Container, + ParentContainersResult, +} from '../../../../types.generated'; +import DefaultPreviewCard from '../../../preview/DefaultPreviewCard'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { IconStyleType } from '../../Entity'; + +export const Preview = ({ + urn, + name, + subType, + description, + platformName, + platformLogo, + platformInstanceId, + container, + owners, + domain, + dataProduct, + deprecation, + globalTags, + snippet, + insights, + externalUrl, + degree, + paths, + health, + parentEntities, + parentContainers, +}: // duration, +// status, +// startTime, +{ + urn: string; + name: string; + subType?: string | null; + description?: string | null; + platformName?: string; + platformLogo?: string | null; + platformInstanceId?: string; + container?: Container; + owners?: Array | null; + domain?: Domain | null; + dataProduct?: DataProduct | null; + deprecation?: Deprecation | null; + globalTags?: GlobalTags | null; + snippet?: React.ReactNode | null; + insights?: Array | null; + externalUrl?: string | null; + degree?: number; + paths?: EntityPath[]; + health?: Health[] | null; + parentEntities?: Array | null; + parentContainers?: ParentContainersResult | null; + // duration?: number | null; + // status?: string | null; + // startTime?: number | null; +}): JSX.Element => { + const entityRegistry = useEntityRegistry(); + return ( + + ); +}; diff --git a/datahub-web-react/src/graphql/dataProcessInstance.graphql b/datahub-web-react/src/graphql/dataProcessInstance.graphql new file mode 100644 index 00000000000000..8f55ca4903d527 --- /dev/null +++ b/datahub-web-react/src/graphql/dataProcessInstance.graphql @@ -0,0 +1,181 @@ +fragment processInstanceRelationshipResults on EntityRelationshipsResult { + start + count + total + relationships { + type + direction + entity { + urn + type + ... on Dataset { + name + properties { + name + description + qualifiedName + } + editableProperties { + description + } + platform { + ...platformFields + } + subTypes { + typeNames + } + status { + removed + } + } + ... on DataJob { + urn + type + dataFlow { + ...nonRecursiveDataFlowFields + } + jobId + properties { + name + description + externalUrl + customProperties { + key + value + } + } + deprecation { + ...deprecationFields + } + dataPlatformInstance { + ...dataPlatformInstanceFields + } + subTypes { + typeNames + } + editableProperties { + description + } + status { + removed + } + } + } + } +} + +fragment dataProcessInstanceFields on DataProcessInstance { + urn + type + platform { + ...platformFields + } + parentContainers { + ...parentContainersFields + } + container { + ...entityContainer + } + subTypes { + typeNames + } + properties { + name + createdTS: created { + time + actor + } + customProperties { + key + value + } + } + mlTrainingRunProperties { + outputUrls + trainingMetrics { + name + description + value + } + hyperParams { + name + description + value + } + } + dataPlatformInstance { + ...dataPlatformInstanceFields + } + state(startTimeMillis: null, endTimeMillis: null, limit: 1) { + status + attempt + result { + resultType + nativeResultType + } + timestampMillis + durationMillis + } + relationships(input: { types: ["InstanceOf", "Consumes", "Produces"], direction: OUTGOING, start: 0, count: 50 }) { + ...processInstanceRelationshipResults + } +} + +query getDataProcessInstance($urn: String!) { + dataProcessInstance(urn: $urn) { + urn + type + platform { + ...platformFields + } + parentContainers { + ...parentContainersFields + } + subTypes { + typeNames + } + container { + ...entityContainer + } + name + properties { + name + created { + time + actor + } + } + mlTrainingRunProperties { + id + outputUrls + trainingMetrics { + name + description + value + } + hyperParams { + name + description + value + } + } + relationships( + input: { types: ["InstanceOf", "Consumes", "Produces"], direction: OUTGOING, start: 0, count: 50 } + ) { + ...processInstanceRelationshipResults + } + dataPlatformInstance { + ...dataPlatformInstanceFields + } + state(startTimeMillis: null, endTimeMillis: null, limit: 1) { + status + attempt + result { + resultType + nativeResultType + } + timestampMillis + durationMillis + } + } +} diff --git a/datahub-web-react/src/graphql/fragments.graphql b/datahub-web-react/src/graphql/fragments.graphql index 68c57c5cb5db55..ecac2997489354 100644 --- a/datahub-web-react/src/graphql/fragments.graphql +++ b/datahub-web-react/src/graphql/fragments.graphql @@ -863,8 +863,17 @@ fragment nonRecursiveMLModel on MLModel { ...ownershipFields } properties { + name description date + created { + time + actor + } + lastModified { + time + actor + } externalUrl version type @@ -956,7 +965,12 @@ fragment nonRecursiveMLModelGroupFields on MLModelGroup { ...deprecationFields } properties { + name description + created { + time + actor + } } browsePathV2 { ...browsePathV2Fields diff --git a/datahub-web-react/src/graphql/lineage.graphql b/datahub-web-react/src/graphql/lineage.graphql index ee05811cbb72de..457936ed62cd2e 100644 --- a/datahub-web-react/src/graphql/lineage.graphql +++ b/datahub-web-react/src/graphql/lineage.graphql @@ -259,6 +259,9 @@ fragment lineageNodeProperties on EntityWithRelationships { name description origin + tags { + ...globalTagsFields + } platform { ...platformFields } @@ -268,6 +271,19 @@ fragment lineageNodeProperties on EntityWithRelationships { status { removed } + properties { + createdTS: created { + time + actor + } + customProperties { + key + value + } + } + editableProperties { + description + } structuredProperties { properties { ...structuredPropertiesFields @@ -328,6 +344,9 @@ fragment lineageNodeProperties on EntityWithRelationships { urn type } + ... on DataProcessInstance { + ...dataProcessInstanceFields + } } fragment lineageFields on EntityWithRelationships { diff --git a/datahub-web-react/src/graphql/mlModelGroup.graphql b/datahub-web-react/src/graphql/mlModelGroup.graphql index 81ab65d0b9a08d..4f11ed4984d37a 100644 --- a/datahub-web-react/src/graphql/mlModelGroup.graphql +++ b/datahub-web-react/src/graphql/mlModelGroup.graphql @@ -2,6 +2,18 @@ query getMLModelGroup($urn: String!) { mlModelGroup(urn: $urn) { urn type + properties { + name + description + created { + time + actor + } + lastModified { + time + actor + } + } ...nonRecursiveMLModelGroupFields incoming: relationships( input: { diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java index 42080e4e17596e..01c33a2530efb5 100644 --- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -421,6 +421,10 @@ public class Constants { "dataProcessInstanceRunEvent"; public static final String DATA_PROCESS_INSTANCE_RELATIONSHIPS_ASPECT_NAME = "dataProcessInstanceRelationships"; + public static final String DATA_PROCESS_INSTANCE_INPUT_ASPECT_NAME = "dataProcessInstanceInput"; + public static final String DATA_PROCESS_INSTANCE_OUTPUT_ASPECT_NAME = "dataProcessInstanceOutput"; + public static final String DATA_PROCESS_INSTANCE_KEY_ASPECT_NAME = "dataProcessInstanceKey"; + public static final String ML_TRAINING_RUN_PROPERTIES_ASPECT_NAME = "mlTrainingRunProperties"; // Business Attribute public static final String BUSINESS_ATTRIBUTE_KEY_ASPECT_NAME = "businessAttributeKey"; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index e51511699e345a..1a91ae35c6595b 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -772,6 +772,11 @@ public void testQueryByDefault() { EntityType.SCHEMA_FIELD, Stream.concat(COMMON.stream(), Stream.of("schemaFieldAliases", "parent")) .collect(Collectors.toSet())) + .put( + EntityType.DATA_PROCESS_INSTANCE, + Stream.concat( + COMMON.stream(), Stream.of("parentInstance", "parentTemplate", "status")) + .collect(Collectors.toSet())) .build(); for (EntityType entityType : SEARCHABLE_ENTITY_TYPES) { diff --git a/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelGroupProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelGroupProperties.pdl index 81c5e7a240f618..b9e364bee8c65a 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelGroupProperties.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelGroupProperties.pdl @@ -12,7 +12,7 @@ import com.linkedin.common.TimeStamp @Aspect = { "name": "mlModelGroupProperties" } -record MLModelGroupProperties includes CustomProperties { +record MLModelGroupProperties includes CustomProperties, MLModelLineageInfo { /** * Display name of the MLModelGroup @@ -50,18 +50,6 @@ record MLModelGroupProperties includes CustomProperties { */ lastModified: optional TimeStamp - /** - * List of jobs (if any) used to train the model group. Visible in Lineage. - */ - @Relationship = { - "/*": { - "name": "TrainedBy", - "entityTypes": [ "dataJob" ], - "isLineage": true - } - } - trainingJobs: optional array[Urn] - /** * Version of the MLModelGroup */ diff --git a/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelLineageInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelLineageInfo.pdl new file mode 100644 index 00000000000000..4c17d6e6ab1a00 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelLineageInfo.pdl @@ -0,0 +1,35 @@ +namespace com.linkedin.ml.metadata +import com.linkedin.common.Urn + + +/** +* A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups +*/ +record MLModelLineageInfo { + + /** + * List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect. + */ + @Relationship = { + "/*": { + "name": "TrainedBy", + "entityTypes": [ "dataJob", "dataProcessInstance" ], + "isLineage": true + } + } + trainingJobs: optional array[Urn] + + /** + * List of jobs or process instances (if any) that use the model or group. + */ + @Relationship = { + "/*": { + "name": "UsedBy", + "entityTypes": [ "dataJob", "dataProcessInstance" ], + "isLineage": true, + "isUpstream": false + } + } + downstreamJobs: optional array[Urn] + +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelProperties.pdl index d89d07384bba1d..ac10e0add13a1c 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelProperties.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelProperties.pdl @@ -14,7 +14,7 @@ import com.linkedin.common.TimeStamp @Aspect = { "name": "mlModelProperties" } -record MLModelProperties includes CustomProperties, ExternalReference { +record MLModelProperties includes CustomProperties, ExternalReference, MLModelLineageInfo { /** * Display name of the MLModel @@ -116,31 +116,6 @@ record MLModelProperties includes CustomProperties, ExternalReference { } deployments: optional array[Urn] - /** - * List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect. - */ - @Relationship = { - "/*": { - "name": "TrainedBy", - "entityTypes": [ "dataJob", "dataProcessInstance" ], - "isLineage": true - } - } - trainingJobs: optional array[Urn] - - /** - * List of jobs (if any) that use the model - */ - @Relationship = { - "/*": { - "name": "UsedBy", - "entityTypes": [ "dataJob" ], - "isLineage": true, - "isUpstream": false - } - } - downstreamJobs: optional array[Urn] - /** * Groups the model belongs to */ diff --git a/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java b/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java index 28abb26be1f524..97ca0dcabea9f3 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java @@ -25,4 +25,5 @@ public class FeatureFlags { private boolean showSeparateSiblings = false; private boolean alternateMCPValidation = false; private boolean showManageStructuredProperties = false; + private boolean dataProcessInstanceEntityEnabled = true; } diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index 1c713fd33884b5..432c4a9ddcb73f 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -3827,7 +3827,43 @@ "name" : "MLModelProperties", "namespace" : "com.linkedin.ml.metadata", "doc" : "Properties associated with a ML Model\r", - "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ], + "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference", { + "type" : "record", + "name" : "MLModelLineageInfo", + "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups\r", + "fields" : [ { + "name" : "trainingJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "name" : "TrainedBy" + } + } + }, { + "name" : "downstreamJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) that use the model or group.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "isUpstream" : false, + "name" : "UsedBy" + } + } + } ] + } ], "fields" : [ { "name" : "name", "type" : "string", @@ -4005,37 +4041,6 @@ "name" : "DeployedTo" } } - }, { - "name" : "trainingJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob", "dataProcessInstance" ], - "isLineage" : true, - "name" : "TrainedBy" - } - } - }, { - "name" : "downstreamJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) that use the model\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob" ], - "isLineage" : true, - "isUpstream" : false, - "name" : "UsedBy" - } - } }, { "name" : "groups", "type" : { @@ -4213,7 +4218,7 @@ }, "doc" : "The order to sort the results i.e. ASCENDING or DESCENDING" } ] - }, "com.linkedin.metadata.query.filter.SortOrder", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", "com.linkedin.mxe.GenericAspect", { + }, "com.linkedin.metadata.query.filter.SortOrder", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelLineageInfo", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", "com.linkedin.mxe.GenericAspect", { "type" : "record", "name" : "MetadataChangeProposal", "namespace" : "com.linkedin.mxe", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 77d4644f3c121a..45e91873de10ff 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -3985,7 +3985,43 @@ "name" : "MLModelProperties", "namespace" : "com.linkedin.ml.metadata", "doc" : "Properties associated with a ML Model\r", - "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ], + "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference", { + "type" : "record", + "name" : "MLModelLineageInfo", + "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups\r", + "fields" : [ { + "name" : "trainingJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "name" : "TrainedBy" + } + } + }, { + "name" : "downstreamJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) that use the model or group.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "isUpstream" : false, + "name" : "UsedBy" + } + } + } ] + } ], "fields" : [ { "name" : "name", "type" : "string", @@ -4163,37 +4199,6 @@ "name" : "DeployedTo" } } - }, { - "name" : "trainingJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob", "dataProcessInstance" ], - "isLineage" : true, - "name" : "TrainedBy" - } - } - }, { - "name" : "downstreamJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) that use the model\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob" ], - "isLineage" : true, - "isUpstream" : false, - "name" : "UsedBy" - } - } }, { "name" : "groups", "type" : { @@ -5004,7 +5009,7 @@ "name" : "MLModelGroupProperties", "namespace" : "com.linkedin.ml.metadata", "doc" : "Properties associated with an ML Model Group\r", - "include" : [ "com.linkedin.common.CustomProperties" ], + "include" : [ "com.linkedin.common.CustomProperties", "MLModelLineageInfo" ], "fields" : [ { "name" : "name", "type" : "string", @@ -5041,21 +5046,6 @@ "type" : "com.linkedin.common.TimeStamp", "doc" : "Date when the MLModelGroup was last modified\r", "optional" : true - }, { - "name" : "trainingJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) used to train the model group. Visible in Lineage.\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob" ], - "isLineage" : true, - "name" : "TrainedBy" - } - } }, { "name" : "version", "type" : "com.linkedin.common.VersionTag", @@ -6700,7 +6690,7 @@ "type" : "int", "doc" : "The total number of entities directly under searched path" } ] - }, "com.linkedin.metadata.search.SearchResultMetadata", "com.linkedin.metadata.search.SearchSuggestion", "com.linkedin.metadata.snapshot.ChartSnapshot", "com.linkedin.metadata.snapshot.CorpGroupSnapshot", "com.linkedin.metadata.snapshot.CorpUserSnapshot", "com.linkedin.metadata.snapshot.DashboardSnapshot", "com.linkedin.metadata.snapshot.DataFlowSnapshot", "com.linkedin.metadata.snapshot.DataHubPolicySnapshot", "com.linkedin.metadata.snapshot.DataHubRetentionSnapshot", "com.linkedin.metadata.snapshot.DataJobSnapshot", "com.linkedin.metadata.snapshot.DataPlatformSnapshot", "com.linkedin.metadata.snapshot.DataProcessSnapshot", "com.linkedin.metadata.snapshot.DatasetSnapshot", "com.linkedin.metadata.snapshot.GlossaryNodeSnapshot", "com.linkedin.metadata.snapshot.GlossaryTermSnapshot", "com.linkedin.metadata.snapshot.MLFeatureSnapshot", "com.linkedin.metadata.snapshot.MLFeatureTableSnapshot", "com.linkedin.metadata.snapshot.MLModelDeploymentSnapshot", "com.linkedin.metadata.snapshot.MLModelGroupSnapshot", "com.linkedin.metadata.snapshot.MLModelSnapshot", "com.linkedin.metadata.snapshot.MLPrimaryKeySnapshot", "com.linkedin.metadata.snapshot.SchemaFieldSnapshot", "com.linkedin.metadata.snapshot.Snapshot", "com.linkedin.metadata.snapshot.TagSnapshot", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.DeploymentStatus", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLFeatureTableProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelDeploymentProperties", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelGroupProperties", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.MLPrimaryKeyProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", { + }, "com.linkedin.metadata.search.SearchResultMetadata", "com.linkedin.metadata.search.SearchSuggestion", "com.linkedin.metadata.snapshot.ChartSnapshot", "com.linkedin.metadata.snapshot.CorpGroupSnapshot", "com.linkedin.metadata.snapshot.CorpUserSnapshot", "com.linkedin.metadata.snapshot.DashboardSnapshot", "com.linkedin.metadata.snapshot.DataFlowSnapshot", "com.linkedin.metadata.snapshot.DataHubPolicySnapshot", "com.linkedin.metadata.snapshot.DataHubRetentionSnapshot", "com.linkedin.metadata.snapshot.DataJobSnapshot", "com.linkedin.metadata.snapshot.DataPlatformSnapshot", "com.linkedin.metadata.snapshot.DataProcessSnapshot", "com.linkedin.metadata.snapshot.DatasetSnapshot", "com.linkedin.metadata.snapshot.GlossaryNodeSnapshot", "com.linkedin.metadata.snapshot.GlossaryTermSnapshot", "com.linkedin.metadata.snapshot.MLFeatureSnapshot", "com.linkedin.metadata.snapshot.MLFeatureTableSnapshot", "com.linkedin.metadata.snapshot.MLModelDeploymentSnapshot", "com.linkedin.metadata.snapshot.MLModelGroupSnapshot", "com.linkedin.metadata.snapshot.MLModelSnapshot", "com.linkedin.metadata.snapshot.MLPrimaryKeySnapshot", "com.linkedin.metadata.snapshot.SchemaFieldSnapshot", "com.linkedin.metadata.snapshot.Snapshot", "com.linkedin.metadata.snapshot.TagSnapshot", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.DeploymentStatus", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLFeatureTableProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelDeploymentProperties", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelGroupProperties", "com.linkedin.ml.metadata.MLModelLineageInfo", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.MLPrimaryKeyProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", { "type" : "record", "name" : "SystemMetadata", "namespace" : "com.linkedin.mxe", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json index 8b6def75f7a665..9061cbff188135 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json @@ -3551,7 +3551,43 @@ "name" : "MLModelProperties", "namespace" : "com.linkedin.ml.metadata", "doc" : "Properties associated with a ML Model\r", - "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ], + "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference", { + "type" : "record", + "name" : "MLModelLineageInfo", + "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups\r", + "fields" : [ { + "name" : "trainingJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "name" : "TrainedBy" + } + } + }, { + "name" : "downstreamJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) that use the model or group.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "isUpstream" : false, + "name" : "UsedBy" + } + } + } ] + } ], "fields" : [ { "name" : "name", "type" : "string", @@ -3729,37 +3765,6 @@ "name" : "DeployedTo" } } - }, { - "name" : "trainingJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob", "dataProcessInstance" ], - "isLineage" : true, - "name" : "TrainedBy" - } - } - }, { - "name" : "downstreamJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) that use the model\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob" ], - "isLineage" : true, - "isUpstream" : false, - "name" : "UsedBy" - } - } }, { "name" : "groups", "type" : { @@ -4002,7 +4007,7 @@ } } } ] - }, "com.linkedin.metadata.run.UnsafeEntityInfo", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", "com.linkedin.schema.ArrayType", "com.linkedin.schema.BinaryJsonSchema", "com.linkedin.schema.BooleanType", "com.linkedin.schema.BytesType", "com.linkedin.schema.DatasetFieldForeignKey", "com.linkedin.schema.DateType", "com.linkedin.schema.EditableSchemaFieldInfo", "com.linkedin.schema.EditableSchemaMetadata", "com.linkedin.schema.EnumType", "com.linkedin.schema.EspressoSchema", "com.linkedin.schema.FixedType", "com.linkedin.schema.ForeignKeyConstraint", "com.linkedin.schema.ForeignKeySpec", "com.linkedin.schema.KafkaSchema", "com.linkedin.schema.KeyValueSchema", "com.linkedin.schema.MapType", "com.linkedin.schema.MySqlDDL", "com.linkedin.schema.NullType", "com.linkedin.schema.NumberType", "com.linkedin.schema.OracleDDL", "com.linkedin.schema.OrcSchema", "com.linkedin.schema.OtherSchema", "com.linkedin.schema.PrestoDDL", "com.linkedin.schema.RecordType", "com.linkedin.schema.SchemaField", "com.linkedin.schema.SchemaFieldDataType", "com.linkedin.schema.SchemaMetadata", "com.linkedin.schema.SchemaMetadataKey", "com.linkedin.schema.Schemaless", "com.linkedin.schema.StringType", "com.linkedin.schema.TimeType", "com.linkedin.schema.UnionType", "com.linkedin.schema.UrnForeignKey", "com.linkedin.tag.TagProperties" ], + }, "com.linkedin.metadata.run.UnsafeEntityInfo", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelLineageInfo", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", "com.linkedin.schema.ArrayType", "com.linkedin.schema.BinaryJsonSchema", "com.linkedin.schema.BooleanType", "com.linkedin.schema.BytesType", "com.linkedin.schema.DatasetFieldForeignKey", "com.linkedin.schema.DateType", "com.linkedin.schema.EditableSchemaFieldInfo", "com.linkedin.schema.EditableSchemaMetadata", "com.linkedin.schema.EnumType", "com.linkedin.schema.EspressoSchema", "com.linkedin.schema.FixedType", "com.linkedin.schema.ForeignKeyConstraint", "com.linkedin.schema.ForeignKeySpec", "com.linkedin.schema.KafkaSchema", "com.linkedin.schema.KeyValueSchema", "com.linkedin.schema.MapType", "com.linkedin.schema.MySqlDDL", "com.linkedin.schema.NullType", "com.linkedin.schema.NumberType", "com.linkedin.schema.OracleDDL", "com.linkedin.schema.OrcSchema", "com.linkedin.schema.OtherSchema", "com.linkedin.schema.PrestoDDL", "com.linkedin.schema.RecordType", "com.linkedin.schema.SchemaField", "com.linkedin.schema.SchemaFieldDataType", "com.linkedin.schema.SchemaMetadata", "com.linkedin.schema.SchemaMetadataKey", "com.linkedin.schema.Schemaless", "com.linkedin.schema.StringType", "com.linkedin.schema.TimeType", "com.linkedin.schema.UnionType", "com.linkedin.schema.UrnForeignKey", "com.linkedin.tag.TagProperties" ], "schema" : { "name" : "runs", "namespace" : "com.linkedin.entity", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json index e4cc5c42303ee2..e6be4e828c976f 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json @@ -3545,7 +3545,43 @@ "name" : "MLModelProperties", "namespace" : "com.linkedin.ml.metadata", "doc" : "Properties associated with a ML Model\r", - "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ], + "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference", { + "type" : "record", + "name" : "MLModelLineageInfo", + "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups\r", + "fields" : [ { + "name" : "trainingJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "name" : "TrainedBy" + } + } + }, { + "name" : "downstreamJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) that use the model or group.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "isUpstream" : false, + "name" : "UsedBy" + } + } + } ] + } ], "fields" : [ { "name" : "name", "type" : "string", @@ -3723,37 +3759,6 @@ "name" : "DeployedTo" } } - }, { - "name" : "trainingJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob", "dataProcessInstance" ], - "isLineage" : true, - "name" : "TrainedBy" - } - } - }, { - "name" : "downstreamJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) that use the model\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob" ], - "isLineage" : true, - "isUpstream" : false, - "name" : "UsedBy" - } - } }, { "name" : "groups", "type" : { @@ -3908,7 +3913,7 @@ "name" : "version", "type" : "long" } ] - }, "com.linkedin.metadata.key.ChartKey", "com.linkedin.metadata.key.CorpGroupKey", "com.linkedin.metadata.key.CorpUserKey", "com.linkedin.metadata.key.DashboardKey", "com.linkedin.metadata.key.DataFlowKey", "com.linkedin.metadata.key.DataJobKey", "com.linkedin.metadata.key.GlossaryNodeKey", "com.linkedin.metadata.key.GlossaryTermKey", "com.linkedin.metadata.key.MLFeatureKey", "com.linkedin.metadata.key.MLModelKey", "com.linkedin.metadata.key.TagKey", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", "com.linkedin.schema.ArrayType", "com.linkedin.schema.BinaryJsonSchema", "com.linkedin.schema.BooleanType", "com.linkedin.schema.BytesType", "com.linkedin.schema.DatasetFieldForeignKey", "com.linkedin.schema.DateType", "com.linkedin.schema.EditableSchemaFieldInfo", "com.linkedin.schema.EditableSchemaMetadata", "com.linkedin.schema.EnumType", "com.linkedin.schema.EspressoSchema", "com.linkedin.schema.FixedType", "com.linkedin.schema.ForeignKeyConstraint", "com.linkedin.schema.ForeignKeySpec", "com.linkedin.schema.KafkaSchema", "com.linkedin.schema.KeyValueSchema", "com.linkedin.schema.MapType", "com.linkedin.schema.MySqlDDL", "com.linkedin.schema.NullType", "com.linkedin.schema.NumberType", "com.linkedin.schema.OracleDDL", "com.linkedin.schema.OrcSchema", "com.linkedin.schema.OtherSchema", "com.linkedin.schema.PrestoDDL", "com.linkedin.schema.RecordType", "com.linkedin.schema.SchemaField", "com.linkedin.schema.SchemaFieldDataType", "com.linkedin.schema.SchemaMetadata", "com.linkedin.schema.SchemaMetadataKey", "com.linkedin.schema.Schemaless", "com.linkedin.schema.StringType", "com.linkedin.schema.TimeType", "com.linkedin.schema.UnionType", "com.linkedin.schema.UrnForeignKey", "com.linkedin.tag.TagProperties", { + }, "com.linkedin.metadata.key.ChartKey", "com.linkedin.metadata.key.CorpGroupKey", "com.linkedin.metadata.key.CorpUserKey", "com.linkedin.metadata.key.DashboardKey", "com.linkedin.metadata.key.DataFlowKey", "com.linkedin.metadata.key.DataJobKey", "com.linkedin.metadata.key.GlossaryNodeKey", "com.linkedin.metadata.key.GlossaryTermKey", "com.linkedin.metadata.key.MLFeatureKey", "com.linkedin.metadata.key.MLModelKey", "com.linkedin.metadata.key.TagKey", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelLineageInfo", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", "com.linkedin.schema.ArrayType", "com.linkedin.schema.BinaryJsonSchema", "com.linkedin.schema.BooleanType", "com.linkedin.schema.BytesType", "com.linkedin.schema.DatasetFieldForeignKey", "com.linkedin.schema.DateType", "com.linkedin.schema.EditableSchemaFieldInfo", "com.linkedin.schema.EditableSchemaMetadata", "com.linkedin.schema.EnumType", "com.linkedin.schema.EspressoSchema", "com.linkedin.schema.FixedType", "com.linkedin.schema.ForeignKeyConstraint", "com.linkedin.schema.ForeignKeySpec", "com.linkedin.schema.KafkaSchema", "com.linkedin.schema.KeyValueSchema", "com.linkedin.schema.MapType", "com.linkedin.schema.MySqlDDL", "com.linkedin.schema.NullType", "com.linkedin.schema.NumberType", "com.linkedin.schema.OracleDDL", "com.linkedin.schema.OrcSchema", "com.linkedin.schema.OtherSchema", "com.linkedin.schema.PrestoDDL", "com.linkedin.schema.RecordType", "com.linkedin.schema.SchemaField", "com.linkedin.schema.SchemaFieldDataType", "com.linkedin.schema.SchemaMetadata", "com.linkedin.schema.SchemaMetadataKey", "com.linkedin.schema.Schemaless", "com.linkedin.schema.StringType", "com.linkedin.schema.TimeType", "com.linkedin.schema.UnionType", "com.linkedin.schema.UrnForeignKey", "com.linkedin.tag.TagProperties", { "type" : "record", "name" : "TimeseriesIndexSizeResult", "namespace" : "com.linkedin.timeseries", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json index e375ac698ab516..10f3218d469757 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json @@ -3979,7 +3979,43 @@ "name" : "MLModelProperties", "namespace" : "com.linkedin.ml.metadata", "doc" : "Properties associated with a ML Model\r", - "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ], + "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference", { + "type" : "record", + "name" : "MLModelLineageInfo", + "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups\r", + "fields" : [ { + "name" : "trainingJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "name" : "TrainedBy" + } + } + }, { + "name" : "downstreamJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) that use the model or group.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "isUpstream" : false, + "name" : "UsedBy" + } + } + } ] + } ], "fields" : [ { "name" : "name", "type" : "string", @@ -4157,37 +4193,6 @@ "name" : "DeployedTo" } } - }, { - "name" : "trainingJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob", "dataProcessInstance" ], - "isLineage" : true, - "name" : "TrainedBy" - } - } - }, { - "name" : "downstreamJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) that use the model\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob" ], - "isLineage" : true, - "isUpstream" : false, - "name" : "UsedBy" - } - } }, { "name" : "groups", "type" : { @@ -4998,7 +5003,7 @@ "name" : "MLModelGroupProperties", "namespace" : "com.linkedin.ml.metadata", "doc" : "Properties associated with an ML Model Group\r", - "include" : [ "com.linkedin.common.CustomProperties" ], + "include" : [ "com.linkedin.common.CustomProperties", "MLModelLineageInfo" ], "fields" : [ { "name" : "name", "type" : "string", @@ -5035,21 +5040,6 @@ "type" : "com.linkedin.common.TimeStamp", "doc" : "Date when the MLModelGroup was last modified\r", "optional" : true - }, { - "name" : "trainingJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) used to train the model group. Visible in Lineage.\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob" ], - "isLineage" : true, - "name" : "TrainedBy" - } - } }, { "name" : "version", "type" : "com.linkedin.common.VersionTag", @@ -5844,7 +5834,7 @@ } ] } } ] - }, "com.linkedin.glossary.GlossaryNodeInfo", "com.linkedin.glossary.GlossaryRelatedTerms", "com.linkedin.glossary.GlossaryTermInfo", "com.linkedin.identity.CorpGroupInfo", "com.linkedin.identity.CorpUserEditableInfo", "com.linkedin.identity.CorpUserInfo", "com.linkedin.identity.CorpUserStatus", "com.linkedin.identity.GroupMembership", "com.linkedin.metadata.aspect.ChartAspect", "com.linkedin.metadata.aspect.CorpGroupAspect", "com.linkedin.metadata.aspect.CorpUserAspect", "com.linkedin.metadata.aspect.DashboardAspect", "com.linkedin.metadata.aspect.DataFlowAspect", "com.linkedin.metadata.aspect.DataHubPolicyAspect", "com.linkedin.metadata.aspect.DataHubRetentionAspect", "com.linkedin.metadata.aspect.DataJobAspect", "com.linkedin.metadata.aspect.DataPlatformAspect", "com.linkedin.metadata.aspect.DataProcessAspect", "com.linkedin.metadata.aspect.DatasetAspect", "com.linkedin.metadata.aspect.GlossaryNodeAspect", "com.linkedin.metadata.aspect.GlossaryTermAspect", "com.linkedin.metadata.aspect.MLFeatureAspect", "com.linkedin.metadata.aspect.MLFeatureTableAspect", "com.linkedin.metadata.aspect.MLModelAspect", "com.linkedin.metadata.aspect.MLModelDeploymentAspect", "com.linkedin.metadata.aspect.MLModelGroupAspect", "com.linkedin.metadata.aspect.MLPrimaryKeyAspect", "com.linkedin.metadata.aspect.SchemaFieldAspect", "com.linkedin.metadata.aspect.TagAspect", "com.linkedin.metadata.key.ChartKey", "com.linkedin.metadata.key.CorpGroupKey", "com.linkedin.metadata.key.CorpUserKey", "com.linkedin.metadata.key.DashboardKey", "com.linkedin.metadata.key.DataFlowKey", "com.linkedin.metadata.key.DataHubPolicyKey", "com.linkedin.metadata.key.DataHubRetentionKey", "com.linkedin.metadata.key.DataJobKey", "com.linkedin.metadata.key.DataPlatformKey", "com.linkedin.metadata.key.DataProcessKey", "com.linkedin.metadata.key.DatasetKey", "com.linkedin.metadata.key.GlossaryNodeKey", "com.linkedin.metadata.key.GlossaryTermKey", "com.linkedin.metadata.key.MLFeatureKey", "com.linkedin.metadata.key.MLFeatureTableKey", "com.linkedin.metadata.key.MLModelDeploymentKey", "com.linkedin.metadata.key.MLModelGroupKey", "com.linkedin.metadata.key.MLModelKey", "com.linkedin.metadata.key.MLPrimaryKeyKey", "com.linkedin.metadata.key.SchemaFieldKey", "com.linkedin.metadata.key.TagKey", "com.linkedin.metadata.snapshot.ChartSnapshot", "com.linkedin.metadata.snapshot.CorpGroupSnapshot", "com.linkedin.metadata.snapshot.CorpUserSnapshot", "com.linkedin.metadata.snapshot.DashboardSnapshot", "com.linkedin.metadata.snapshot.DataFlowSnapshot", "com.linkedin.metadata.snapshot.DataHubPolicySnapshot", "com.linkedin.metadata.snapshot.DataHubRetentionSnapshot", "com.linkedin.metadata.snapshot.DataJobSnapshot", "com.linkedin.metadata.snapshot.DataPlatformSnapshot", "com.linkedin.metadata.snapshot.DataProcessSnapshot", "com.linkedin.metadata.snapshot.DatasetSnapshot", "com.linkedin.metadata.snapshot.GlossaryNodeSnapshot", "com.linkedin.metadata.snapshot.GlossaryTermSnapshot", "com.linkedin.metadata.snapshot.MLFeatureSnapshot", "com.linkedin.metadata.snapshot.MLFeatureTableSnapshot", "com.linkedin.metadata.snapshot.MLModelDeploymentSnapshot", "com.linkedin.metadata.snapshot.MLModelGroupSnapshot", "com.linkedin.metadata.snapshot.MLModelSnapshot", "com.linkedin.metadata.snapshot.MLPrimaryKeySnapshot", "com.linkedin.metadata.snapshot.SchemaFieldSnapshot", "com.linkedin.metadata.snapshot.Snapshot", "com.linkedin.metadata.snapshot.TagSnapshot", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.DeploymentStatus", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLFeatureTableProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelDeploymentProperties", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelGroupProperties", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.MLPrimaryKeyProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", { + }, "com.linkedin.glossary.GlossaryNodeInfo", "com.linkedin.glossary.GlossaryRelatedTerms", "com.linkedin.glossary.GlossaryTermInfo", "com.linkedin.identity.CorpGroupInfo", "com.linkedin.identity.CorpUserEditableInfo", "com.linkedin.identity.CorpUserInfo", "com.linkedin.identity.CorpUserStatus", "com.linkedin.identity.GroupMembership", "com.linkedin.metadata.aspect.ChartAspect", "com.linkedin.metadata.aspect.CorpGroupAspect", "com.linkedin.metadata.aspect.CorpUserAspect", "com.linkedin.metadata.aspect.DashboardAspect", "com.linkedin.metadata.aspect.DataFlowAspect", "com.linkedin.metadata.aspect.DataHubPolicyAspect", "com.linkedin.metadata.aspect.DataHubRetentionAspect", "com.linkedin.metadata.aspect.DataJobAspect", "com.linkedin.metadata.aspect.DataPlatformAspect", "com.linkedin.metadata.aspect.DataProcessAspect", "com.linkedin.metadata.aspect.DatasetAspect", "com.linkedin.metadata.aspect.GlossaryNodeAspect", "com.linkedin.metadata.aspect.GlossaryTermAspect", "com.linkedin.metadata.aspect.MLFeatureAspect", "com.linkedin.metadata.aspect.MLFeatureTableAspect", "com.linkedin.metadata.aspect.MLModelAspect", "com.linkedin.metadata.aspect.MLModelDeploymentAspect", "com.linkedin.metadata.aspect.MLModelGroupAspect", "com.linkedin.metadata.aspect.MLPrimaryKeyAspect", "com.linkedin.metadata.aspect.SchemaFieldAspect", "com.linkedin.metadata.aspect.TagAspect", "com.linkedin.metadata.key.ChartKey", "com.linkedin.metadata.key.CorpGroupKey", "com.linkedin.metadata.key.CorpUserKey", "com.linkedin.metadata.key.DashboardKey", "com.linkedin.metadata.key.DataFlowKey", "com.linkedin.metadata.key.DataHubPolicyKey", "com.linkedin.metadata.key.DataHubRetentionKey", "com.linkedin.metadata.key.DataJobKey", "com.linkedin.metadata.key.DataPlatformKey", "com.linkedin.metadata.key.DataProcessKey", "com.linkedin.metadata.key.DatasetKey", "com.linkedin.metadata.key.GlossaryNodeKey", "com.linkedin.metadata.key.GlossaryTermKey", "com.linkedin.metadata.key.MLFeatureKey", "com.linkedin.metadata.key.MLFeatureTableKey", "com.linkedin.metadata.key.MLModelDeploymentKey", "com.linkedin.metadata.key.MLModelGroupKey", "com.linkedin.metadata.key.MLModelKey", "com.linkedin.metadata.key.MLPrimaryKeyKey", "com.linkedin.metadata.key.SchemaFieldKey", "com.linkedin.metadata.key.TagKey", "com.linkedin.metadata.snapshot.ChartSnapshot", "com.linkedin.metadata.snapshot.CorpGroupSnapshot", "com.linkedin.metadata.snapshot.CorpUserSnapshot", "com.linkedin.metadata.snapshot.DashboardSnapshot", "com.linkedin.metadata.snapshot.DataFlowSnapshot", "com.linkedin.metadata.snapshot.DataHubPolicySnapshot", "com.linkedin.metadata.snapshot.DataHubRetentionSnapshot", "com.linkedin.metadata.snapshot.DataJobSnapshot", "com.linkedin.metadata.snapshot.DataPlatformSnapshot", "com.linkedin.metadata.snapshot.DataProcessSnapshot", "com.linkedin.metadata.snapshot.DatasetSnapshot", "com.linkedin.metadata.snapshot.GlossaryNodeSnapshot", "com.linkedin.metadata.snapshot.GlossaryTermSnapshot", "com.linkedin.metadata.snapshot.MLFeatureSnapshot", "com.linkedin.metadata.snapshot.MLFeatureTableSnapshot", "com.linkedin.metadata.snapshot.MLModelDeploymentSnapshot", "com.linkedin.metadata.snapshot.MLModelGroupSnapshot", "com.linkedin.metadata.snapshot.MLModelSnapshot", "com.linkedin.metadata.snapshot.MLPrimaryKeySnapshot", "com.linkedin.metadata.snapshot.SchemaFieldSnapshot", "com.linkedin.metadata.snapshot.Snapshot", "com.linkedin.metadata.snapshot.TagSnapshot", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.DeploymentStatus", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLFeatureTableProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelDeploymentProperties", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelGroupProperties", "com.linkedin.ml.metadata.MLModelLineageInfo", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.MLPrimaryKeyProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", { "type" : "record", "name" : "GenericPayload", "namespace" : "com.linkedin.mxe", diff --git a/smoke-test/tests/data_process_instance/__init__.py b/smoke-test/tests/data_process_instance/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/smoke-test/tests/data_process_instance/test_data_process_instance.py b/smoke-test/tests/data_process_instance/test_data_process_instance.py new file mode 100644 index 00000000000000..a8aca6034d5be1 --- /dev/null +++ b/smoke-test/tests/data_process_instance/test_data_process_instance.py @@ -0,0 +1,293 @@ +import logging +import os +import tempfile +from random import randint + +import pytest +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.common import PipelineContext, RecordEnvelope +from datahub.ingestion.api.sink import NoopWriteCallback +from datahub.ingestion.sink.file import FileSink, FileSinkConfig +from datahub.metadata.schema_classes import ( + AuditStampClass, + ContainerClass, + ContainerPropertiesClass, + DataPlatformInstanceClass, + DataPlatformInstancePropertiesClass, + DataProcessInstanceKeyClass, + DataProcessInstancePropertiesClass, + DataProcessInstanceRunEventClass, + MLHyperParamClass, + MLMetricClass, + MLTrainingRunPropertiesClass, + SubTypesClass, + TimeWindowSizeClass, +) + +from tests.utils import ( + delete_urns_from_file, + ingest_file_via_rest, + wait_for_writes_to_sync, +) + +logger = logging.getLogger(__name__) + +# Generate unique DPI ID +dpi_id = f"test-pipeline-run-{randint(1000, 9999)}" +dpi_urn = f"urn:li:dataProcessInstance:{dpi_id}" + + +class FileEmitter: + def __init__(self, filename: str) -> None: + self.sink: FileSink = FileSink( + ctx=PipelineContext(run_id="create_test_data"), + config=FileSinkConfig(filename=filename), + ) + + def emit(self, event): + self.sink.write_record_async( + record_envelope=RecordEnvelope(record=event, metadata={}), + write_callback=NoopWriteCallback(), + ) + + def close(self): + self.sink.close() + + +def create_test_data(filename: str): + mcps = [ + # Key aspect + MetadataChangeProposalWrapper( + entityType="dataProcessInstance", + entityUrn=dpi_urn, + aspectName="dataProcessInstanceKey", + aspect=DataProcessInstanceKeyClass(id=dpi_id), + ), + # Properties aspect + MetadataChangeProposalWrapper( + entityType="dataProcessInstance", + entityUrn=dpi_urn, + aspectName="dataProcessInstanceProperties", + aspect=DataProcessInstancePropertiesClass( + name="Test Pipeline Run", + type="BATCH_SCHEDULED", + created=AuditStampClass( + time=1640692800000, actor="urn:li:corpuser:datahub" + ), + ), + ), + # Run Event aspect + MetadataChangeProposalWrapper( + entityType="dataProcessInstance", + entityUrn=dpi_urn, + aspectName="dataProcessInstanceRunEvent", + aspect=DataProcessInstanceRunEventClass( + timestampMillis=1704067200000, + eventGranularity=TimeWindowSizeClass(unit="WEEK", multiple=1), + status="COMPLETE", + ), + ), + # Platform Instance aspect + MetadataChangeProposalWrapper( + entityType="dataProcessInstance", + entityUrn=dpi_urn, + aspectName="dataPlatformInstance", + aspect=DataPlatformInstanceClass( + platform="urn:li:dataPlatform:airflow", + instance="urn:li:dataPlatformInstance:(urn:li:dataPlatform:airflow,1234567890)", + ), + ), + MetadataChangeProposalWrapper( + entityType="dataPlatformInstance", + entityUrn="urn:li:dataPlatformInstance:(urn:li:dataPlatform:airflow,1234567890)", + aspectName="dataPlatformInstanceProperties", + aspect=DataPlatformInstancePropertiesClass( + name="my process instance", + ), + ), + # SubTypes aspect + MetadataChangeProposalWrapper( + entityType="dataProcessInstance", + entityUrn=dpi_urn, + aspectName="subTypes", + aspect=SubTypesClass(typeNames=["TEST", "BATCH_JOB"]), + ), + # Container aspect + MetadataChangeProposalWrapper( + entityType="dataProcessInstance", + entityUrn=dpi_urn, + aspectName="container", + aspect=ContainerClass(container="urn:li:container:testGroup1"), + ), + MetadataChangeProposalWrapper( + entityType="container", + entityUrn="urn:li:container:testGroup1", + aspectName="containerProperties", + aspect=ContainerPropertiesClass(name="testGroup1"), + ), + # ML Training Run Properties aspect + MetadataChangeProposalWrapper( + entityType="dataProcessInstance", + entityUrn=dpi_urn, + aspectName="mlTrainingRunProperties", + aspect=MLTrainingRunPropertiesClass( + id="test-training-run-123", + trainingMetrics=[ + MLMetricClass( + name="accuracy", + description="accuracy of the model", + value="0.95", + ), + MLMetricClass( + name="loss", + description="accuracy loss of the model", + value="0.05", + ), + ], + hyperParams=[ + MLHyperParamClass( + name="learningRate", + description="rate of learning", + value="0.001", + ), + MLHyperParamClass( + name="batchSize", description="size of the batch", value="32" + ), + ], + outputUrls=["s3://my-bucket/ml/output"], + ), + ), + ] + + file_emitter = FileEmitter(filename) + for mcp in mcps: + file_emitter.emit(mcp) + file_emitter.close() + + +@pytest.fixture(scope="module", autouse=False) +def ingest_cleanup_data(auth_session, graph_client, request): + new_file, filename = tempfile.mkstemp(suffix=".json") + try: + create_test_data(filename) + print("ingesting data process instance test data") + ingest_file_via_rest(auth_session, filename) + wait_for_writes_to_sync() + yield + print("removing data process instance test data") + delete_urns_from_file(graph_client, filename) + wait_for_writes_to_sync() + finally: + os.remove(filename) + + +@pytest.mark.integration +def test_search_dpi(auth_session, ingest_cleanup_data): + """Test DPI search and validation of returned fields using GraphQL.""" + + json = { + "query": """query scrollAcrossEntities($input: ScrollAcrossEntitiesInput!) { + scrollAcrossEntities(input: $input) { + nextScrollId + count + total + searchResults { + entity { + ... on DataProcessInstance { + urn + properties { + name + externalUrl + } + dataPlatformInstance { + platform { + urn + name + } + } + subTypes { + typeNames + } + container { + urn + } + platform { + urn + name + properties { + type + } + } + mlTrainingRunProperties { + id + trainingMetrics { + name + value + } + hyperParams { + name + value + } + outputUrls + } + } + } + } + } + }""", + "variables": { + "input": {"types": ["DATA_PROCESS_INSTANCE"], "query": dpi_id, "count": 10} + }, + } + + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) + response.raise_for_status() + res_data = response.json() + + # Basic response structure validation + assert res_data, "Response should not be empty" + assert "data" in res_data, "Response should contain 'data' field" + print("RESPONSE DATA:" + str(res_data)) + assert ( + "scrollAcrossEntities" in res_data["data"] + ), "Response should contain 'scrollAcrossEntities' field" + + search_results = res_data["data"]["scrollAcrossEntities"] + assert ( + "searchResults" in search_results + ), "Response should contain 'searchResults' field" + + results = search_results["searchResults"] + assert len(results) > 0, "Should find at least one result" + + # Find our test entity + test_entity = None + for result in results: + if result["entity"]["urn"] == dpi_urn: + test_entity = result["entity"] + break + + assert test_entity is not None, f"Should find test entity with URN {dpi_urn}" + + # Validate fields + props = test_entity["properties"] + assert props["name"] == "Test Pipeline Run" + + platform_instance = test_entity["dataPlatformInstance"] + assert platform_instance["platform"]["urn"] == "urn:li:dataPlatform:airflow" + + sub_types = test_entity["subTypes"] + assert set(sub_types["typeNames"]) == {"TEST", "BATCH_JOB"} + + container = test_entity["container"] + assert container["urn"] == "urn:li:container:testGroup1" + + ml_props = test_entity["mlTrainingRunProperties"] + assert ml_props["id"] == "test-training-run-123" + assert ml_props["trainingMetrics"][0] == {"name": "accuracy", "value": "0.95"} + assert ml_props["trainingMetrics"][1] == {"name": "loss", "value": "0.05"} + assert ml_props["hyperParams"][0] == {"name": "learningRate", "value": "0.001"} + assert ml_props["hyperParams"][1] == {"name": "batchSize", "value": "32"} + assert ml_props["outputUrls"][0] == "s3://my-bucket/ml/output" diff --git a/smoke-test/tests/ml_models/__init__.py b/smoke-test/tests/ml_models/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/smoke-test/tests/ml_models/test_ml_models.py b/smoke-test/tests/ml_models/test_ml_models.py new file mode 100644 index 00000000000000..59821ab3e3cc41 --- /dev/null +++ b/smoke-test/tests/ml_models/test_ml_models.py @@ -0,0 +1,133 @@ +import logging +import os +import tempfile +from random import randint + +import pytest +from datahub.emitter.mce_builder import make_ml_model_group_urn, make_ml_model_urn +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.common import PipelineContext, RecordEnvelope +from datahub.ingestion.api.sink import NoopWriteCallback +from datahub.ingestion.graph.client import DataHubGraph +from datahub.ingestion.sink.file import FileSink, FileSinkConfig +from datahub.metadata.schema_classes import ( + MLModelGroupPropertiesClass, + MLModelPropertiesClass, +) + +from tests.utils import ( + delete_urns_from_file, + get_sleep_info, + ingest_file_via_rest, + wait_for_writes_to_sync, +) + +logger = logging.getLogger(__name__) + +# Generate unique model names for testing +start_index = randint(10, 10000) +model_names = [f"test_model_{i}" for i in range(start_index, start_index + 3)] +model_group_urn = make_ml_model_group_urn("workbench", "test_group", "DEV") +model_urns = [make_ml_model_urn("workbench", name, "DEV") for name in model_names] + + +class FileEmitter: + def __init__(self, filename: str) -> None: + self.sink: FileSink = FileSink( + ctx=PipelineContext(run_id="create_test_data"), + config=FileSinkConfig(filename=filename), + ) + + def emit(self, event): + self.sink.write_record_async( + record_envelope=RecordEnvelope(record=event, metadata={}), + write_callback=NoopWriteCallback(), + ) + + def close(self): + self.sink.close() + + +def create_test_data(filename: str): + # Create model group + model_group_mcp = MetadataChangeProposalWrapper( + entityUrn=str(model_group_urn), + aspect=MLModelGroupPropertiesClass( + description="Test model group for integration testing", + trainingJobs=["urn:li:dataProcessInstance:test_job"], + ), + ) + + # Create models that belong to the group + model_mcps = [ + MetadataChangeProposalWrapper( + entityUrn=model_urn, + aspect=MLModelPropertiesClass( + name=f"Test Model ({model_urn})", + description=f"Test model {model_urn}", + groups=[str(model_group_urn)], + trainingJobs=["urn:li:dataProcessInstance:test_job"], + ), + ) + for model_urn in model_urns + ] + + file_emitter = FileEmitter(filename) + for mcps in [model_group_mcp] + model_mcps: + file_emitter.emit(mcps) + + file_emitter.close() + + +sleep_sec, sleep_times = get_sleep_info() + + +@pytest.fixture(scope="module", autouse=False) +def ingest_cleanup_data(auth_session, graph_client, request): + new_file, filename = tempfile.mkstemp(suffix=".json") + try: + create_test_data(filename) + print("ingesting ml model test data") + ingest_file_via_rest(auth_session, filename) + wait_for_writes_to_sync() + yield + print("removing ml model test data") + delete_urns_from_file(graph_client, filename) + wait_for_writes_to_sync() + finally: + os.remove(filename) + + +@pytest.mark.integration +def test_create_ml_models(graph_client: DataHubGraph, ingest_cleanup_data): + """Test creation and validation of ML models and model groups.""" + + # Validate model group properties + fetched_group_props = graph_client.get_aspect( + str(model_group_urn), MLModelGroupPropertiesClass + ) + assert fetched_group_props is not None + assert fetched_group_props.description == "Test model group for integration testing" + assert fetched_group_props.trainingJobs == ["urn:li:dataProcessInstance:test_job"] + + # Validate individual models + for model_urn in model_urns: + fetched_model_props = graph_client.get_aspect(model_urn, MLModelPropertiesClass) + assert fetched_model_props is not None + assert fetched_model_props.name == f"Test Model ({model_urn})" + assert fetched_model_props.description == f"Test model {model_urn}" + assert str(model_group_urn) in (fetched_model_props.groups or []) + assert fetched_model_props.trainingJobs == [ + "urn:li:dataProcessInstance:test_job" + ] + + # Validate relationships between models and group + related_models = set() + for e in graph_client.get_related_entities( + str(model_group_urn), + relationship_types=["MemberOf"], + direction=DataHubGraph.RelationshipDirection.INCOMING, + ): + related_models.add(e.urn) + + assert set(model_urns) == related_models From e1532a7f516c80906b74c53a89d3fd895c7dcf1d Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 13 Jan 2025 21:41:51 -0800 Subject: [PATCH 072/249] fix(ingest): support async_flag properly in ingestProposalBatch (#12332) --- metadata-ingestion/src/datahub/emitter/rest_emitter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/emitter/rest_emitter.py b/metadata-ingestion/src/datahub/emitter/rest_emitter.py index 74b8ade7da445b..7271f784bf881e 100644 --- a/metadata-ingestion/src/datahub/emitter/rest_emitter.py +++ b/metadata-ingestion/src/datahub/emitter/rest_emitter.py @@ -374,7 +374,7 @@ def emit_mcps( # the size when chunking, and again for the actual request. payload_dict: dict = {"proposals": mcp_obj_chunk} if async_flag is not None: - payload_dict["async"] = True if async_flag else False + payload_dict["async"] = "true" if async_flag else "false" payload = json.dumps(payload_dict) self._emit_generic(url, payload) From ebbadf65ad836e550af52566d7036a791ceafbfa Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Tue, 14 Jan 2025 01:50:14 -0800 Subject: [PATCH 073/249] feat(ingest/snowflake): Support ingesting snowflake tags as structured properties (#12285) --- .../create_structured_property.py | 6 +- .../update_structured_property.py | 2 +- .../src/datahub/emitter/mcp_builder.py | 27 + .../source/snowflake/snowflake_config.py | 13 + .../source/snowflake/snowflake_schema.py | 7 +- .../source/snowflake/snowflake_schema_gen.py | 132 +- .../source/snowflake/snowflake_tag.py | 18 +- .../datahub/ingestion/source/sql/sql_utils.py | 5 + ...nowflake_structured_properties_golden.json | 4418 +++++++++++++++++ .../integration/snowflake/test_snowflake.py | 62 + .../snowflake/test_snowflake_tag.py | 48 + 11 files changed, 4708 insertions(+), 30 deletions(-) create mode 100644 metadata-ingestion/tests/integration/snowflake/snowflake_structured_properties_golden.json diff --git a/metadata-ingestion/examples/structured_properties/create_structured_property.py b/metadata-ingestion/examples/structured_properties/create_structured_property.py index e66ac3aec41221..64bc0a67812775 100644 --- a/metadata-ingestion/examples/structured_properties/create_structured_property.py +++ b/metadata-ingestion/examples/structured_properties/create_structured_property.py @@ -17,7 +17,7 @@ rest_emitter = DatahubRestEmitter(gms_server="http://localhost:8080") # first, let's make an open ended structured property that allows one text value -text_property_urn = StructuredPropertyUrn("openTextProperty") +text_property_urn = StructuredPropertyUrn("io.acryl.openTextProperty") text_property_definition = StructuredPropertyDefinitionClass( qualifiedName="io.acryl.openTextProperty", displayName="Open Text Property", @@ -39,7 +39,7 @@ # next, let's make a property that allows for multiple datahub entity urns as values # This example property could be used to reference other users or groups in datahub -urn_property_urn = StructuredPropertyUrn("dataSteward") +urn_property_urn = StructuredPropertyUrn("io.acryl.dataManagement.dataSteward") urn_property_definition = StructuredPropertyDefinitionClass( qualifiedName="io.acryl.dataManagement.dataSteward", displayName="Data Steward", @@ -63,7 +63,7 @@ rest_emitter.emit(event_prop_2) # finally, let's make a single select number property with a few allowed options -number_property_urn = StructuredPropertyUrn("replicationSLA") +number_property_urn = StructuredPropertyUrn("io.acryl.dataManagement.replicationSLA") number_property_definition = StructuredPropertyDefinitionClass( qualifiedName="io.acryl.dataManagement.replicationSLA", displayName="Retention Time", diff --git a/metadata-ingestion/examples/structured_properties/update_structured_property.py b/metadata-ingestion/examples/structured_properties/update_structured_property.py index 9b80ebc236d8b6..6f4b8b3be20d15 100644 --- a/metadata-ingestion/examples/structured_properties/update_structured_property.py +++ b/metadata-ingestion/examples/structured_properties/update_structured_property.py @@ -30,7 +30,7 @@ def get_emitter() -> Union[DataHubRestEmitter, DatahubKafkaEmitter]: # input your unique structured property ID -property_urn = StructuredPropertyUrn("dataSteward") +property_urn = StructuredPropertyUrn("io.acryl.dataManagement.dataSteward") with get_emitter() as emitter: for patch_mcp in ( diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py index c8eb62a2e1de23..581f903d0eef0d 100644 --- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py @@ -31,9 +31,12 @@ OwnershipClass, OwnershipTypeClass, StatusClass, + StructuredPropertiesClass, + StructuredPropertyValueAssignmentClass, SubTypesClass, TagAssociationClass, ) +from datahub.metadata.urns import StructuredPropertyUrn # In https://github.com/datahub-project/datahub/pull/11214, we added a # new env field to container properties. However, populating this field @@ -187,12 +190,31 @@ def add_tags_to_entity_wu( ).as_workunit() +def add_structured_properties_to_entity_wu( + entity_urn: str, structured_properties: Dict[StructuredPropertyUrn, str] +) -> Iterable[MetadataWorkUnit]: + aspect = StructuredPropertiesClass( + properties=[ + StructuredPropertyValueAssignmentClass( + propertyUrn=urn.urn(), + values=[value], + ) + for urn, value in structured_properties.items() + ] + ) + yield MetadataChangeProposalWrapper( + entityUrn=entity_urn, + aspect=aspect, + ).as_workunit() + + def gen_containers( container_key: KeyType, name: str, sub_types: List[str], parent_container_key: Optional[ContainerKey] = None, extra_properties: Optional[Dict[str, str]] = None, + structured_properties: Optional[Dict[StructuredPropertyUrn, str]] = None, domain_urn: Optional[str] = None, description: Optional[str] = None, owner_urn: Optional[str] = None, @@ -282,6 +304,11 @@ def gen_containers( tags=sorted(tags), ) + if structured_properties: + yield from add_structured_properties_to_entity_wu( + entity_urn=container_urn, structured_properties=structured_properties + ) + def add_dataset_to_container( container_key: KeyType, dataset_urn: str diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py index 2d61ce59857778..b14e51a982082c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py @@ -244,6 +244,11 @@ class SnowflakeV2Config( description="""Optional. Allowed values are `without_lineage`, `with_lineage`, and `skip` (default). `without_lineage` only extracts tags that have been applied directly to the given entity. `with_lineage` extracts both directly applied and propagated tags, but will be significantly slower. See the [Snowflake documentation](https://docs.snowflake.com/en/user-guide/object-tagging.html#tag-lineage) for information about tag lineage/propagation. """, ) + extract_tags_as_structured_properties: bool = Field( + default=False, + description="If enabled along with `extract_tags`, extracts snowflake's key-value tags as DataHub structured properties instead of DataHub tags.", + ) + include_external_url: bool = Field( default=True, description="Whether to populate Snowsight url for Snowflake Objects", @@ -263,6 +268,14 @@ class SnowflakeV2Config( description="List of regex patterns for tags to include in ingestion. Only used if `extract_tags` is enabled.", ) + structured_property_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern.allow_all(), + description=( + "List of regex patterns for structured properties to include in ingestion." + " Only used if `extract_tags` and `extract_tags_as_structured_properties` are enabled." + ), + ) + # This is required since access_history table does not capture whether the table was temporary table. temporary_tables_pattern: List[str] = Field( default=DEFAULT_TEMP_TABLES_PATTERNS, diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py index 780effc82b0163..d165be3f3cc656 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py @@ -45,15 +45,18 @@ class SnowflakeTag: name: str value: str - def display_name(self) -> str: + def tag_display_name(self) -> str: return f"{self.name}: {self.value}" - def identifier(self) -> str: + def tag_identifier(self) -> str: return f"{self._id_prefix_as_str()}:{self.value}" def _id_prefix_as_str(self) -> str: return f"{self.database}.{self.schema}.{self.name}" + def structured_property_identifier(self) -> str: + return f"snowflake.{self.database}.{self.schema}.{self.name}" + @dataclass class SnowflakeColumn(BaseColumn): diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py index 6f09c26b08da2d..393e4d3c96d51f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py @@ -4,12 +4,14 @@ from datahub.configuration.pattern_utils import is_schema_allowed from datahub.emitter.mce_builder import ( + get_sys_time, make_data_platform_urn, make_dataset_urn_with_platform_instance, make_schema_field_urn, make_tag_urn, ) from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.emitter.mcp_builder import add_structured_properties_to_entity_wu from datahub.ingestion.api.source import SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.glossary.classification_mixin import ( @@ -72,6 +74,7 @@ PROFILING, ) from datahub.metadata.com.linkedin.pegasus2avro.common import ( + AuditStamp, GlobalTags, Status, SubTypes, @@ -98,7 +101,18 @@ StringType, TimeType, ) +from datahub.metadata.com.linkedin.pegasus2avro.structured import ( + StructuredPropertyDefinition, +) from datahub.metadata.com.linkedin.pegasus2avro.tag import TagProperties +from datahub.metadata.urns import ( + ContainerUrn, + DatasetUrn, + DataTypeUrn, + EntityTypeUrn, + SchemaFieldUrn, + StructuredPropertyUrn, +) from datahub.sql_parsing.sql_parsing_aggregator import ( KnownLineageMapping, SqlParsingAggregator, @@ -673,14 +687,31 @@ def _process_view( yield from self.gen_dataset_workunits(view, schema_name, db_name) def _process_tag(self, tag: SnowflakeTag) -> Iterable[MetadataWorkUnit]: - tag_identifier = tag.identifier() + use_sp = self.config.extract_tags_as_structured_properties + identifier = ( + self.snowflake_identifier(tag.structured_property_identifier()) + if use_sp + else tag.tag_identifier() + ) - if self.report.is_tag_processed(tag_identifier): + if self.report.is_tag_processed(identifier): return - self.report.report_tag_processed(tag_identifier) - - yield from self.gen_tag_workunits(tag) + self.report.report_tag_processed(identifier) + if use_sp: + yield from self.gen_tag_as_structured_property_workunits(tag) + else: + yield from self.gen_tag_workunits(tag) + + def _format_tags_as_structured_properties( + self, tags: List[SnowflakeTag] + ) -> Dict[StructuredPropertyUrn, str]: + return { + StructuredPropertyUrn( + self.snowflake_identifier(tag.structured_property_identifier()) + ): tag.value + for tag in tags + } def gen_dataset_workunits( self, @@ -725,6 +756,9 @@ def gen_dataset_workunits( env=self.config.env, ) + if self.config.extract_tags_as_structured_properties: + yield from self.gen_column_tags_as_structured_properties(dataset_urn, table) + yield from add_table_to_schema_container( dataset_urn=dataset_urn, parent_container_key=schema_container_key, @@ -758,16 +792,24 @@ def gen_dataset_workunits( ) if table.tags: - tag_associations = [ - TagAssociation( - tag=make_tag_urn(self.snowflake_identifier(tag.identifier())) + if self.config.extract_tags_as_structured_properties: + yield from add_structured_properties_to_entity_wu( + dataset_urn, + self._format_tags_as_structured_properties(table.tags), ) - for tag in table.tags - ] - global_tags = GlobalTags(tag_associations) - yield MetadataChangeProposalWrapper( - entityUrn=dataset_urn, aspect=global_tags - ).as_workunit() + else: + tag_associations = [ + TagAssociation( + tag=make_tag_urn( + self.snowflake_identifier(tag.tag_identifier()) + ) + ) + for tag in table.tags + ] + global_tags = GlobalTags(tag_associations) + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, aspect=global_tags + ).as_workunit() if isinstance(table, SnowflakeView) and table.view_definition is not None: view_properties_aspect = ViewProperties( @@ -840,10 +882,10 @@ def get_dataset_properties( ) def gen_tag_workunits(self, tag: SnowflakeTag) -> Iterable[MetadataWorkUnit]: - tag_urn = make_tag_urn(self.snowflake_identifier(tag.identifier())) + tag_urn = make_tag_urn(self.snowflake_identifier(tag.tag_identifier())) tag_properties_aspect = TagProperties( - name=tag.display_name(), + name=tag.tag_display_name(), description=f"Represents the Snowflake tag `{tag._id_prefix_as_str()}` with value `{tag.value}`.", ) @@ -851,6 +893,41 @@ def gen_tag_workunits(self, tag: SnowflakeTag) -> Iterable[MetadataWorkUnit]: entityUrn=tag_urn, aspect=tag_properties_aspect ).as_workunit() + def gen_tag_as_structured_property_workunits( + self, tag: SnowflakeTag + ) -> Iterable[MetadataWorkUnit]: + identifier = self.snowflake_identifier(tag.structured_property_identifier()) + urn = StructuredPropertyUrn(identifier).urn() + aspect = StructuredPropertyDefinition( + qualifiedName=identifier, + displayName=tag.name, + valueType=DataTypeUrn("datahub.string").urn(), + entityTypes=[ + EntityTypeUrn(f"datahub.{ContainerUrn.ENTITY_TYPE}").urn(), + EntityTypeUrn(f"datahub.{DatasetUrn.ENTITY_TYPE}").urn(), + EntityTypeUrn(f"datahub.{SchemaFieldUrn.ENTITY_TYPE}").urn(), + ], + lastModified=AuditStamp( + time=get_sys_time(), actor="urn:li:corpuser:datahub" + ), + ) + yield MetadataChangeProposalWrapper( + entityUrn=urn, + aspect=aspect, + ).as_workunit() + + def gen_column_tags_as_structured_properties( + self, dataset_urn: str, table: Union[SnowflakeTable, SnowflakeView] + ) -> Iterable[MetadataWorkUnit]: + for column_name in table.column_tags: + schema_field_urn = SchemaFieldUrn(dataset_urn, column_name).urn() + yield from add_structured_properties_to_entity_wu( + schema_field_urn, + self._format_tags_as_structured_properties( + table.column_tags[column_name] + ), + ) + def gen_schema_metadata( self, table: Union[SnowflakeTable, SnowflakeView], @@ -892,13 +969,14 @@ def gen_schema_metadata( [ TagAssociation( make_tag_urn( - self.snowflake_identifier(tag.identifier()) + self.snowflake_identifier(tag.tag_identifier()) ) ) for tag in table.column_tags[col.name] ] ) if col.name in table.column_tags + and not self.config.extract_tags_as_structured_properties else None ), ) @@ -985,8 +1063,17 @@ def gen_database_containers( ) ), tags=( - [self.snowflake_identifier(tag.identifier()) for tag in database.tags] + [ + self.snowflake_identifier(tag.tag_identifier()) + for tag in database.tags + ] if database.tags + and not self.config.extract_tags_as_structured_properties + else None + ), + structured_properties=( + self._format_tags_as_structured_properties(database.tags) + if database.tags and self.config.extract_tags_as_structured_properties else None ), ) @@ -1038,8 +1125,13 @@ def gen_schema_containers( else None ), tags=( - [self.snowflake_identifier(tag.identifier()) for tag in schema.tags] - if schema.tags + [self.snowflake_identifier(tag.tag_identifier()) for tag in schema.tags] + if schema.tags and not self.config.extract_tags_as_structured_properties + else None + ), + structured_properties=( + self._format_tags_as_structured_properties(schema.tags) + if schema.tags and self.config.extract_tags_as_structured_properties else None ), ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_tag.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_tag.py index be449e963d270b..75567cc3da8830 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_tag.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_tag.py @@ -165,10 +165,20 @@ def _filter_tags( allowed_tags = [] for tag in tags: - tag_identifier = tag.identifier() - self.report.report_entity_scanned(tag_identifier, "tag") - if not self.config.tag_pattern.allowed(tag_identifier): - self.report.report_dropped(tag_identifier) + identifier = ( + tag._id_prefix_as_str() + if self.config.extract_tags_as_structured_properties + else tag.tag_identifier() + ) + self.report.report_entity_scanned(identifier, "tag") + + pattern = ( + self.config.structured_property_pattern + if self.config.extract_tags_as_structured_properties + else self.config.tag_pattern + ) + if not pattern.allowed(identifier): + self.report.report_dropped(identifier) else: allowed_tags.append(tag) return allowed_tags diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py index f45147223b8881..1545de0fff796f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py @@ -20,6 +20,7 @@ from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField from datahub.metadata.schema_classes import DataPlatformInstanceClass +from datahub.metadata.urns import StructuredPropertyUrn from datahub.utilities.registries.domain_registry import DomainRegistry from datahub.utilities.urns.dataset_urn import DatasetUrn @@ -75,6 +76,7 @@ def gen_schema_container( created: Optional[int] = None, last_modified: Optional[int] = None, extra_properties: Optional[Dict[str, str]] = None, + structured_properties: Optional[Dict[StructuredPropertyUrn, str]] = None, ) -> Iterable[MetadataWorkUnit]: domain_urn: Optional[str] = None if domain_registry: @@ -99,6 +101,7 @@ def gen_schema_container( owner_urn=owner_urn, qualified_name=qualified_name, extra_properties=extra_properties, + structured_properties=structured_properties, ) @@ -133,6 +136,7 @@ def gen_database_container( created: Optional[int] = None, last_modified: Optional[int] = None, extra_properties: Optional[Dict[str, str]] = None, + structured_properties: Optional[Dict[StructuredPropertyUrn, str]] = None, ) -> Iterable[MetadataWorkUnit]: domain_urn: Optional[str] = None if domain_registry: @@ -154,6 +158,7 @@ def gen_database_container( owner_urn=owner_urn, qualified_name=qualified_name, extra_properties=extra_properties, + structured_properties=structured_properties, ) diff --git a/metadata-ingestion/tests/integration/snowflake/snowflake_structured_properties_golden.json b/metadata-ingestion/tests/integration/snowflake/snowflake_structured_properties_golden.json new file mode 100644 index 00000000000000..3adedd59070396 --- /dev/null +++ b/metadata-ingestion/tests/integration/snowflake/snowflake_structured_properties_golden.json @@ -0,0 +1,4418 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "snowflake", + "env": "PROD", + "database": "test_db" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/", + "name": "TEST_DB", + "description": "Comment for TEST_DB", + "env": "PROD", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Database" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "changeType": "UPSERT", + "aspectName": "structuredProperties", + "aspect": { + "json": { + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:snowflake.other_db.other_schema.my_other_tag", + "values": [ + { + "string": "other" + } + ] + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "structuredProperty", + "entityUrn": "urn:li:structuredProperty:snowflake.other_db.other_schema.my_other_tag", + "changeType": "UPSERT", + "aspectName": "propertyDefinition", + "aspect": { + "json": { + "qualifiedName": "snowflake.other_db.other_schema.my_other_tag", + "displayName": "my_other_tag", + "valueType": "urn:li:dataType:datahub.string", + "cardinality": "SINGLE", + "entityTypes": [ + "urn:li:entityType:datahub.container", + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.schemaField" + ], + "immutable": false, + "lastModified": { + "time": 1615443388097, + "actor": "urn:li:corpuser:datahub" + }, + "filterStatus": "DISABLED" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "snowflake", + "env": "PROD", + "database": "test_db", + "schema": "test_schema" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/", + "name": "TEST_SCHEMA", + "description": "comment for TEST_DB.TEST_SCHEMA", + "env": "PROD", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "changeType": "UPSERT", + "aspectName": "structuredProperties", + "aspect": { + "json": { + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:snowflake.other_db.other_schema.my_other_tag", + "values": [ + { + "string": "other" + } + ] + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_1", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_1/", + "name": "TABLE_1", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_1", + "description": "Comment for Table", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_2", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_2/", + "name": "TABLE_2", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_2", + "description": "Comment for Table", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_3", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_3/", + "name": "TABLE_3", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_3", + "description": "Comment for Table", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_4", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_4/", + "name": "TABLE_4", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_4", + "description": "Comment for Table", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_5", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_5/", + "name": "TABLE_5", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_5", + "description": "Comment for Table", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_6", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_6/", + "name": "TABLE_6", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_6", + "description": "Comment for Table", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_7", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_7/", + "name": "TABLE_7", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_7", + "description": "Comment for Table", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_8", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_8/", + "name": "TABLE_8", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_8", + "description": "Comment for Table", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_9", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_9/", + "name": "TABLE_9", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_9", + "description": "Comment for Table", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_10", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "CLUSTERING_KEY": "LINEAR(COL_1)" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_10/", + "name": "TABLE_10", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_10", + "description": "Comment for Table", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "structuredProperty", + "entityUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.security", + "changeType": "UPSERT", + "aspectName": "propertyDefinition", + "aspect": { + "json": { + "qualifiedName": "snowflake.test_db.test_schema.security", + "displayName": "security", + "valueType": "urn:li:dataType:datahub.string", + "cardinality": "SINGLE", + "entityTypes": [ + "urn:li:entityType:datahub.container", + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.schemaField" + ], + "immutable": false, + "lastModified": { + "time": 1615443388097, + "actor": "urn:li:corpuser:datahub" + }, + "filterStatus": "DISABLED" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.view_1", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "IS_SECURE": "true" + }, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/view/VIEW_1/", + "name": "VIEW_1", + "qualifiedName": "TEST_DB.TEST_SCHEMA.VIEW_1", + "description": "Comment for View", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "schemaField", + "entityUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),COL_1)", + "changeType": "UPSERT", + "aspectName": "structuredProperties", + "aspect": { + "json": { + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.security", + "values": [ + { + "string": "pii" + } + ] + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "create view view_1 as select * from table_1", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "structuredProperty", + "entityUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.my_tag_0", + "changeType": "UPSERT", + "aspectName": "propertyDefinition", + "aspect": { + "json": { + "qualifiedName": "snowflake.test_db.test_schema.my_tag_0", + "displayName": "my_tag_0", + "valueType": "urn:li:dataType:datahub.string", + "cardinality": "SINGLE", + "entityTypes": [ + "urn:li:entityType:datahub.container", + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.schemaField" + ], + "immutable": false, + "lastModified": { + "time": 1615443388097, + "actor": "urn:li:corpuser:datahub" + }, + "filterStatus": "DISABLED" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "structuredProperty", + "entityUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.my_tag_1", + "changeType": "UPSERT", + "aspectName": "propertyDefinition", + "aspect": { + "json": { + "qualifiedName": "snowflake.test_db.test_schema.my_tag_1", + "displayName": "my_tag_1", + "valueType": "urn:li:dataType:datahub.string", + "cardinality": "SINGLE", + "entityTypes": [ + "urn:li:entityType:datahub.container", + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.schemaField" + ], + "immutable": false, + "lastModified": { + "time": 1615443388097, + "actor": "urn:li:corpuser:datahub" + }, + "filterStatus": "DISABLED" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "structuredProperty", + "entityUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.my_tag_2", + "changeType": "UPSERT", + "aspectName": "propertyDefinition", + "aspect": { + "json": { + "qualifiedName": "snowflake.test_db.test_schema.my_tag_2", + "displayName": "my_tag_2", + "valueType": "urn:li:dataType:datahub.string", + "cardinality": "SINGLE", + "entityTypes": [ + "urn:li:entityType:datahub.container", + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.schemaField" + ], + "immutable": false, + "lastModified": { + "time": 1615443388097, + "actor": "urn:li:corpuser:datahub" + }, + "filterStatus": "DISABLED" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.view_2", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "https://app.snowflake.com/ap-south-1.aws/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/view/VIEW_2/", + "name": "VIEW_2", + "qualifiedName": "TEST_DB.TEST_SCHEMA.VIEW_2", + "description": "Comment for View", + "created": { + "time": 1623135600000 + }, + "lastModified": { + "time": 1623135600000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "structuredProperties", + "aspect": { + "json": { + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.my_tag_0", + "values": [ + { + "string": "my_value_0" + } + ] + }, + { + "propertyUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.my_tag_1", + "values": [ + { + "string": "my_value_1" + } + ] + }, + { + "propertyUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.my_tag_2", + "values": [ + { + "string": "my_value_2" + } + ] + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "create view view_2 as select * from table_2", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + }, + { + "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1615443388097, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_1)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_2)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_2)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_3)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_3)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_4)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_4)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_5)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_5)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_6)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_6)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_7)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_7)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_8)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_8)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_9)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_9)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_10)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_10)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "create view view_1 as select * from table_1", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1736285939366, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_1)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_10)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_2)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_3)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_4)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_5)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_6)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_7)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_8)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_9)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_1)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_2)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_3)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_4)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_5)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_6)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_7)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_8)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_9)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_10)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 1615443388097, + "actor": "urn:li:corpuser:_ingestion" + }, + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "type": "VIEW", + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_1)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_2)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_3)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_4)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_5)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_6)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_7)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_8)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_9)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_10)" + ], + "confidenceScore": 0.9, + "query": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "queryProperties", + "aspect": { + "json": { + "statement": { + "value": "create view view_2 as select * from table_2", + "language": "SQL" + }, + "source": "SYSTEM", + "created": { + "time": 0, + "actor": "urn:li:corpuser:_ingestion" + }, + "lastModified": { + "time": 1736285939376, + "actor": "urn:li:corpuser:_ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "querySubjects", + "aspect": { + "json": { + "subjects": [ + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)" + }, + { + "entity": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_1)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_2)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_3)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_4)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_5)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_6)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_7)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_8)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_9)" + }, + { + "entity": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_10)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_1%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "query", + "entityUrn": "urn:li:query:view_urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Ctest_db.test_schema.view_2%2CPROD%29", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "schemaField", + "entityUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),COL_1)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "structuredProperty", + "entityUrn": "urn:li:structuredProperty:snowflake.other_db.other_schema.my_other_tag", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "structuredProperty", + "entityUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.my_tag_0", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "structuredProperty", + "entityUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.my_tag_1", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "structuredProperty", + "entityUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.my_tag_2", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "structuredProperty", + "entityUrn": "urn:li:structuredProperty:snowflake.test_db.test_schema.security", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2025_01_07-13_38_56-3fo398", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py index ef4918a20e640c..d2e20e784282ee 100644 --- a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py +++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py @@ -185,6 +185,68 @@ def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph): assert cache_info["get_fk_constraints_for_schema"]["misses"] == 1 +def test_snowflake_tags_as_structured_properties( + pytestconfig, tmp_path, mock_time, mock_datahub_graph +): + test_resources_dir = pytestconfig.rootpath / "tests/integration/snowflake" + + # Run the metadata ingestion pipeline. + output_file = tmp_path / "snowflake_structured_properties_test_events.json" + golden_file = test_resources_dir / "snowflake_structured_properties_golden.json" + + with mock.patch("snowflake.connector.connect") as mock_connect: + sf_connection = mock.MagicMock() + sf_cursor = mock.MagicMock() + mock_connect.return_value = sf_connection + sf_connection.cursor.return_value = sf_cursor + + sf_cursor.execute.side_effect = default_query_results + + pipeline = Pipeline( + config=PipelineConfig( + source=SourceConfig( + type="snowflake", + config=SnowflakeV2Config( + extract_tags_as_structured_properties=True, + extract_tags=TagOption.without_lineage, + account_id="ABC12345.ap-south-1.aws", + username="TST_USR", + password="TST_PWD", + match_fully_qualified_names=True, + schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]), + include_technical_schema=True, + include_table_lineage=False, + include_column_lineage=False, + include_usage_stats=False, + include_operational_stats=False, + ), + ), + sink=DynamicTypedConfig( + type="file", config={"filename": str(output_file)} + ), + ) + ) + pipeline.run() + pipeline.pretty_print_summary() + pipeline.raise_from_status() + assert not pipeline.source.get_report().warnings + + # Verify the output. + + mce_helpers.check_golden_file( + pytestconfig, + output_path=output_file, + golden_path=golden_file, + ignore_paths=[ + r"root\[\d+\]\['aspect'\]\['json'\]\['timestampMillis'\]", + r"root\[\d+\]\['aspect'\]\['json'\]\['created'\]", + r"root\[\d+\]\['aspect'\]\['json'\]\['lastModified'\]", + r"root\[\d+\]\['aspect'\]\['json'\]\['fields'\]\[\d+\]\['glossaryTerms'\]\['auditStamp'\]\['time'\]", + r"root\[\d+\]\['systemMetadata'\]", + ], + ) + + @freeze_time(FROZEN_TIME) def test_snowflake_private_link_and_incremental_mcps( pytestconfig, tmp_path, mock_time, mock_datahub_graph diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake_tag.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake_tag.py index 9bb598cb0c1c7f..d4f6e92c93c1e0 100644 --- a/metadata-ingestion/tests/integration/snowflake/test_snowflake_tag.py +++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake_tag.py @@ -98,3 +98,51 @@ def test_snowflake_tag_pattern_deny(): "TEST_DB.TEST_SCHEMA.my_tag_1:my_value_1", "TEST_DB.TEST_SCHEMA.security:pii", } + + +def test_snowflake_structured_property_pattern_deny(): + with mock.patch("snowflake.connector.connect") as mock_connect: + sf_connection = mock.MagicMock() + sf_cursor = mock.MagicMock() + mock_connect.return_value = sf_connection + sf_connection.cursor.return_value = sf_cursor + sf_cursor.execute.side_effect = default_query_results + + tag_config = SnowflakeV2Config( + account_id="ABC12345.ap-south-1.aws", + username="TST_USR", + password="TST_PWD", + match_fully_qualified_names=True, + schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]), + extract_tags_as_structured_properties=True, + tag_pattern=AllowDenyPattern( + deny=["TEST_DB.TEST_SCHEMA.my_tag_2:my_value_2"] + ), + structured_property_pattern=AllowDenyPattern( + deny=["TEST_DB.TEST_SCHEMA.my_tag_[0-9]"] + ), + include_technical_schema=True, + include_table_lineage=False, + include_column_lineage=False, + include_usage_stats=False, + include_operational_stats=False, + extract_tags=TagOption.without_lineage, + ) + + pipeline = Pipeline( + config=PipelineConfig( + source=SourceConfig(type="snowflake", config=tag_config), + sink=DynamicTypedConfig(type="blackhole", config={}), + ) + ) + pipeline.run() + pipeline.pretty_print_summary() + pipeline.raise_from_status() + + source_report = pipeline.source.get_report() + assert isinstance(source_report, SnowflakeV2Report) + assert source_report.tags_scanned == 5 + assert source_report._processed_tags == { + "snowflake.other_db.other_schema.my_other_tag", + "snowflake.test_db.test_schema.security", + } From 4633fbfeca963aa7f2fdb49c879446170882c865 Mon Sep 17 00:00:00 2001 From: ryota-cloud Date: Tue, 14 Jan 2025 11:09:08 -0800 Subject: [PATCH 074/249] fix(ingestion) fix snappy inconsistent version in ingestion (#12321) --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 284092e2b14f49..e4fd70a99e6434 100644 --- a/build.gradle +++ b/build.gradle @@ -195,7 +195,7 @@ project.ext.externalDependency = [ 'kafkaAvroSerde': "io.confluent:kafka-streams-avro-serde:$kafkaVersion", 'kafkaAvroSerializer': 'io.confluent:kafka-avro-serializer:5.1.4', 'kafkaClients': "org.apache.kafka:kafka-clients:$kafkaVersion-ccs", - 'snappy': 'org.xerial.snappy:snappy-java:1.1.10.4', + 'snappy': 'org.xerial.snappy:snappy-java:1.1.10.5', 'logbackClassic': "ch.qos.logback:logback-classic:$logbackClassic", 'logbackClassicJava8' : "ch.qos.logback:logback-classic:$logbackClassicJava8", 'slf4jApi': "org.slf4j:slf4j-api:$slf4jVersion", From e1d57e3f213bef8ba863426c0fbd2de098f6c6f6 Mon Sep 17 00:00:00 2001 From: kevinkarchacryl Date: Tue, 14 Jan 2025 15:53:51 -0500 Subject: [PATCH 075/249] Super type dbt redshift (#12337) --- .../ingestion/source/redshift/redshift.py | 1 + .../datahub/ingestion/source/sql/sql_types.py | 2 +- .../tests/unit/test_dbt_source.py | 34 ++++++++++++++++++- 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py index 5371017a2a3212..9bfca941ce48fb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py @@ -276,6 +276,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource): "HLLSKETCH": NullType, "TIMETZ": TimeType, "VARBYTE": StringType, + "SUPER": NullType, } def get_platform_instance_id(self) -> str: diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py index 9ec73a9af96dc5..1acf962d7c4750 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py @@ -93,7 +93,7 @@ "regtype": None, "regrole": None, "regnamespace": None, - "super": None, + "super": NullType, "uuid": StringType, "pg_lsn": None, "tsvector": None, # text search vector diff --git a/metadata-ingestion/tests/unit/test_dbt_source.py b/metadata-ingestion/tests/unit/test_dbt_source.py index 0a869297837014..ff22ffedc6228f 100644 --- a/metadata-ingestion/tests/unit/test_dbt_source.py +++ b/metadata-ingestion/tests/unit/test_dbt_source.py @@ -9,7 +9,12 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.source.dbt import dbt_cloud from datahub.ingestion.source.dbt.dbt_cloud import DBTCloudConfig -from datahub.ingestion.source.dbt.dbt_common import DBTNode +from datahub.ingestion.source.dbt.dbt_common import ( + DBTNode, + DBTSourceReport, + NullTypeClass, + get_column_type, +) from datahub.ingestion.source.dbt.dbt_core import ( DBTCoreConfig, DBTCoreSource, @@ -461,3 +466,30 @@ def test_dbt_time_parsing() -> None: assert timestamp.tzinfo is not None and timestamp.tzinfo.utcoffset( timestamp ) == timedelta(0) + + +def test_get_column_type_redshift(): + report = DBTSourceReport() + dataset_name = "test_dataset" + + # Test 'super' type which should not show any warnings/errors + result_super = get_column_type(report, dataset_name, "super", "redshift") + assert isinstance(result_super.type, NullTypeClass) + assert ( + len(report.infos) == 0 + ), "No warnings should be generated for known SUPER type" + + # Test unknown type, which generates a warning but resolves to NullTypeClass + unknown_type = "unknown_type" + result_unknown = get_column_type(report, dataset_name, unknown_type, "redshift") + assert isinstance(result_unknown.type, NullTypeClass) + + # exact warning message for an unknown type + expected_context = f"{dataset_name} - {unknown_type}" + messages = [info for info in report.infos if expected_context in str(info.context)] + assert len(messages) == 1 + assert messages[0].title == "Unable to map column types to DataHub types" + assert ( + messages[0].message + == "Got an unexpected column type. The column's parsed field type will not be populated." + ) From 90fe5b6cb71a953042ebba33af6d6431e6ae0046 Mon Sep 17 00:00:00 2001 From: ryota-cloud Date: Tue, 14 Jan 2025 14:26:59 -0800 Subject: [PATCH 076/249] fix(docker) add new gradle profile for consumer debug purpose (#12338) --- docker/build.gradle | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docker/build.gradle b/docker/build.gradle index 7b36c0d9acdcf0..576e47a53e6ef5 100644 --- a/docker/build.gradle +++ b/docker/build.gradle @@ -42,6 +42,15 @@ ext { modules: python_services_modules + backend_profile_modules + [':datahub-frontend'], isDebug: true ], + + 'quickstartDebugConsumers': [ + profile: 'debug-consumers', + modules: python_services_modules + backend_profile_modules + [':datahub-frontend', + ':metadata-jobs:mce-consumer-job', + ':metadata-jobs:mae-consumer-job'], + isDebug: true + ], + 'quickstartPg': [ profile: 'quickstart-postgres', modules: (backend_profile_modules - [':docker:mysql-setup']) + [ From 94b9da0bd8d9c04a5566a3c731f2f5418fc3eb0a Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Tue, 14 Jan 2025 17:28:34 -0600 Subject: [PATCH 077/249] feat(entityVersioning): initial implementation (#12166) --- .../datahub/graphql/GmsGraphQLEngine.java | 15 + .../datahub/graphql/GmsGraphQLEngineArgs.java | 2 + .../versioning/LinkAssetVersionResolver.java | 88 +++ .../UnlinkAssetVersionResolver.java | 67 ++ .../src/main/resources/entity.graphql | 60 ++ .../LinkAssetVersionResolverTest.java | 102 +++ .../UnlinkAssetVersionResolverTest.java | 123 ++++ docker/profiles/docker-compose.gms.yml | 8 + .../metadata/aspect/AspectRetriever.java | 16 +- .../aspect/CachingAspectRetriever.java | 6 + .../linkedin/metadata/aspect/ReadItem.java | 1 + .../patch/template/AspectTemplateEngine.java | 20 +- .../common/VersionPropertiesTemplate.java | 44 ++ .../metadata/entity/SearchRetriever.java | 52 +- .../registry/SnapshotEntityRegistry.java | 2 + .../metadata/aspect/MockAspectRetriever.java | 19 +- .../java/com/linkedin/metadata/Constants.java | 13 + metadata-io/build.gradle | 1 + .../client/EntityClientAspectRetriever.java | 19 + .../entity/EntityServiceAspectRetriever.java | 12 + .../AlphanumericSortIdGenerator.java | 70 ++ .../EntityVersioningServiceImpl.java | 356 +++++++++++ .../sideeffects/VersionSetSideEffect.java | 137 ++++ .../VersionPropertiesValidator.java | 158 +++++ .../VersionSetPropertiesValidator.java | 80 +++ .../search/SearchServiceSearchRetriever.java | 31 +- .../SearchDocumentTransformer.java | 2 +- .../metadata/search/utils/ESUtils.java | 36 ++ .../service/UpdateIndicesService.java | 3 +- .../AlphanumericSortIdGeneratorTest.java | 62 ++ .../EntityVersioningServiceTest.java | 603 ++++++++++++++++++ .../sideeffects/VersionSetSideEffectTest.java | 229 +++++++ .../VersionPropertiesValidatorTest.java | 165 +++++ .../VersionSetPropertiesValidatorTest.java | 139 ++++ .../AutocompleteRequestHandlerTest.java | 156 +++++ .../request/SearchRequestHandlerTest.java | 244 ++++++- .../SearchDocumentTransformerTest.java | 19 + .../com/linkedin/common/VersionProperties.pdl | 77 +++ .../com/linkedin/common/VersionTag.pdl | 1 + .../linkedin/metadata/key/VersionSetKey.pdl | 20 + .../linkedin/metadata/query/SearchFlags.pdl | 5 + .../versionset/VersionSetProperties.pdl | 24 + .../src/main/resources/entity-registry.yml | 7 + .../graphql/featureflags/FeatureFlags.java | 1 + .../src/main/resources/application.yaml | 1 + .../EntityVersioningServiceFactory.java | 21 + .../factory/graphql/GraphQLEngineFactory.java | 5 +- .../SpringStandardPluginConfiguration.java | 67 ++ .../delegates/DatahubUsageEventsImplTest.java | 4 + .../delegates/EntityApiDelegateImplTest.java | 3 + .../GlobalControllerExceptionHandler.java | 24 + .../openapi/config/SpringWebConfig.java | 7 +- .../openapi/v3/OpenAPIV3Generator.java | 243 +++++-- .../v3/controller/EntityController.java | 124 +++- .../openapi/v3/OpenAPIV3GeneratorTest.java | 6 +- .../v3/controller/EntityControllerTest.java | 218 ++++++- .../com.linkedin.entity.aspects.snapshot.json | 4 + ...com.linkedin.entity.entities.snapshot.json | 10 + .../com.linkedin.entity.runs.snapshot.json | 4 + ...nkedin.operations.operations.snapshot.json | 4 + ...m.linkedin.platform.platform.snapshot.json | 4 + .../versioning/EntityVersioningService.java | 36 ++ .../versioning/VersionPropertiesInput.java | 20 + .../metadata/search/utils/QueryUtils.java | 8 + .../authorization/PoliciesConfig.java | 11 +- .../tests/entity_versioning/__init__.py | 0 .../entity_versioning/test_versioning.py | 64 ++ test-models/build.gradle | 1 + 68 files changed, 4063 insertions(+), 121 deletions(-) create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/common/VersionPropertiesTemplate.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGenerator.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceImpl.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffect.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidator.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidator.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGeneratorTest.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceTest.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffectTest.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidatorTest.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidatorTest.java create mode 100644 metadata-models/src/main/pegasus/com/linkedin/common/VersionProperties.pdl rename {li-utils => metadata-models}/src/main/pegasus/com/linkedin/common/VersionTag.pdl (78%) create mode 100644 metadata-models/src/main/pegasus/com/linkedin/metadata/key/VersionSetKey.pdl create mode 100644 metadata-models/src/main/pegasus/com/linkedin/versionset/VersionSetProperties.pdl create mode 100644 metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/versioning/EntityVersioningServiceFactory.java create mode 100644 metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningService.java create mode 100644 metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/VersionPropertiesInput.java create mode 100644 smoke-test/tests/entity_versioning/__init__.py create mode 100644 smoke-test/tests/entity_versioning/test_versioning.py diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 3c46c1a8dce35c..b15db80a8487ae 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -174,6 +174,8 @@ import com.linkedin.datahub.graphql.resolvers.embed.UpdateEmbedResolver; import com.linkedin.datahub.graphql.resolvers.entity.EntityExistsResolver; import com.linkedin.datahub.graphql.resolvers.entity.EntityPrivilegesResolver; +import com.linkedin.datahub.graphql.resolvers.entity.versioning.LinkAssetVersionResolver; +import com.linkedin.datahub.graphql.resolvers.entity.versioning.UnlinkAssetVersionResolver; import com.linkedin.datahub.graphql.resolvers.form.BatchAssignFormResolver; import com.linkedin.datahub.graphql.resolvers.form.BatchRemoveFormResolver; import com.linkedin.datahub.graphql.resolvers.form.CreateDynamicFormAssignmentResolver; @@ -391,6 +393,7 @@ import com.linkedin.metadata.config.telemetry.TelemetryConfiguration; import com.linkedin.metadata.connection.ConnectionService; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; import com.linkedin.metadata.graph.GraphClient; import com.linkedin.metadata.graph.SiblingGraphService; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -476,6 +479,7 @@ public class GmsGraphQLEngine { private final RestrictedService restrictedService; private ConnectionService connectionService; private AssertionService assertionService; + private final EntityVersioningService entityVersioningService; private final BusinessAttributeService businessAttributeService; private final FeatureFlags featureFlags; @@ -599,6 +603,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { this.restrictedService = args.restrictedService; this.connectionService = args.connectionService; this.assertionService = args.assertionService; + this.entityVersioningService = args.entityVersioningService; this.businessAttributeService = args.businessAttributeService; this.ingestionConfiguration = Objects.requireNonNull(args.ingestionConfiguration); @@ -1392,6 +1397,16 @@ private void configureMutationResolvers(final RuntimeWiring.Builder builder) { "removeBusinessAttribute", new RemoveBusinessAttributeResolver(this.entityService)); } + if (featureFlags.isEntityVersioning()) { + typeWiring + .dataFetcher( + "linkAssetVersion", + new LinkAssetVersionResolver(this.entityVersioningService, this.featureFlags)) + .dataFetcher( + "unlinkAssetVersion", + new UnlinkAssetVersionResolver( + this.entityVersioningService, this.featureFlags)); + } return typeWiring; }); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java index f6ab3a603dbb7b..131f4e87637807 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java @@ -21,6 +21,7 @@ import com.linkedin.metadata.config.telemetry.TelemetryConfiguration; import com.linkedin.metadata.connection.ConnectionService; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; import com.linkedin.metadata.graph.GraphClient; import com.linkedin.metadata.graph.SiblingGraphService; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -88,6 +89,7 @@ public class GmsGraphQLEngineArgs { BusinessAttributeService businessAttributeService; ConnectionService connectionService; AssertionService assertionService; + EntityVersioningService entityVersioningService; // any fork specific args should go below this line } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java new file mode 100644 index 00000000000000..69e049af1e87b7 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java @@ -0,0 +1,88 @@ +package com.linkedin.datahub.graphql.resolvers.entity.versioning; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; +import static com.linkedin.metadata.authorization.ApiOperation.UPDATE; + +import com.datahub.authorization.AuthUtil; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; +import com.linkedin.datahub.graphql.exception.AuthorizationException; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.LinkVersionInput; +import com.linkedin.metadata.entity.IngestResult; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import com.linkedin.metadata.entity.versioning.VersionPropertiesInput; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import org.apache.commons.lang.StringUtils; + +/** + * Currently only supports linking the latest version, but may be modified later to support inserts + */ +public class LinkAssetVersionResolver implements DataFetcher> { + + private final EntityVersioningService entityVersioningService; + private final FeatureFlags featureFlags; + + public LinkAssetVersionResolver( + EntityVersioningService entityVersioningService, FeatureFlags featureFlags) { + this.entityVersioningService = entityVersioningService; + this.featureFlags = featureFlags; + } + + @Override + public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { + final QueryContext context = environment.getContext(); + final LinkVersionInput input = + bindArgument(environment.getArgument("input"), LinkVersionInput.class); + if (!featureFlags.isEntityVersioning()) { + throw new IllegalAccessError( + "Entity Versioning is not configured, please enable before attempting to use this feature."); + } + Urn versionSetUrn = UrnUtils.getUrn(input.getVersionSet()); + if (!VERSION_SET_ENTITY_NAME.equals(versionSetUrn.getEntityType())) { + throw new IllegalArgumentException( + String.format("Version Set urn %s must be of type Version Set.", input.getVersionSet())); + } + Urn entityUrn = UrnUtils.getUrn(input.getLinkedEntity()); + OperationContext opContext = context.getOperationContext(); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + opContext, UPDATE, ImmutableSet.of(versionSetUrn, entityUrn))) { + throw new AuthorizationException( + String.format( + "%s is unauthorized to %s entities %s and %s", + opContext.getAuthentication().getActor().toUrnStr(), + UPDATE, + input.getVersionSet(), + input.getLinkedEntity())); + } + VersionPropertiesInput versionPropertiesInput = + new VersionPropertiesInput( + input.getComment(), + input.getVersion(), + input.getSourceTimestamp(), + input.getSourceCreator()); + return GraphQLConcurrencyUtils.supplyAsync( + () -> { + List linkResults = + entityVersioningService.linkLatestVersion( + opContext, versionSetUrn, entityUrn, versionPropertiesInput); + + return linkResults.stream() + .filter( + ingestResult -> input.getLinkedEntity().equals(ingestResult.getUrn().toString())) + .map(ingestResult -> ingestResult.getUrn().toString()) + .findAny() + .orElse(StringUtils.EMPTY); + }, + this.getClass().getSimpleName(), + "get"); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java new file mode 100644 index 00000000000000..3d5027a0d668ac --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java @@ -0,0 +1,67 @@ +package com.linkedin.datahub.graphql.resolvers.entity.versioning; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; +import static com.linkedin.metadata.authorization.ApiOperation.UPDATE; + +import com.datahub.authorization.AuthUtil; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; +import com.linkedin.datahub.graphql.exception.AuthorizationException; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.UnlinkVersionInput; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import io.datahubproject.metadata.context.OperationContext; +import java.util.concurrent.CompletableFuture; + +public class UnlinkAssetVersionResolver implements DataFetcher> { + + private final EntityVersioningService entityVersioningService; + private final FeatureFlags featureFlags; + + public UnlinkAssetVersionResolver( + EntityVersioningService entityVersioningService, FeatureFlags featureFlags) { + this.entityVersioningService = entityVersioningService; + this.featureFlags = featureFlags; + } + + @Override + public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { + if (!featureFlags.isEntityVersioning()) { + throw new IllegalAccessError( + "Entity Versioning is not configured, please enable before attempting to use this feature."); + } + final QueryContext context = environment.getContext(); + final UnlinkVersionInput input = + bindArgument(environment.getArgument("input"), UnlinkVersionInput.class); + Urn versionSetUrn = UrnUtils.getUrn(input.getVersionSet()); + if (!VERSION_SET_ENTITY_NAME.equals(versionSetUrn.getEntityType())) { + throw new IllegalArgumentException( + String.format("Version Set urn %s must be of type Version Set.", input.getVersionSet())); + } + Urn entityUrn = UrnUtils.getUrn(input.getUnlinkedEntity()); + OperationContext opContext = context.getOperationContext(); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + opContext, UPDATE, ImmutableSet.of(versionSetUrn, entityUrn))) { + throw new AuthorizationException( + String.format( + "%s is unauthorized to %s entities %s and %s", + opContext.getAuthentication().getActor(), + UPDATE, + input.getVersionSet(), + input.getUnlinkedEntity())); + } + return GraphQLConcurrencyUtils.supplyAsync( + () -> { + entityVersioningService.unlinkVersion(opContext, versionSetUrn, entityUrn); + return true; + }, + this.getClass().getSimpleName(), + "get"); + } +} diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 9dd1948e18e042..b47be7ae32b2c4 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -956,6 +956,16 @@ type Mutation { Remove Business Attribute """ removeBusinessAttribute(input: AddBusinessAttributeInput!): Boolean + + """ + Link the latest versioned entity to a Version Set + """ + linkAssetVersion(input: LinkVersionInput!): String + + """ + Unlink a versioned entity from a Version Set + """ + unlinkAssetVersion(input: UnlinkVersionInput!): Boolean } """ @@ -12911,6 +12921,56 @@ input ListBusinessAttributesInput { query: String } +""" +Input for linking a versioned entity to a Version Set +""" +input LinkVersionInput { + """ + The target version set + """ + versionSet: String! + + """ + The target versioned entity to link + """ + linkedEntity: String! + + """ + Version Tag label for the version, should be unique within a Version Set + """ + version: String! + + """ + Optional timestamp from the source system + """ + sourceTimestamp: Long + + """ + Optional creator from the source system, will be converted to an Urn + """ + sourceCreator: String + + """ + Optional comment about the version + """ + comment: String +} + +""" +Input for unlinking a versioned entity from a Version Set +""" +input UnlinkVersionInput { + """ + The target version set + """ + versionSet: String + + """ + The target versioned entity to unlink + """ + unlinkedEntity: String +} + """ The result obtained when listing Business Attribute """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java new file mode 100644 index 00000000000000..690856263fccc5 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java @@ -0,0 +1,102 @@ +package com.linkedin.datahub.graphql.resolvers.entity.versioning; + +import static com.linkedin.datahub.graphql.TestUtils.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.testng.Assert.*; + +import com.google.common.collect.ImmutableList; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.LinkVersionInput; +import com.linkedin.metadata.entity.IngestResult; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import com.linkedin.metadata.entity.versioning.VersionPropertiesInput; +import graphql.schema.DataFetchingEnvironment; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +public class LinkAssetVersionResolverTest { + + private static final String TEST_VERSION_SET_URN = "urn:li:versionSet:test-version-set"; + private static final String TEST_ENTITY_URN = + "urn:li:dataset:(urn:li:dataPlatform:mysql,my-test,PROD)"; + + @Test + public void testGetSuccessful() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(true); + + IngestResult mockResult = + IngestResult.builder().urn(Urn.createFromString(TEST_ENTITY_URN)).build(); + + Mockito.when( + mockService.linkLatestVersion( + any(), + eq(UrnUtils.getUrn(TEST_VERSION_SET_URN)), + eq(UrnUtils.getUrn(TEST_ENTITY_URN)), + any(VersionPropertiesInput.class))) + .thenReturn(ImmutableList.of(mockResult)); + + LinkAssetVersionResolver resolver = new LinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + LinkVersionInput input = new LinkVersionInput(); + input.setVersionSet(TEST_VERSION_SET_URN); + input.setLinkedEntity(TEST_ENTITY_URN); + input.setComment("Test comment"); + input.setVersion("v1"); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + String result = resolver.get(mockEnv).get(); + assertEquals(result, TEST_ENTITY_URN); + } + + @Test + public void testGetFeatureFlagDisabled() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(false); + + LinkAssetVersionResolver resolver = new LinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + LinkVersionInput input = new LinkVersionInput(); + input.setVersionSet(TEST_VERSION_SET_URN); + input.setLinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + + assertThrows(IllegalAccessError.class, () -> resolver.get(mockEnv)); + } + + @Test + public void testGetInvalidVersionSetUrn() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(true); + + LinkAssetVersionResolver resolver = new LinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + LinkVersionInput input = new LinkVersionInput(); + input.setVersionSet("urn:li:dataset:invalid-version-set"); // Invalid URN type + input.setLinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + + assertThrows(IllegalArgumentException.class, () -> resolver.get(mockEnv)); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java new file mode 100644 index 00000000000000..0000ad24a04537 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java @@ -0,0 +1,123 @@ +package com.linkedin.datahub.graphql.resolvers.entity.versioning; + +import static com.linkedin.datahub.graphql.TestUtils.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.testng.Assert.*; + +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.UnlinkVersionInput; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import graphql.schema.DataFetchingEnvironment; +import java.util.Collections; +import java.util.concurrent.CompletionException; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +public class UnlinkAssetVersionResolverTest { + + private static final String TEST_VERSION_SET_URN = "urn:li:versionSet:test-version-set"; + private static final String TEST_ENTITY_URN = + "urn:li:dataset:(urn:li:dataPlatform:mysql,my-test,PROD)"; + + @Test + public void testGetSuccessful() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(true); + + Mockito.when( + mockService.unlinkVersion( + any(), + eq(UrnUtils.getUrn(TEST_VERSION_SET_URN)), + eq(UrnUtils.getUrn(TEST_ENTITY_URN)))) + .thenReturn(Collections.emptyList()); + + UnlinkAssetVersionResolver resolver = new UnlinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + UnlinkVersionInput input = new UnlinkVersionInput(); + input.setVersionSet(TEST_VERSION_SET_URN); + input.setUnlinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertTrue(resolver.get(mockEnv).get()); + + Mockito.verify(mockService) + .unlinkVersion( + any(), eq(UrnUtils.getUrn(TEST_VERSION_SET_URN)), eq(UrnUtils.getUrn(TEST_ENTITY_URN))); + } + + @Test + public void testGetFeatureFlagDisabled() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(false); + + UnlinkAssetVersionResolver resolver = new UnlinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + UnlinkVersionInput input = new UnlinkVersionInput(); + input.setVersionSet(TEST_VERSION_SET_URN); + input.setUnlinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + + assertThrows(IllegalAccessError.class, () -> resolver.get(mockEnv)); + } + + @Test + public void testGetInvalidVersionSetUrn() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(true); + + UnlinkAssetVersionResolver resolver = new UnlinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + UnlinkVersionInput input = new UnlinkVersionInput(); + input.setVersionSet("urn:li:dataset:invalid-version-set"); // Invalid URN type + input.setUnlinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + + assertThrows(IllegalArgumentException.class, () -> resolver.get(mockEnv)); + } + + @Test + public void testGetServiceException() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(true); + + Mockito.doThrow(new RuntimeException("Service error")) + .when(mockService) + .unlinkVersion(any(), any(), any()); + + UnlinkAssetVersionResolver resolver = new UnlinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + UnlinkVersionInput input = new UnlinkVersionInput(); + input.setVersionSet(TEST_VERSION_SET_URN); + input.setUnlinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + } +} diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index 64163ef970080a..ada7df51e20bef 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -67,6 +67,7 @@ x-datahub-system-update-service: &datahub-system-update-service SCHEMA_REGISTRY_SYSTEM_UPDATE: ${SCHEMA_REGISTRY_SYSTEM_UPDATE:-true} SPRING_KAFKA_PROPERTIES_AUTO_REGISTER_SCHEMAS: ${SPRING_KAFKA_PROPERTIES_AUTO_REGISTER_SCHEMAS:-true} SPRING_KAFKA_PROPERTIES_USE_LATEST_VERSION: ${SPRING_KAFKA_PROPERTIES_USE_LATEST_VERSION:-true} + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ${HOME}/.datahub/plugins:/etc/datahub/plugins @@ -80,6 +81,7 @@ x-datahub-system-update-service-dev: &datahub-system-update-service-dev SKIP_ELASTICSEARCH_CHECK: false REPROCESS_DEFAULT_BROWSE_PATHS_V2: ${REPROCESS_DEFAULT_BROWSE_PATHS_V2:-false} JAVA_TOOL_OPTIONS: '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5003' + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ../../datahub-upgrade/build/libs/:/datahub/datahub-upgrade/bin/ - ../../metadata-models/src/main/resources/:/datahub/datahub-gms/resources @@ -101,6 +103,7 @@ x-datahub-gms-service: &datahub-gms-service <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *datahub-quickstart-telemetry-env, *kafka-env] ALTERNATE_MCP_VALIDATION: ${ALTERNATE_MCP_VALIDATION:-true} STRICT_URN_VALIDATION_ENABLED: ${STRICT_URN_VALIDATION_ENABLED:-true} + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} healthcheck: test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health start_period: 90s @@ -131,6 +134,7 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev SEARCH_SERVICE_ENABLE_CACHE: false LINEAGE_SEARCH_CACHE_ENABLED: false SHOW_BROWSE_V2: true + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ./datahub-gms/start.sh:/datahub/datahub-gms/scripts/start.sh - ./datahub-gms/jetty.xml:/datahub/datahub-gms/scripts/jetty.xml @@ -155,12 +159,14 @@ x-datahub-mae-consumer-service: &datahub-mae-consumer-service - ${DATAHUB_LOCAL_MAE_ENV:-empty2.env} environment: &datahub-mae-consumer-env <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *kafka-env] + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev <<: *datahub-mae-consumer-service image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_VERSION:-debug} environment: <<: [*datahub-dev-telemetry-env, *datahub-mae-consumer-env] + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ./datahub-mae-consumer/start.sh:/datahub/datahub-mae-consumer/scripts/start.sh - ../../metadata-models/src/main/resources/:/datahub/datahub-mae-consumer/resources @@ -183,12 +189,14 @@ x-datahub-mce-consumer-service: &datahub-mce-consumer-service <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *datahub-quickstart-telemetry-env, *kafka-env] ALTERNATE_MCP_VALIDATION: ${ALTERNATE_MCP_VALIDATION:-true} STRICT_URN_VALIDATION_ENABLED: ${STRICT_URN_VALIDATION_ENABLED:-true} + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} x-datahub-mce-consumer-service-dev: &datahub-mce-consumer-service-dev <<: *datahub-mce-consumer-service image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_VERSION:-debug} environment: <<: [*datahub-dev-telemetry-env, *datahub-mce-consumer-env] + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ./datahub-mce-consumer/start.sh:/datahub/datahub-mce-consumer/scripts/start.sh - ../../metadata-jobs/mce-consumer-job/build/libs/:/datahub/datahub-mce-consumer/bin diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/AspectRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/AspectRetriever.java index e34df7db481189..87939e14bfde68 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/AspectRetriever.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/AspectRetriever.java @@ -5,11 +5,9 @@ import com.linkedin.common.urn.Urn; import com.linkedin.entity.Aspect; import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.util.Pair; import java.util.Collections; import java.util.Map; import java.util.Set; -import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -50,19 +48,7 @@ default SystemAspect getLatestSystemAspect( Map> getLatestSystemAspects(Map> urnAspectNames); @Nonnull - default Map entityExists(Set urns) { - Set keyAspectNames = - urns.stream() - .map(Urn::getEntityType) - .distinct() - .map(entityType -> getEntityRegistry().getEntitySpec(entityType).getKeyAspectName()) - .collect(Collectors.toSet()); - - Map> latest = getLatestAspectObjects(urns, keyAspectNames); - return urns.stream() - .map(urn -> Pair.of(urn, latest.containsKey(urn))) - .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); - } + Map entityExists(Set urns); @Nonnull EntityRegistry getEntityRegistry(); diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java index 375dd8cf8911e1..7b3233921d039e 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java @@ -29,6 +29,12 @@ public Map> getLatestSystemAspects( return Collections.emptyMap(); } + @Nonnull + @Override + public Map entityExists(Set urns) { + return Collections.emptyMap(); + } + @Nonnull @Override public EntityRegistry getEntityRegistry() { diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/ReadItem.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/ReadItem.java index 106596bf80ccf0..341dec4d4741c7 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/ReadItem.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/ReadItem.java @@ -36,6 +36,7 @@ default String getAspectName() { @Nullable RecordTemplate getRecordTemplate(); + @Nullable default T getAspect(Class clazz) { return getAspect(clazz, getRecordTemplate()); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java index ce36b7e77a2b16..821dad13aa0c3c 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java @@ -1,20 +1,6 @@ package com.linkedin.metadata.aspect.patch.template; -import static com.linkedin.metadata.Constants.CHART_INFO_ASPECT_NAME; -import static com.linkedin.metadata.Constants.DASHBOARD_INFO_ASPECT_NAME; -import static com.linkedin.metadata.Constants.DATASET_PROPERTIES_ASPECT_NAME; -import static com.linkedin.metadata.Constants.DATA_FLOW_INFO_ASPECT_NAME; -import static com.linkedin.metadata.Constants.DATA_JOB_INFO_ASPECT_NAME; -import static com.linkedin.metadata.Constants.DATA_JOB_INPUT_OUTPUT_ASPECT_NAME; -import static com.linkedin.metadata.Constants.DATA_PRODUCT_PROPERTIES_ASPECT_NAME; -import static com.linkedin.metadata.Constants.EDITABLE_SCHEMA_METADATA_ASPECT_NAME; -import static com.linkedin.metadata.Constants.FORM_INFO_ASPECT_NAME; -import static com.linkedin.metadata.Constants.GLOBAL_TAGS_ASPECT_NAME; -import static com.linkedin.metadata.Constants.GLOSSARY_TERMS_ASPECT_NAME; -import static com.linkedin.metadata.Constants.OWNERSHIP_ASPECT_NAME; -import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTIES_ASPECT_NAME; -import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; -import static com.linkedin.metadata.Constants.UPSTREAM_LINEAGE_ASPECT_NAME; +import static com.linkedin.metadata.Constants.*; import com.fasterxml.jackson.core.JsonProcessingException; import com.linkedin.data.template.RecordTemplate; @@ -50,7 +36,9 @@ public class AspectTemplateEngine { DASHBOARD_INFO_ASPECT_NAME, STRUCTURED_PROPERTIES_ASPECT_NAME, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, - FORM_INFO_ASPECT_NAME) + FORM_INFO_ASPECT_NAME, + UPSTREAM_LINEAGE_ASPECT_NAME, + VERSION_PROPERTIES_ASPECT_NAME) .collect(Collectors.toSet()); private final Map> _aspectTemplateMap; diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/common/VersionPropertiesTemplate.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/common/VersionPropertiesTemplate.java new file mode 100644 index 00000000000000..2f7d24e2cdb4b7 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/common/VersionPropertiesTemplate.java @@ -0,0 +1,44 @@ +package com.linkedin.metadata.aspect.patch.template.common; + +import com.fasterxml.jackson.databind.JsonNode; +import com.linkedin.common.VersionProperties; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.patch.template.Template; +import javax.annotation.Nonnull; + +public class VersionPropertiesTemplate implements Template { + + public static final String IS_LATEST_FIELD = "isLatest"; + + @Override + public VersionProperties getSubtype(RecordTemplate recordTemplate) throws ClassCastException { + if (recordTemplate instanceof VersionProperties) { + return (VersionProperties) recordTemplate; + } + throw new ClassCastException("Unable to cast RecordTemplate to VersionProperties"); + } + + @Override + public Class getTemplateType() { + return VersionProperties.class; + } + + @Nonnull + @Override + public VersionProperties getDefault() { + throw new UnsupportedOperationException( + "Unable to generate default version properties, no sensible default for " + "version set."); + } + + @Nonnull + @Override + public JsonNode transformFields(JsonNode baseNode) { + return baseNode; + } + + @Nonnull + @Override + public JsonNode rebaseFields(JsonNode patched) { + return patched; + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java index d4894c97015f8f..19dc89d26cb1af 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java @@ -1,6 +1,10 @@ package com.linkedin.metadata.entity; +import com.google.common.collect.ImmutableList; +import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.query.filter.SortOrder; import com.linkedin.metadata.search.ScrollResult; import com.linkedin.metadata.search.SearchEntityArray; import java.util.List; @@ -8,6 +12,40 @@ import javax.annotation.Nullable; public interface SearchRetriever { + + SearchFlags RETRIEVER_SEARCH_FLAGS = + new SearchFlags() + .setFulltext(false) + .setMaxAggValues(20) + .setSkipCache(false) + .setSkipAggregates(true) + .setSkipHighlighting(true) + .setIncludeSoftDeleted(false) + .setIncludeRestricted(false); + + SearchFlags RETRIEVER_SEARCH_FLAGS_NO_CACHE_ALL_VERSIONS = + new SearchFlags() + .setFulltext(false) + .setMaxAggValues(20) + .setSkipCache(true) + .setSkipAggregates(true) + .setSkipHighlighting(true) + .setIncludeSoftDeleted(false) + .setIncludeRestricted(false) + .setFilterNonLatestVersions(false); + + /** + * Allows for configuring the sort, should only be used when sort specified is unique. More often + * the default is desirable to just use the urnSort + */ + ScrollResult scroll( + @Nonnull List entities, + @Nullable Filter filters, + @Nullable String scrollId, + int count, + List sortCriteria, + @Nullable SearchFlags searchFlags); + /** * Returns search results for the given entities, filtered and sorted. * @@ -17,11 +55,17 @@ public interface SearchRetriever { * @param count size of a page * @return result of the search */ - ScrollResult scroll( + default ScrollResult scroll( @Nonnull List entities, @Nullable Filter filters, @Nullable String scrollId, - int count); + int count) { + SortCriterion urnSort = new SortCriterion(); + urnSort.setField("urn"); + urnSort.setOrder(SortOrder.ASCENDING); + return scroll( + entities, filters, scrollId, count, ImmutableList.of(urnSort), RETRIEVER_SEARCH_FLAGS); + } SearchRetriever EMPTY = new EmptySearchRetriever(); @@ -32,7 +76,9 @@ public ScrollResult scroll( @Nonnull List entities, @Nullable Filter filters, @Nullable String scrollId, - int count) { + int count, + List sortCriteria, + @Nullable SearchFlags searchFlags) { ScrollResult empty = new ScrollResult(); empty.setEntities(new SearchEntityArray()); empty.setNumEntities(0); diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java index 16df2d452a619e..f4d6799bb476f5 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java @@ -12,6 +12,7 @@ import com.linkedin.metadata.aspect.patch.template.common.GlossaryTermsTemplate; import com.linkedin.metadata.aspect.patch.template.common.OwnershipTemplate; import com.linkedin.metadata.aspect.patch.template.common.StructuredPropertiesTemplate; +import com.linkedin.metadata.aspect.patch.template.common.VersionPropertiesTemplate; import com.linkedin.metadata.aspect.patch.template.dashboard.DashboardInfoTemplate; import com.linkedin.metadata.aspect.patch.template.dataflow.DataFlowInfoTemplate; import com.linkedin.metadata.aspect.patch.template.datajob.DataJobInfoTemplate; @@ -113,6 +114,7 @@ private AspectTemplateEngine populateTemplateEngine(Map aspe aspectSpecTemplateMap.put( STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, new StructuredPropertyDefinitionTemplate()); aspectSpecTemplateMap.put(FORM_INFO_ASPECT_NAME, new FormInfoTemplate()); + aspectSpecTemplateMap.put(VERSION_PROPERTIES_ASPECT_NAME, new VersionPropertiesTemplate()); return new AspectTemplateEngine(aspectSpecTemplateMap); } diff --git a/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java index 98a6d59004a92a..15f168f74a32df 100644 --- a/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java +++ b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java @@ -20,11 +20,14 @@ import java.util.Set; import java.util.stream.Collectors; import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; import org.mockito.Mockito; public class MockAspectRetriever implements CachingAspectRetriever { private final Map> data; private final Map> systemData = new HashMap<>(); + @Getter @Setter private EntityRegistry entityRegistry; public MockAspectRetriever(@Nonnull Map> data) { this.data = @@ -60,6 +63,7 @@ public MockAspectRetriever(@Nonnull Map> data) { .build()); } } + this.entityRegistry = Mockito.mock(EntityRegistry.class); } public MockAspectRetriever( @@ -71,6 +75,15 @@ public MockAspectRetriever(Urn propertyUrn, StructuredPropertyDefinition definit this(Map.of(propertyUrn, List.of(definition))); } + @Nonnull + public Map entityExists(Set urns) { + if (urns.isEmpty()) { + return Map.of(); + } else { + return urns.stream().collect(Collectors.toMap(urn -> urn, data::containsKey)); + } + } + @Nonnull @Override public Map> getLatestAspectObjects( @@ -90,10 +103,4 @@ public Map> getLatestSystemAspects( .map(urn -> Pair.of(urn, systemData.get(urn))) .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); } - - @Nonnull - @Override - public EntityRegistry getEntityRegistry() { - return Mockito.mock(EntityRegistry.class); - } } diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java index 01c33a2530efb5..463376edcdf259 100644 --- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -128,6 +128,7 @@ public class Constants { public static final String INCIDENTS_SUMMARY_ASPECT_NAME = "incidentsSummary"; public static final String DOCUMENTATION_ASPECT_NAME = "documentation"; public static final String DATA_TRANSFORM_LOGIC_ASPECT_NAME = "dataTransformLogic"; + public static final String VERSION_PROPERTIES_ASPECT_NAME = "versionProperties"; // User public static final String CORP_USER_KEY_ASPECT_NAME = "corpUserKey"; @@ -464,6 +465,18 @@ public class Constants { // Incidents public static final String ENTITY_REF = "entities"; + // Version Set + public static final String VERSION_SET_ENTITY_NAME = "versionSet"; + public static final String VERSION_SET_KEY_ASPECT_NAME = "versionSetKey"; + public static final String VERSION_SET_PROPERTIES_ASPECT_NAME = "versionSetProperties"; + + // Versioning related + public static final String INITIAL_VERSION_SORT_ID = "AAAAAAAA"; + public static final String VERSION_SORT_ID_FIELD_NAME = "versionSortId"; + public static final String IS_LATEST_FIELD_NAME = "isLatest"; + + public static final String DISPLAY_PROPERTIES_ASPECT_NAME = "displayProperties"; + // Config public static final String ELASTICSEARCH_IMPLEMENTATION_OPENSEARCH = "opensearch"; public static final String ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH = "elasticsearch"; diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index 88bbfa2e10c4c1..aab29101b30f71 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -71,6 +71,7 @@ dependencies { testImplementation project(':datahub-graphql-core') testImplementation project(path: ':metadata-integration:java:datahub-client', configuration: 'shadow') testImplementation project(':metadata-service:auth-impl') + testImplementation project(':li-utils') testImplementation externalDependency.testng testImplementation externalDependency.h2 testImplementation externalDependency.mysqlConnector diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java index 669ec751f87c69..bb9a5ad68c959b 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java @@ -56,6 +56,25 @@ public Map> getLatestAspectObjects( } } + @Nonnull + public Map entityExists(Set urns) { + if (urns.isEmpty()) { + return Map.of(); + } else { + return urns.stream() + .collect( + Collectors.toMap( + urn -> urn, + urn -> { + try { + return entityClient.exists(systemOperationContext, urn); + } catch (RemoteInvocationException e) { + throw new RuntimeException(e); + } + })); + } + } + @Nonnull @Override public Map> getLatestSystemAspects( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java index 50cf8af30d606a..6ecf83b874dea0 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java @@ -54,6 +54,18 @@ public Map> getLatestAspectObjects( } } + @Nonnull + public Map entityExists(Set urns) { + if (urns.isEmpty()) { + return Map.of(); + } else { + return urns.stream() + .collect( + Collectors.toMap( + urn -> urn, urn -> entityService.exists(systemOperationContext, urn))); + } + } + @Nonnull @Override public Map> getLatestSystemAspects( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGenerator.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGenerator.java new file mode 100644 index 00000000000000..40553b338741f8 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGenerator.java @@ -0,0 +1,70 @@ +package com.linkedin.metadata.entity.versioning; + +public class AlphanumericSortIdGenerator { + + private AlphanumericSortIdGenerator() {} + + private static final int STRING_LENGTH = 8; + private static final char[] ALLOWED_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".toCharArray(); + + /** + * Increments an 8-character alphanumeric string. For example: "AAAAAAAA" -> "AAAAAAAB" "AAAAAAAZ" + * -> "AAAAAABA" + * + * @param currentId The current 8-character string + * @return The next string in sequence + * @throws IllegalArgumentException if input string is not 8 characters or contains invalid + * characters + */ + public static String increment(String currentId) { + if (currentId == null || currentId.length() != STRING_LENGTH) { + throw new IllegalArgumentException("Input string must be exactly 8 characters long"); + } + + // Convert string to char array for manipulation + char[] currentIdChars = currentId.toCharArray(); + + // Validate input characters + for (char c : currentIdChars) { + if (getCharIndex(c) == -1) { + throw new IllegalArgumentException("Invalid character in input string: " + c); + } + } + + // Start from rightmost position + for (int i = STRING_LENGTH - 1; i >= 0; i--) { + int currentCharIndex = getCharIndex(currentIdChars[i]); + + // If current character is not the last allowed character, + // simply increment it and we're done + if (currentCharIndex < ALLOWED_CHARS.length - 1) { + currentIdChars[i] = ALLOWED_CHARS[currentCharIndex + 1]; + return new String(currentIdChars); + } + + // If we're here, we need to carry over to next position + currentIdChars[i] = ALLOWED_CHARS[0]; + + // If we're at the leftmost position and need to carry, + // we've reached maximum value and need to wrap around + if (i == 0) { + return "AAAAAAAA"; + } + } + + // Should never reach here + throw new RuntimeException("Unexpected error in increment operation"); + } + + /** + * Gets the index of a character in the ALLOWED_CHARS array. Returns -1 if character is not found. + */ + private static int getCharIndex(char c) { + for (int i = 0; i < ALLOWED_CHARS.length; i++) { + if (ALLOWED_CHARS[i] == c) { + return i; + } + } + return -1; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceImpl.java new file mode 100644 index 00000000000000..48f5a00e9e8d5a --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceImpl.java @@ -0,0 +1,356 @@ +package com.linkedin.metadata.entity.versioning; + +import static com.linkedin.metadata.Constants.INITIAL_VERSION_SORT_ID; +import static com.linkedin.metadata.Constants.VERSION_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_KEY_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SORT_ID_FIELD_NAME; +import static com.linkedin.metadata.aspect.validation.ConditionalWriteValidator.HTTP_HEADER_IF_VERSION_MATCH; + +import com.datahub.util.RecordUtils; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.MetadataAttribution; +import com.linkedin.common.VersionProperties; +import com.linkedin.common.VersionTag; +import com.linkedin.common.urn.CorpuserUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.SetMode; +import com.linkedin.data.template.StringMap; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.SystemAspect; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.IngestResult; +import com.linkedin.metadata.entity.RollbackResult; +import com.linkedin.metadata.entity.RollbackRunResult; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; +import com.linkedin.metadata.key.VersionSetKey; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.query.filter.SortOrder; +import com.linkedin.metadata.search.ScrollResult; +import com.linkedin.metadata.search.SearchEntity; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.search.utils.QueryUtils; +import com.linkedin.metadata.utils.CriterionUtils; +import com.linkedin.metadata.utils.EntityKeyUtils; +import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.versionset.VersionSetProperties; +import com.linkedin.versionset.VersioningScheme; +import io.datahubproject.metadata.context.OperationContext; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class EntityVersioningServiceImpl implements EntityVersioningService { + + private final EntityService entityService; + + public EntityVersioningServiceImpl(EntityService entityService) { + this.entityService = entityService; + } + + /** + * Generates a new set of VersionProperties for the latest version and links it to the specified + * version set. If the specified version set does not yet exist, will create it. Order of + * operations here is important: 1. Create initial Version Set if necessary, do not generate + * Version Set Properties 2. Create Version Properties for specified entity. If this aspect + * already exists will fail. 3. Generate version properties with the properly set latest version + * Will eventually want to add in the scheme here as a parameter + * + * @return ingestResult -> the results of the ingested linked version + */ + @Override + public List linkLatestVersion( + OperationContext opContext, + Urn versionSet, + Urn newLatestVersion, + VersionPropertiesInput inputProperties) { + List proposals = new ArrayList<>(); + AspectRetriever aspectRetriever = opContext.getAspectRetriever(); + String sortId; + Long versionSetConstraint; + Long versionPropertiesConstraint; + VersionSetKey versionSetKey = + (VersionSetKey) + EntityKeyUtils.convertUrnToEntityKey( + versionSet, opContext.getEntityRegistryContext().getKeyAspectSpec(versionSet)); + if (!versionSetKey.getEntityType().equals(newLatestVersion.getEntityType())) { + throw new IllegalArgumentException( + "Entity type must match Version Set's specified type: " + + versionSetKey.getEntityType() + + " invalid type: " + + newLatestVersion.getEntityType()); + } + if (!aspectRetriever.entityExists(ImmutableSet.of(versionSet)).get(versionSet)) { + MetadataChangeProposal versionSetKeyProposal = new MetadataChangeProposal(); + versionSetKeyProposal.setEntityUrn(versionSet); + versionSetKeyProposal.setEntityType(VERSION_SET_ENTITY_NAME); + versionSetKeyProposal.setAspectName(VERSION_SET_KEY_ASPECT_NAME); + versionSetKeyProposal.setAspect(GenericRecordUtils.serializeAspect(versionSetKey)); + versionSetKeyProposal.setChangeType(ChangeType.CREATE_ENTITY); + entityService.ingestProposal( + opContext, versionSetKeyProposal, opContext.getAuditStamp(), false); + + sortId = INITIAL_VERSION_SORT_ID; + versionSetConstraint = -1L; + versionPropertiesConstraint = -1L; + } else { + SystemAspect versionSetPropertiesAspect = + aspectRetriever.getLatestSystemAspect(versionSet, VERSION_SET_PROPERTIES_ASPECT_NAME); + VersionSetProperties versionSetProperties = + RecordUtils.toRecordTemplate( + VersionSetProperties.class, versionSetPropertiesAspect.getRecordTemplate().data()); + versionSetConstraint = + versionSetPropertiesAspect + .getSystemMetadataVersion() + .orElse(versionSetPropertiesAspect.getVersion()); + SystemAspect latestVersion = + aspectRetriever.getLatestSystemAspect( + versionSetProperties.getLatest(), VERSION_PROPERTIES_ASPECT_NAME); + VersionProperties latestVersionProperties = + RecordUtils.toRecordTemplate( + VersionProperties.class, latestVersion.getRecordTemplate().data()); + versionPropertiesConstraint = + latestVersion.getSystemMetadataVersion().orElse(latestVersion.getVersion()); + // When more impls for versioning scheme are set up, this will need to be resolved to the + // correct scheme generation strategy + sortId = AlphanumericSortIdGenerator.increment(latestVersionProperties.getSortId()); + } + + SystemAspect currentVersionPropertiesAspect = + aspectRetriever.getLatestSystemAspect(newLatestVersion, VERSION_PROPERTIES_ASPECT_NAME); + if (currentVersionPropertiesAspect != null) { + VersionProperties currentVersionProperties = + RecordUtils.toRecordTemplate( + VersionProperties.class, currentVersionPropertiesAspect.getRecordTemplate().data()); + if (currentVersionProperties.getVersionSet().equals(versionSet)) { + return new ArrayList<>(); + } else { + throw new IllegalStateException( + String.format( + "Version already exists for specified entity: %s for a different Version Set: %s", + newLatestVersion, currentVersionProperties.getVersionSet())); + } + } + + VersionTag versionTag = new VersionTag(); + versionTag.setVersionTag(inputProperties.getVersion()); + MetadataAttribution metadataAttribution = new MetadataAttribution(); + metadataAttribution.setActor(opContext.getActorContext().getActorUrn()); + metadataAttribution.setTime(System.currentTimeMillis()); + versionTag.setMetadataAttribution(metadataAttribution); + VersionProperties versionProperties = + new VersionProperties() + .setVersionSet(versionSet) + .setComment(inputProperties.getComment(), SetMode.IGNORE_NULL) + .setVersion(versionTag) + .setMetadataCreatedTimestamp(opContext.getAuditStamp()) + .setSortId(sortId); + if (inputProperties.getSourceCreationTimestamp() != null) { + + AuditStamp sourceCreatedAuditStamp = + new AuditStamp().setTime(inputProperties.getSourceCreationTimestamp()); + Urn actor = null; + if (inputProperties.getSourceCreator() != null) { + actor = new CorpuserUrn(inputProperties.getSourceCreator()); + } + sourceCreatedAuditStamp.setActor(UrnUtils.getActorOrDefault(actor)); + + versionProperties.setSourceCreatedTimestamp(sourceCreatedAuditStamp); + } + MetadataChangeProposal versionPropertiesProposal = new MetadataChangeProposal(); + versionPropertiesProposal.setEntityUrn(newLatestVersion); + versionPropertiesProposal.setEntityType(newLatestVersion.getEntityType()); + versionPropertiesProposal.setAspectName(VERSION_PROPERTIES_ASPECT_NAME); + versionPropertiesProposal.setAspect(GenericRecordUtils.serializeAspect(versionProperties)); + versionPropertiesProposal.setChangeType(ChangeType.UPSERT); + StringMap headerMap = new StringMap(); + headerMap.put(HTTP_HEADER_IF_VERSION_MATCH, versionPropertiesConstraint.toString()); + versionPropertiesProposal.setChangeType(ChangeType.UPSERT); + proposals.add(versionPropertiesProposal); + + // Might want to refactor this to a Patch w/ Create if not exists logic if more properties get + // added + // to Version Set Properties + VersionSetProperties versionSetProperties = + new VersionSetProperties() + .setVersioningScheme( + VersioningScheme + .ALPHANUMERIC_GENERATED_BY_DATAHUB) // Only one available, will need to add to + // input properties once more are added. + .setLatest(newLatestVersion); + MetadataChangeProposal versionSetPropertiesProposal = new MetadataChangeProposal(); + versionSetPropertiesProposal.setEntityUrn(versionSet); + versionSetPropertiesProposal.setEntityType(VERSION_SET_ENTITY_NAME); + versionSetPropertiesProposal.setAspectName(VERSION_SET_PROPERTIES_ASPECT_NAME); + versionSetPropertiesProposal.setAspect( + GenericRecordUtils.serializeAspect(versionSetProperties)); + versionSetPropertiesProposal.setChangeType(ChangeType.UPSERT); + StringMap versionSetHeaderMap = new StringMap(); + versionSetHeaderMap.put(HTTP_HEADER_IF_VERSION_MATCH, versionSetConstraint.toString()); + versionSetPropertiesProposal.setHeaders(versionSetHeaderMap); + proposals.add(versionSetPropertiesProposal); + + return entityService.ingestProposal( + opContext, + AspectsBatchImpl.builder() + .mcps(proposals, opContext.getAuditStamp(), opContext.getRetrieverContext()) + .build(), + false); + } + + /** + * Unlinks a version from a version set. Will attempt to set up the previous version as the new + * latest. This fully removes the version properties and unversions the specified entity. + * + * @param opContext operational context containing various information about the current execution + * @param linkedVersion the currently linked latest versioned entity urn + * @return the deletion result + */ + @Override + public List unlinkVersion( + OperationContext opContext, Urn versionSet, Urn linkedVersion) { + List deletedAspects = new ArrayList<>(); + AspectRetriever aspectRetriever = opContext.getAspectRetriever(); + SystemAspect linkedVersionPropertiesAspect = + aspectRetriever.getLatestSystemAspect(linkedVersion, VERSION_PROPERTIES_ASPECT_NAME); + // Not currently versioned, do nothing + if (linkedVersionPropertiesAspect == null) { + return deletedAspects; + } + VersionProperties linkedVersionProperties = + RecordUtils.toRecordTemplate( + VersionProperties.class, linkedVersionPropertiesAspect.getRecordTemplate().data()); + Urn versionSetUrn = linkedVersionProperties.getVersionSet(); + if (!versionSet.equals(versionSetUrn)) { + throw new IllegalArgumentException( + String.format( + "Version is not linked to specified version set: %s but is linked to: %s", + versionSet, versionSetUrn)); + } + // Delete latest version properties + entityService + .deleteAspect( + opContext, + linkedVersion.toString(), + VERSION_PROPERTIES_ASPECT_NAME, + Collections.emptyMap(), + true) + .ifPresent(deletedAspects::add); + + // Get Version Set details + VersionSetKey versionSetKey = + (VersionSetKey) + EntityKeyUtils.convertUrnToEntityKey( + versionSetUrn, + opContext.getEntityRegistryContext().getKeyAspectSpec(versionSetUrn)); + SearchRetriever searchRetriever = opContext.getRetrieverContext().getSearchRetriever(); + + // Find current latest version and previous + ScrollResult linkedVersions = + searchRetriever.scroll( + ImmutableList.of(versionSetKey.getEntityType()), + QueryUtils.newConjunctiveFilter( + CriterionUtils.buildCriterion( + "versionSet", Condition.EQUAL, versionSetUrn.toString())), + null, + 2, + ImmutableList.of( + new SortCriterion() + .setField(VERSION_SORT_ID_FIELD_NAME) + .setOrder(SortOrder.DESCENDING)), + SearchRetriever.RETRIEVER_SEARCH_FLAGS_NO_CACHE_ALL_VERSIONS); + String updatedLatestVersionUrn = null; + + SearchEntityArray linkedEntities = linkedVersions.getEntities(); + SystemAspect versionSetPropertiesAspect = + aspectRetriever.getLatestSystemAspect(versionSetUrn, VERSION_SET_PROPERTIES_ASPECT_NAME); + if (versionSetPropertiesAspect == null) { + throw new IllegalStateException( + String.format( + "Version Set Properties must exist if entity version exists: %s", versionSetUrn)); + } + VersionSetProperties versionSetProperties = + RecordUtils.toRecordTemplate( + VersionSetProperties.class, versionSetPropertiesAspect.getRecordTemplate().data()); + long versionConstraint = + versionSetPropertiesAspect + .getSystemMetadataVersion() + .orElse(versionSetPropertiesAspect.getVersion()); + boolean isLatest = linkedVersion.equals(versionSetProperties.getLatest()); + + if (linkedEntities.size() == 2 && isLatest) { + // If the version to unlink is the same as the last search result and is currently the latest + // based on SQL, set to one immediately before. + // Otherwise set to most current one in search results assuming we have not gotten the index + // update for a recent update to latest. + // Does assume that there are not multiple index updates waiting in the queue so rapid fire + // updates intermixed with deletes should be avoided. + SearchEntity maybeLatestVersion = linkedEntities.get(0); + if (maybeLatestVersion.getEntity().equals(linkedVersion)) { + SearchEntity priorLatestVersion = linkedEntities.get(1); + updatedLatestVersionUrn = priorLatestVersion.getEntity().toString(); + } else { + updatedLatestVersionUrn = maybeLatestVersion.getEntity().toString(); + } + + } else if (linkedEntities.size() == 1 && isLatest) { + // Missing a version, if that version is not the one being unlinked then set as latest + // version. Same reasoning as above + SearchEntity maybePriorLatestVersion = linkedEntities.get(0); + if (!linkedVersion.equals(maybePriorLatestVersion.getEntity())) { + updatedLatestVersionUrn = maybePriorLatestVersion.getEntity().toString(); + } else { + // Delete Version Set if we are removing the last version + // TODO: Conditional deletes impl + only do the delete if version match + RollbackRunResult deleteResult = entityService.deleteUrn(opContext, versionSetUrn); + deletedAspects.addAll(deleteResult.getRollbackResults()); + } + } + + if (updatedLatestVersionUrn != null) { + + // Might want to refactor this to a Patch w/ Create if not exists logic if more properties + // get added + // to Version Set Properties + VersionSetProperties newVersionSetProperties = + new VersionSetProperties() + .setVersioningScheme( + VersioningScheme + .ALPHANUMERIC_GENERATED_BY_DATAHUB) // Only one available, will need to add + // to input properties once more are + // added. + .setLatest(UrnUtils.getUrn(updatedLatestVersionUrn)); + MetadataChangeProposal versionSetPropertiesProposal = new MetadataChangeProposal(); + versionSetPropertiesProposal.setEntityUrn(versionSetUrn); + versionSetPropertiesProposal.setEntityType(VERSION_SET_ENTITY_NAME); + versionSetPropertiesProposal.setAspectName(VERSION_SET_PROPERTIES_ASPECT_NAME); + versionSetPropertiesProposal.setAspect( + GenericRecordUtils.serializeAspect(newVersionSetProperties)); + versionSetPropertiesProposal.setChangeType(ChangeType.UPSERT); + StringMap headerMap = new StringMap(); + headerMap.put(HTTP_HEADER_IF_VERSION_MATCH, Long.toString(versionConstraint)); + versionSetPropertiesProposal.setHeaders(headerMap); + entityService.ingestProposal( + opContext, + AspectsBatchImpl.builder() + .mcps( + ImmutableList.of(versionSetPropertiesProposal), + opContext.getAuditStamp(), + opContext.getRetrieverContext()) + .build(), + false); + } + + return deletedAspects; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffect.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffect.java new file mode 100644 index 00000000000000..7e9692841c79ae --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffect.java @@ -0,0 +1,137 @@ +package com.linkedin.metadata.entity.versioning.sideeffects; + +import static com.linkedin.metadata.Constants.VERSION_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; + +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.batch.MCLItem; +import com.linkedin.metadata.aspect.batch.MCPItem; +import com.linkedin.metadata.aspect.patch.GenericJsonPatch; +import com.linkedin.metadata.aspect.patch.PatchOperationType; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffect; +import com.linkedin.metadata.entity.ebean.batch.PatchItemImpl; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.versionset.VersionSetProperties; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; + +/** + * Side effect that updates the isLatest property for the referenced versioned entity's Version + * Properties aspect. + */ +@Slf4j +@Getter +@Setter +@Accessors(chain = true) +public class VersionSetSideEffect extends MCPSideEffect { + @Nonnull private AspectPluginConfig config; + + @Override + protected Stream applyMCPSideEffect( + Collection changeMCPS, @Nonnull RetrieverContext retrieverContext) { + return Stream.of(); + } + + @Override + protected Stream postMCPSideEffect( + Collection mclItems, @Nonnull RetrieverContext retrieverContext) { + return mclItems.stream().flatMap(item -> updateLatest(item, retrieverContext)); + } + + private static Stream updateLatest( + MCLItem mclItem, @Nonnull RetrieverContext retrieverContext) { + + if (VERSION_SET_PROPERTIES_ASPECT_NAME.equals(mclItem.getAspectName())) { + List mcpItems = new ArrayList<>(); + VersionSetProperties versionSetProperties = mclItem.getAspect(VersionSetProperties.class); + if (versionSetProperties == null) { + log.error("Unable to process version set properties for urn: {}", mclItem.getUrn()); + return Stream.empty(); + } + // Set old latest isLatest to false, set new latest isLatest to true + // This side effect assumes the entity is already versioned, if it is not yet versioned it + // will fail due + // to not having set default values for the aspect. This creates an implicit ordering of when + // aspects should be + // updated. Version Properties first, then Version Set Properties. + Urn newLatest = versionSetProperties.getLatest(); + + VersionSetProperties previousVersionSetProperties = + mclItem.getPreviousAspect(VersionSetProperties.class); + if (previousVersionSetProperties != null) { + Urn previousLatest = previousVersionSetProperties.getLatest(); + if (!newLatest.equals(previousLatest) + && retrieverContext + .getAspectRetriever() + .entityExists(Collections.singleton(previousLatest)) + .getOrDefault(previousLatest, false)) { + EntitySpec entitySpec = + retrieverContext + .getAspectRetriever() + .getEntityRegistry() + .getEntitySpec(previousLatest.getEntityType()); + GenericJsonPatch.PatchOp previousPatch = new GenericJsonPatch.PatchOp(); + previousPatch.setOp(PatchOperationType.ADD.getValue()); + previousPatch.setPath("/isLatest"); + previousPatch.setValue(false); + mcpItems.add( + PatchItemImpl.builder() + .urn(previousLatest) + .entitySpec(entitySpec) + .aspectName(VERSION_PROPERTIES_ASPECT_NAME) + .aspectSpec(entitySpec.getAspectSpec(VERSION_PROPERTIES_ASPECT_NAME)) + .patch( + GenericJsonPatch.builder() + .patch(List.of(previousPatch)) + .build() + .getJsonPatch()) + .auditStamp(mclItem.getAuditStamp()) + .systemMetadata(mclItem.getSystemMetadata()) + .build(retrieverContext.getAspectRetriever().getEntityRegistry())); + } + } + + // Explicitly error here to avoid downstream patch error with less context + if (retrieverContext + .getAspectRetriever() + .getLatestAspectObject(newLatest, VERSION_PROPERTIES_ASPECT_NAME) + == null) { + throw new UnsupportedOperationException( + "Cannot set latest version to unversioned entity: " + newLatest); + } + + EntitySpec entitySpec = + retrieverContext + .getAspectRetriever() + .getEntityRegistry() + .getEntitySpec(newLatest.getEntityType()); + GenericJsonPatch.PatchOp currentPatch = new GenericJsonPatch.PatchOp(); + currentPatch.setOp(PatchOperationType.ADD.getValue()); + currentPatch.setPath("/isLatest"); + currentPatch.setValue(true); + mcpItems.add( + PatchItemImpl.builder() + .urn(newLatest) + .entitySpec(entitySpec) + .aspectName(VERSION_PROPERTIES_ASPECT_NAME) + .aspectSpec(entitySpec.getAspectSpec(VERSION_PROPERTIES_ASPECT_NAME)) + .patch(GenericJsonPatch.builder().patch(List.of(currentPatch)).build().getJsonPatch()) + .auditStamp(mclItem.getAuditStamp()) + .systemMetadata(mclItem.getSystemMetadata()) + .build(retrieverContext.getAspectRetriever().getEntityRegistry())); + return mcpItems.stream(); + } + return Stream.empty(); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidator.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidator.java new file mode 100644 index 00000000000000..4d29cc254c1ba6 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidator.java @@ -0,0 +1,158 @@ +package com.linkedin.metadata.entity.versioning.validation; + +import static com.linkedin.metadata.Constants.VERSION_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_KEY_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; + +import com.datahub.util.RecordUtils; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.VersionProperties; +import com.linkedin.common.urn.Urn; +import com.linkedin.entity.Aspect; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; +import com.linkedin.metadata.entity.ebean.batch.PatchItemImpl; +import com.linkedin.metadata.key.VersionSetKey; +import com.linkedin.versionset.VersionSetProperties; +import com.linkedin.versionset.VersioningScheme; +import java.util.Collection; +import java.util.Collections; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang.StringUtils; + +@Setter +@Getter +@Slf4j +@Accessors(chain = true) +public class VersionPropertiesValidator extends AspectPayloadValidator { + + @Nonnull private AspectPluginConfig config; + + private static final Set SHOULD_VALIDATE_PROPOSED = + ImmutableSet.of(ChangeType.UPDATE, ChangeType.UPSERT, ChangeType.CREATE); + + @Override + protected Stream validateProposedAspects( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + return validatePropertiesProposals( + mcpItems.stream() + .filter(mcpItem -> VERSION_PROPERTIES_ASPECT_NAME.equals(mcpItem.getAspectName())) + .filter(mcpItem -> SHOULD_VALIDATE_PROPOSED.contains(mcpItem.getChangeType())) + .collect(Collectors.toList())); + } + + @Override + protected Stream validatePreCommitAspects( + @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) { + return validatePropertiesUpserts( + changeMCPs.stream() + .filter(changeMCP -> VERSION_PROPERTIES_ASPECT_NAME.equals(changeMCP.getAspectName())) + .collect(Collectors.toList()), + retrieverContext); + } + + @VisibleForTesting + public static Stream validatePropertiesUpserts( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); + for (BatchItem mcpItem : mcpItems) { + VersionProperties versionProperties = mcpItem.getAspect(VersionProperties.class); + // Validate Version Set entity type + Urn versionSetUrn = versionProperties.getVersionSet(); + Map aspects = + retrieverContext + .getAspectRetriever() + .getLatestAspectObjects( + Collections.singleton(versionSetUrn), + ImmutableSet.of(VERSION_SET_KEY_ASPECT_NAME, VERSION_SET_PROPERTIES_ASPECT_NAME)) + .get(versionSetUrn); + if (aspects == null || aspects.isEmpty()) { + exceptions.addException(mcpItem, "Version Set specified does not exist: " + versionSetUrn); + continue; + } + Optional keyAspect = Optional.ofNullable(aspects.get(VERSION_SET_KEY_ASPECT_NAME)); + if (keyAspect.isPresent()) { + VersionSetKey versionSetKey = + RecordUtils.toRecordTemplate(VersionSetKey.class, keyAspect.get().data()); + if (!mcpItem.getEntitySpec().getName().equals(versionSetKey.getEntityType())) { + exceptions.addException( + mcpItem, + "Version Set specified entity type does not match, expected type: " + + versionSetKey.getEntityType()); + } + + // Validate sort ID scheme + String sortId = versionProperties.getSortId(); + Optional versionSetPropertiesAspect = + Optional.ofNullable(aspects.get(VERSION_SET_PROPERTIES_ASPECT_NAME)); + // Validate sort id matches scheme if version set properties exist + if (versionSetPropertiesAspect.isPresent()) { + VersionSetProperties versionSetProperties = + RecordUtils.toRecordTemplate( + VersionSetProperties.class, versionSetPropertiesAspect.get().data()); + VersioningScheme versioningScheme = versionSetProperties.getVersioningScheme(); + switch (versioningScheme) { + case ALPHANUMERIC_GENERATED_BY_DATAHUB: + validateDataHubGeneratedScheme(sortId, exceptions, mcpItem); + break; + default: + exceptions.addException(mcpItem, "Unsupported scheme type: " + versioningScheme); + } + } + } else { + exceptions.addException(mcpItem, "Version Set specified does not exist: " + versionSetUrn); + } + } + return exceptions.streamAllExceptions(); + } + + private static void validateDataHubGeneratedScheme( + String sortId, ValidationExceptionCollection exceptions, BatchItem mcpItem) { + if (!(sortId.length() == 8 + && StringUtils.isAllUpperCase(sortId) + && StringUtils.isAlpha(sortId))) { + exceptions.addException( + mcpItem, + "Invalid sortID for Versioning Scheme. ID: " + + sortId + + " Scheme: " + + VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + } + } + + @VisibleForTesting + public static Stream validatePropertiesProposals( + @Nonnull Collection mcpItems) { + ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); + for (BatchItem mcpItem : mcpItems) { + if (mcpItem instanceof PatchItemImpl) { + throw new IllegalStateException("Patch item must have change type of PATCH."); + } + VersionProperties versionProperties = mcpItem.getAspect(VersionProperties.class); + // Validate isLatest not set + if (versionProperties.hasIsLatest()) { + exceptions.addException( + mcpItem, "IsLatest should not be specified, this is a computed field."); + } + } + return exceptions.streamAllExceptions(); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidator.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidator.java new file mode 100644 index 00000000000000..8a7795f29ccfe0 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidator.java @@ -0,0 +1,80 @@ +package com.linkedin.metadata.entity.versioning.validation; + +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; + +import com.datahub.util.RecordUtils; +import com.google.common.annotations.VisibleForTesting; +import com.linkedin.entity.Aspect; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; +import com.linkedin.versionset.VersionSetProperties; +import java.util.Collection; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; + +@Setter +@Getter +@Slf4j +@Accessors(chain = true) +public class VersionSetPropertiesValidator extends AspectPayloadValidator { + + @Nonnull private AspectPluginConfig config; + + @Override + protected Stream validateProposedAspects( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + return validatePropertiesUpserts( + mcpItems.stream() + .filter(i -> VERSION_SET_PROPERTIES_ASPECT_NAME.equals(i.getAspectName())) + .collect(Collectors.toList()), + retrieverContext); + } + + @Override + protected Stream validatePreCommitAspects( + @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) { + return Stream.empty(); + } + + @VisibleForTesting + public static Stream validatePropertiesUpserts( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); + for (BatchItem mcpItem : mcpItems) { + VersionSetProperties versionSetProperties = mcpItem.getAspect(VersionSetProperties.class); + Optional aspect = + Optional.ofNullable( + retrieverContext + .getAspectRetriever() + .getLatestAspectObject(mcpItem.getUrn(), VERSION_SET_PROPERTIES_ASPECT_NAME)); + if (aspect.isPresent()) { + VersionSetProperties previousVersionSetProperties = + RecordUtils.toRecordTemplate(VersionSetProperties.class, aspect.get().data()); + if (!previousVersionSetProperties + .getVersioningScheme() + .equals(versionSetProperties.getVersioningScheme())) { + exceptions.addException( + mcpItem, + "Versioning Scheme cannot change. Expected Scheme: " + + previousVersionSetProperties.getVersioningScheme() + + " Provided Scheme: " + + versionSetProperties.getVersioningScheme()); + } + } + } + return exceptions.streamAllExceptions(); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java index 8d7548e0ba90a1..dae119beec4a7e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java @@ -6,7 +6,9 @@ import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; import io.datahubproject.metadata.context.OperationContext; +import java.util.ArrayList; import java.util.List; +import java.util.Optional; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.Builder; @@ -16,15 +18,6 @@ @Getter @Builder public class SearchServiceSearchRetriever implements SearchRetriever { - private static final SearchFlags RETRIEVER_SEARCH_FLAGS = - new SearchFlags() - .setFulltext(false) - .setMaxAggValues(20) - .setSkipCache(false) - .setSkipAggregates(true) - .setSkipHighlighting(true) - .setIncludeSoftDeleted(false) - .setIncludeRestricted(false); @Setter private OperationContext systemOperationContext; private final SearchService searchService; @@ -34,16 +27,24 @@ public ScrollResult scroll( @Nonnull List entities, @Nullable Filter filters, @Nullable String scrollId, - int count) { - SortCriterion urnSort = new SortCriterion(); - urnSort.setField("urn"); - urnSort.setOrder(SortOrder.ASCENDING); + int count, + List sortCriteria, + @Nullable SearchFlags searchFlags) { + List finalCriteria = new ArrayList<>(sortCriteria); + if (sortCriteria.stream().noneMatch(sortCriterion -> "urn".equals(sortCriterion.getField()))) { + SortCriterion urnSort = new SortCriterion(); + urnSort.setField("urn"); + urnSort.setOrder(SortOrder.ASCENDING); + finalCriteria.add(urnSort); + } + final SearchFlags finalSearchFlags = + Optional.ofNullable(searchFlags).orElse(RETRIEVER_SEARCH_FLAGS); return searchService.scrollAcrossEntities( - systemOperationContext.withSearchFlags(flags -> RETRIEVER_SEARCH_FLAGS), + systemOperationContext.withSearchFlags(flags -> finalSearchFlags), entities, "*", filters, - List.of(urnSort), + finalCriteria, scrollId, null, count); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java index b4ad847cb7afc2..7a60b89d0127cc 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java @@ -147,7 +147,7 @@ private static Set objectFieldsFilter(Iterator fieldNames) { public Optional transformAspect( @Nonnull OperationContext opContext, final @Nonnull Urn urn, - final @Nonnull RecordTemplate aspect, + final @Nullable RecordTemplate aspect, final @Nonnull AspectSpec aspectSpec, final Boolean forDelete) throws RemoteInvocationException, URISyntaxException { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index 17bbbaf059dec4..95fff81d13957c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -20,10 +20,12 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterContext; +import com.linkedin.metadata.utils.CriterionUtils; import io.datahubproject.metadata.context.OperationContext; import java.util.Collections; import java.util.HashMap; @@ -188,6 +190,13 @@ public static BoolQueryBuilder buildFilterQuery( }); finalQueryBuilder.should(andQueryBuilder); } + if (Boolean.TRUE.equals( + opContext.getSearchContext().getSearchFlags().isFilterNonLatestVersions())) { + BoolQueryBuilder filterNonLatestVersions = + ESUtils.buildFilterNonLatestEntities( + opContext, queryFilterRewriteChain, searchableFieldTypes); + finalQueryBuilder.must(filterNonLatestVersions); + } if (!finalQueryBuilder.should().isEmpty()) { finalQueryBuilder.minimumShouldMatch(1); } @@ -869,4 +878,31 @@ private static void filterSoftDeletedByDefault( } } } + + public static BoolQueryBuilder buildFilterNonLatestEntities( + OperationContext opContext, + QueryFilterRewriteChain queryFilterRewriteChain, + Map> searchableFieldTypes) { + ConjunctiveCriterion isLatestCriterion = new ConjunctiveCriterion(); + CriterionArray isLatestCriterionArray = new CriterionArray(); + isLatestCriterionArray.add( + CriterionUtils.buildCriterion(IS_LATEST_FIELD_NAME, Condition.EQUAL, "true")); + isLatestCriterion.setAnd(isLatestCriterionArray); + BoolQueryBuilder isLatest = + ESUtils.buildConjunctiveFilterQuery( + isLatestCriterion, false, searchableFieldTypes, opContext, queryFilterRewriteChain); + ConjunctiveCriterion isNotVersionedCriterion = new ConjunctiveCriterion(); + CriterionArray isNotVersionedCriterionArray = new CriterionArray(); + isNotVersionedCriterionArray.add( + CriterionUtils.buildCriterion(IS_LATEST_FIELD_NAME, Condition.EXISTS, true)); + isNotVersionedCriterion.setAnd(isNotVersionedCriterionArray); + BoolQueryBuilder isNotVersioned = + ESUtils.buildConjunctiveFilterQuery( + isNotVersionedCriterion, + false, + searchableFieldTypes, + opContext, + queryFilterRewriteChain); + return QueryBuilders.boolQuery().should(isLatest).should(isNotVersioned).minimumShouldMatch(1); + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java index c5fc9ebdac9fa6..635d4472305c93 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java @@ -42,6 +42,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.Getter; import lombok.extern.slf4j.Slf4j; @@ -399,7 +400,7 @@ private void deleteSearchData( Urn urn, String entityName, AspectSpec aspectSpec, - RecordTemplate aspect, + @Nullable RecordTemplate aspect, Boolean isKeyAspect) { String docId; try { diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGeneratorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGeneratorTest.java new file mode 100644 index 00000000000000..8021507231d3b9 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/AlphanumericSortIdGeneratorTest.java @@ -0,0 +1,62 @@ +package com.linkedin.metadata.entity.versioning; + +import static com.linkedin.metadata.Constants.INITIAL_VERSION_SORT_ID; +import static org.testng.Assert.*; + +import org.testng.annotations.Test; + +public class AlphanumericSortIdGeneratorTest { + + @Test + public void testBasicIncrement() { + assertEquals(AlphanumericSortIdGenerator.increment(INITIAL_VERSION_SORT_ID), "AAAAAAAB"); + assertEquals(AlphanumericSortIdGenerator.increment("AAAAAAAB"), "AAAAAAAC"); + } + + @Test + public void testCarryOver() { + assertEquals(AlphanumericSortIdGenerator.increment("AAAAAAAZ"), "AAAAAABA"); + assertEquals(AlphanumericSortIdGenerator.increment("AAAAAZZZ"), "AAAABAAA"); + } + + @Test + public void testWrapAround() { + assertEquals(AlphanumericSortIdGenerator.increment("ZZZZZZZZ"), INITIAL_VERSION_SORT_ID); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testInvalidLength() { + AlphanumericSortIdGenerator.increment("AAA"); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testInvalidCharacters() { + AlphanumericSortIdGenerator.increment("AAAA$AAA"); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testNullInput() { + AlphanumericSortIdGenerator.increment(null); + } + + @Test + public void testSequence() { + String id = "AAAAAAAA"; + id = AlphanumericSortIdGenerator.increment(id); + assertEquals(id, "AAAAAAAB"); + id = AlphanumericSortIdGenerator.increment(id); + assertEquals(id, "AAAAAAAC"); + id = AlphanumericSortIdGenerator.increment(id); + assertEquals(id, "AAAAAAAD"); + } + + @Test + public void testLowerBoundary() { + assertEquals(AlphanumericSortIdGenerator.increment(INITIAL_VERSION_SORT_ID), "AAAAAAAB"); + } + + @Test + public void testUpperBoundary() { + assertEquals(AlphanumericSortIdGenerator.increment("ZZZZZZZZ"), "AAAAAAAA"); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceTest.java new file mode 100644 index 00000000000000..8c4d81af129428 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceTest.java @@ -0,0 +1,603 @@ +package com.linkedin.metadata.entity.versioning; + +import static com.linkedin.metadata.Constants.INITIAL_VERSION_SORT_ID; +import static com.linkedin.metadata.Constants.VERSION_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + +import com.linkedin.common.FabricType; +import com.linkedin.common.VersionProperties; +import com.linkedin.common.VersionTag; +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.CachingAspectRetriever; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.SystemAspect; +import com.linkedin.metadata.aspect.batch.AspectsBatch; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.EntityServiceAspectRetriever; +import com.linkedin.metadata.entity.RollbackResult; +import com.linkedin.metadata.entity.RollbackRunResult; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.metadata.entity.TestEntityRegistry; +import com.linkedin.metadata.models.registry.ConfigEntityRegistry; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.models.registry.EntityRegistryException; +import com.linkedin.metadata.models.registry.MergedEntityRegistry; +import com.linkedin.metadata.search.ScrollResult; +import com.linkedin.metadata.search.SearchEntity; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.search.SearchResultMetadata; +import com.linkedin.metadata.snapshot.Snapshot; +import com.linkedin.versionset.VersionSetProperties; +import com.linkedin.versionset.VersioningScheme; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.RetrieverContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; +import org.mockito.ArgumentCaptor; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class EntityVersioningServiceTest { + + private EntityVersioningServiceImpl versioningService; + private EntityService mockEntityService; + private OperationContext mockOpContext; + private AspectRetriever mockAspectRetriever; + private CachingAspectRetriever mockCachingAspectRetriever; + private SearchRetriever mockSearchRetriever; + private static Urn TEST_VERSION_SET_URN = UrnUtils.getUrn("urn:li:versionSet:(123456,dataset)"); + private static Urn TEST_DATASET_URN = + new DatasetUrn(new DataPlatformUrn("kafka"), "myDataset", FabricType.PROD); + private static Urn TEST_DATASET_URN_2 = + new DatasetUrn(new DataPlatformUrn("hive"), "myHiveDataset", FabricType.PROD); + private static Urn TEST_DATASET_URN_3 = + new DatasetUrn(new DataPlatformUrn("hive"), "myHiveDataset2", FabricType.PROD); + + @BeforeMethod + public void setup() throws EntityRegistryException { + mockEntityService = mock(EntityService.class); + final EntityRegistry snapshotEntityRegistry = new TestEntityRegistry(); + final EntityRegistry configEntityRegistry = + new ConfigEntityRegistry( + Snapshot.class.getClassLoader().getResourceAsStream("entity-registry.yml")); + final EntityRegistry testEntityRegistry = + new MergedEntityRegistry(snapshotEntityRegistry).apply(configEntityRegistry); + mockAspectRetriever = mock(EntityServiceAspectRetriever.class); + mockCachingAspectRetriever = mock(CachingAspectRetriever.class); + mockSearchRetriever = mock(SearchRetriever.class); + when(mockAspectRetriever.getEntityRegistry()).thenReturn(testEntityRegistry); + mockOpContext = + TestOperationContexts.systemContext( + null, + null, + null, + () -> testEntityRegistry, + () -> + RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .graphRetriever(GraphRetriever.EMPTY) + .searchRetriever(mockSearchRetriever) + .cachingAspectRetriever(mockCachingAspectRetriever) + .build(), + null, + opContext -> + ((EntityServiceAspectRetriever) opContext.getAspectRetriever()) + .setSystemOperationContext(opContext), + null); + versioningService = new EntityVersioningServiceImpl(mockEntityService); + } + + @Test + public void testLinkLatestVersionNewVersionSet() throws Exception { + + VersionPropertiesInput input = + new VersionPropertiesInput("Test comment", "Test label", 123456789L, "testCreator"); + // Mock version set doesn't exist + when(mockAspectRetriever.entityExists(anySet())) + .thenReturn(Map.of(TEST_VERSION_SET_URN, false)); + + // Capture the proposals + ArgumentCaptor aspectsCaptor = ArgumentCaptor.forClass(AspectsBatch.class); + when(mockEntityService.ingestProposal(eq(mockOpContext), aspectsCaptor.capture(), eq(false))) + .thenReturn(List.of()); + + // Execute + versioningService.linkLatestVersion( + mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN, input); + + // Verify + List capturedAspects = aspectsCaptor.getAllValues(); + List versionPropertiesAspect = + capturedAspects.get(0).getMCPItems().stream() + .filter(mcpItem -> VERSION_PROPERTIES_ASPECT_NAME.equals(mcpItem.getAspectName())) + .map(mcpItem -> mcpItem.getAspect(VersionProperties.class)) + .collect(Collectors.toList()); + + // Verify VersionProperties has initial sort ID + VersionProperties versionProps = + (VersionProperties) + versionPropertiesAspect.stream() + .filter(a -> a instanceof VersionProperties) + .findFirst() + .orElseThrow(() -> new AssertionError("VersionProperties not found")); + + assertEquals(versionProps.getSortId(), INITIAL_VERSION_SORT_ID); + assertEquals(versionProps.getComment(), "Test comment"); + assertEquals(versionProps.getVersionSet(), TEST_VERSION_SET_URN); + + List versionSetPropertiesAspect = + capturedAspects.get(0).getMCPItems().stream() + .filter(mcpItem -> VERSION_SET_PROPERTIES_ASPECT_NAME.equals(mcpItem.getAspectName())) + .map(mcpItem -> mcpItem.getAspect(VersionSetProperties.class)) + .collect(Collectors.toList()); + VersionSetProperties versionSetProperties = + (VersionSetProperties) + versionSetPropertiesAspect.stream() + .filter(aspect -> aspect instanceof VersionSetProperties) + .findFirst() + .orElseThrow(() -> new AssertionError("Version Set Properties not found")); + assertEquals(versionSetProperties.getLatest(), TEST_DATASET_URN); + assertEquals( + versionSetProperties.getVersioningScheme(), + VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + } + + @Test + public void testLinkLatestVersionExistingVersionSet() throws Exception { + + VersionPropertiesInput input = + new VersionPropertiesInput("Test comment", "Label2", 123456789L, "testCreator"); + + // Mock version set exists + when(mockAspectRetriever.entityExists(anySet())).thenReturn(Map.of(TEST_VERSION_SET_URN, true)); + + // Mock existing version set properties + VersionSetProperties existingVersionSetProps = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB) + .setLatest(TEST_DATASET_URN); + SystemAspect mockVersionSetPropertiesAspect = mock(SystemAspect.class); + when(mockVersionSetPropertiesAspect.getRecordTemplate()).thenReturn(existingVersionSetProps); + when(mockVersionSetPropertiesAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect(eq(TEST_VERSION_SET_URN), anyString())) + .thenReturn(mockVersionSetPropertiesAspect); + + // Mock existing version properties with a sort ID + VersionProperties existingVersionProps = + new VersionProperties() + .setSortId("AAAAAAAA") + .setVersion(new VersionTag().setVersionTag("Label1")) + .setVersionSet(TEST_VERSION_SET_URN); + SystemAspect mockVersionPropertiesAspect = mock(SystemAspect.class); + when(mockVersionPropertiesAspect.getRecordTemplate()).thenReturn(existingVersionProps); + when(mockVersionPropertiesAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect(eq(TEST_DATASET_URN), anyString())) + .thenReturn(mockVersionPropertiesAspect); + + // Capture the proposals + ArgumentCaptor aspectsCaptor = ArgumentCaptor.forClass(AspectsBatch.class); + when(mockEntityService.ingestProposal(eq(mockOpContext), aspectsCaptor.capture(), eq(false))) + .thenReturn(List.of()); + + // Execute + versioningService.linkLatestVersion( + mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN_2, input); + + // Verify + List capturedAspects = aspectsCaptor.getAllValues(); + List aspects = + capturedAspects.get(0).getMCPItems().stream() + .filter(mcpItem -> VERSION_PROPERTIES_ASPECT_NAME.equals(mcpItem.getAspectName())) + .map(mcpItem -> mcpItem.getAspect(VersionProperties.class)) + .collect(Collectors.toList()); + + // Verify VersionProperties has incremented sort ID + VersionProperties versionProps = + (VersionProperties) + aspects.stream() + .filter(a -> a instanceof VersionProperties) + .findFirst() + .orElseThrow(() -> new AssertionError("VersionProperties not found")); + + assertEquals(versionProps.getSortId(), "AAAAAAAB"); + assertEquals(versionProps.getComment(), "Test comment"); + assertEquals(versionProps.getVersionSet(), TEST_VERSION_SET_URN); + } + + @Test + public void testUnlinkInitialVersion() throws Exception { + + // Mock version properties aspect + VersionProperties versionProps = + new VersionProperties() + .setVersionSet(TEST_VERSION_SET_URN) + .setSortId(INITIAL_VERSION_SORT_ID); + SystemAspect mockVersionPropsAspect = mock(SystemAspect.class); + when(mockVersionPropsAspect.getRecordTemplate()).thenReturn(versionProps); + when(mockVersionPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_DATASET_URN), eq(VERSION_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionPropsAspect); + VersionSetProperties versionSetProps = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB) + .setLatest(TEST_DATASET_URN); + SystemAspect mockVersionSetPropsAspect = mock(SystemAspect.class); + when(mockVersionSetPropsAspect.getRecordTemplate()).thenReturn(versionSetProps); + when(mockVersionSetPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_VERSION_SET_URN), eq(VERSION_SET_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionSetPropsAspect); + + // Mock delete aspect responses + RollbackResult versionSetDeleteResult = + new RollbackResult( + TEST_VERSION_SET_URN, + "versionSet", + VERSION_SET_PROPERTIES_ASPECT_NAME, + null, + null, + null, + null, + null, + false, + 0); + RollbackRunResult rollbackRunResult = + new RollbackRunResult(new ArrayList<>(), 1, List.of(versionSetDeleteResult)); + RollbackResult versionPropsDeleteResult = + new RollbackResult( + TEST_DATASET_URN, + "dataset", + VERSION_PROPERTIES_ASPECT_NAME, + null, + null, + null, + null, + null, + false, + 0); + + when(mockEntityService.deleteUrn(eq(mockOpContext), eq(TEST_VERSION_SET_URN))) + .thenReturn(rollbackRunResult); + when(mockEntityService.deleteAspect( + eq(mockOpContext), anyString(), eq(VERSION_PROPERTIES_ASPECT_NAME), anyMap(), eq(true))) + .thenReturn(Optional.of(versionPropsDeleteResult)); + + // Mock graph retriever response + SearchEntityArray relatedEntities = new SearchEntityArray(); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN)); + + ScrollResult scrollResult = + new ScrollResult().setEntities(relatedEntities).setMetadata(new SearchResultMetadata()); + when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any(), any())) + .thenReturn(scrollResult); + + // Execute + List results = + versioningService.unlinkVersion(mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN); + + // Verify + assertEquals(results.size(), 2); + verify(mockEntityService).deleteUrn(eq(mockOpContext), eq(TEST_VERSION_SET_URN)); + verify(mockEntityService) + .deleteAspect( + eq(mockOpContext), + eq(TEST_DATASET_URN.toString()), + eq(VERSION_PROPERTIES_ASPECT_NAME), + anyMap(), + eq(true)); + verify(mockSearchRetriever, never()).scroll(any(), any(), anyString(), anyInt(), any(), any()); + } + + @Test + public void testUnlinkLatestVersionWithPriorVersion() throws Exception { + + // Mock version properties aspect + VersionProperties versionProps = + new VersionProperties() + .setVersionSet(TEST_VERSION_SET_URN) + .setSortId("AAAAAAAB"); // Not initial version + SystemAspect mockVersionPropsAspect = mock(SystemAspect.class); + when(mockVersionPropsAspect.getRecordTemplate()).thenReturn(versionProps); + when(mockVersionPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_DATASET_URN), eq(VERSION_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionPropsAspect); + + VersionSetProperties versionSetProps = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB) + .setLatest(TEST_DATASET_URN); + SystemAspect mockVersionSetPropsAspect = mock(SystemAspect.class); + when(mockVersionSetPropsAspect.getRecordTemplate()).thenReturn(versionSetProps); + when(mockVersionSetPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_VERSION_SET_URN), eq(VERSION_SET_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionSetPropsAspect); + + // Mock graph retriever response + SearchEntityArray relatedEntities = new SearchEntityArray(); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN)); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN_2)); + + ScrollResult scrollResult = + new ScrollResult().setEntities(relatedEntities).setMetadata(new SearchResultMetadata()); + when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any(), any())) + .thenReturn(scrollResult); + + // Mock delete aspect response + RollbackResult versionPropsDeleteResult = + new RollbackResult( + TEST_DATASET_URN, + "dataset", + VERSION_PROPERTIES_ASPECT_NAME, + null, + null, + null, + null, + null, + false, + 0); + when(mockEntityService.deleteAspect( + eq(mockOpContext), anyString(), eq(VERSION_PROPERTIES_ASPECT_NAME), anyMap(), eq(true))) + .thenReturn(Optional.of(versionPropsDeleteResult)); + + // Execute + List results = + versioningService.unlinkVersion(mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN); + + // Verify + assertEquals(results.size(), 1); + verify(mockEntityService) + .deleteAspect( + eq(mockOpContext), + eq(TEST_DATASET_URN.toString()), + eq(VERSION_PROPERTIES_ASPECT_NAME), + anyMap(), + eq(true)); + verify(mockEntityService).ingestProposal(eq(mockOpContext), any(), eq(false)); + verify(mockEntityService, never()).deleteUrn(eq(mockOpContext), eq(TEST_VERSION_SET_URN)); + } + + @Test + public void testUnlinkNotLatestVersionWithPriorVersion() throws Exception { + + // Mock version properties aspect + VersionProperties versionProps = + new VersionProperties() + .setVersionSet(TEST_VERSION_SET_URN) + .setSortId("AAAAAAAB"); // Not initial version + SystemAspect mockVersionPropsAspect = mock(SystemAspect.class); + when(mockVersionPropsAspect.getRecordTemplate()).thenReturn(versionProps); + when(mockVersionPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_DATASET_URN_2), eq(VERSION_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionPropsAspect); + + VersionSetProperties versionSetProps = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB) + .setLatest(TEST_DATASET_URN); + SystemAspect mockVersionSetPropsAspect = mock(SystemAspect.class); + when(mockVersionSetPropsAspect.getRecordTemplate()).thenReturn(versionSetProps); + when(mockVersionSetPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_VERSION_SET_URN), eq(VERSION_SET_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionSetPropsAspect); + + // Mock graph retriever response + SearchEntityArray relatedEntities = new SearchEntityArray(); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN)); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN_2)); + + ScrollResult scrollResult = + new ScrollResult().setEntities(relatedEntities).setMetadata(new SearchResultMetadata()); + when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any(), any())) + .thenReturn(scrollResult); + + // Mock delete aspect response + RollbackResult versionPropsDeleteResult = + new RollbackResult( + TEST_DATASET_URN_2, + "dataset", + VERSION_PROPERTIES_ASPECT_NAME, + null, + null, + null, + null, + null, + false, + 0); + when(mockEntityService.deleteAspect( + eq(mockOpContext), + eq(TEST_DATASET_URN_2.toString()), + eq(VERSION_PROPERTIES_ASPECT_NAME), + anyMap(), + eq(true))) + .thenReturn(Optional.of(versionPropsDeleteResult)); + + // Execute + List results = + versioningService.unlinkVersion(mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN_2); + + // Verify + assertEquals(results.size(), 1); + verify(mockEntityService) + .deleteAspect( + eq(mockOpContext), + eq(TEST_DATASET_URN_2.toString()), + eq(VERSION_PROPERTIES_ASPECT_NAME), + anyMap(), + eq(true)); + verify(mockEntityService, never()).deleteUrn(eq(mockOpContext), eq(TEST_VERSION_SET_URN)); + } + + @Test + public void testUnlinkNotReturnedSingleVersionWithPriorVersion() throws Exception { + + // Mock version properties aspect + VersionProperties versionProps = + new VersionProperties() + .setVersionSet(TEST_VERSION_SET_URN) + .setSortId("AAAAAAAB"); // Not initial version + SystemAspect mockVersionPropsAspect = mock(SystemAspect.class); + when(mockVersionPropsAspect.getRecordTemplate()).thenReturn(versionProps); + when(mockVersionPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_DATASET_URN_2), eq(VERSION_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionPropsAspect); + + VersionSetProperties versionSetProps = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB) + .setLatest(TEST_DATASET_URN_2); + SystemAspect mockVersionSetPropsAspect = mock(SystemAspect.class); + when(mockVersionSetPropsAspect.getRecordTemplate()).thenReturn(versionSetProps); + when(mockVersionSetPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_VERSION_SET_URN), eq(VERSION_SET_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionSetPropsAspect); + + // Mock graph retriever response + SearchEntityArray relatedEntities = new SearchEntityArray(); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN)); + + ScrollResult scrollResult = + new ScrollResult().setEntities(relatedEntities).setMetadata(new SearchResultMetadata()); + when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any(), any())) + .thenReturn(scrollResult); + + // Mock delete aspect response + RollbackResult versionPropsDeleteResult = + new RollbackResult( + TEST_DATASET_URN_2, + "dataset", + VERSION_PROPERTIES_ASPECT_NAME, + null, + null, + null, + null, + null, + false, + 0); + when(mockEntityService.deleteAspect( + eq(mockOpContext), anyString(), eq(VERSION_PROPERTIES_ASPECT_NAME), anyMap(), eq(true))) + .thenReturn(Optional.of(versionPropsDeleteResult)); + + // Execute + List results = + versioningService.unlinkVersion(mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN_2); + + // Verify + assertEquals(results.size(), 1); + verify(mockEntityService) + .deleteAspect( + eq(mockOpContext), + eq(TEST_DATASET_URN_2.toString()), + eq(VERSION_PROPERTIES_ASPECT_NAME), + anyMap(), + eq(true)); + verify(mockEntityService).ingestProposal(eq(mockOpContext), any(), eq(false)); + verify(mockEntityService, never()).deleteUrn(eq(mockOpContext), eq(TEST_VERSION_SET_URN)); + } + + @Test + public void testUnlinkNotReturnedDoubleVersionWithPriorVersion() throws Exception { + + // Mock version properties aspect + VersionProperties versionProps = + new VersionProperties() + .setVersionSet(TEST_VERSION_SET_URN) + .setSortId("AAAAAAAB"); // Not initial version + SystemAspect mockVersionPropsAspect = mock(SystemAspect.class); + when(mockVersionPropsAspect.getRecordTemplate()).thenReturn(versionProps); + when(mockVersionPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_DATASET_URN_3), eq(VERSION_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionPropsAspect); + + VersionSetProperties versionSetProps = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB) + .setLatest(TEST_DATASET_URN_3); + SystemAspect mockVersionSetPropsAspect = mock(SystemAspect.class); + when(mockVersionSetPropsAspect.getRecordTemplate()).thenReturn(versionSetProps); + when(mockVersionSetPropsAspect.getSystemMetadataVersion()).thenReturn(Optional.of(1L)); + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_VERSION_SET_URN), eq(VERSION_SET_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockVersionSetPropsAspect); + + // Mock graph retriever response + SearchEntityArray relatedEntities = new SearchEntityArray(); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN)); + relatedEntities.add(new SearchEntity().setEntity(TEST_DATASET_URN_2)); + + ScrollResult scrollResult = + new ScrollResult().setEntities(relatedEntities).setMetadata(new SearchResultMetadata()); + when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any(), any())) + .thenReturn(scrollResult); + + // Mock delete aspect response + RollbackResult versionPropsDeleteResult = + new RollbackResult( + TEST_DATASET_URN_3, + "dataset", + VERSION_PROPERTIES_ASPECT_NAME, + null, + null, + null, + null, + null, + false, + 0); + when(mockEntityService.deleteAspect( + eq(mockOpContext), anyString(), eq(VERSION_PROPERTIES_ASPECT_NAME), anyMap(), eq(true))) + .thenReturn(Optional.of(versionPropsDeleteResult)); + + // Execute + List results = + versioningService.unlinkVersion(mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN_3); + + // Verify + assertEquals(results.size(), 1); + verify(mockEntityService) + .deleteAspect( + eq(mockOpContext), + eq(TEST_DATASET_URN_3.toString()), + eq(VERSION_PROPERTIES_ASPECT_NAME), + anyMap(), + eq(true)); + verify(mockEntityService).ingestProposal(eq(mockOpContext), any(), eq(false)); + verify(mockEntityService, never()).deleteUrn(eq(mockOpContext), eq(TEST_VERSION_SET_URN)); + } + + @Test + public void testUnlinkNonVersionedEntity() throws Exception { + + // Mock no version properties aspect + when(mockAspectRetriever.getLatestSystemAspect( + eq(TEST_DATASET_URN), eq(VERSION_PROPERTIES_ASPECT_NAME))) + .thenReturn(null); + + // Execute + List results = + versioningService.unlinkVersion(mockOpContext, TEST_VERSION_SET_URN, TEST_DATASET_URN); + + // Verify + assertTrue(results.isEmpty()); + verify(mockEntityService, never()).deleteAspect(any(), any(), any(), any(), anyBoolean()); + verify(mockEntityService, never()).deleteUrn(any(), any()); + verify(mockSearchRetriever, never()).scroll(any(), any(), anyString(), anyInt(), any(), any()); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffectTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffectTest.java new file mode 100644 index 00000000000000..35445efaedc607 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/sideeffects/VersionSetSideEffectTest.java @@ -0,0 +1,229 @@ +package com.linkedin.metadata.entity.versioning.sideeffects; + +import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; +import static com.linkedin.metadata.Constants.GLOBAL_TAGS_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; +import static org.mockito.Mockito.mock; +import static org.testng.Assert.assertEquals; + +import com.linkedin.common.GlobalTags; +import com.linkedin.common.TagAssociationArray; +import com.linkedin.common.VersionProperties; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.batch.MCPItem; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; +import com.linkedin.metadata.entity.ebean.batch.MCLItemImpl; +import com.linkedin.metadata.entity.ebean.batch.PatchItemImpl; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.utils.AuditStampUtils; +import com.linkedin.test.metadata.aspect.MockAspectRetriever; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import com.linkedin.versionset.VersionSetProperties; +import com.linkedin.versionset.VersioningScheme; +import io.datahubproject.metadata.context.RetrieverContext; +import jakarta.json.JsonObject; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class VersionSetSideEffectTest { + private static final TestEntityRegistry TEST_REGISTRY = new TestEntityRegistry(); + private static final Urn TEST_VERSION_SET_URN = + UrnUtils.getUrn("urn:li:versionSet:(123456,dataset)"); + private static final Urn PREVIOUS_LATEST_URN = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"); + private static final Urn NEW_LATEST_URN = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDatasetV2,PROD)"); + + private static final AspectPluginConfig TEST_PLUGIN_CONFIG = + AspectPluginConfig.builder() + .className(VersionSetSideEffect.class.getName()) + .enabled(true) + .supportedOperations( + List.of("CREATE", "PATCH", "CREATE_ENTITY", "UPSERT", "DELETE", "RESTATE")) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .aspectName(VERSION_SET_PROPERTIES_ASPECT_NAME) + .entityName(VERSION_SET_ENTITY_NAME) + .build())) + .build(); + + private MockAspectRetriever mockAspectRetriever; + private RetrieverContext retrieverContext; + private VersionSetSideEffect sideEffect; + + @BeforeMethod + public void setup() { + GraphRetriever graphRetriever = mock(GraphRetriever.class); + VersionProperties existingProperties = + new VersionProperties() + .setVersionSet(TEST_VERSION_SET_URN) + .setIsLatest(false) + .setSortId("AAAAAAAA"); + VersionProperties previousLatestProperties = + new VersionProperties() + .setVersionSet(TEST_VERSION_SET_URN) + .setIsLatest(true) + .setSortId("AAAAAAAB"); + Map> data = new HashMap<>(); + data.put(NEW_LATEST_URN, Collections.singletonList(existingProperties)); + data.put(PREVIOUS_LATEST_URN, Collections.singletonList(previousLatestProperties)); + mockAspectRetriever = new MockAspectRetriever(data); + mockAspectRetriever.setEntityRegistry(TEST_REGISTRY); + + retrieverContext = + RetrieverContext.builder() + .searchRetriever(mock(SearchRetriever.class)) + .aspectRetriever(mockAspectRetriever) + .graphRetriever(graphRetriever) + .build(); + + sideEffect = new VersionSetSideEffect(); + sideEffect.setConfig(TEST_PLUGIN_CONFIG); + } + + @Test + public void testUpdateLatestVersion() { + // Create previous version set properties with different latest + VersionSetProperties previousProperties = new VersionSetProperties(); + previousProperties.setLatest(PREVIOUS_LATEST_URN); + previousProperties.setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + // Create new version set properties + VersionSetProperties newProperties = new VersionSetProperties(); + newProperties.setLatest(NEW_LATEST_URN); + newProperties.setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + EntitySpec entitySpec = TEST_REGISTRY.getEntitySpec(VERSION_SET_ENTITY_NAME); + + // Create change item + ChangeItemImpl changeItem = + ChangeItemImpl.builder() + .urn(TEST_VERSION_SET_URN) + .aspectName(VERSION_SET_PROPERTIES_ASPECT_NAME) + .entitySpec(entitySpec) + .aspectSpec(entitySpec.getAspectSpec(VERSION_SET_PROPERTIES_ASPECT_NAME)) + .recordTemplate(newProperties) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(mockAspectRetriever); + + // Create MCL item with previous aspect + MCLItemImpl mclItem = + MCLItemImpl.builder() + .previousRecordTemplate(previousProperties) + .build(changeItem, previousProperties, null, retrieverContext.getAspectRetriever()); + + // Run side effect + List sideEffectResults = + sideEffect + .postMCPSideEffect(Collections.singletonList(mclItem), retrieverContext) + .collect(Collectors.toList()); + + // Verify results + assertEquals(sideEffectResults.size(), 2, "Expected two patch operations"); + + // Verify patch for previous latest version + MCPItem previousPatch = sideEffectResults.get(0); + assertEquals(previousPatch.getUrn(), PREVIOUS_LATEST_URN); + JsonObject previousPatchOp = + ((PatchItemImpl) previousPatch).getPatch().toJsonArray().getJsonObject(0); + assertEquals(previousPatchOp.getString("op"), "add"); + assertEquals(previousPatchOp.getString("path"), "/isLatest"); + assertEquals(previousPatchOp.getBoolean("value"), false); + + // Verify patch for new latest version + MCPItem newPatch = sideEffectResults.get(1); + assertEquals(newPatch.getUrn(), NEW_LATEST_URN); + JsonObject newPatchOp = ((PatchItemImpl) newPatch).getPatch().toJsonArray().getJsonObject(0); + assertEquals(newPatchOp.getString("op"), "add"); + assertEquals(newPatchOp.getString("path"), "/isLatest"); + assertEquals(newPatchOp.getBoolean("value"), true); + } + + @Test + public void testNoChangesWhenLatestRemainsSame() { + // Create version set properties with same latest + VersionSetProperties previousProperties = new VersionSetProperties(); + previousProperties.setLatest(NEW_LATEST_URN); + previousProperties.setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + VersionSetProperties newProperties = new VersionSetProperties(); + newProperties.setLatest(NEW_LATEST_URN); + newProperties.setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + EntitySpec entitySpec = TEST_REGISTRY.getEntitySpec(VERSION_SET_ENTITY_NAME); + + // Create change item + ChangeItemImpl changeItem = + ChangeItemImpl.builder() + .urn(TEST_VERSION_SET_URN) + .aspectName(VERSION_SET_PROPERTIES_ASPECT_NAME) + .entitySpec(entitySpec) + .aspectSpec(entitySpec.getAspectSpec(VERSION_SET_PROPERTIES_ASPECT_NAME)) + .recordTemplate(newProperties) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(mockAspectRetriever); + + // Create MCL item with previous aspect + MCLItemImpl mclItem = + MCLItemImpl.builder() + .previousRecordTemplate(previousProperties) + .build(changeItem, null, null, retrieverContext.getAspectRetriever()); + + // Run side effect + List sideEffectResults = + sideEffect + .postMCPSideEffect(Collections.singletonList(mclItem), retrieverContext) + .collect(Collectors.toList()); + + // Verify results - should still get one patch to set isLatest=true on current latest + assertEquals(sideEffectResults.size(), 1, "Expected one patch operation"); + + // Verify patch operation + MCPItem patch = sideEffectResults.get(0); + assertEquals(patch.getUrn(), NEW_LATEST_URN); + JsonObject patchOp = ((PatchItemImpl) patch).getPatch().toJsonArray().getJsonObject(0); + assertEquals(patchOp.getString("op"), "add"); + assertEquals(patchOp.getString("path"), "/isLatest"); + assertEquals(patchOp.getBoolean("value"), true); + } + + @Test + public void testNoChangesForNonVersionSetProperties() { + // Create some other type of aspect change + EntitySpec entitySpec = TEST_REGISTRY.getEntitySpec(DATASET_ENTITY_NAME); + ChangeItemImpl changeItem = + ChangeItemImpl.builder() + .urn(PREVIOUS_LATEST_URN) + .aspectName(GLOBAL_TAGS_ASPECT_NAME) + .entitySpec(entitySpec) + .aspectSpec(entitySpec.getAspectSpec(GLOBAL_TAGS_ASPECT_NAME)) + .recordTemplate(new GlobalTags().setTags(new TagAssociationArray())) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(mockAspectRetriever); + + MCLItemImpl mclItem = + MCLItemImpl.builder().build(changeItem, null, null, retrieverContext.getAspectRetriever()); + + // Run side effect + List sideEffectResults = + sideEffect + .postMCPSideEffect(Collections.singletonList(mclItem), retrieverContext) + .collect(Collectors.toList()); + + // Verify no changes for non-version set properties aspects + assertEquals( + sideEffectResults.size(), 0, "Expected no changes for non-version set properties aspect"); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidatorTest.java new file mode 100644 index 00000000000000..4afd05c98a9312 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionPropertiesValidatorTest.java @@ -0,0 +1,165 @@ +package com.linkedin.metadata.entity.versioning.validation; + +import static com.linkedin.metadata.Constants.CHART_ENTITY_NAME; + +import com.linkedin.common.VersionProperties; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.metadata.key.VersionSetKey; +import com.linkedin.test.metadata.aspect.MockAspectRetriever; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import com.linkedin.test.metadata.aspect.batch.TestMCP; +import com.linkedin.versionset.VersionSetProperties; +import com.linkedin.versionset.VersioningScheme; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; +import org.mockito.Mockito; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class VersionPropertiesValidatorTest { + + private static final String ENTITY_TYPE = "dataset"; + private static final Urn TEST_VERSION_SET_URN = + UrnUtils.getUrn("urn:li:versionSet:(12356,dataset)"); + private static final Urn TEST_ENTITY_URN = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"); + + private SearchRetriever mockSearchRetriever; + private MockAspectRetriever mockAspectRetriever; + private GraphRetriever mockGraphRetriever; + private RetrieverContext retrieverContext; + + @BeforeMethod + public void setup() { + mockSearchRetriever = Mockito.mock(SearchRetriever.class); + mockGraphRetriever = Mockito.mock(GraphRetriever.class); + + // Create version set key and properties + VersionSetKey versionSetKey = new VersionSetKey(); + versionSetKey.setEntityType(ENTITY_TYPE); + + VersionSetProperties versionSetProperties = new VersionSetProperties(); + versionSetProperties.setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + // Initialize mock aspect retriever with version set data + Map> data = new HashMap<>(); + data.put(TEST_VERSION_SET_URN, Arrays.asList(versionSetKey, versionSetProperties)); + mockAspectRetriever = new MockAspectRetriever(data); + + retrieverContext = + io.datahubproject.metadata.context.RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .searchRetriever(mockSearchRetriever) + .graphRetriever(mockGraphRetriever) + .build(); + } + + @Test + public void testValidVersionProperties() { + VersionProperties properties = new VersionProperties(); + properties.setVersionSet(TEST_VERSION_SET_URN); + properties.setSortId("ABCDEFGH"); // Valid 8-char uppercase alpha + + Stream validationResult = + VersionPropertiesValidator.validatePropertiesUpserts( + TestMCP.ofOneUpsertItem(TEST_ENTITY_URN, properties, new TestEntityRegistry()), + retrieverContext); + + Assert.assertTrue(validationResult.findAny().isEmpty()); + } + + @Test + public void testInvalidSortId() { + VersionProperties properties = new VersionProperties(); + properties.setVersionSet(TEST_VERSION_SET_URN); + properties.setSortId("123"); // Invalid - not 8 chars, not alpha + + Stream validationResult = + VersionPropertiesValidator.validatePropertiesUpserts( + TestMCP.ofOneUpsertItem(TEST_ENTITY_URN, properties, new TestEntityRegistry()), + retrieverContext); + + AspectValidationException exception = validationResult.findAny().get(); + Assert.assertNotNull(exception); + Assert.assertTrue(exception.getMessage().contains("Invalid sortID for Versioning Scheme")); + } + + @Test + public void testNonexistentVersionSet() { + Urn nonexistentUrn = UrnUtils.getUrn("urn:li:versionSet:(nonexistent,dataset)"); + + VersionProperties properties = new VersionProperties(); + properties.setVersionSet(nonexistentUrn); + properties.setSortId("ABCDEFGH"); + + Stream validationResult = + VersionPropertiesValidator.validatePropertiesUpserts( + TestMCP.ofOneUpsertItem(TEST_ENTITY_URN, properties, new TestEntityRegistry()), + retrieverContext); + + AspectValidationException exception = validationResult.findAny().get(); + Assert.assertNotNull(exception); + Assert.assertTrue(exception.getMessage().contains("Version Set specified does not exist")); + } + + @Test + public void testEntityTypeMismatch() { + // Create version set with different entity type + VersionSetKey wrongTypeKey = new VersionSetKey(); + wrongTypeKey.setEntityType(CHART_ENTITY_NAME); + + VersionSetProperties versionSetProperties = new VersionSetProperties(); + versionSetProperties.setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + Map> data = new HashMap<>(); + data.put(TEST_VERSION_SET_URN, Arrays.asList(wrongTypeKey, versionSetProperties)); + mockAspectRetriever = new MockAspectRetriever(data); + + retrieverContext = + io.datahubproject.metadata.context.RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .searchRetriever(mockSearchRetriever) + .graphRetriever(mockGraphRetriever) + .build(); + + VersionProperties properties = new VersionProperties(); + properties.setVersionSet(TEST_VERSION_SET_URN); + properties.setSortId("ABCDEFGH"); + + Stream validationResult = + VersionPropertiesValidator.validatePropertiesUpserts( + TestMCP.ofOneUpsertItem(TEST_ENTITY_URN, properties, new TestEntityRegistry()), + retrieverContext); + + AspectValidationException exception = validationResult.findAny().get(); + Assert.assertNotNull(exception); + Assert.assertTrue( + exception.getMessage().contains("Version Set specified entity type does not match")); + } + + @Test + public void testIsLatestFieldSpecified() { + VersionProperties properties = new VersionProperties(); + properties.setVersionSet(TEST_VERSION_SET_URN); + properties.setSortId("ABCDEFGH"); + properties.setIsLatest(true); // Should not be specified + + Stream validationResult = + VersionPropertiesValidator.validatePropertiesProposals( + TestMCP.ofOneUpsertItem(TEST_ENTITY_URN, properties, new TestEntityRegistry())); + + AspectValidationException exception = validationResult.findAny().get(); + Assert.assertNotNull(exception); + Assert.assertTrue(exception.getMessage().contains("IsLatest should not be specified")); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidatorTest.java new file mode 100644 index 00000000000000..c91495271f6149 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/validation/VersionSetPropertiesValidatorTest.java @@ -0,0 +1,139 @@ +package com.linkedin.metadata.entity.versioning.validation; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.test.metadata.aspect.MockAspectRetriever; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import com.linkedin.test.metadata.aspect.batch.TestMCP; +import com.linkedin.versionset.VersionSetProperties; +import com.linkedin.versionset.VersioningScheme; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; +import org.mockito.Mockito; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class VersionSetPropertiesValidatorTest { + + private static final Urn TEST_VERSION_SET_URN = + UrnUtils.getUrn("urn:li:versionSet:(123456,dataset)"); + + private SearchRetriever mockSearchRetriever; + private MockAspectRetriever mockAspectRetriever; + private GraphRetriever mockGraphRetriever; + private RetrieverContext retrieverContext; + + @BeforeMethod + public void setup() { + mockSearchRetriever = Mockito.mock(SearchRetriever.class); + mockGraphRetriever = Mockito.mock(GraphRetriever.class); + + Map> emptyData = new HashMap<>(); + mockAspectRetriever = new MockAspectRetriever(emptyData); + + retrieverContext = + io.datahubproject.metadata.context.RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .searchRetriever(mockSearchRetriever) + .graphRetriever(mockGraphRetriever) + .build(); + } + + @Test + public void testValidUpsertWithNoExistingProperties() { + // Create version set properties + VersionSetProperties properties = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + // Test validation with no existing properties + Stream validationResult = + VersionSetPropertiesValidator.validatePropertiesUpserts( + TestMCP.ofOneUpsertItem(TEST_VERSION_SET_URN, properties, new TestEntityRegistry()), + retrieverContext); + + // Assert no validation exceptions + Assert.assertTrue(validationResult.findAny().isEmpty()); + } + + @Test + public void testValidUpsertWithSameVersioningScheme() { + // Create existing properties with semantic versioning + VersionSetProperties existingProperties = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + // Set up mock retriever with existing properties + Map> data = new HashMap<>(); + data.put(TEST_VERSION_SET_URN, Collections.singletonList(existingProperties)); + mockAspectRetriever = new MockAspectRetriever(data); + + retrieverContext = + io.datahubproject.metadata.context.RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .searchRetriever(mockSearchRetriever) + .graphRetriever(mockGraphRetriever) + .build(); + + // Create new properties with same versioning scheme + VersionSetProperties newProperties = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + // Test validation + Stream validationResult = + VersionSetPropertiesValidator.validatePropertiesUpserts( + TestMCP.ofOneUpsertItem(TEST_VERSION_SET_URN, newProperties, new TestEntityRegistry()), + retrieverContext); + + // Assert no validation exceptions + Assert.assertTrue(validationResult.findAny().isEmpty()); + } + + @Test + public void testInvalidUpsertWithDifferentVersioningScheme() { + // Create existing properties with semantic versioning + VersionSetProperties existingProperties = + new VersionSetProperties() + .setVersioningScheme(VersioningScheme.ALPHANUMERIC_GENERATED_BY_DATAHUB); + + // Set up mock retriever with existing properties + Map> data = new HashMap<>(); + data.put(TEST_VERSION_SET_URN, Collections.singletonList(existingProperties)); + mockAspectRetriever = new MockAspectRetriever(data); + + retrieverContext = + io.datahubproject.metadata.context.RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .searchRetriever(mockSearchRetriever) + .graphRetriever(mockGraphRetriever) + .build(); + + // Create new properties with different versioning scheme + VersionSetProperties newProperties = + new VersionSetProperties().setVersioningScheme(VersioningScheme.$UNKNOWN); + + // Test validation + Stream validationResult = + VersionSetPropertiesValidator.validatePropertiesUpserts( + TestMCP.ofOneUpsertItem(TEST_VERSION_SET_URN, newProperties, new TestEntityRegistry()), + retrieverContext); + + // Assert validation exception exists + AspectValidationException exception = validationResult.findAny().get(); + Assert.assertNotNull(exception); + Assert.assertTrue(exception.getMessage().contains("Versioning Scheme cannot change")); + Assert.assertTrue( + exception.getMessage().contains("Expected Scheme: ALPHANUMERIC_GENERATED_BY_DATAHUB")); + Assert.assertTrue(exception.getMessage().contains("Provided Scheme: $UNKNOWN")); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java index c5205906e9d373..23d493b7287f78 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java @@ -1,9 +1,12 @@ package com.linkedin.metadata.search.query.request; +import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.Mockito.mock; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; +import com.google.common.collect.ImmutableList; import com.linkedin.metadata.TestEntitySpecBuilder; import com.linkedin.metadata.config.search.ExactMatchConfiguration; import com.linkedin.metadata.config.search.PartialConfiguration; @@ -13,22 +16,35 @@ import com.linkedin.metadata.config.search.custom.BoolQueryConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.config.search.custom.QueryConfiguration; +import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.request.AutocompleteRequestHandler; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.opensearch.action.search.SearchRequest; import org.opensearch.common.lucene.search.function.FieldValueFactorFunction; import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.ExistsQueryBuilder; import org.opensearch.index.query.MatchAllQueryBuilder; import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder; import org.opensearch.index.query.MatchQueryBuilder; import org.opensearch.index.query.MultiMatchQueryBuilder; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.TermQueryBuilder; import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; import org.opensearch.index.query.functionscore.ScoreFunctionBuilders; import org.opensearch.search.builder.SearchSourceBuilder; @@ -40,6 +56,8 @@ public class AutocompleteRequestHandlerTest { private static AutocompleteRequestHandler handler; private OperationContext mockOpContext = TestOperationContexts.systemContextNoSearchAuthorization(mock(EntityRegistry.class)); + private OperationContext nonMockOpContext = + TestOperationContexts.systemContextNoSearchAuthorization(); static { testQueryConfig = new SearchConfiguration(); @@ -465,10 +483,148 @@ public void testCustomConfigWithFunctionScores() { assertEquals(wrapper.filterFunctionBuilders(), expectedCustomScoreFunctions); } + @Test + public void testFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getQuery( + filterCriterion, + nonMockOpContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + true); + + List isLatestQueries = + testQuery.filter().stream() + .filter(filter -> filter instanceof BoolQueryBuilder) + .flatMap(filter -> ((BoolQueryBuilder) filter).must().stream()) + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertTrue(isLatestQueries.size() == 2, "Expected to find two queries"); + final TermQueryBuilder termQueryBuilder = (TermQueryBuilder) isLatestQueries.get(0); + assertEquals(termQueryBuilder.fieldName(), "isLatest"); + Set values = new HashSet<>(); + values.add((Boolean) termQueryBuilder.value()); + + assertEquals(values.size(), 1, "Expected only true value."); + assertTrue(values.contains(true)); + final ExistsQueryBuilder existsQueryBuilder = (ExistsQueryBuilder) isLatestQueries.get(1); + assertEquals(existsQueryBuilder.fieldName(), "isLatest"); + } + + @Test + public void testNoFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getQuery( + filterCriterion, + nonMockOpContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + false); + + // bool -> filter -> [bool] -> must -> [bool] + List isLatestQueries = + testQuery.filter().stream() + .filter(filter -> filter instanceof BoolQueryBuilder) + .flatMap(filter -> ((BoolQueryBuilder) filter).must().stream()) + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertTrue(isLatestQueries.isEmpty(), "Expected to find no queries"); + } + private static QueryBuilder extractNestedQuery(BoolQueryBuilder nested) { assertEquals(nested.should().size(), 1); BoolQueryBuilder firstLevel = (BoolQueryBuilder) nested.should().get(0); assertEquals(firstLevel.should().size(), 1); return firstLevel.should().get(0); } + + private BoolQueryBuilder getQuery( + final Criterion filterCriterion, final EntitySpec entitySpec, boolean filterNonLatest) { + final Filter filter = + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd(new CriterionArray(ImmutableList.of(filterCriterion))))); + + AutocompleteRequestHandler requestHandler = + AutocompleteRequestHandler.getBuilder( + entitySpec, + CustomSearchConfiguration.builder().build(), + QueryFilterRewriteChain.EMPTY, + testQueryConfig); + + return (BoolQueryBuilder) + ((FunctionScoreQueryBuilder) + requestHandler + .getSearchRequest( + mockOpContext.withSearchFlags( + flags -> + flags + .setFulltext(false) + .setFilterNonLatestVersions(filterNonLatest)), + "", + "platform", + filter, + 3) + .source() + .query()) + .query(); + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index 1a91ae35c6595b..1fea4476d75abb 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.search.query.request; import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.SEARCHABLE_ENTITY_TYPES; +import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static com.linkedin.metadata.utils.CriterionUtils.buildExistsCriterion; @@ -56,6 +57,8 @@ import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.ExistsQueryBuilder; import org.opensearch.index.query.MatchQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; import org.opensearch.index.query.TermsQueryBuilder; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.AggregationBuilders; @@ -805,7 +808,214 @@ public void testQueryByDefault() { } } + @Test + public void testFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getQuery( + filterCriterion, + operationContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + true); + + List isLatestQueries = + testQuery.filter().stream() + .filter(filter -> filter instanceof BoolQueryBuilder) + .flatMap(filter -> ((BoolQueryBuilder) filter).must().stream()) + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertEquals(isLatestQueries.size(), 2, "Expected to find two queries"); + final TermQueryBuilder termQueryBuilder = (TermQueryBuilder) isLatestQueries.get(0); + assertEquals(termQueryBuilder.fieldName(), "isLatest"); + Set values = new HashSet<>(); + values.add((Boolean) termQueryBuilder.value()); + + assertEquals(values.size(), 1, "Expected only true value."); + assertTrue(values.contains(true)); + final ExistsQueryBuilder existsQueryBuilder = (ExistsQueryBuilder) isLatestQueries.get(1); + assertEquals(existsQueryBuilder.fieldName(), "isLatest"); + } + + @Test + public void testNoFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getQuery( + filterCriterion, + operationContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + false); + + List isLatestQueries = + testQuery.filter().stream() + .filter(filter -> filter instanceof BoolQueryBuilder) + .flatMap(filter -> ((BoolQueryBuilder) filter).must().stream()) + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertTrue(isLatestQueries.isEmpty(), "Expected to find no queries"); + } + + @Test + public void testAggregationFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getAggregationQuery( + filterCriterion, + operationContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + true); + + List isLatestQueries = + testQuery.must().stream() + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertEquals(isLatestQueries.size(), 2, "Expected to find two queries"); + final TermQueryBuilder termQueryBuilder = (TermQueryBuilder) isLatestQueries.get(0); + assertEquals(termQueryBuilder.fieldName(), "isLatest"); + Set values = new HashSet<>(); + values.add((Boolean) termQueryBuilder.value()); + + assertEquals(values.size(), 1, "Expected only true value."); + assertTrue(values.contains(true)); + final ExistsQueryBuilder existsQueryBuilder = (ExistsQueryBuilder) isLatestQueries.get(1); + assertEquals(existsQueryBuilder.fieldName(), "isLatest"); + } + + @Test + public void testAggregationNoFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getAggregationQuery( + filterCriterion, + operationContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + false); + + List isLatestQueries = + testQuery.must().stream() + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertTrue(isLatestQueries.isEmpty(), "Expected to find no queries"); + } + private BoolQueryBuilder getQuery(final Criterion filterCriterion) { + return getQuery(filterCriterion, TestEntitySpecBuilder.getSpec(), true); + } + + private BoolQueryBuilder getQuery( + final Criterion filterCriterion, final EntitySpec entitySpec, boolean filterNonLatest) { final Filter filter = new Filter() .setOr( @@ -816,7 +1026,7 @@ private BoolQueryBuilder getQuery(final Criterion filterCriterion) { final SearchRequestHandler requestHandler = SearchRequestHandler.getBuilder( operationContext.getEntityRegistry(), - TestEntitySpecBuilder.getSpec(), + entitySpec, testQueryConfig, null, QueryFilterRewriteChain.EMPTY); @@ -824,7 +1034,8 @@ private BoolQueryBuilder getQuery(final Criterion filterCriterion) { return (BoolQueryBuilder) requestHandler .getSearchRequest( - operationContext.withSearchFlags(flags -> flags.setFulltext(false)), + operationContext.withSearchFlags( + flags -> flags.setFulltext(false).setFilterNonLatestVersions(filterNonLatest)), "", filter, null, @@ -834,4 +1045,33 @@ private BoolQueryBuilder getQuery(final Criterion filterCriterion) { .source() .query(); } + + private BoolQueryBuilder getAggregationQuery( + final Criterion filterCriterion, final EntitySpec entitySpec, boolean filterNonLatest) { + final Filter filter = + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd(new CriterionArray(ImmutableList.of(filterCriterion))))); + + final SearchRequestHandler requestHandler = + SearchRequestHandler.getBuilder( + operationContext.getEntityRegistry(), + entitySpec, + testQueryConfig, + null, + QueryFilterRewriteChain.EMPTY); + + return (BoolQueryBuilder) + requestHandler + .getAggregationRequest( + operationContext.withSearchFlags( + flags -> flags.setFulltext(false).setFilterNonLatestVersions(filterNonLatest)), + "platform", + filter, + 10) + .source() + .query(); + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java index 65b73b7425b743..5a4fb39bd50e96 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java @@ -430,4 +430,23 @@ public void testEmptyDescription() throws RemoteInvocationException, URISyntaxEx assertTrue(transformed.get().get("description").isNull()); assertFalse(transformed.get().get("hasDescription").asBoolean()); } + + @Test + public void testHandleRemoveFieldsWithStructuredProperties() throws IOException { + ObjectNode previousDoc = JsonNodeFactory.instance.objectNode(); + previousDoc.put("structuredProperties.prop1", "value1"); + previousDoc.put("structuredProperties.prop2", "value2"); + previousDoc.put("otherField", "value3"); + + ObjectNode newDoc = JsonNodeFactory.instance.objectNode(); + newDoc.put("structuredProperties.prop1", "updatedValue1"); + newDoc.put("otherField", "updatedValue3"); + + ObjectNode result = SearchDocumentTransformer.handleRemoveFields(newDoc, previousDoc); + + assertEquals(result.get("structuredProperties.prop1").asText(), "updatedValue1"); + assertTrue(result.has("structuredProperties.prop2")); + assertTrue(result.get("structuredProperties.prop2").isNull()); + assertEquals(result.get("otherField").asText(), "updatedValue3"); + } } diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/VersionProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/VersionProperties.pdl new file mode 100644 index 00000000000000..af4d48debe0217 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/common/VersionProperties.pdl @@ -0,0 +1,77 @@ +namespace com.linkedin.common + +/** + * Properties about a versioned asset i.e. dataset, ML Model, etc. + */ +@Aspect = { + "name": "versionProperties" +} +record VersionProperties { + /** + * The linked Version Set entity that ties multiple versioned assets together + */ + @Searchable = { + "queryByDefault": false + } + @Relationship = { + "name": "VersionOf", + "entityTypes": [ "versionSet" ] + } + versionSet: Urn + + /** + * Label for this versioned asset, is unique within a version set + */ + @Searchable = { + "/versionTag": { + "fieldName": "version", + "queryByDefault": false + } + } + version: VersionTag + + /** + * Associated aliases for this versioned asset + */ + @Searchable = { + "/*/versionTag": { + "fieldName": "aliases", + "queryByDefault": false + } + } + aliases: array[VersionTag] = [] + + /** + * Comment documenting what this version was created for, changes, or represents + */ + comment: optional string + + /** + * Sort identifier that determines where a version lives in the order of the Version Set. + * What this looks like depends on the Version Scheme. For sort ids generated by DataHub we use an 8 character string representation. + */ + @Searchable = { + "queryByDefault": false, + "fieldName": "versionSortId" + } + sortId: string + + /** + * Timestamp reflecting when this asset version was created in the source system. + */ + sourceCreatedTimestamp: optional AuditStamp + + /** + * Timestamp reflecting when the metadata for this version was created in DataHub + */ + metadataCreatedTimestamp: optional AuditStamp + + /** + * Marks whether this version is currently the latest. Set by a side effect and should not be modified by API. + */ + @Searchable = { + "queryByDefault": false, + "fieldType": "BOOLEAN" + } + isLatest: optional boolean +} \ No newline at end of file diff --git a/li-utils/src/main/pegasus/com/linkedin/common/VersionTag.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/VersionTag.pdl similarity index 78% rename from li-utils/src/main/pegasus/com/linkedin/common/VersionTag.pdl rename to metadata-models/src/main/pegasus/com/linkedin/common/VersionTag.pdl index f26a1b0140b793..82f2193747c5d3 100644 --- a/li-utils/src/main/pegasus/com/linkedin/common/VersionTag.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/common/VersionTag.pdl @@ -5,4 +5,5 @@ namespace com.linkedin.common */ record VersionTag { versionTag: optional string + metadataAttribution: optional MetadataAttribution } \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/key/VersionSetKey.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/key/VersionSetKey.pdl new file mode 100644 index 00000000000000..edbddc29e2023f --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/key/VersionSetKey.pdl @@ -0,0 +1,20 @@ +namespace com.linkedin.metadata.key + + +/** + * Key for a Version Set entity + */ +@Aspect = { + "name": "versionSetKey" +} +record VersionSetKey { + /** + * ID of the Version Set, generated from platform + asset id / name + */ + id: string + + /** + * Type of entities included in version set, limits to a single entity type between linked versioned entities + */ + entityType: string +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl index a3a7a8cda58a8d..ab5873452641ed 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl @@ -64,4 +64,9 @@ record SearchFlags { * By default we include these, but custom aggregation requests don't need them. */ includeDefaultFacets: optional boolean = true + + /** + * Include only latest versions in version sets, default true + */ + filterNonLatestVersions: optional boolean = true } diff --git a/metadata-models/src/main/pegasus/com/linkedin/versionset/VersionSetProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/versionset/VersionSetProperties.pdl new file mode 100644 index 00000000000000..0e50c33aa2b7d4 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/versionset/VersionSetProperties.pdl @@ -0,0 +1,24 @@ +namespace com.linkedin.versionset + +import com.linkedin.common.CustomProperties +import com.linkedin.common.Urn + +@Aspect = { + "name": "versionSetProperties" +} +record VersionSetProperties includes CustomProperties { + /** + * The latest versioned entity linked to in this version set + */ + @Searchable = { + "queryByDefault": "false" + } + latest: Urn + + /** + * What versioning scheme is being utilized for the versioned entities sort criterion. Static once set + */ + versioningScheme: enum VersioningScheme { + ALPHANUMERIC_GENERATED_BY_DATAHUB + } +} \ No newline at end of file diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index 1556b72e4aefb1..32f9d1b98db5df 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -46,6 +46,7 @@ entities: - structuredProperties - forms - partitionsSummary + - versionProperties - name: dataHubPolicy doc: DataHub Policies represent access policies granted to users or groups on metadata operations like edit, view etc. category: internal @@ -365,6 +366,7 @@ entities: - structuredProperties - forms - testResults + - versionProperties - name: mlModelGroup category: core keyAspect: mlModelGroupKey @@ -494,6 +496,11 @@ entities: keyAspect: globalSettingsKey aspects: - globalSettingsInfo + - name: versionSet + category: core + keyAspect: versionSetKey + aspects: + - versionSetProperties - name: incident doc: An incident for an asset. category: core diff --git a/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java b/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java index 97ca0dcabea9f3..eeb90d09204bb9 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java @@ -26,4 +26,5 @@ public class FeatureFlags { private boolean alternateMCPValidation = false; private boolean showManageStructuredProperties = false; private boolean dataProcessInstanceEntityEnabled = true; + private boolean entityVersioning = false; } diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index c029cb4648d012..69b86962442b91 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -466,6 +466,7 @@ featureFlags: showSeparateSiblings: ${SHOW_SEPARATE_SIBLINGS:false} # If turned on, all siblings will be separated with no way to get to a "combined" sibling view editableDatasetNameEnabled: ${EDITABLE_DATASET_NAME_ENABLED:false} # Enables the ability to edit the dataset name in the UI showManageStructuredProperties: ${SHOW_MANAGE_STRUCTURED_PROPERTIES:true} # If turned on, show the manage structured properties button on the govern dropdown + entityVersioning: ${ENTITY_VERSIONING_ENABLED:false} # Enables entity versioning APIs, validators, and side effects entityChangeEvents: enabled: ${ENABLE_ENTITY_CHANGE_EVENTS_HOOK:true} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/versioning/EntityVersioningServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/versioning/EntityVersioningServiceFactory.java new file mode 100644 index 00000000000000..4d03860cccb5ca --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/versioning/EntityVersioningServiceFactory.java @@ -0,0 +1,21 @@ +package com.linkedin.gms.factory.entity.versioning; + +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import com.linkedin.metadata.entity.versioning.EntityVersioningServiceImpl; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Slf4j +@Configuration +public class EntityVersioningServiceFactory { + + @Bean(name = "entityVersioningService") + @Nonnull + protected EntityVersioningService createInstance(EntityService entityService) { + + return new EntityVersioningServiceImpl(entityService); + } +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java index 3229f12f9021d0..8f389eccc4cf8f 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java @@ -27,6 +27,7 @@ import com.linkedin.metadata.config.GraphQLConcurrencyConfiguration; import com.linkedin.metadata.connection.ConnectionService; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; import com.linkedin.metadata.graph.GraphClient; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.graph.SiblingGraphService; @@ -205,7 +206,8 @@ public class GraphQLEngineFactory { @Nonnull protected GraphQLEngine graphQLEngine( @Qualifier("entityClient") final EntityClient entityClient, - @Qualifier("systemEntityClient") final SystemEntityClient systemEntityClient) { + @Qualifier("systemEntityClient") final SystemEntityClient systemEntityClient, + final EntityVersioningService entityVersioningService) { GmsGraphQLEngineArgs args = new GmsGraphQLEngineArgs(); args.setEntityClient(entityClient); args.setSystemEntityClient(systemEntityClient); @@ -255,6 +257,7 @@ protected GraphQLEngine graphQLEngine( configProvider.getGraphQL().getQuery().isIntrospectionEnabled()); args.setGraphQLQueryDepthLimit(configProvider.getGraphQL().getQuery().getDepthLimit()); args.setBusinessAttributeService(businessAttributeService); + args.setEntityVersioningService(entityVersioningService); args.setConnectionService(_connectionService); args.setAssertionService(assertionService); return new GmsGraphQLEngine(args).builder().build(); diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java index 2349dbd169f1d9..7d0937663fecb0 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java @@ -6,6 +6,9 @@ import static com.linkedin.metadata.Constants.SCHEMA_METADATA_ASPECT_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_SETTINGS_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.hooks.IgnoreUnknownMutator; @@ -16,6 +19,9 @@ import com.linkedin.metadata.aspect.validation.ExecutionRequestResultValidator; import com.linkedin.metadata.aspect.validation.FieldPathValidator; import com.linkedin.metadata.dataproducts.sideeffects.DataProductUnsetSideEffect; +import com.linkedin.metadata.entity.versioning.sideeffects.VersionSetSideEffect; +import com.linkedin.metadata.entity.versioning.validation.VersionPropertiesValidator; +import com.linkedin.metadata.entity.versioning.validation.VersionSetPropertiesValidator; import com.linkedin.metadata.schemafields.sideeffects.SchemaFieldSideEffect; import com.linkedin.metadata.structuredproperties.validation.HidePropertyValidator; import com.linkedin.metadata.structuredproperties.validation.ShowPropertyAsBadgeValidator; @@ -32,6 +38,13 @@ @Slf4j public class SpringStandardPluginConfiguration { private static final String ALL = "*"; + private static final String UPSERT = "UPSERT"; + private static final String UPDATE = "UPDATE"; + private static final String CREATE = "CREATE"; + private static final String CREATE_ENTITY = "CREATE_ENTITY"; + private static final String PATCH = "PATCH"; + private static final String DELETE = "DELETE"; + private static final String RESTATE = "RESTATE"; @Value("${metadataChangeProposal.validation.ignoreUnknown}") private boolean ignoreUnknownEnabled; @@ -189,4 +202,58 @@ public AspectPayloadValidator showPropertyAsAssetBadgeValidator() { .build())) .build()); } + + @Bean + @ConditionalOnProperty(name = "featureFlags.entityVersioning", havingValue = "true") + public AspectPayloadValidator versionPropertiesValidator() { + return new VersionPropertiesValidator() + .setConfig( + AspectPluginConfig.builder() + .className(VersionPropertiesValidator.class.getName()) + .enabled(true) + .supportedOperations(List.of(UPSERT, UPDATE, PATCH, CREATE, CREATE_ENTITY)) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(ALL) + .aspectName(VERSION_PROPERTIES_ASPECT_NAME) + .build())) + .build()); + } + + @Bean + @ConditionalOnProperty(name = "featureFlags.entityVersioning", havingValue = "true") + public AspectPayloadValidator versionSetPropertiesValidator() { + return new VersionSetPropertiesValidator() + .setConfig( + AspectPluginConfig.builder() + .className(VersionSetPropertiesValidator.class.getName()) + .enabled(true) + .supportedOperations(List.of(UPSERT, UPDATE, PATCH, CREATE, CREATE_ENTITY)) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(VERSION_SET_ENTITY_NAME) + .aspectName(VERSION_SET_PROPERTIES_ASPECT_NAME) + .build())) + .build()); + } + + @Bean + @ConditionalOnProperty(name = "featureFlags.entityVersioning", havingValue = "true") + public MCPSideEffect versionSetSideEffect() { + return new VersionSetSideEffect() + .setConfig( + AspectPluginConfig.builder() + .className(VersionSetSideEffect.class.getName()) + .enabled(true) + .supportedOperations(List.of(UPSERT, UPDATE, PATCH, CREATE, CREATE_ENTITY)) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(VERSION_SET_ENTITY_NAME) + .aspectName(VERSION_SET_PROPERTIES_ASPECT_NAME) + .build())) + .build()); + } } diff --git a/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImplTest.java b/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImplTest.java index 68b13bd5fb4ee8..07557ece381a0a 100644 --- a/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImplTest.java +++ b/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImplTest.java @@ -3,11 +3,13 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; +import com.linkedin.gms.factory.config.ConfigurationProvider; import io.datahubproject.openapi.config.OpenAPIAnalyticsTestConfiguration; import io.datahubproject.openapi.config.SpringWebConfig; import io.datahubproject.openapi.v2.generated.controller.DatahubUsageEventsApiController; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.mock.mockito.MockBean; import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Import; import org.springframework.http.HttpStatus; @@ -22,6 +24,8 @@ public class DatahubUsageEventsImplTest extends AbstractTestNGSpringContextTests @Autowired private DatahubUsageEventsApiController analyticsController; + @MockBean private ConfigurationProvider configurationProvider; + @Test public void initTest() { assertNotNull(analyticsController); diff --git a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImplTest.java b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImplTest.java index 2beb210e5bc4ff..31b35b65ea1a8c 100644 --- a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImplTest.java +++ b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImplTest.java @@ -4,6 +4,7 @@ import static org.testng.Assert.*; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.models.registry.EntityRegistry; import io.datahubproject.openapi.config.OpenAPIEntityTestConfiguration; import io.datahubproject.openapi.config.SpringWebConfig; @@ -38,6 +39,7 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc; import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.mock.mockito.MockBean; import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Import; import org.springframework.http.HttpStatus; @@ -68,6 +70,7 @@ public void disableAssert() { @Autowired private DatasetApiController datasetApiController; @Autowired private EntityRegistry entityRegistry; @Autowired private MockMvc mockMvc; + @MockBean private ConfigurationProvider configurationProvider; @Test public void initTest() { diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java index c756827cad56ba..01493d71643481 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java @@ -4,6 +4,9 @@ import io.datahubproject.metadata.exception.ActorAccessException; import io.datahubproject.openapi.exception.InvalidUrnException; import io.datahubproject.openapi.exception.UnauthorizedException; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import java.io.IOException; import java.util.Map; import javax.annotation.PostConstruct; import lombok.extern.slf4j.Slf4j; @@ -64,4 +67,25 @@ public static ResponseEntity> handleUnauthorizedException( public static ResponseEntity> actorAccessException(ActorAccessException e) { return new ResponseEntity<>(Map.of("error", e.getMessage()), HttpStatus.FORBIDDEN); } + + @Override + protected void logException(Exception ex, HttpServletRequest request) { + log.error("Error while resolving request: " + request.getRequestURI(), ex); + } + + @Override + protected void sendServerError( + Exception ex, HttpServletRequest request, HttpServletResponse response) throws IOException { + log.error("Error while resolving request: " + request.getRequestURI(), ex); + request.setAttribute("jakarta.servlet.error.exception", ex); + response.sendError(500); + } + + @ExceptionHandler(Exception.class) + public ResponseEntity> handleGenericException( + Exception e, HttpServletRequest request) { + log.error("Unhandled exception occurred for request: " + request.getRequestURI(), e); + return new ResponseEntity<>( + Map.of("error", "Internal server error occurred"), HttpStatus.INTERNAL_SERVER_ERROR); + } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java index 622cf20af9ff57..c4b4431e77c4ef 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java @@ -7,6 +7,7 @@ import com.fasterxml.jackson.core.StreamReadConstraints; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.models.registry.EntityRegistry; import io.datahubproject.openapi.converter.StringToChangeCategoryConverter; import io.datahubproject.openapi.v3.OpenAPIV3Generator; @@ -81,13 +82,15 @@ public void addFormatters(FormatterRegistry registry) { } @Bean - public GroupedOpenApi v3OpenApiGroup(final EntityRegistry entityRegistry) { + public GroupedOpenApi v3OpenApiGroup( + final EntityRegistry entityRegistry, final ConfigurationProvider configurationProvider) { return GroupedOpenApi.builder() .group("10-openapi-v3") .displayName("DataHub v3 (OpenAPI)") .addOpenApiCustomizer( openApi -> { - OpenAPI v3OpenApi = OpenAPIV3Generator.generateOpenApiSpec(entityRegistry); + OpenAPI v3OpenApi = + OpenAPIV3Generator.generateOpenApiSpec(entityRegistry, configurationProvider); openApi.setInfo(v3OpenApi.getInfo()); openApi.setTags(Collections.emptyList()); openApi.getPaths().putAll(v3OpenApi.getPaths()); diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java index c6b8d579d879e0..f7764f2ddb39a1 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java @@ -1,11 +1,14 @@ package io.datahubproject.openapi.v3; +import static com.linkedin.metadata.Constants.VERSION_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; import static io.datahubproject.openapi.util.ReflectionCache.toUpperFirst; import com.fasterxml.jackson.databind.JsonNode; import com.github.fge.processing.ProcessingUtil; import com.google.common.collect.ImmutableMap; import com.linkedin.data.avro.SchemaTranslator; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -64,7 +67,8 @@ public class OpenAPIV3Generator { private static final String ASPECTS = "Aspects"; private static final String ENTITIES = "Entities"; - public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { + public static OpenAPI generateOpenApiSpec( + EntityRegistry entityRegistry, ConfigurationProvider configurationProvider) { final Set aspectNames = entityRegistry.getAspectSpecs().keySet(); final Set entityNames = entityRegistry.getEntitySpecs().values().stream() @@ -125,22 +129,25 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { buildAspectRefResponseSchema(upperAspectName)); }); + List definedEntitySpecs = + entityRegistry.getEntitySpecs().values().stream() + .filter(entitySpec -> definitionNames.contains(entitySpec.getName())) + .sorted(Comparator.comparing(EntitySpec::getName)) + .collect(Collectors.toList()); // --> Entity components - entityRegistry.getEntitySpecs().values().stream() - .filter(e -> aspectNames.contains(e.getKeyAspectName())) - .forEach( - e -> { - final String entityName = toUpperFirst(e.getName()); - components.addSchemas( - entityName + ENTITY_REQUEST_SUFFIX, buildEntitySchema(e, aspectNames, false)); - components.addSchemas( - entityName + ENTITY_RESPONSE_SUFFIX, buildEntitySchema(e, aspectNames, true)); - components.addSchemas( - "Scroll" + entityName + ENTITY_RESPONSE_SUFFIX, buildEntityScrollSchema(e)); - components.addSchemas( - "BatchGet" + entityName + ENTITY_REQUEST_SUFFIX, - buildEntityBatchGetRequestSchema(e, aspectNames)); - }); + definedEntitySpecs.forEach( + e -> { + final String entityName = toUpperFirst(e.getName()); + components.addSchemas( + entityName + ENTITY_REQUEST_SUFFIX, buildEntitySchema(e, aspectNames, false)); + components.addSchemas( + entityName + ENTITY_RESPONSE_SUFFIX, buildEntitySchema(e, aspectNames, true)); + components.addSchemas( + "Scroll" + entityName + ENTITY_RESPONSE_SUFFIX, buildEntityScrollSchema(e)); + components.addSchemas( + "BatchGet" + entityName + ENTITY_REQUEST_SUFFIX, + buildEntityBatchGetRequestSchema(e, aspectNames)); + }); components.addSchemas("SortOrder", new Schema()._enum(List.of("ASCENDING", "DESCENDING"))); // TODO: Correct handling of SystemMetadata and AuditStamp @@ -151,14 +158,12 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { // Parameters // --> Entity Parameters - entityRegistry.getEntitySpecs().values().stream() - .filter(e -> definitionNames.contains(e.getKeyAspectName())) - .forEach( - e -> { - final String parameterName = toUpperFirst(e.getName()) + ASPECTS; - components.addParameters( - parameterName + MODEL_VERSION, buildParameterSchema(e, definitionNames)); - }); + definedEntitySpecs.forEach( + e -> { + final String parameterName = toUpperFirst(e.getName()) + ASPECTS; + components.addParameters( + parameterName + MODEL_VERSION, buildParameterSchema(e, definitionNames)); + }); addExtraParameters(components); @@ -169,39 +174,56 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { paths.addPathItem("/v3/entity/scroll", buildGenericListEntitiesPath()); // --> Entity Paths - entityRegistry.getEntitySpecs().values().stream() - .filter(e -> definitionNames.contains(e.getName())) - .sorted(Comparator.comparing(EntitySpec::getName)) - .forEach( - e -> { - paths.addPathItem( - String.format("/v3/entity/%s", e.getName().toLowerCase()), - buildListEntityPath(e)); - paths.addPathItem( - String.format("/v3/entity/%s/batchGet", e.getName().toLowerCase()), - buildBatchGetEntityPath(e)); - paths.addPathItem( - String.format("/v3/entity/%s/{urn}", e.getName().toLowerCase()), - buildSingleEntityPath(e)); - }); + definedEntitySpecs.forEach( + e -> { + paths.addPathItem( + String.format("/v3/entity/%s", e.getName().toLowerCase()), buildListEntityPath(e)); + paths.addPathItem( + String.format("/v3/entity/%s/batchGet", e.getName().toLowerCase()), + buildBatchGetEntityPath(e)); + paths.addPathItem( + String.format("/v3/entity/%s/{urn}", e.getName().toLowerCase()), + buildSingleEntityPath(e)); + }); // --> Aspect Paths - entityRegistry.getEntitySpecs().values().stream() - .filter(e -> definitionNames.contains(e.getName())) - .sorted(Comparator.comparing(EntitySpec::getName)) - .forEach( - e -> { - e.getAspectSpecs().stream() - .filter(a -> definitionNames.contains(a.getName())) - .sorted(Comparator.comparing(AspectSpec::getName)) - .forEach( - a -> - paths.addPathItem( - String.format( - "/v3/entity/%s/{urn}/%s", - e.getName().toLowerCase(), a.getName().toLowerCase()), - buildSingleEntityAspectPath(e, a))); - }); + definedEntitySpecs.forEach( + e -> + e.getAspectSpecs().stream() + .filter(a -> definitionNames.contains(a.getName())) + .sorted(Comparator.comparing(AspectSpec::getName)) + .forEach( + a -> + paths.addPathItem( + String.format( + "/v3/entity/%s/{urn}/%s", + e.getName().toLowerCase(), a.getName().toLowerCase()), + buildSingleEntityAspectPath(e, a)))); + definedEntitySpecs.forEach( + e -> + e.getAspectSpecs().stream() + .filter(a -> definitionNames.contains(a.getName())) + .sorted(Comparator.comparing(AspectSpec::getName)) + .forEach( + a -> + paths.addPathItem( + String.format( + "/v3/entity/%s/{urn}/%s", + e.getName().toLowerCase(), a.getName().toLowerCase()), + buildSingleEntityAspectPath(e, a)))); + + // --> Link & Unlink APIs + if (configurationProvider.getFeatureFlags().isEntityVersioning()) { + definedEntitySpecs.stream() + .filter(entitySpec -> VERSION_SET_ENTITY_NAME.equals(entitySpec.getName())) + .forEach( + entitySpec -> { + paths.addPathItem( + "/v3/entity/versioning/{versionSetUrn}/relationship/versionOf/{entityUrn}", + buildVersioningRelationshipPath()); + }); + } + return new OpenAPI().openapi("3.0.1").info(info).paths(paths).components(components); } @@ -1198,4 +1220,115 @@ private static PathItem buildSingleEntityAspectPath( .post(postOperation) .patch(patchOperation); } + + private static Schema buildVersionPropertiesRequestSchema() { + return new Schema<>() + .type(TYPE_OBJECT) + .description("Properties for creating a version relationship") + .properties( + Map.of( + "comment", + new Schema<>() + .type(TYPE_STRING) + .description("Comment about the version") + .nullable(true), + "label", + new Schema<>() + .type(TYPE_STRING) + .description("Label for the version") + .nullable(true), + "sourceCreationTimestamp", + new Schema<>() + .type(TYPE_INTEGER) + .description("Timestamp when version was created in source system") + .nullable(true), + "sourceCreator", + new Schema<>() + .type(TYPE_STRING) + .description("Creator of version in source system") + .nullable(true))); + } + + private static PathItem buildVersioningRelationshipPath() { + final PathItem result = new PathItem(); + + // Common parameters for path + final List parameters = + List.of( + new Parameter() + .in(NAME_PATH) + .name("versionSetUrn") + .description("The Version Set URN to unlink from") + .required(true) + .schema(new Schema().type(TYPE_STRING)), + new Parameter() + .in(NAME_PATH) + .name("entityUrn") + .description("The Entity URN to be unlinked") + .required(true) + .schema(new Schema().type(TYPE_STRING))); + + // Success response for DELETE + final ApiResponse successDeleteResponse = + new ApiResponse() + .description("Successfully unlinked entity from version set") + .content(new Content().addMediaType("application/json", new MediaType())); + + // DELETE operation + final Operation deleteOperation = + new Operation() + .summary("Unlink an entity from a version set") + .description("Removes the version relationship between an entity and a version set") + .tags(List.of("Version Relationships")) + .parameters(parameters) + .responses( + new ApiResponses() + .addApiResponse("200", successDeleteResponse) + .addApiResponse( + "404", new ApiResponse().description("Version Set or Entity not found"))); + + // Success response for POST + final ApiResponse successPostResponse = + new ApiResponse() + .description("Successfully linked entity to version set") + .content( + new Content() + .addMediaType( + "application/json", + new MediaType() + .schema( + new Schema<>() + .$ref( + String.format( + "#/components/schemas/%s%s", + toUpperFirst(VERSION_PROPERTIES_ASPECT_NAME), + ASPECT_RESPONSE_SUFFIX))))); + + // Request body for POST + final RequestBody requestBody = + new RequestBody() + .description("Version properties for the link operation") + .required(true) + .content( + new Content() + .addMediaType( + "application/json", + new MediaType().schema(buildVersionPropertiesRequestSchema()))); + + // POST operation + final Operation postOperation = + new Operation() + .summary("Link an entity to a version set") + .description("Creates a version relationship between an entity and a version set") + .tags(List.of("Version Relationships")) + .parameters(parameters) + .requestBody(requestBody) + .responses( + new ApiResponses() + .addApiResponse("201", successPostResponse) + .addApiResponse( + "404", new ApiResponse().description("Version Set or Entity not found"))); + + return result.delete(deleteOperation).post(postOperation); + } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java index af13cd3aab0510..a4583082d57c7f 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java @@ -1,7 +1,9 @@ package io.datahubproject.openapi.v3.controller; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; import static com.linkedin.metadata.aspect.validation.ConditionalWriteValidator.HTTP_HEADER_IF_VERSION_MATCH; import static com.linkedin.metadata.authorization.ApiOperation.READ; +import static com.linkedin.metadata.authorization.ApiOperation.UPDATE; import com.datahub.authentication.Actor; import com.datahub.authentication.Authentication; @@ -11,22 +13,28 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.ByteString; import com.linkedin.data.template.SetMode; import com.linkedin.data.template.StringMap; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.events.metadata.ChangeType; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.aspect.batch.BatchItem; import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.entity.EntityApiUtils; import com.linkedin.metadata.entity.IngestResult; +import com.linkedin.metadata.entity.RollbackResult; import com.linkedin.metadata.entity.UpdateAspectResult; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.entity.ebean.batch.ProposedItem; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import com.linkedin.metadata.entity.versioning.VersionPropertiesInput; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.query.filter.SortCriterion; @@ -71,9 +79,12 @@ import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.util.CollectionUtils; +import org.springframework.web.bind.annotation.DeleteMapping; +import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestBody; import org.springframework.web.bind.annotation.RequestMapping; @@ -89,6 +100,9 @@ public class EntityController extends GenericEntitiesController< GenericAspectV3, GenericEntityV3, GenericEntityScrollResultV3> { + @Autowired private final EntityVersioningService entityVersioningService; + @Autowired private final ConfigurationProvider configurationProvider; + @Tag(name = "Generic Entities") @PostMapping(value = "/{entityName}/batchGet", produces = MediaType.APPLICATION_JSON_VALUE) @Operation(summary = "Get a batch of entities") @@ -222,6 +236,111 @@ public ResponseEntity scrollEntities( entityAspectsBody.getAspects() != null)); } + @Tag(name = "EntityVersioning") + @PostMapping( + value = "/versioning/{versionSetUrn}/relationship/versionOf/{entityUrn}", + produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(summary = "Link an Entity to a Version Set as the latest version") + public ResponseEntity> linkLatestVersion( + HttpServletRequest request, + @PathVariable("versionSetUrn") String versionSetUrnString, + @PathVariable("entityUrn") String entityUrnString, + @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") + Boolean withSystemMetadata, + @RequestBody @Nonnull VersionPropertiesInput versionPropertiesInput) + throws URISyntaxException, JsonProcessingException { + + if (!configurationProvider.getFeatureFlags().isEntityVersioning()) { + throw new IllegalAccessError( + "Entity Versioning is not configured, please enable before attempting to use this feature."); + } + Authentication authentication = AuthenticationContext.getAuthentication(); + Urn versionSetUrn = UrnUtils.getUrn(versionSetUrnString); + if (!VERSION_SET_ENTITY_NAME.equals(versionSetUrn.getEntityType())) { + throw new IllegalArgumentException( + String.format("Version Set urn %s must be of type Version Set.", versionSetUrnString)); + } + Urn entityUrn = UrnUtils.getUrn(entityUrnString); + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder() + .buildOpenapi( + authentication.getActor().toUrnStr(), + request, + "linkLatestVersion", + ImmutableSet.of(entityUrn.getEntityType(), versionSetUrn.getEntityType())), + authorizationChain, + authentication, + true); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + opContext, UPDATE, ImmutableSet.of(versionSetUrn, entityUrn))) { + throw new UnauthorizedException( + String.format( + "%s is unauthorized to %s entities %s and %s", + authentication.getActor().toUrnStr(), UPDATE, versionSetUrnString, entityUrnString)); + } + + return ResponseEntity.ok( + buildEntityList( + opContext, + entityVersioningService.linkLatestVersion( + opContext, versionSetUrn, entityUrn, versionPropertiesInput), + false)); + } + + @Tag(name = "EntityVersioning") + @DeleteMapping( + value = "/versioning/{versionSetUrn}/relationship/versionOf/{entityUrn}", + produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(summary = "Unlink the latest linked version of an entity") + public ResponseEntity> unlinkVersion( + HttpServletRequest request, + @PathVariable("versionSetUrn") String versionSetUrnString, + @PathVariable("entityUrn") String entityUrnString, + @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") + Boolean withSystemMetadata) + throws URISyntaxException, JsonProcessingException { + + if (!configurationProvider.getFeatureFlags().isEntityVersioning()) { + throw new IllegalAccessError( + "Entity Versioning is not configured, please enable before attempting to use this feature."); + } + Authentication authentication = AuthenticationContext.getAuthentication(); + Urn versionSetUrn = UrnUtils.getUrn(versionSetUrnString); + if (!VERSION_SET_ENTITY_NAME.equals(versionSetUrn.getEntityType())) { + throw new IllegalArgumentException( + String.format("Version Set urn %s must be of type Version Set.", versionSetUrnString)); + } + Urn entityUrn = UrnUtils.getUrn(entityUrnString); + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder() + .buildOpenapi( + authentication.getActor().toUrnStr(), + request, + "unlinkVersion", + ImmutableSet.of(entityUrn.getEntityType(), versionSetUrn.getEntityType())), + authorizationChain, + authentication, + true); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + opContext, UPDATE, ImmutableSet.of(versionSetUrn, entityUrn))) { + throw new UnauthorizedException( + String.format( + "%s is unauthorized to %s entities %s and %s", + authentication.getActor().toUrnStr(), UPDATE, versionSetUrnString, entityUrnString)); + } + List rollbackResults = + entityVersioningService.unlinkVersion(opContext, versionSetUrn, entityUrn); + + return ResponseEntity.ok( + rollbackResults.stream() + .map(rollbackResult -> rollbackResult.getUrn().toString()) + .collect(Collectors.toList())); + } + @Override public GenericEntityScrollResultV3 buildScrollResult( @Nonnull OperationContext opContext, @@ -361,7 +480,10 @@ protected List buildEntityList( .auditStamp( withSystemMetadata ? ingest.getRequest().getAuditStamp() : null) .build())) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + // Map merge strategy, just take latest one + .collect( + Collectors.toMap( + Map.Entry::getKey, Map.Entry::getValue, (value1, value2) -> value2)); responseList.add( GenericEntityV3.builder().build(objectMapper, urnAspects.getKey(), aspectsMap)); } diff --git a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java index e1568017156d9b..d8f04b60455abb 100644 --- a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java +++ b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java @@ -6,6 +6,8 @@ import static org.testng.Assert.assertTrue; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import io.swagger.v3.core.util.Yaml; import io.swagger.v3.oas.models.OpenAPI; @@ -36,8 +38,10 @@ public void testOpenApiSpecBuilder() throws Exception { OpenAPIV3GeneratorTest.class .getClassLoader() .getResourceAsStream("entity-registry.yml")); + ConfigurationProvider configurationProvider = new ConfigurationProvider(); + configurationProvider.setFeatureFlags(new FeatureFlags()); - OpenAPI openAPI = OpenAPIV3Generator.generateOpenApiSpec(er); + OpenAPI openAPI = OpenAPIV3Generator.generateOpenApiSpec(er, configurationProvider); String openapiYaml = Yaml.pretty(openAPI); Files.write( Path.of(getClass().getResource("/").getPath(), "open-api.yaml"), diff --git a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java index 952dc31c5ba386..e82ab50a0defeb 100644 --- a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java +++ b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java @@ -33,9 +33,12 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.RecordTemplate; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; import com.linkedin.dataset.DatasetProfile; import com.linkedin.entity.Aspect; import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.gms.factory.entity.versioning.EntityVersioningServiceFactory; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityServiceImpl; @@ -57,6 +60,7 @@ import io.datahubproject.openapi.config.SpringWebConfig; import io.datahubproject.openapi.exception.InvalidUrnException; import io.datahubproject.test.metadata.context.TestOperationContexts; +import jakarta.servlet.ServletException; import java.util.Collections; import java.util.List; import java.util.Map; @@ -81,7 +85,11 @@ @SpringBootTest(classes = {SpringWebConfig.class}) @ComponentScan(basePackages = {"io.datahubproject.openapi.v3.controller"}) -@Import({SpringWebConfig.class, EntityControllerTest.EntityControllerTestConfig.class}) +@Import({ + SpringWebConfig.class, + EntityControllerTest.EntityControllerTestConfig.class, + EntityVersioningServiceFactory.class +}) @AutoConfigureWebMvc @AutoConfigureMockMvc public class EntityControllerTest extends AbstractTestNGSpringContextTests { @@ -92,6 +100,7 @@ public class EntityControllerTest extends AbstractTestNGSpringContextTests { @Autowired private TimeseriesAspectService mockTimeseriesAspectService; @Autowired private EntityRegistry entityRegistry; @Autowired private OperationContext opContext; + @MockBean private ConfigurationProvider configurationProvider; @Test public void initTest() { @@ -431,4 +440,211 @@ public TimeseriesAspectService timeseriesAspectService() { return timeseriesAspectService; } } + + @Test + public void testGetEntityBatchWithMultipleEntities() throws Exception { + List TEST_URNS = + List.of( + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)"), + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,2,PROD)")); + + // Mock entity aspect response + when(mockEntityService.getEnvelopedVersionedAspects( + any(OperationContext.class), anyMap(), eq(false))) + .thenReturn( + Map.of( + TEST_URNS.get(0), + List.of( + new EnvelopedAspect() + .setName("status") + .setValue(new Aspect(new Status().data()))), + TEST_URNS.get(1), + List.of( + new EnvelopedAspect() + .setName("status") + .setValue(new Aspect(new Status().data()))))); + + String requestBody = + String.format( + "[{\"urn\": \"%s\"}, {\"urn\": \"%s\"}]", + TEST_URNS.get(0).toString(), TEST_URNS.get(1).toString()); + + mockMvc + .perform( + MockMvcRequestBuilders.post("/v3/entity/dataset/batchGet") + .content(requestBody) + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is2xxSuccessful()) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].urn").value(TEST_URNS.get(0).toString())) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].urn").value(TEST_URNS.get(1).toString())); + } + + @Test(expectedExceptions = ServletException.class) + public void testGetEntityBatchWithInvalidUrn() throws Exception { + String requestBody = "[{\"urn\": \"invalid:urn\"}]"; + + mockMvc + .perform( + MockMvcRequestBuilders.post("/v3/entity/dataset/batchGet") + .content(requestBody) + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is4xxClientError()); + } + + @Test + public void testScrollEntitiesWithMultipleSortFields() throws Exception { + List TEST_URNS = + List.of( + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)"), + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,2,PROD)")); + + ScrollResult expectedResult = + new ScrollResult() + .setEntities( + new SearchEntityArray( + List.of( + new SearchEntity().setEntity(TEST_URNS.get(0)), + new SearchEntity().setEntity(TEST_URNS.get(1))))); + + when(mockSearchService.scrollAcrossEntities( + any(OperationContext.class), + eq(List.of("dataset")), + anyString(), + nullable(Filter.class), + any(), + nullable(String.class), + nullable(String.class), + anyInt())) + .thenReturn(expectedResult); + + when(mockEntityService.getEnvelopedVersionedAspects( + any(OperationContext.class), anyMap(), eq(false))) + .thenReturn( + Map.of( + TEST_URNS.get(0), + List.of( + new EnvelopedAspect() + .setName("status") + .setValue(new Aspect(new Status().data()))))); + + mockMvc + .perform( + MockMvcRequestBuilders.post("/v3/entity/scroll") + .content("{\"entities\":[\"dataset\"]}") + .param("sortCriteria", "name", "urn") + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is2xxSuccessful()) + .andExpect( + MockMvcResultMatchers.jsonPath("$.entities[0].urn").value(TEST_URNS.get(0).toString())); + } + + @Test + public void testScrollEntitiesWithPitKeepAlive() throws Exception { + List TEST_URNS = + List.of(UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)")); + + ScrollResult expectedResult = + new ScrollResult() + .setEntities( + new SearchEntityArray(List.of(new SearchEntity().setEntity(TEST_URNS.get(0))))) + .setScrollId("test-scroll-id"); + + when(mockSearchService.scrollAcrossEntities( + any(OperationContext.class), + eq(List.of("dataset")), + anyString(), + nullable(Filter.class), + any(), + nullable(String.class), + eq("10m"), + anyInt())) + .thenReturn(expectedResult); + + when(mockEntityService.getEnvelopedVersionedAspects( + any(OperationContext.class), anyMap(), eq(false))) + .thenReturn( + Map.of( + TEST_URNS.get(0), + List.of( + new EnvelopedAspect() + .setName("status") + .setValue(new Aspect(new Status().data()))))); + + mockMvc + .perform( + MockMvcRequestBuilders.post("/v3/entity/scroll") + .content("{\"entities\":[\"dataset\"]}") + .param("pitKeepAlive", "10m") + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is2xxSuccessful()) + .andExpect(MockMvcResultMatchers.jsonPath("$.scrollId").value("test-scroll-id")); + } + + @Test(expectedExceptions = ServletException.class) + public void testEntityVersioningFeatureFlagDisabled() throws Exception { + Urn TEST_URN = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)"); + Urn VERSION_SET_URN = UrnUtils.getUrn("urn:li:versionSet:test-version-set"); + + FeatureFlags mockFeatureFlags = mock(FeatureFlags.class); + when(configurationProvider.getFeatureFlags()).thenReturn(mockFeatureFlags); + when(mockFeatureFlags.isEntityVersioning()).thenReturn(false); + + // Test linking version with disabled flag + mockMvc + .perform( + MockMvcRequestBuilders.post( + String.format( + "/v3/entity/versioning/%s/relationship/versionOf/%s", + VERSION_SET_URN, TEST_URN)) + .content("{}") + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is4xxClientError()); + + // Test unlinking version with disabled flag + mockMvc + .perform( + MockMvcRequestBuilders.delete( + String.format( + "/v3/entity/versioning/%s/relationship/versionOf/%s", + VERSION_SET_URN, TEST_URN)) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is4xxClientError()); + } + + @Test(expectedExceptions = ServletException.class) + public void testInvalidVersionSetUrn() throws Exception { + Urn TEST_URN = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)"); + String INVALID_VERSION_SET_URN = "urn:li:dataset:invalid-version-set"; + + FeatureFlags mockFeatureFlags = mock(FeatureFlags.class); + when(configurationProvider.getFeatureFlags()).thenReturn(mockFeatureFlags); + when(mockFeatureFlags.isEntityVersioning()).thenReturn(true); + + // Test linking with invalid version set URN + mockMvc + .perform( + MockMvcRequestBuilders.post( + String.format( + "/v3/entity/versioning/%s/relationship/versionOf/%s", + INVALID_VERSION_SET_URN, TEST_URN)) + .content("{}") + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is4xxClientError()); + + // Test unlinking with invalid version set URN + mockMvc + .perform( + MockMvcRequestBuilders.delete( + String.format( + "/v3/entity/versioning/%s/relationship/versionOf/%s", + INVALID_VERSION_SET_URN, TEST_URN)) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is4xxClientError()); + } } diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index 432c4a9ddcb73f..af11532ccf4ece 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -1382,6 +1382,10 @@ "name" : "versionTag", "type" : "string", "optional" : true + }, { + "name" : "metadataAttribution", + "type" : "MetadataAttribution", + "optional" : true } ] }, { "type" : "record", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 45e91873de10ff..f58d83dd1e5cb7 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -1409,6 +1409,10 @@ "name" : "versionTag", "type" : "string", "optional" : true + }, { + "name" : "metadataAttribution", + "type" : "MetadataAttribution", + "optional" : true } ] }, "com.linkedin.common.fieldtransformer.TransformationType", "com.linkedin.common.fieldtransformer.UDFTransformer", { "type" : "record", @@ -6139,6 +6143,12 @@ "doc" : "Include default facets when getting facets to aggregate on in search requests.\nBy default we include these, but custom aggregation requests don't need them.", "default" : true, "optional" : true + }, { + "name" : "filterNonLatestVersions", + "type" : "boolean", + "doc" : "Include only latest versions in version sets, default true", + "default" : true, + "optional" : true } ] }, { "type" : "enum", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json index 9061cbff188135..61c31f93987b88 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json @@ -1115,6 +1115,10 @@ "name" : "versionTag", "type" : "string", "optional" : true + }, { + "name" : "metadataAttribution", + "type" : "MetadataAttribution", + "optional" : true } ] }, { "type" : "record", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json index e6be4e828c976f..75793be7331da4 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json @@ -1115,6 +1115,10 @@ "name" : "versionTag", "type" : "string", "optional" : true + }, { + "name" : "metadataAttribution", + "type" : "MetadataAttribution", + "optional" : true } ] }, { "type" : "record", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json index 10f3218d469757..58ba2ad05dfe74 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json @@ -1409,6 +1409,10 @@ "name" : "versionTag", "type" : "string", "optional" : true + }, { + "name" : "metadataAttribution", + "type" : "MetadataAttribution", + "optional" : true } ] }, "com.linkedin.common.fieldtransformer.TransformationType", "com.linkedin.common.fieldtransformer.UDFTransformer", { "type" : "record", diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningService.java new file mode 100644 index 00000000000000..9e82efa913a98d --- /dev/null +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningService.java @@ -0,0 +1,36 @@ +package com.linkedin.metadata.entity.versioning; + +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.entity.IngestResult; +import com.linkedin.metadata.entity.RollbackResult; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; + +public interface EntityVersioningService { + + /** + * Generates a new set of VersionProperties for the latest version and links it to the specified + * version set. If the specified version set does not yet exist, will create it. Order of + * operations here is important: 1. Create initial Version Set if necessary, do not generate + * Version Set Properties 2. Create Version Properties for specified entity. 3. Generate version + * properties with the properly set latest version Will eventually want to add in the scheme here + * as a parameter + * + * @return ingestResult -> the results of the ingested linked version + */ + List linkLatestVersion( + OperationContext opContext, + Urn versionSet, + Urn newLatestVersion, + VersionPropertiesInput inputProperties); + + /** + * Unlinks the latest version from a version set. Will attempt to set up the previous version as + * the new latest. This fully removes the version properties and unversions the specified entity. + * + * @param opContext operational context containing various information about the current execution + * @param currentLatest the currently linked latest versioned entity urn + * @return the deletion result + */ + List unlinkVersion(OperationContext opContext, Urn versionSet, Urn currentLatest); +} diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/VersionPropertiesInput.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/VersionPropertiesInput.java new file mode 100644 index 00000000000000..28c320ec717201 --- /dev/null +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/VersionPropertiesInput.java @@ -0,0 +1,20 @@ +package com.linkedin.metadata.entity.versioning; + +import com.fasterxml.jackson.annotation.JsonInclude; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@JsonInclude(JsonInclude.Include.NON_NULL) +@NoArgsConstructor(force = true, access = AccessLevel.PRIVATE) +@AllArgsConstructor +public class VersionPropertiesInput { + private String comment; + private String version; + private Long sourceCreationTimestamp; + private String sourceCreator; +} diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java index 7e9d1701bf79a9..4cd9ec6c75b786 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java @@ -87,6 +87,14 @@ public static Filter newDisjunctiveFilter(@Nonnull Criterion... orCriterion) { .collect(Collectors.toCollection(ConjunctiveCriterionArray::new))); } + @Nonnull + public static Filter newConjunctiveFilter(@Nonnull Criterion... andCriterion) { + ConjunctiveCriterionArray orCriteria = new ConjunctiveCriterionArray(); + orCriteria.add( + new ConjunctiveCriterion().setAnd(new CriterionArray(Arrays.asList(andCriterion)))); + return new Filter().setOr(orCriteria); + } + @Nonnull public static ConjunctiveCriterion add( @Nonnull ConjunctiveCriterion conjunctiveCriterion, @Nonnull Criterion element) { diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java index 80a11ab98bbf4a..3c623f8df7c1bf 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java @@ -749,6 +749,14 @@ public class PoliciesConfig { EDIT_ENTITY_TAGS_PRIVILEGE, EDIT_ENTITY_GLOSSARY_TERMS_PRIVILEGE)); + // Version Set privileges + public static final ResourcePrivileges VERSION_SET_PRIVILEGES = + ResourcePrivileges.of( + "versionSet", + "Version Set", + "A logical collection of versioned entities.", + COMMON_ENTITY_PRIVILEGES); + public static final List ENTITY_RESOURCE_PRIVILEGES = ImmutableList.of( DATASET_PRIVILEGES, @@ -767,7 +775,8 @@ public class PoliciesConfig { DATA_PRODUCT_PRIVILEGES, ER_MODEL_RELATIONSHIP_PRIVILEGES, BUSINESS_ATTRIBUTE_PRIVILEGES, - STRUCTURED_PROPERTIES_PRIVILEGES); + STRUCTURED_PROPERTIES_PRIVILEGES, + VERSION_SET_PRIVILEGES); // Merge all entity specific resource privileges to create a superset of all resource privileges public static final ResourcePrivileges ALL_RESOURCE_PRIVILEGES = diff --git a/smoke-test/tests/entity_versioning/__init__.py b/smoke-test/tests/entity_versioning/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/smoke-test/tests/entity_versioning/test_versioning.py b/smoke-test/tests/entity_versioning/test_versioning.py new file mode 100644 index 00000000000000..c331cc5305a336 --- /dev/null +++ b/smoke-test/tests/entity_versioning/test_versioning.py @@ -0,0 +1,64 @@ +import pytest + + +@pytest.fixture(scope="module", autouse=True) +def test_link_unlink_version(auth_session): + """Fixture to execute setup before and tear down after all tests are run""" + res_data = link_version(auth_session) + + assert res_data + assert res_data["data"] + assert res_data["data"]["linkAssetVersion"] + assert ( + res_data["data"]["linkAssetVersion"] + == "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)" + ) + + res_data = unlink_version(auth_session) + + assert res_data + assert res_data["data"] + assert res_data["data"]["unlinkAssetVersion"] + + +def link_version(auth_session): + json = { + "mutation": """mutation linkAssetVersion($input: LinkVersionInput!) {\n + linkAssetVersion(input: $input) + }\n + }""", + "variables": { + "input": { + "version": "1233456", + "versionSet": "urn:li:versionSet:(12345678910,dataset)", + "linkedEntity": "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)", + } + }, + } + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) + response.raise_for_status() + + return response.json() + + +def unlink_version(auth_session): + json = { + "mutation": """mutation unlinkAssetVersion($input: UnlinkVersionInput!) {\n + unlinkAssetVersion(input: $input) + }\n + }""", + "variables": { + "input": { + "versionSet": "urn:li:versionSet:(12345678910,dataset)", + "unlinkedEntity": "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)", + } + }, + } + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) + response.raise_for_status() + + return response.json() diff --git a/test-models/build.gradle b/test-models/build.gradle index e8733f0525870b..89bf4ec445440d 100644 --- a/test-models/build.gradle +++ b/test-models/build.gradle @@ -18,3 +18,4 @@ idea { } sourceSets.mainGeneratedDataTemplate.java.srcDirs('src/main/javaPegasus/') +spotlessJava.dependsOn generateTestDataTemplate \ No newline at end of file From b252f782c56327175a1a0bddc95b5c417db285a1 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 14 Jan 2025 17:04:49 -0800 Subject: [PATCH 078/249] feat(build): use remote gradle cache (#12344) --- settings.gradle | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/settings.gradle b/settings.gradle index 77d0706549a439..437a353f210ac4 100644 --- a/settings.gradle +++ b/settings.gradle @@ -79,6 +79,20 @@ include ':metadata-service:openapi-servlet:models' include ':metadata-integration:java:datahub-schematron:lib' include ':metadata-integration:java:datahub-schematron:cli' +buildCache { + def depotSecret = System.getenv('DEPOT_TOKEN'); + + remote(HttpBuildCache) { + url = 'https://cache.depot.dev' + enabled = depotSecret != null + push = true + credentials { + username = '' + password = depotSecret + } + } +} + def installPreCommitHooks() { def preCommitInstalled = false try { @@ -116,7 +130,7 @@ def installPreCommitHooks() { def stderr = new StringBuilder() installHooksProcess.waitForProcessOutput(stdout, stderr) if (installHooksProcess.exitValue() != 0) { - println "Failed to install hooks: ${stderr}" + println "Failed to install hooks: ${stdout}" return } println "Hooks output: ${stdout}" From a0575329848d65eafb455a3f400e8f47bc7e9bb7 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 14 Jan 2025 19:35:36 -0600 Subject: [PATCH 079/249] feat(docker-profiles): version mixing & docs (#12342) --- docker/build.gradle | 6 +----- docker/profiles/README.md | 28 +++++++++++++++++++++++++- docker/profiles/docker-compose.gms.yml | 16 +++++++-------- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/docker/build.gradle b/docker/build.gradle index 576e47a53e6ef5..0070d814286cf0 100644 --- a/docker/build.gradle +++ b/docker/build.gradle @@ -42,7 +42,6 @@ ext { modules: python_services_modules + backend_profile_modules + [':datahub-frontend'], isDebug: true ], - 'quickstartDebugConsumers': [ profile: 'debug-consumers', modules: python_services_modules + backend_profile_modules + [':datahub-frontend', @@ -50,7 +49,6 @@ ext { ':metadata-jobs:mae-consumer-job'], isDebug: true ], - 'quickstartPg': [ profile: 'quickstart-postgres', modules: (backend_profile_modules - [':docker:mysql-setup']) + [ @@ -108,9 +106,7 @@ dockerCompose { } // Common environment variables - environment.put 'DATAHUB_VERSION', config.isDebug ? - System.getenv("DATAHUB_VERSION") ?: "v${version}" : - "v${version}" + environment.put 'DATAHUB_VERSION', System.getenv("DATAHUB_VERSION") ?: "v${version}" environment.put 'DATAHUB_TELEMETRY_ENABLED', 'false' environment.put "METADATA_TESTS_ENABLED", "true" environment.put "DATAHUB_REPO", "${docker_registry}" diff --git a/docker/profiles/README.md b/docker/profiles/README.md index fb3c9e3c84a7a2..192fde3130a895 100644 --- a/docker/profiles/README.md +++ b/docker/profiles/README.md @@ -101,4 +101,30 @@ Runs everything except for the GMS. Useful for running just a local (non-docker) | debug-cassandra | | | X | | X | X | X | X | | | X | X | | | debug-consumers | X | | | | X | X | X | X | X | X | X | X | | | debug-neo4j | X | | | X | X | X | X | X | | | X | X | | -| debug-elasticsearch | X | | | | X | X | X | X | | | X | | X | \ No newline at end of file +| debug-elasticsearch | X | | | | X | X | X | X | | | X | | X | + +## Advanced Setups + +### Version Mixing + +In some cases, it might be useful to debug upgrade scenarios where there are intentional version miss-matches. It is possible +to override individual component versions. + +Note: This only works for `non-debug` profiles because of the file mounts when in `debug` which would run older containers +but still pickup the latest application jars. + +In this example we are interested in upgrading two components (the `mae-consumer` and the `mce-consumer`) to a fresh build `v0.15.1-SNAPSHOT` +while maintaining older components on `v0.14.1` (especially the `system-update` container). + +This configuration reproduces the situation where the consumers were upgraded prior to running the latest version of `system-update`. In this +scenario we expect the consumers to block their startup waiting for the successful completion of a newer `system-update`. + +`DATAHUB_VERSION` - specifies the default component version of `v0.14.1` +`DATAHUB_MAE_VERSION` - specifies an override of just the `mae-consumer` to version `v0.15.1-SNAPSHOT`[1] +`DATAHUB_MCE_VERSION` - specifies an override of just the `mce-consumer` to version `v0.15.1-SNAPSHOT`[1] + +```shell + DATAHUB_MAE_VERSION="v0.15.1-SNAPSHOT" DATAHUB_MCE_VERSION="v0.15.1-SNAPSHOT" DATAHUB_VERSION="v0.14.1" ./gradlew quickstart +``` + +[1] Image versions were `v0.15.1-SNAPSHOT` built locally prior to running the command. diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index ada7df51e20bef..2147d6b5a0247f 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -54,7 +54,7 @@ x-datahub-dev-telemetry-env: &datahub-dev-telemetry-env ################################# x-datahub-system-update-service: &datahub-system-update-service hostname: datahub-system-update - image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_UPDATE_VERSION:-${DATAHUB_VERSION:-head}} command: - -u - SystemUpdate @@ -73,7 +73,7 @@ x-datahub-system-update-service: &datahub-system-update-service x-datahub-system-update-service-dev: &datahub-system-update-service-dev <<: *datahub-system-update-service - image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_VERSION:-debug} + image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_UPDATE_VERSION:-${DATAHUB_VERSION:-debug}} ports: - ${DATAHUB_MAPPED_UPGRADE_DEBUG_PORT:-5003}:5003 environment: &datahub-system-update-dev-env @@ -92,7 +92,7 @@ x-datahub-system-update-service-dev: &datahub-system-update-service-dev ################################# x-datahub-gms-service: &datahub-gms-service hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_GMS_VERSION:-${DATAHUB_VERSION:-head}} ports: - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 env_file: @@ -118,7 +118,7 @@ x-datahub-gms-service: &datahub-gms-service x-datahub-gms-service-dev: &datahub-gms-service-dev <<: *datahub-gms-service - image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_VERSION:-debug} + image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_GMS_VERSION:-${DATAHUB_VERSION:-debug}} ports: - ${DATAHUB_MAPPED_GMS_DEBUG_PORT:-5001}:5001 - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 @@ -150,7 +150,7 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev ################################# x-datahub-mae-consumer-service: &datahub-mae-consumer-service hostname: datahub-mae-consumer - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_MAE_VERSION:-${DATAHUB_VERSION:-head}} ports: - 9091:9091 env_file: @@ -163,7 +163,7 @@ x-datahub-mae-consumer-service: &datahub-mae-consumer-service x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev <<: *datahub-mae-consumer-service - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_VERSION:-debug} + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_MAE_VERSION:-${DATAHUB_VERSION:-debug}} environment: <<: [*datahub-dev-telemetry-env, *datahub-mae-consumer-env] ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} @@ -178,7 +178,7 @@ x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev ################################# x-datahub-mce-consumer-service: &datahub-mce-consumer-service hostname: datahub-mce-consumer - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_MCE_VERSION:-${DATAHUB_VERSION:-head}} ports: - 9090:9090 env_file: @@ -193,7 +193,7 @@ x-datahub-mce-consumer-service: &datahub-mce-consumer-service x-datahub-mce-consumer-service-dev: &datahub-mce-consumer-service-dev <<: *datahub-mce-consumer-service - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_VERSION:-debug} + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_MCE_VERSION:-${DATAHUB_VERSION:-debug}} environment: <<: [*datahub-dev-telemetry-env, *datahub-mce-consumer-env] ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} From 3905c8ee4146c93a06653dbcd690775ae36bef0f Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 14 Jan 2025 19:36:02 -0600 Subject: [PATCH 080/249] docs(async-api): addition to known issues (#12339) --- docs/how/updating-datahub.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 68b41c907c6ad6..eb5a792216d981 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -1,3 +1,8 @@ +# Known Issues + +- Async APIs - DataHub's asynchronous APIs perform only basic schema validation when receiving MCP requests, similar to direct production to MCP Kafka topics. While requests must conform to the MCP schema to be accepted, actual processing happens later in the pipeline. Any processing failures that occur after the initial acceptance are captured in the Failed MCP topic, but these failures are not immediately surfaced to the API caller since they happen asynchronously. + + # Updating DataHub

Watch Metadata & AI Summit sessions on-demand.

Watch Now', + '

Learn about DataHub 1.0 launching at our 5th birthday party!

Register
', backgroundColor: "#111", textColor: "#ffffff", isCloseable: false, diff --git a/docs-website/src/components/SolutionsDropdown/SolutionsDropdownContent/solutionsDropdownContent.js b/docs-website/src/components/SolutionsDropdown/SolutionsDropdownContent/solutionsDropdownContent.js index abede0f11735d4..ad7278a438cf81 100644 --- a/docs-website/src/components/SolutionsDropdown/SolutionsDropdownContent/solutionsDropdownContent.js +++ b/docs-website/src/components/SolutionsDropdown/SolutionsDropdownContent/solutionsDropdownContent.js @@ -24,7 +24,7 @@ const solutionsDropdownContent = { title: "DataHub Core", description: "Get started with the Open Source platform.", iconImage: "/img/solutions/icon-dropdown-core.png", - href: "/", + href: "/docs/quickstart", }, { title: "Cloud vs Core", From 3084147df9fe21cbab3d80eb029a14c45376925e Mon Sep 17 00:00:00 2001 From: rharisi Date: Fri, 17 Jan 2025 06:28:06 +0530 Subject: [PATCH 100/249] fix(pdl): Add Dataplatform Instance urn pdl file (#11754) Co-authored-by: John Joyce --- .../common/urn/DataPlatformInstanceUrn.java | 79 +++++++++++++++++++ .../common/DataPlatformInstanceUrn.pdl | 27 +++++++ 2 files changed, 106 insertions(+) create mode 100644 li-utils/src/main/javaPegasus/com/linkedin/common/urn/DataPlatformInstanceUrn.java create mode 100644 li-utils/src/main/pegasus/com/linkedin/common/DataPlatformInstanceUrn.pdl diff --git a/li-utils/src/main/javaPegasus/com/linkedin/common/urn/DataPlatformInstanceUrn.java b/li-utils/src/main/javaPegasus/com/linkedin/common/urn/DataPlatformInstanceUrn.java new file mode 100644 index 00000000000000..dfce6dc1e51085 --- /dev/null +++ b/li-utils/src/main/javaPegasus/com/linkedin/common/urn/DataPlatformInstanceUrn.java @@ -0,0 +1,79 @@ +package com.linkedin.common.urn; + +import com.linkedin.data.template.Custom; +import com.linkedin.data.template.DirectCoercer; +import com.linkedin.data.template.TemplateOutputCastException; +import java.net.URISyntaxException; + +public final class DataPlatformInstanceUrn extends Urn { + + public static final String ENTITY_TYPE = "dataPlatformInstance"; + + private final DataPlatformUrn _platform; + private final String _instanceId; + + public DataPlatformInstanceUrn(DataPlatformUrn platform, String instanceId) { + super(ENTITY_TYPE, TupleKey.create(platform, instanceId)); + this._platform = platform; + this._instanceId = instanceId; + } + + public DataPlatformUrn getPlatformEntity() { + return _platform; + } + + public String getInstance() { + return _instanceId; + } + + public static DataPlatformInstanceUrn createFromString(String rawUrn) throws URISyntaxException { + return createFromUrn(Urn.createFromString(rawUrn)); + } + + public static DataPlatformInstanceUrn createFromUrn(Urn urn) throws URISyntaxException { + if (!"li".equals(urn.getNamespace())) { + throw new URISyntaxException(urn.toString(), "Urn namespace type should be 'li'."); + } else if (!ENTITY_TYPE.equals(urn.getEntityType())) { + throw new URISyntaxException( + urn.toString(), "Urn entity type should be 'dataPlatformInstance'."); + } else { + TupleKey key = urn.getEntityKey(); + if (key.size() != 2) { + throw new URISyntaxException(urn.toString(), "Invalid number of keys."); + } else { + try { + return new DataPlatformInstanceUrn( + (DataPlatformUrn) key.getAs(0, DataPlatformUrn.class), + (String) key.getAs(1, String.class)); + } catch (Exception e) { + throw new URISyntaxException(urn.toString(), "Invalid URN Parameter: '" + e.getMessage()); + } + } + } + } + + public static DataPlatformInstanceUrn deserialize(String rawUrn) throws URISyntaxException { + return createFromString(rawUrn); + } + + static { + Custom.initializeCustomClass(DataPlatformUrn.class); + Custom.initializeCustomClass(DataPlatformInstanceUrn.class); + Custom.registerCoercer( + new DirectCoercer() { + public Object coerceInput(DataPlatformInstanceUrn object) throws ClassCastException { + return object.toString(); + } + + public DataPlatformInstanceUrn coerceOutput(Object object) + throws TemplateOutputCastException { + try { + return DataPlatformInstanceUrn.createFromString((String) object); + } catch (URISyntaxException e) { + throw new TemplateOutputCastException("Invalid URN syntax: " + e.getMessage(), e); + } + } + }, + DataPlatformInstanceUrn.class); + } +} diff --git a/li-utils/src/main/pegasus/com/linkedin/common/DataPlatformInstanceUrn.pdl b/li-utils/src/main/pegasus/com/linkedin/common/DataPlatformInstanceUrn.pdl new file mode 100644 index 00000000000000..168e0ee7611d31 --- /dev/null +++ b/li-utils/src/main/pegasus/com/linkedin/common/DataPlatformInstanceUrn.pdl @@ -0,0 +1,27 @@ +namespace com.linkedin.common + +/** + * Standardized dataset identifier. + */ +@java.class = "com.linkedin.common.urn.DataPlatformInstanceUrn" +@validate.`com.linkedin.common.validator.TypedUrnValidator` = { + "accessible" : true, + "owningTeam" : "urn:li:internalTeam:datahub", + "entityType" : "dataPlatformInstance", + "constructable" : true, + "namespace" : "li", + "name" : "DataPlatformInstance", + "doc" : "Standardized data platform instance identifier.", + "owners" : [ "urn:li:corpuser:fbar", "urn:li:corpuser:bfoo" ], + "fields" : [ { + "type" : "com.linkedin.common.urn.DataPlatformUrn", + "name" : "platform", + "doc" : "Standardized platform urn." + }, { + "name" : "instance", + "doc" : "Instance of the data platform (e.g. db instance)", + "type" : "string", + } ], + "maxLength" : 100 +} +typeref DataPlatformInstanceUrn = string From 4a1fff56c695400f1ed57caf8f1d4a4c7ea2809c Mon Sep 17 00:00:00 2001 From: Meenakshi Kamalaseshan Radha <62914384+mkamalas@users.noreply.github.com> Date: Fri, 17 Jan 2025 06:28:43 +0530 Subject: [PATCH 101/249] feat(ui-plugin) - Allow custom userContext states to be added (#12057) --- datahub-web-react/src/app/context/CustomUserContext.tsx | 7 +++++++ datahub-web-react/src/app/context/userContext.tsx | 3 +++ 2 files changed, 10 insertions(+) create mode 100644 datahub-web-react/src/app/context/CustomUserContext.tsx diff --git a/datahub-web-react/src/app/context/CustomUserContext.tsx b/datahub-web-react/src/app/context/CustomUserContext.tsx new file mode 100644 index 00000000000000..016bbe29684ea5 --- /dev/null +++ b/datahub-web-react/src/app/context/CustomUserContext.tsx @@ -0,0 +1,7 @@ +/** + * Custom User Context State - This is a custom user context state and can be overriden in specific fork of DataHub. + * The below type can be customized with specific object properties as well if needed. + */ +export type CustomUserContextState = Record; + +export const DEFAULT_CUSTOM_STATE: CustomUserContextState = {}; diff --git a/datahub-web-react/src/app/context/userContext.tsx b/datahub-web-react/src/app/context/userContext.tsx index c9b8adafd9722f..a728e01ddc29ae 100644 --- a/datahub-web-react/src/app/context/userContext.tsx +++ b/datahub-web-react/src/app/context/userContext.tsx @@ -1,5 +1,6 @@ import React from 'react'; import { CorpUser, PlatformPrivileges } from '../../types.generated'; +import { CustomUserContextState, DEFAULT_CUSTOM_STATE } from './CustomUserContext'; /** * Local State is persisted to local storage. @@ -22,6 +23,7 @@ export type State = { loadedPersonalDefaultViewUrn: boolean; hasSetDefaultView: boolean; }; + customState?: CustomUserContextState; }; /** @@ -51,6 +53,7 @@ export const DEFAULT_STATE: State = { loadedPersonalDefaultViewUrn: false, hasSetDefaultView: false, }, + customState: DEFAULT_CUSTOM_STATE, }; export const DEFAULT_CONTEXT = { From fb08919f0457f45c8654c5882a6e5079bed9dac7 Mon Sep 17 00:00:00 2001 From: Deepali Jain <70557997+Deepalijain13@users.noreply.github.com> Date: Fri, 17 Jan 2025 06:30:59 +0530 Subject: [PATCH 102/249] feat(ui): Enhancements to the user pic list selection within entities (#11803) --- .../sidebar/Ownership/EditOwnersModal.tsx | 21 +++++++++++++++++-- .../src/app/shared/OwnerLabel.tsx | 12 +++++++++-- datahub-web-react/src/graphql/search.graphql | 4 ++++ 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/EditOwnersModal.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/EditOwnersModal.tsx index 62b967e8f7b30d..e57666471df1a6 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/EditOwnersModal.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/EditOwnersModal.tsx @@ -78,10 +78,26 @@ export const EditOwnersModal = ({ const renderSearchResult = (entity: Entity) => { const avatarUrl = (entity.type === EntityType.CorpUser && (entity as CorpUser).editableProperties?.pictureLink) || undefined; + const corpUserDepartmentName = + (entity.type === EntityType.CorpUser && (entity as CorpUser).properties?.departmentName) || ''; + const corpUserId = (entity.type === EntityType.CorpUser && (entity as CorpUser).username) || ''; + const corpUserTitle = (entity.type === EntityType.CorpUser && (entity as CorpUser).properties?.title) || ''; const displayName = entityRegistry.getDisplayName(entity.type, entity); + return ( - - + } + > + ); }; @@ -381,6 +397,7 @@ export const EditOwnersModal = ({ value: owner.value.ownerUrn, label: owner.label, }))} + optionLabelProp="label" > {ownerSearchOptions} diff --git a/datahub-web-react/src/app/shared/OwnerLabel.tsx b/datahub-web-react/src/app/shared/OwnerLabel.tsx index de3c03dea2ba4a..fb670aa56d7881 100644 --- a/datahub-web-react/src/app/shared/OwnerLabel.tsx +++ b/datahub-web-react/src/app/shared/OwnerLabel.tsx @@ -20,14 +20,22 @@ type Props = { name: string; avatarUrl: string | undefined; type: EntityType; + corpUserId?: string; + corpUserTitle?: string; + corpUserDepartmentName?: string; }; -export const OwnerLabel = ({ name, avatarUrl, type }: Props) => { +export const OwnerLabel = ({ name, avatarUrl, type, corpUserId, corpUserTitle, corpUserDepartmentName }: Props) => { + const subHeader = [corpUserId, corpUserTitle, corpUserDepartmentName].filter(Boolean).join(' - '); + return ( -
{name}
+
+
{name}
+ {subHeader &&
{subHeader}
} +
); diff --git a/datahub-web-react/src/graphql/search.graphql b/datahub-web-react/src/graphql/search.graphql index 72e7d347187828..de7d1befd39b08 100644 --- a/datahub-web-react/src/graphql/search.graphql +++ b/datahub-web-react/src/graphql/search.graphql @@ -433,6 +433,8 @@ fragment searchResultsWithoutSchemaField on Entity { lastName fullName email + departmentName + title } info { active @@ -442,6 +444,8 @@ fragment searchResultsWithoutSchemaField on Entity { lastName fullName email + departmentName + title } editableProperties { displayName From 825309ef5d172e38f69c382b712b5122c8d97656 Mon Sep 17 00:00:00 2001 From: BHADHRINATH U <91546378+Bhadhri03@users.noreply.github.com> Date: Fri, 17 Jan 2025 06:38:55 +0530 Subject: [PATCH 103/249] Fix(UI): Move setUpdatedName call inside updateName promise in Dataset name edit (#12232) Co-authored-by: Raj Tekal --- .../app/entity/shared/containers/profile/header/EntityName.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityName.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityName.tsx index 8976629d9ef0b1..549724bd1945d9 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityName.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityName.tsx @@ -48,9 +48,9 @@ function EntityName(props: Props) { setIsEditing(false); return; } - setUpdatedName(name); updateName({ variables: { input: { name, urn } } }) .then(() => { + setUpdatedName(name); setIsEditing(false); message.success({ content: 'Name Updated', duration: 2 }); refetch(); From 99ce309b3780ca2c767c9a1e5e498b58d4e9c89e Mon Sep 17 00:00:00 2001 From: Dmitry Bryazgin <58312247+bda618@users.noreply.github.com> Date: Thu, 16 Jan 2025 20:10:03 -0500 Subject: [PATCH 104/249] feat(datahub) Remove serialVersionUID from constructor (#12150) --- .../datahub/graphql/authorization/AuthorizationUtils.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java index c25d6af75fe76d..29d1c02dacb416 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java @@ -232,6 +232,10 @@ public static T restrictEntity(@Nonnull Object entity, Class clazz) { try { Object[] args = allFields.stream() + // New versions of graphql.codegen generate serialVersionUID + // We need to filter serialVersionUID out because serialVersionUID is + // never part of the entity type constructor + .filter(field -> !field.getName().contains("serialVersionUID")) .map( field -> { // properties are often not required but only because From 05ed277f5e3abd72d21dd836808c762b5c852ebe Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Thu, 16 Jan 2025 22:56:36 -0800 Subject: [PATCH 105/249] feat(graphql/versioning): Add versioning support to graphql; mutations return version set (#12358) Co-authored-by: RyanHolstien --- .../linkedin/datahub/graphql/Constants.java | 1 + .../datahub/graphql/GmsGraphQLEngine.java | 53 +++- .../resolvers/config/AppConfigResolver.java | 1 + .../versioning/LinkAssetVersionResolver.java | 38 ++- .../UnlinkAssetVersionResolver.java | 19 +- .../graphql/resolvers/search/SearchUtils.java | 107 +++++++ .../versioning/VersionsSearchResolver.java | 87 ++++++ .../mappers/SearchFlagsInputMapper.java | 3 + .../common/mappers/UrnToEntityMapper.java | 6 + .../graphql/types/dataset/DatasetType.java | 3 +- .../types/dataset/mappers/DatasetMapper.java | 7 + .../types/mlmodel/mappers/MLModelMapper.java | 7 + .../versioning/VersionPropertiesMapper.java | 53 ++++ .../types/versioning/VersionSetMapper.java | 47 +++ .../types/versioning/VersionSetType.java | 79 +++++ .../src/main/resources/app.graphql | 7 +- .../src/main/resources/entity.graphql | 65 +--- .../src/main/resources/search.graphql | 7 +- .../src/main/resources/versioning.graphql | 148 +++++++++ .../LinkAssetVersionResolverTest.java | 3 +- .../UnlinkAssetVersionResolverTest.java | 2 +- .../VersionsSearchResolverTest.java | 294 ++++++++++++++++++ datahub-web-react/src/Mocks.tsx | 1 + datahub-web-react/src/appConfigContext.tsx | 1 + datahub-web-react/src/graphql/app.graphql | 1 + datahub-web-react/src/graphql/dataset.graphql | 1 + datahub-web-react/src/graphql/mlModel.graphql | 1 + datahub-web-react/src/graphql/preview.graphql | 5 + datahub-web-react/src/graphql/search.graphql | 5 + .../src/graphql/versioning.graphql | 89 ++++++ 30 files changed, 1055 insertions(+), 86 deletions(-) create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolver.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionPropertiesMapper.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetMapper.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetType.java create mode 100644 datahub-graphql-core/src/main/resources/versioning.graphql create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolverTest.java create mode 100644 datahub-web-react/src/graphql/versioning.graphql diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java index 69306862a46ef7..aec5352dec1a64 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java @@ -28,6 +28,7 @@ private Constants() {} public static final String INCIDENTS_SCHEMA_FILE = "incident.graphql"; public static final String CONTRACTS_SCHEMA_FILE = "contract.graphql"; public static final String CONNECTIONS_SCHEMA_FILE = "connection.graphql"; + public static final String VERSION_SCHEMA_FILE = "versioning.graphql"; public static final String BROWSE_PATH_DELIMITER = "/"; public static final String BROWSE_PATH_V2_DELIMITER = "␟"; public static final String VERSION_STAMP_FIELD_NAME = "versionStamp"; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index b15db80a8487ae..403e80a71380be 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -124,6 +124,8 @@ import com.linkedin.datahub.graphql.generated.TestResult; import com.linkedin.datahub.graphql.generated.TypeQualifier; import com.linkedin.datahub.graphql.generated.UserUsageCounts; +import com.linkedin.datahub.graphql.generated.VersionProperties; +import com.linkedin.datahub.graphql.generated.VersionSet; import com.linkedin.datahub.graphql.resolvers.MeResolver; import com.linkedin.datahub.graphql.resolvers.assertion.AssertionRunEventResolver; import com.linkedin.datahub.graphql.resolvers.assertion.DeleteAssertionResolver; @@ -324,6 +326,7 @@ import com.linkedin.datahub.graphql.resolvers.user.ListUsersResolver; import com.linkedin.datahub.graphql.resolvers.user.RemoveUserResolver; import com.linkedin.datahub.graphql.resolvers.user.UpdateUserStatusResolver; +import com.linkedin.datahub.graphql.resolvers.versioning.VersionsSearchResolver; import com.linkedin.datahub.graphql.resolvers.view.CreateViewResolver; import com.linkedin.datahub.graphql.resolvers.view.DeleteViewResolver; import com.linkedin.datahub.graphql.resolvers.view.ListGlobalViewsResolver; @@ -381,6 +384,7 @@ import com.linkedin.datahub.graphql.types.structuredproperty.StructuredPropertyType; import com.linkedin.datahub.graphql.types.tag.TagType; import com.linkedin.datahub.graphql.types.test.TestType; +import com.linkedin.datahub.graphql.types.versioning.VersionSetType; import com.linkedin.datahub.graphql.types.view.DataHubViewType; import com.linkedin.entity.client.EntityClient; import com.linkedin.entity.client.SystemEntityClient; @@ -537,6 +541,7 @@ public class GmsGraphQLEngine { private final IncidentType incidentType; private final RestrictedType restrictedType; private final DataProcessInstanceType dataProcessInstanceType; + private final VersionSetType versionSetType; private final int graphQLQueryComplexityLimit; private final int graphQLQueryDepthLimit; @@ -658,6 +663,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { this.incidentType = new IncidentType(entityClient); this.restrictedType = new RestrictedType(entityClient, restrictedService); this.dataProcessInstanceType = new DataProcessInstanceType(entityClient, featureFlags); + this.versionSetType = new VersionSetType(entityClient); this.graphQLQueryComplexityLimit = args.graphQLQueryComplexityLimit; this.graphQLQueryDepthLimit = args.graphQLQueryDepthLimit; @@ -707,6 +713,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { entityTypeType, formType, incidentType, + versionSetType, restrictedType, businessAttributeType, dataProcessInstanceType)); @@ -809,6 +816,8 @@ public void configureRuntimeWiring(final RuntimeWiring.Builder builder) { configureConnectionResolvers(builder); configureDeprecationResolvers(builder); configureMetadataAttributionResolver(builder); + configureVersionPropertiesResolvers(builder); + configureVersionSetResolvers(builder); } private void configureOrganisationRoleResolvers(RuntimeWiring.Builder builder) { @@ -863,7 +872,8 @@ public GraphQLEngine.Builder builder() { .addSchema(fileBasedSchema(ASSERTIONS_SCHEMA_FILE)) .addSchema(fileBasedSchema(INCIDENTS_SCHEMA_FILE)) .addSchema(fileBasedSchema(CONTRACTS_SCHEMA_FILE)) - .addSchema(fileBasedSchema(COMMON_SCHEMA_FILE)); + .addSchema(fileBasedSchema(COMMON_SCHEMA_FILE)) + .addSchema(fileBasedSchema(VERSION_SCHEMA_FILE)); for (GmsGraphQLPlugin plugin : this.graphQLPlugins) { List pluginSchemaFiles = plugin.getSchemaFiles(); @@ -1050,6 +1060,7 @@ private void configureQueryResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("form", getResolver(formType)) .dataFetcher("view", getResolver(dataHubViewType)) .dataFetcher("structuredProperty", getResolver(structuredPropertyType)) + .dataFetcher("versionSet", getResolver(versionSetType)) .dataFetcher("listPolicies", new ListPoliciesResolver(this.entityClient)) .dataFetcher("getGrantedPrivileges", new GetGrantedPrivilegesResolver()) .dataFetcher("listUsers", new ListUsersResolver(this.entityClient)) @@ -2295,7 +2306,15 @@ private void configureTypeResolvers(final RuntimeWiring.Builder builder) { .type( "TimeSeriesAspect", typeWiring -> typeWiring.typeResolver(new TimeSeriesAspectInterfaceTypeResolver())) - .type("ResultsType", typeWiring -> typeWiring.typeResolver(new ResultsTypeResolver())); + .type("ResultsType", typeWiring -> typeWiring.typeResolver(new ResultsTypeResolver())) + .type( + "SupportsVersions", + typeWiring -> + typeWiring.typeResolver( + new EntityInterfaceTypeResolver( + loadableTypes.stream() + .map(graphType -> (EntityType) graphType) + .collect(Collectors.toList())))); } /** Configures custom type extensions leveraged within our GraphQL schema. */ @@ -3322,4 +3341,34 @@ private void configureMetadataAttributionResolver(final RuntimeWiring.Builder bu entityTypes, (env) -> ((MetadataAttribution) env.getSource()).getSource()))); } + + private void configureVersionPropertiesResolvers(final RuntimeWiring.Builder builder) { + builder.type( + "VersionProperties", + typeWiring -> + typeWiring.dataFetcher( + "versionSet", + new LoadableTypeResolver<>( + versionSetType, + (env) -> { + final VersionProperties versionProperties = env.getSource(); + return versionProperties != null + ? versionProperties.getVersionSet().getUrn() + : null; + }))); + } + + private void configureVersionSetResolvers(final RuntimeWiring.Builder builder) { + builder.type( + "VersionSet", + typeWiring -> + typeWiring + .dataFetcher( + "latestVersion", + new EntityTypeResolver( + entityTypes, (env) -> ((VersionSet) env.getSource()).getLatestVersion())) + .dataFetcher( + "versionsSearch", + new VersionsSearchResolver(this.entityClient, this.viewService))); + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java index 3647eb55b2583a..8cdc13a14be87c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java @@ -189,6 +189,7 @@ public CompletableFuture get(final DataFetchingEnvironment environmen .setEditableDatasetNameEnabled(_featureFlags.isEditableDatasetNameEnabled()) .setShowSeparateSiblings(_featureFlags.isShowSeparateSiblings()) .setShowManageStructuredProperties(_featureFlags.isShowManageStructuredProperties()) + .setEntityVersioningEnabled(_featureFlags.isEntityVersioning()) .build(); appConfig.setFeatureFlags(featureFlagsConfig); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java index 69e049af1e87b7..f32fd03a384005 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java @@ -12,7 +12,9 @@ import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.LinkVersionInput; +import com.linkedin.datahub.graphql.generated.VersionSet; import com.linkedin.metadata.entity.IngestResult; import com.linkedin.metadata.entity.versioning.EntityVersioningService; import com.linkedin.metadata.entity.versioning.VersionPropertiesInput; @@ -21,24 +23,22 @@ import io.datahubproject.metadata.context.OperationContext; import java.util.List; import java.util.concurrent.CompletableFuture; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang.StringUtils; /** * Currently only supports linking the latest version, but may be modified later to support inserts */ -public class LinkAssetVersionResolver implements DataFetcher> { +@Slf4j +@RequiredArgsConstructor +public class LinkAssetVersionResolver implements DataFetcher> { private final EntityVersioningService entityVersioningService; private final FeatureFlags featureFlags; - public LinkAssetVersionResolver( - EntityVersioningService entityVersioningService, FeatureFlags featureFlags) { - this.entityVersioningService = entityVersioningService; - this.featureFlags = featureFlags; - } - @Override - public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { + public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { final QueryContext context = environment.getContext(); final LinkVersionInput input = bindArgument(environment.getArgument("input"), LinkVersionInput.class); @@ -75,12 +75,22 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws entityVersioningService.linkLatestVersion( opContext, versionSetUrn, entityUrn, versionPropertiesInput); - return linkResults.stream() - .filter( - ingestResult -> input.getLinkedEntity().equals(ingestResult.getUrn().toString())) - .map(ingestResult -> ingestResult.getUrn().toString()) - .findAny() - .orElse(StringUtils.EMPTY); + String successVersionSetUrn = + linkResults.stream() + .filter( + ingestResult -> + input.getLinkedEntity().equals(ingestResult.getUrn().toString())) + .map(ingestResult -> ingestResult.getUrn().toString()) + .findAny() + .orElse(StringUtils.EMPTY); + + if (StringUtils.isEmpty(successVersionSetUrn)) { + return null; + } + VersionSet versionSet = new VersionSet(); + versionSet.setUrn(versionSetUrn.toString()); + versionSet.setType(EntityType.VERSION_SET); + return versionSet; }, this.getClass().getSimpleName(), "get"); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java index 3d5027a0d668ac..33ab83a59c6771 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java @@ -12,14 +12,18 @@ import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.UnlinkVersionInput; +import com.linkedin.datahub.graphql.generated.VersionSet; +import com.linkedin.metadata.entity.RollbackResult; import com.linkedin.metadata.entity.versioning.EntityVersioningService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; import io.datahubproject.metadata.context.OperationContext; +import java.util.List; import java.util.concurrent.CompletableFuture; -public class UnlinkAssetVersionResolver implements DataFetcher> { +public class UnlinkAssetVersionResolver implements DataFetcher> { private final EntityVersioningService entityVersioningService; private final FeatureFlags featureFlags; @@ -31,7 +35,7 @@ public UnlinkAssetVersionResolver( } @Override - public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { + public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { if (!featureFlags.isEntityVersioning()) { throw new IllegalAccessError( "Entity Versioning is not configured, please enable before attempting to use this feature."); @@ -58,8 +62,15 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw } return GraphQLConcurrencyUtils.supplyAsync( () -> { - entityVersioningService.unlinkVersion(opContext, versionSetUrn, entityUrn); - return true; + List results = + entityVersioningService.unlinkVersion(opContext, versionSetUrn, entityUrn); + if (results.isEmpty() || results.stream().allMatch(RollbackResult::isNoOp)) { + return null; + } + VersionSet versionSet = new VersionSet(); + versionSet.setUrn(versionSetUrn.toString()); + versionSet.setType(EntityType.VERSION_SET); + return versionSet; }, this.getClass().getSimpleName(), "get"); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java index a01b3aaec9c982..f105a72a1273ee 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java @@ -18,13 +18,18 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.FacetFilterInput; import com.linkedin.datahub.graphql.generated.SearchResults; import com.linkedin.datahub.graphql.generated.SearchSortInput; +import com.linkedin.datahub.graphql.resolvers.ResolverUtils; import com.linkedin.datahub.graphql.types.common.mappers.SearchFlagsInputMapper; import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper; +import com.linkedin.datahub.graphql.types.mappers.UrnSearchResultsMapper; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -33,24 +38,32 @@ import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; +import com.linkedin.metadata.search.SearchResult; import com.linkedin.metadata.service.ViewService; import com.linkedin.view.DataHubViewInfo; import io.datahubproject.metadata.context.OperationContext; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Optional; import java.util.Set; +import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; import org.codehaus.plexus.util.CollectionUtils; @Slf4j public class SearchUtils { private SearchUtils() {} + private static final int DEFAULT_SEARCH_COUNT = 10; + private static final int DEFAULT_SCROLL_COUNT = 10; + private static final String DEFAULT_SCROLL_KEEP_ALIVE = "5m"; + /** Entities that are searched by default in Search Across Entities */ public static final List SEARCHABLE_ENTITY_TYPES = ImmutableList.of( @@ -348,4 +361,98 @@ public static List getSortCriteria(@Nullable final SearchSortInpu return sortCriteria; } + + public static CompletableFuture searchAcrossEntities( + QueryContext inputContext, + final EntityClient _entityClient, + final ViewService _viewService, + List inputEntityTypes, + String inputQuery, + Filter baseFilter, + String viewUrn, + List sortCriteria, + com.linkedin.datahub.graphql.generated.SearchFlags inputSearchFlags, + Integer inputCount, + Integer inputStart, + String className) { + + final List entityTypes = + (inputEntityTypes == null || inputEntityTypes.isEmpty()) + ? SEARCHABLE_ENTITY_TYPES + : inputEntityTypes; + final List entityNames = + entityTypes.stream().map(EntityTypeMapper::getName).collect(Collectors.toList()); + + // escape forward slash since it is a reserved character in Elasticsearch, default to * if + // blank/empty + final String query = + StringUtils.isNotBlank(inputQuery) ? ResolverUtils.escapeForwardSlash(inputQuery) : "*"; + + final Optional searchFlags = + Optional.ofNullable(inputSearchFlags) + .map((flags) -> SearchFlagsInputMapper.map(inputContext, flags)); + final OperationContext context = + inputContext.getOperationContext().withSearchFlags(searchFlags::orElse); + + final int count = Optional.ofNullable(inputCount).orElse(DEFAULT_SEARCH_COUNT); + final int start = Optional.ofNullable(inputStart).orElse(0); + + return GraphQLConcurrencyUtils.supplyAsync( + () -> { + final OperationContext baseContext = inputContext.getOperationContext(); + final Optional maybeResolvedView = + Optional.ofNullable(viewUrn) + .map((urn) -> resolveView(baseContext, _viewService, UrnUtils.getUrn(urn))); + + final List finalEntityNames = + maybeResolvedView + .map( + (view) -> + intersectEntityTypes(entityNames, view.getDefinition().getEntityTypes())) + .orElse(entityNames); + + final Filter finalFilters = + maybeResolvedView + .map((view) -> combineFilters(baseFilter, view.getDefinition().getFilter())) + .orElse(baseFilter); + + log.debug( + "Executing search for multiple entities: entity types {}, query {}, filters: {}, start: {}, count: {}", + finalEntityNames, + query, + finalFilters, + start, + count); + + try { + final SearchResult searchResult = + _entityClient.searchAcrossEntities( + context, + finalEntityNames, + query, + finalFilters, + start, + count, + sortCriteria, + null); + return UrnSearchResultsMapper.map(inputContext, searchResult); + } catch (Exception e) { + log.warn( + "Failed to execute search for multiple entities: entity types {}, query {}, filters: {}, start: {}, count: {}", + finalEntityNames, + query, + finalFilters, + start, + count); + throw new RuntimeException( + "Failed to execute search: " + + String.format( + "entity types %s, query %s, filters: %s, start: %s, count: %s", + finalEntityNames, query, finalFilters, start, count), + e); + } + }, + className, + "searchAcrossEntities"); + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolver.java new file mode 100644 index 00000000000000..915e1cf00ebc6b --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolver.java @@ -0,0 +1,87 @@ +package com.linkedin.datahub.graphql.resolvers.versioning; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import static com.linkedin.metadata.Constants.*; + +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.Entity; +import com.linkedin.datahub.graphql.generated.SearchAcrossEntitiesInput; +import com.linkedin.datahub.graphql.generated.SearchFlags; +import com.linkedin.datahub.graphql.generated.SearchResults; +import com.linkedin.datahub.graphql.resolvers.ResolverUtils; +import com.linkedin.datahub.graphql.resolvers.search.SearchUtils; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.query.filter.SortOrder; +import com.linkedin.metadata.service.ViewService; +import com.linkedin.metadata.utils.CriterionUtils; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.stream.Stream; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +/** Resolver that executes a searchAcrossEntities only on a version set's versioned entities */ +@Slf4j +@RequiredArgsConstructor +public class VersionsSearchResolver implements DataFetcher> { + + private static final String VERSION_SET_FIELD_NAME = "versionSet"; + + private final EntityClient _entityClient; + private final ViewService _viewService; + + @Override + public CompletableFuture get(DataFetchingEnvironment environment) { + final Entity entity = environment.getSource(); + final QueryContext context = environment.getContext(); + final SearchAcrossEntitiesInput input = + bindArgument(environment.getArgument("input"), SearchAcrossEntitiesInput.class); + + final Criterion versionSetFilter = + CriterionUtils.buildCriterion(VERSION_SET_FIELD_NAME, Condition.EQUAL, entity.getUrn()); + final Filter baseFilter = + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion().setAnd(new CriterionArray(versionSetFilter)))); + final Filter inputFilter = ResolverUtils.buildFilter(null, input.getOrFilters()); + + final List initialSortCriteria = + SearchUtils.getSortCriteria(input.getSortInput()); + final List sortCriteria = + Stream.concat( + initialSortCriteria.stream(), + Stream.of( + new SortCriterion() + .setField(VERSION_SORT_ID_FIELD_NAME) + .setOrder(SortOrder.DESCENDING))) + .toList(); + + SearchFlags searchFlags = Optional.ofNullable(input.getSearchFlags()).orElse(new SearchFlags()); + searchFlags.setFilterNonLatestVersions(false); + + return SearchUtils.searchAcrossEntities( + context, + _entityClient, + _viewService, + input.getTypes(), + input.getQuery(), + SearchUtils.combineFilters(inputFilter, baseFilter), + input.getViewUrn(), + sortCriteria, + searchFlags, + input.getCount(), + input.getStart(), + this.getClass().getSimpleName()); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java index 9f5025ccf303a2..0b3a445175c4c1 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java @@ -69,6 +69,9 @@ public com.linkedin.metadata.query.SearchFlags apply( result.setCustomHighlightingFields( new StringArray(searchFlags.getCustomHighlightingFields())); } + if (searchFlags.getFilterNonLatestVersions() != null) { + result.setFilterNonLatestVersions(searchFlags.getFilterNonLatestVersions()); + } return result; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java index eae33e6da2e56d..b815c1b1c1dd9f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java @@ -41,6 +41,7 @@ import com.linkedin.datahub.graphql.generated.StructuredPropertyEntity; import com.linkedin.datahub.graphql.generated.Tag; import com.linkedin.datahub.graphql.generated.Test; +import com.linkedin.datahub.graphql.generated.VersionSet; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -231,6 +232,11 @@ public Entity apply(@Nullable QueryContext context, Urn input) { ((DataProcessInstance) partialEntity).setUrn(input.toString()); ((DataProcessInstance) partialEntity).setType(EntityType.DATA_PROCESS_INSTANCE); } + if (input.getEntityType().equals(VERSION_SET_ENTITY_NAME)) { + partialEntity = new VersionSet(); + ((VersionSet) partialEntity).setUrn(input.toString()); + ((VersionSet) partialEntity).setType(EntityType.VERSION_SET); + } return partialEntity; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java index 6a3f9cb9b21f38..74ef4cf125cd24 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java @@ -89,7 +89,8 @@ public class DatasetType ACCESS_ASPECT_NAME, STRUCTURED_PROPERTIES_ASPECT_NAME, FORMS_ASPECT_NAME, - SUB_TYPES_ASPECT_NAME); + SUB_TYPES_ASPECT_NAME, + VERSION_PROPERTIES_ASPECT_NAME); private static final Set FACET_FIELDS = ImmutableSet.of("origin", "platform"); private static final String ENTITY_NAME = "dataset"; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java index e411014c23c89b..aa7033b180e80e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java @@ -17,6 +17,7 @@ import com.linkedin.common.Status; import com.linkedin.common.SubTypes; import com.linkedin.common.TimeStamp; +import com.linkedin.common.VersionProperties; import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.QueryContext; @@ -48,6 +49,7 @@ import com.linkedin.datahub.graphql.types.rolemetadata.mappers.AccessMapper; import com.linkedin.datahub.graphql.types.structuredproperty.StructuredPropertiesMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; +import com.linkedin.datahub.graphql.types.versioning.VersionPropertiesMapper; import com.linkedin.dataset.DatasetDeprecation; import com.linkedin.dataset.DatasetProperties; import com.linkedin.dataset.EditableDatasetProperties; @@ -183,6 +185,11 @@ public Dataset apply( SUB_TYPES_ASPECT_NAME, (dashboard, dataMap) -> dashboard.setSubTypes(SubTypesMapper.map(context, new SubTypes(dataMap)))); + mappingHelper.mapToResult( + VERSION_PROPERTIES_ASPECT_NAME, + (entity, dataMap) -> + entity.setVersionProperties( + VersionPropertiesMapper.map(context, new VersionProperties(dataMap)))); if (context != null && !canView(context.getOperationContext(), entityUrn)) { return AuthorizationUtils.restrictEntity(mappingHelper.getResult(), Dataset.class); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java index 7102fd4aed9743..11e6b5180f8c1c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java @@ -13,6 +13,7 @@ import com.linkedin.common.InstitutionalMemory; import com.linkedin.common.Ownership; import com.linkedin.common.Status; +import com.linkedin.common.VersionProperties; import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; @@ -38,6 +39,7 @@ import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.datahub.graphql.types.structuredproperty.StructuredPropertiesMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; +import com.linkedin.datahub.graphql.types.versioning.VersionPropertiesMapper; import com.linkedin.domain.Domains; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspectMap; @@ -180,6 +182,11 @@ public MLModel apply( FORMS_ASPECT_NAME, ((entity, dataMap) -> entity.setForms(FormsMapper.map(new Forms(dataMap), entityUrn.toString())))); + mappingHelper.mapToResult( + VERSION_PROPERTIES_ASPECT_NAME, + (entity, dataMap) -> + entity.setVersionProperties( + VersionPropertiesMapper.map(context, new VersionProperties(dataMap)))); if (context != null && !canView(context.getOperationContext(), entityUrn)) { return AuthorizationUtils.restrictEntity(mappingHelper.getResult(), MLModel.class); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionPropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionPropertiesMapper.java new file mode 100644 index 00000000000000..f89ebdc9f2b043 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionPropertiesMapper.java @@ -0,0 +1,53 @@ +package com.linkedin.datahub.graphql.types.versioning; + +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.VersionProperties; +import com.linkedin.datahub.graphql.generated.VersionSet; +import com.linkedin.datahub.graphql.types.mappers.MapperUtils; +import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.datahub.graphql.types.mlmodel.mappers.VersionTagMapper; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +public class VersionPropertiesMapper + implements ModelMapper { + public static final VersionPropertiesMapper INSTANCE = new VersionPropertiesMapper(); + + public static VersionProperties map( + @Nullable QueryContext context, + @Nonnull final com.linkedin.common.VersionProperties versionProperties) { + return INSTANCE.apply(context, versionProperties); + } + + @Override + public VersionProperties apply( + @Nullable QueryContext context, @Nonnull com.linkedin.common.VersionProperties input) { + final VersionProperties result = new VersionProperties(); + + result.setVersionSet( + VersionSet.builder() + .setUrn(input.getVersionSet().toString()) + .setType(EntityType.VERSION_SET) + .build()); + + result.setVersion(VersionTagMapper.map(context, input.getVersion())); + result.setAliases( + input.getAliases().stream() + .map(alias -> VersionTagMapper.map(context, alias)) + .collect(Collectors.toList())); + result.setComment(input.getComment()); + result.setIsLatest(Boolean.TRUE.equals(input.isIsLatest())); + + if (input.getMetadataCreatedTimestamp() != null) { + result.setCreated(MapperUtils.createResolvedAuditStamp(input.getMetadataCreatedTimestamp())); + } + if (input.getSourceCreatedTimestamp() != null) { + result.setCreatedInSource( + MapperUtils.createResolvedAuditStamp(input.getSourceCreatedTimestamp())); + } + + return result; + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetMapper.java new file mode 100644 index 00000000000000..3a07115ece5f6e --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetMapper.java @@ -0,0 +1,47 @@ +package com.linkedin.datahub.graphql.types.versioning; + +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; + +import com.linkedin.data.DataMap; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.VersionSet; +import com.linkedin.datahub.graphql.types.common.mappers.UrnToEntityMapper; +import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; +import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspectMap; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +public class VersionSetMapper implements ModelMapper { + + public static final VersionSetMapper INSTANCE = new VersionSetMapper(); + + public static VersionSet map( + @Nullable QueryContext context, @Nonnull final EntityResponse entityResponse) { + return INSTANCE.apply(context, entityResponse); + } + + @Override + public VersionSet apply(@Nullable QueryContext context, @Nonnull EntityResponse entityResponse) { + final VersionSet result = new VersionSet(); + result.setUrn(entityResponse.getUrn().toString()); + result.setType(EntityType.VERSION_SET); + + EnvelopedAspectMap aspectMap = entityResponse.getAspects(); + MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); + mappingHelper.mapToResult( + VERSION_SET_PROPERTIES_ASPECT_NAME, + (versionSet, dataMap) -> mapVersionSetProperties(context, versionSet, dataMap)); + + return result; + } + + private void mapVersionSetProperties( + @Nullable QueryContext context, @Nonnull VersionSet versionSet, @Nonnull DataMap dataMap) { + com.linkedin.versionset.VersionSetProperties versionProperties = + new com.linkedin.versionset.VersionSetProperties(dataMap); + versionSet.setLatestVersion(UrnToEntityMapper.map(context, versionProperties.getLatest())); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetType.java new file mode 100644 index 00000000000000..ed2beff4530949 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetType.java @@ -0,0 +1,79 @@ +package com.linkedin.datahub.graphql.types.versioning; + +import static com.linkedin.metadata.Constants.*; + +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.Entity; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.VersionSet; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.client.EntityClient; +import graphql.execution.DataFetcherResult; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public class VersionSetType + implements com.linkedin.datahub.graphql.types.EntityType { + + public static final Set ASPECTS_TO_FETCH = + ImmutableSet.of(VERSION_SET_PROPERTIES_ASPECT_NAME); + private final EntityClient _entityClient; + + @Override + public EntityType type() { + return EntityType.VERSION_SET; + } + + @Override + public Function getKeyProvider() { + return Entity::getUrn; + } + + @Override + public Class objectClass() { + return VersionSet.class; + } + + @Override + public List> batchLoad( + @Nonnull List urns, @Nonnull QueryContext context) throws Exception { + final List versionSetUrns = + urns.stream().map(UrnUtils::getUrn).collect(Collectors.toList()); + + try { + final Map entities = + _entityClient.batchGetV2( + context.getOperationContext(), + VERSION_SET_ENTITY_NAME, + new HashSet<>(versionSetUrns), + ASPECTS_TO_FETCH); + + final List gmsResults = new ArrayList<>(); + for (Urn urn : versionSetUrns) { + gmsResults.add(entities.getOrDefault(urn, null)); + } + return gmsResults.stream() + .map( + gmsResult -> + gmsResult == null + ? null + : DataFetcherResult.newResult() + .data(VersionSetMapper.map(context, gmsResult)) + .build()) + .collect(Collectors.toList()); + } catch (Exception e) { + throw new RuntimeException("Failed to batch load Queries", e); + } + } +} diff --git a/datahub-graphql-core/src/main/resources/app.graphql b/datahub-graphql-core/src/main/resources/app.graphql index 28688903687235..ca7f89415f6b87 100644 --- a/datahub-graphql-core/src/main/resources/app.graphql +++ b/datahub-graphql-core/src/main/resources/app.graphql @@ -531,6 +531,11 @@ type FeatureFlagsConfig { If turned on, show the manage structured properties tab in the govern dropdown """ showManageStructuredProperties: Boolean! + + """ + If turned on, exposes the versioning feature by allowing users to link entities in the UI. + """ + entityVersioningEnabled: Boolean! } """ @@ -573,4 +578,4 @@ type DocPropagationSettings { The default doc propagation setting for the platform. """ docColumnPropagation: Boolean -} \ No newline at end of file +} diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index b47be7ae32b2c4..51909ae72c56b0 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -956,16 +956,6 @@ type Mutation { Remove Business Attribute """ removeBusinessAttribute(input: AddBusinessAttributeInput!): Boolean - - """ - Link the latest versioned entity to a Version Set - """ - linkAssetVersion(input: LinkVersionInput!): String - - """ - Unlink a versioned entity from a Version Set - """ - unlinkAssetVersion(input: UnlinkVersionInput!): Boolean } """ @@ -1231,6 +1221,11 @@ enum EntityType { A Business Attribute """ BUSINESS_ATTRIBUTE + + """ + A set of versioned entities, representing a single source / logical entity over time + """ + VERSION_SET } """ @@ -12921,56 +12916,6 @@ input ListBusinessAttributesInput { query: String } -""" -Input for linking a versioned entity to a Version Set -""" -input LinkVersionInput { - """ - The target version set - """ - versionSet: String! - - """ - The target versioned entity to link - """ - linkedEntity: String! - - """ - Version Tag label for the version, should be unique within a Version Set - """ - version: String! - - """ - Optional timestamp from the source system - """ - sourceTimestamp: Long - - """ - Optional creator from the source system, will be converted to an Urn - """ - sourceCreator: String - - """ - Optional comment about the version - """ - comment: String -} - -""" -Input for unlinking a versioned entity from a Version Set -""" -input UnlinkVersionInput { - """ - The target version set - """ - versionSet: String - - """ - The target versioned entity to unlink - """ - unlinkedEntity: String -} - """ The result obtained when listing Business Attribute """ diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index 82bfb9ee26fc42..d8f17faa3d11c2 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -172,6 +172,11 @@ input SearchFlags { Whether or not to fetch and request for structured property facets when doing a search """ includeStructuredPropertyFacets: Boolean + + """ + Determines whether to filter out any non-latest entity version if entity is part of a Version Set, default true + """ + filterNonLatestVersions: Boolean } """ @@ -1497,4 +1502,4 @@ input GroupingCriterion { """ groupingEntityType: EntityType! -} \ No newline at end of file +} diff --git a/datahub-graphql-core/src/main/resources/versioning.graphql b/datahub-graphql-core/src/main/resources/versioning.graphql new file mode 100644 index 00000000000000..4a63463509c84d --- /dev/null +++ b/datahub-graphql-core/src/main/resources/versioning.graphql @@ -0,0 +1,148 @@ +type VersionSet implements Entity { + """ + The primary key of the VersionSet + """ + urn: String! + + """ + The standard Entity Type + """ + type: EntityType! + + """ + Granular API for querying edges extending from this entity + """ + relationships(input: RelationshipsInput!): EntityRelationshipsResult + + """ + The latest versioned entity linked to in this version set + """ + latestVersion: Entity + + """ + Executes a search on all versioned entities linked to this version set + By default sorts by sortId in descending order + """ + versionsSearch(input: SearchAcrossEntitiesInput!): SearchResults +} + +type VersionProperties { + """ + The linked Version Set entity that ties multiple versioned assets together + """ + versionSet: VersionSet! + + """ + Label for this versioned asset, should be unique within a version set (not enforced) + """ + version: VersionTag! + + """ + Additional version identifiers for this versioned asset. + """ + aliases: [VersionTag!]! + + """ + Comment documenting what this version was created for, changes, or represents + """ + comment: String + + """ + Whether this version is currently the latest in its verison set + """ + isLatest: Boolean! + + """ + Timestamp reflecting when the metadata for this version was created in DataHub + """ + created: ResolvedAuditStamp + + """ + Timestamp reflecting when the metadata for this version was created in DataHub + """ + createdInSource: ResolvedAuditStamp +} + +interface SupportsVersions { + """ + Indicates that this entity is versioned and provides information about the version. + """ + versionProperties: VersionProperties +} + +extend type Dataset implements SupportsVersions { + versionProperties: VersionProperties +} + +extend type MLModel implements SupportsVersions { + versionProperties: VersionProperties +} + +extend type Query { + """ + Fetch a Version Set by its URN + """ + versionSet(urn: String!): VersionSet +} + +""" +Input for linking a versioned entity to a Version Set +""" +input LinkVersionInput { + """ + The target version set + """ + versionSet: String! + + """ + The target versioned entity to link + """ + linkedEntity: String! + + """ + Version Tag label for the version, should be unique within a version set (not enforced) + """ + version: String! + + """ + Optional timestamp from the source system + """ + sourceTimestamp: Long + + """ + Optional creator from the source system, will be converted to an Urn + """ + sourceCreator: String + + """ + Optional comment about the version + """ + comment: String +} + +""" +Input for unlinking a versioned entity from a Version Set +""" +input UnlinkVersionInput { + """ + The target version set + """ + versionSet: String + + """ + The target versioned entity to unlink + """ + unlinkedEntity: String +} + +extend type Mutation { + """ + Link the latest versioned entity to a Version Set + """ + linkAssetVersion(input: LinkVersionInput!): VersionSet + + """ + Unlink a versioned entity from a Version Set + """ + unlinkAssetVersion(input: UnlinkVersionInput!): VersionSet +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java index 690856263fccc5..c2eb92f4d1cd4c 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java @@ -56,8 +56,7 @@ public void testGetSuccessful() throws Exception { Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); Mockito.when(mockEnv.getContext()).thenReturn(mockContext); - String result = resolver.get(mockEnv).get(); - assertEquals(result, TEST_ENTITY_URN); + assertEquals(resolver.get(mockEnv).get().getUrn(), TEST_VERSION_SET_URN); } @Test diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java index 0000ad24a04537..e162ce96e627c6 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java @@ -48,7 +48,7 @@ public void testGetSuccessful() throws Exception { Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); Mockito.when(mockEnv.getContext()).thenReturn(mockContext); - assertTrue(resolver.get(mockEnv).get()); + assertEquals(resolver.get(mockEnv).get(), null); Mockito.verify(mockService) .unlinkVersion( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolverTest.java new file mode 100644 index 00000000000000..3554df074df698 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolverTest.java @@ -0,0 +1,294 @@ +package com.linkedin.datahub.graphql.resolvers.versioning; + +import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; +import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.*; +import static org.mockito.ArgumentMatchers.*; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertThrows; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.StringArray; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.AndFilterInput; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.FacetFilterInput; +import com.linkedin.datahub.graphql.generated.SearchAcrossEntitiesInput; +import com.linkedin.datahub.graphql.generated.SearchFlags; +import com.linkedin.datahub.graphql.generated.SearchResults; +import com.linkedin.datahub.graphql.generated.SearchSortInput; +import com.linkedin.datahub.graphql.generated.SortCriterion; +import com.linkedin.datahub.graphql.generated.SortOrder; +import com.linkedin.datahub.graphql.generated.VersionSet; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.search.SearchResult; +import com.linkedin.metadata.search.SearchResultMetadata; +import com.linkedin.metadata.service.ViewService; +import com.linkedin.metadata.utils.CriterionUtils; +import com.linkedin.r2.RemoteInvocationException; +import com.linkedin.view.DataHubViewDefinition; +import com.linkedin.view.DataHubViewInfo; +import com.linkedin.view.DataHubViewType; +import graphql.schema.DataFetchingEnvironment; +import java.util.List; +import java.util.concurrent.CompletionException; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +public class VersionsSearchResolverTest { + + private static final String VERSION_SET_URN = "urn:li:versionSet:(my_version_set,dataset)"; + private static final Urn TEST_VIEW_URN = UrnUtils.getUrn("urn:li:dataHubView:test"); + private static final Urn TEST_USER_URN = UrnUtils.getUrn("urn:li:corpuser:test"); + + private static final SearchAcrossEntitiesInput BASIC_INPUT = + new SearchAcrossEntitiesInput( + List.of(EntityType.DATASET), "", 0, 10, null, null, null, null, null); + + private static final SearchAcrossEntitiesInput COMPLEX_INPUT = + new SearchAcrossEntitiesInput( + List.of(EntityType.CHART, EntityType.DATASET), + "query", + 2, + 5, + null, + List.of( + AndFilterInput.builder() + .setAnd( + List.of( + FacetFilterInput.builder() + .setField("field1") + .setValues(List.of("1", "2")) + .build(), + FacetFilterInput.builder() + .setField("field2") + .setValues(List.of("a")) + .build())) + .build(), + AndFilterInput.builder() + .setAnd( + List.of( + FacetFilterInput.builder() + .setField("field3") + .setValues(List.of("3", "4")) + .build(), + FacetFilterInput.builder() + .setField("field4") + .setValues(List.of("b")) + .build())) + .build()), + TEST_VIEW_URN.toString(), + SearchFlags.builder().setSkipCache(true).build(), + SearchSortInput.builder() + .setSortCriteria( + List.of( + SortCriterion.builder() + .setField("sortField1") + .setSortOrder(SortOrder.DESCENDING) + .build(), + SortCriterion.builder() + .setField("sortField2") + .setSortOrder(SortOrder.ASCENDING) + .build())) + .build()); + + @Test + public void testGetSuccessBasic() throws Exception { + EntityClient mockEntityClient = initMockEntityClient(); + ViewService mockViewService = Mockito.mock(ViewService.class); + VersionsSearchResolver resolver = new VersionsSearchResolver(mockEntityClient, mockViewService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(BASIC_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + VersionSet versionSet = new VersionSet(); + versionSet.setUrn(VERSION_SET_URN); + Mockito.when(mockEnv.getSource()).thenReturn(versionSet); + + SearchResults result = resolver.get(mockEnv).get(); + + // Validate the result + assertEquals(result.getSearchResults().size(), 0); + + // Validate that we called the search service correctly + Mockito.verify(mockEntityClient, Mockito.times(1)) + .searchAcrossEntities( + Mockito.argThat( + context -> + !context.getSearchContext().getSearchFlags().isFilterNonLatestVersions()), + Mockito.eq(List.of(Constants.DATASET_ENTITY_NAME)), + Mockito.eq("*"), + Mockito.eq( + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd( + new CriterionArray( + CriterionUtils.buildCriterion( + "versionSet", Condition.EQUAL, VERSION_SET_URN)))))), + Mockito.eq(0), + Mockito.eq(10), + Mockito.eq( + List.of( + new com.linkedin.metadata.query.filter.SortCriterion() + .setField(VERSION_SORT_ID_FIELD_NAME) + .setOrder(com.linkedin.metadata.query.filter.SortOrder.DESCENDING))), + any()); + } + + @Test + public void testGetSuccessComplex() throws Exception { + EntityClient mockEntityClient = initMockEntityClient(); + + Filter viewFilter = + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd( + new CriterionArray( + List.of(buildCriterion("viewField", Condition.EQUAL, "test")))))); + DataHubViewInfo viewInfo = + new DataHubViewInfo() + .setName("test") + .setType(DataHubViewType.GLOBAL) + .setCreated(new AuditStamp().setTime(0L).setActor(TEST_USER_URN)) + .setLastModified(new AuditStamp().setTime(0L).setActor(TEST_USER_URN)) + .setDefinition( + new DataHubViewDefinition() + .setEntityTypes( + new StringArray( + List.of( + Constants.DATASET_ENTITY_NAME, Constants.DASHBOARD_ENTITY_NAME))) + .setFilter(viewFilter)); + ViewService mockViewService = Mockito.mock(ViewService.class); + Mockito.when(mockViewService.getViewInfo(any(), Mockito.eq(TEST_VIEW_URN))) + .thenReturn(viewInfo); + + VersionsSearchResolver resolver = new VersionsSearchResolver(mockEntityClient, mockViewService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(COMPLEX_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + VersionSet versionSet = new VersionSet(); + versionSet.setUrn(VERSION_SET_URN); + Mockito.when(mockEnv.getSource()).thenReturn(versionSet); + + SearchResults result = resolver.get(mockEnv).get(); + + // Validate the result + assertEquals(result.getSearchResults().size(), 0); + + // Validate that we called the search service correctly + Mockito.verify(mockEntityClient, Mockito.times(1)) + .searchAcrossEntities( + Mockito.argThat( + context -> + !context.getSearchContext().getSearchFlags().isFilterNonLatestVersions() + && context.getSearchContext().getSearchFlags().isSkipCache()), + Mockito.eq(List.of(Constants.DATASET_ENTITY_NAME)), + Mockito.eq("query"), + Mockito.eq( + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd( + new CriterionArray( + CriterionUtils.buildCriterion( + "field1", Condition.EQUAL, "1", "2"), + CriterionUtils.buildCriterion( + "field2", Condition.EQUAL, "a"), + CriterionUtils.buildCriterion( + "versionSet", Condition.EQUAL, VERSION_SET_URN), + CriterionUtils.buildCriterion( + "viewField", Condition.EQUAL, "test"))), + new ConjunctiveCriterion() + .setAnd( + new CriterionArray( + CriterionUtils.buildCriterion( + "field3", Condition.EQUAL, "3", "4"), + CriterionUtils.buildCriterion( + "field4", Condition.EQUAL, "b"), + CriterionUtils.buildCriterion( + "versionSet", Condition.EQUAL, VERSION_SET_URN), + CriterionUtils.buildCriterion( + "viewField", Condition.EQUAL, "test")))))), + Mockito.eq(2), + Mockito.eq(5), + Mockito.eq( + List.of( + new com.linkedin.metadata.query.filter.SortCriterion() + .setField("sortField1") + .setOrder(com.linkedin.metadata.query.filter.SortOrder.DESCENDING), + new com.linkedin.metadata.query.filter.SortCriterion() + .setField("sortField2") + .setOrder(com.linkedin.metadata.query.filter.SortOrder.ASCENDING), + new com.linkedin.metadata.query.filter.SortCriterion() + .setField(VERSION_SORT_ID_FIELD_NAME) + .setOrder(com.linkedin.metadata.query.filter.SortOrder.DESCENDING))), + any()); + } + + @Test + public void testThrowsError() throws Exception { + EntityClient mockEntityClient = initMockEntityClient(); + ViewService mockViewService = Mockito.mock(ViewService.class); + + Mockito.when( + mockEntityClient.searchAcrossEntities( + any(), any(), any(), any(), Mockito.anyInt(), Mockito.anyInt(), any(), any())) + .thenThrow(new RemoteInvocationException()); + + VersionsSearchResolver resolver = new VersionsSearchResolver(mockEntityClient, mockViewService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(BASIC_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + VersionSet versionSet = new VersionSet(); + versionSet.setUrn(VERSION_SET_URN); + Mockito.when(mockEnv.getSource()).thenReturn(versionSet); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + } + + private EntityClient initMockEntityClient() throws Exception { + EntityClient client = Mockito.mock(EntityClient.class); + + Mockito.when( + client.searchAcrossEntities( + any(), + any(), + Mockito.anyString(), + any(), + Mockito.anyInt(), + Mockito.anyInt(), + any(), + Mockito.eq(null))) + .thenReturn( + new SearchResult() + .setEntities(new SearchEntityArray()) + .setNumEntities(0) + .setFrom(0) + .setPageSize(0) + .setMetadata(new SearchResultMetadata())); + + return client; + } +} diff --git a/datahub-web-react/src/Mocks.tsx b/datahub-web-react/src/Mocks.tsx index 2da9e733eb4072..063b784920e234 100644 --- a/datahub-web-react/src/Mocks.tsx +++ b/datahub-web-react/src/Mocks.tsx @@ -645,6 +645,7 @@ export const dataset3 = { structuredProperties: null, forms: null, activeIncidents: null, + versionProperties: null, } as Dataset; export const dataset3WithSchema = { diff --git a/datahub-web-react/src/appConfigContext.tsx b/datahub-web-react/src/appConfigContext.tsx index d7fef85db4b625..8ac18d0142b4e9 100644 --- a/datahub-web-react/src/appConfigContext.tsx +++ b/datahub-web-react/src/appConfigContext.tsx @@ -57,6 +57,7 @@ export const DEFAULT_APP_CONFIG = { editableDatasetNameEnabled: false, showSeparateSiblings: false, showManageStructuredProperties: false, + entityVersioningEnabled: false, }, }; diff --git a/datahub-web-react/src/graphql/app.graphql b/datahub-web-react/src/graphql/app.graphql index 0d1999f82f77cd..c1fe50d7620a3c 100644 --- a/datahub-web-react/src/graphql/app.graphql +++ b/datahub-web-react/src/graphql/app.graphql @@ -72,6 +72,7 @@ query appConfig { editableDatasetNameEnabled showSeparateSiblings showManageStructuredProperties + entityVersioningEnabled } } } diff --git a/datahub-web-react/src/graphql/dataset.graphql b/datahub-web-react/src/graphql/dataset.graphql index fcca919f614235..8bbeb304aae2cc 100644 --- a/datahub-web-react/src/graphql/dataset.graphql +++ b/datahub-web-react/src/graphql/dataset.graphql @@ -172,6 +172,7 @@ fragment nonSiblingDatasetFields on Dataset { forms { ...formsFields } + ...entityProfileVersionProperties } query getRecentQueries($urn: String!) { diff --git a/datahub-web-react/src/graphql/mlModel.graphql b/datahub-web-react/src/graphql/mlModel.graphql index 2192888caef701..ad97c7c6f530a1 100644 --- a/datahub-web-react/src/graphql/mlModel.graphql +++ b/datahub-web-react/src/graphql/mlModel.graphql @@ -34,5 +34,6 @@ query getMLModel($urn: String!) { forms { ...formsFields } + ...entityProfileVersionProperties } } diff --git a/datahub-web-react/src/graphql/preview.graphql b/datahub-web-react/src/graphql/preview.graphql index 1bee614dd7adbe..8000f59f2bf258 100644 --- a/datahub-web-react/src/graphql/preview.graphql +++ b/datahub-web-react/src/graphql/preview.graphql @@ -346,4 +346,9 @@ fragment entityPreview on Entity { ... on Container { ...entityContainer } + ... on SupportsVersions { + versionProperties { + ...versionProperties + } + } } diff --git a/datahub-web-react/src/graphql/search.graphql b/datahub-web-react/src/graphql/search.graphql index de7d1befd39b08..9edd6754022866 100644 --- a/datahub-web-react/src/graphql/search.graphql +++ b/datahub-web-react/src/graphql/search.graphql @@ -949,6 +949,11 @@ fragment searchResultsWithoutSchemaField on Entity { ... on StructuredPropertyEntity { ...structuredPropertyFields } + ... on SupportsVersions { + versionProperties { + ...versionProperties + } + } } fragment searchResultFields on Entity { diff --git a/datahub-web-react/src/graphql/versioning.graphql b/datahub-web-react/src/graphql/versioning.graphql new file mode 100644 index 00000000000000..e9b6b82494b6ed --- /dev/null +++ b/datahub-web-react/src/graphql/versioning.graphql @@ -0,0 +1,89 @@ +fragment versionProperties on VersionProperties { + versionSet { + urn + type + } + isLatest + version { + versionTag + } + aliases { + versionTag + } + comment + created { + time + actor { + urn + ...entityDisplayNameFields + editableProperties { + displayName + pictureLink + } + } + } + createdInSource { + time + actor { + urn + ...entityDisplayNameFields + editableProperties { + displayName + pictureLink + } + } + } +} + +fragment versionsSearchResults on SearchResults { + count + total + searchResults { + entity { + urn + type + ... on SupportsVersions { + versionProperties { + ...versionProperties + } + } + } + } +} + +fragment entityProfileVersionProperties on SupportsVersions { + versionProperties { + ...versionProperties + versionSet { + urn + type + versionsSearch(input: { query: "*", count: 5, searchFlags: { skipCache: true } }) { + ...versionsSearchResults + } + } + } +} + +query searchAcrossVersions($versionSetUrn: String!, $input: SearchAcrossEntitiesInput!) { + versionSet(urn: $versionSetUrn) { + versionsSearch(input: $input) { + ...versionsSearchResults + } + } +} + +mutation linkAssetVersion($input: LinkVersionInput!) { + linkAssetVersion(input: $input) { + versionsSearch(input: { query: "*", count: 5, searchFlags: { skipCache: true } }) { + ...versionsSearchResults + } + } +} + +mutation unlinkAssetVersion($input: UnlinkVersionInput!) { + unlinkAssetVersion(input: $input) { + versionsSearch(input: { query: "*", count: 5, searchFlags: { skipCache: true } }) { + ...versionsSearchResults + } + } +} From 4de7f61d0924dd66e86c8a31686fdf4e84a474da Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Fri, 17 Jan 2025 21:38:16 +0530 Subject: [PATCH 106/249] fix(ingest): log exception properly (#12372) --- metadata-ingestion/src/datahub/ingestion/run/pipeline.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py index ee1c1608cd48c6..ef59ba7a3b58b4 100644 --- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py +++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py @@ -76,8 +76,9 @@ def on_failure( failure_metadata: dict, ) -> None: logger.error( - f"{self.name} failed to write record with workunit {record_envelope.metadata['workunit_id']}" - f" with {failure_exception} and info {failure_metadata}" + f"{self.name} failed to write record with workunit {record_envelope.metadata['workunit_id']}", + extra={"failure_metadata": failure_metadata}, + exc_info=failure_exception, ) From 76e46b89dbcb0dc12e3524bbbfdf177d5db93473 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Fri, 17 Jan 2025 21:38:29 +0530 Subject: [PATCH 107/249] dev(ingest): move modules from isort,flake8 to ruff (#12373) --- .../airflow-plugin/build.gradle | 6 +-- .../airflow-plugin/pyproject.toml | 53 ++++++++++++++++--- .../airflow-plugin/setup.cfg | 21 -------- .../airflow-plugin/setup.py | 4 +- .../src/datahub_airflow_plugin/_config.py | 5 +- .../_datahub_ol_adapter.py | 3 +- .../src/datahub_airflow_plugin/_extractors.py | 23 ++++---- .../client/airflow_generator.py | 2 +- .../datahub_listener.py | 10 ++-- .../datahub_airflow_plugin/datahub_plugin.py | 6 +-- .../datahub_plugin_v22.py | 2 +- .../src/datahub_airflow_plugin/entities.py | 1 + .../example_dags/generic_recipe_sample_dag.py | 1 + .../example_dags/graph_usage_sample_dag.py | 2 +- .../example_dags/lineage_emission_dag.py | 2 +- .../datahub_airflow_plugin/hooks/datahub.py | 2 + .../lineage/_lineage_core.py | 1 - .../operators/datahub.py | 2 +- .../operators/datahub_assertion_operator.py | 2 +- .../operators/datahub_assertion_sensor.py | 2 +- .../operators/datahub_operation_operator.py | 2 +- .../operators/datahub_operation_sensor.py | 2 +- ...hub_emitter_operator_jinja_template_dag.py | 2 +- .../tests/integration/test_plugin.py | 2 +- .../airflow-plugin/tests/unit/test_airflow.py | 2 +- .../tests/unit/test_packaging.py | 2 +- .../dagster-plugin/build.gradle | 6 +-- .../dagster-plugin/pyproject.toml | 52 +++++++++++++++--- .../dagster-plugin/setup.cfg | 21 -------- .../dagster-plugin/setup.py | 5 +- .../client/dagster_generator.py | 2 + .../sensors/datahub_sensors.py | 6 ++- .../dagster-plugin/tests/unit/test_dagster.py | 4 +- .../gx-plugin/pyproject.toml | 3 -- .../prefect-plugin/pyproject.toml | 3 -- metadata-ingestion/pyproject.toml | 12 +---- .../api/entities/dataproduct/dataproduct.py | 2 +- .../datahub/ingestion/source/abs/source.py | 2 +- .../ingestion/source/dremio/dremio_api.py | 2 +- .../ingestion/source/neo4j/neo4j_source.py | 2 +- .../src/datahub/ingestion/source/s3/source.py | 2 +- .../ingestion/source/schema/json_schema.py | 2 +- .../ingestion/source/sql/clickhouse.py | 2 +- .../ingestion/source/tableau/tableau.py | 2 +- .../src/datahub/testing/mcp_diff.py | 2 +- .../src/datahub/utilities/sqllineage_patch.py | 2 +- .../integration/powerbi/test_m_parser.py | 4 +- .../tests/integration/powerbi/test_powerbi.py | 4 +- .../tests/performance/data_generation.py | 4 +- smoke-test/pyproject.toml | 2 - 50 files changed, 166 insertions(+), 144 deletions(-) diff --git a/metadata-ingestion-modules/airflow-plugin/build.gradle b/metadata-ingestion-modules/airflow-plugin/build.gradle index 68a35c0dfc417b..1bcb58e6b7c543 100644 --- a/metadata-ingestion-modules/airflow-plugin/build.gradle +++ b/metadata-ingestion-modules/airflow-plugin/build.gradle @@ -74,16 +74,14 @@ task lint(type: Exec, dependsOn: installDev) { "find ${venv_name}/lib -path *airflow/_vendor/connexion/spec.py -exec sed -i.bak -e '169,169s/ # type: List\\[str\\]//g' {} \\; && " + "source ${venv_name}/bin/activate && set -x && " + "black --check --diff src/ tests/ && " + - "isort --check --diff src/ tests/ && " + - "flake8 --count --statistics src/ tests/ && " + + "ruff check src/ tests/ && " + "mypy --show-traceback --show-error-codes src/ tests/" } task lintFix(type: Exec, dependsOn: installDev) { commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + "black src/ tests/ && " + - "isort src/ tests/ && " + - "flake8 src/ tests/ && " + + "ruff check --fix src/ tests/" "mypy src/ tests/ " } diff --git a/metadata-ingestion-modules/airflow-plugin/pyproject.toml b/metadata-ingestion-modules/airflow-plugin/pyproject.toml index 648040c1951db8..7d03c2a14bf078 100644 --- a/metadata-ingestion-modules/airflow-plugin/pyproject.toml +++ b/metadata-ingestion-modules/airflow-plugin/pyproject.toml @@ -10,11 +10,50 @@ extend-exclude = ''' ''' include = '\.pyi?$' -[tool.isort] -indent = ' ' -known_future_library = ['__future__', 'datahub.utilities._markupsafe_compat', 'datahub_provider._airflow_compat'] -profile = 'black' -sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER' +[tool.ruff.lint.isort] +combine-as-imports = true +known-first-party = ["datahub"] +extra-standard-library = ["__future__", "datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"] +section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"] +force-sort-within-sections = false +force-wrap-aliases = false +split-on-trailing-comma = false +order-by-type = true +relative-imports-order = "closest-to-furthest" +force-single-line = false +single-line-exclusions = ["typing"] +length-sort = false +from-first = false +required-imports = [] +classes = ["typing"] -[tool.pyright] -extraPaths = ['tests'] \ No newline at end of file +[tool.ruff.lint] +select = [ + "B", + "C90", + "E", + "F", + "I", # For isort + "TID", +] +ignore = [ + # Ignore line length violations (handled by Black) + "E501", + # Ignore whitespace before ':' (matches Black) + "E203", + "E203", + # Allow usages of functools.lru_cache + "B019", + # Allow function call in argument defaults + "B008", +] + +[tool.ruff.lint.mccabe] +max-complexity = 15 + +[tool.ruff.lint.flake8-tidy-imports] +# Disallow all relative imports. +ban-relative-imports = "all" + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["F401"] \ No newline at end of file diff --git a/metadata-ingestion-modules/airflow-plugin/setup.cfg b/metadata-ingestion-modules/airflow-plugin/setup.cfg index c25256c5751b8d..abb9040ab3535a 100644 --- a/metadata-ingestion-modules/airflow-plugin/setup.cfg +++ b/metadata-ingestion-modules/airflow-plugin/setup.cfg @@ -1,24 +1,3 @@ -[flake8] -max-complexity = 15 -ignore = - # Ignore: line length issues, since black's formatter will take care of them. - E501, - # Ignore: 1 blank line required before class docstring. - D203, - # See https://stackoverflow.com/a/57074416. - W503, - # See https://github.com/psf/black/issues/315. - E203 -exclude = - .git, - venv, - .tox, - __pycache__ -per-file-ignores = - # imported but unused - __init__.py: F401 -ban-relative-imports = true - [mypy] plugins = sqlmypy, diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py index d07063dbffc5c4..2fd74b37e89c05 100644 --- a/metadata-ingestion-modules/airflow-plugin/setup.py +++ b/metadata-ingestion-modules/airflow-plugin/setup.py @@ -73,9 +73,7 @@ def get_long_description(): *mypy_stubs, "black==22.12.0", "coverage>=5.1", - "flake8>=3.8.3", - "flake8-tidy-imports>=4.3.0", - "isort>=5.7.0", + "ruff==0.9.1", "mypy==1.10.1", # pydantic 1.8.2 is incompatible with mypy 0.910. # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910. diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py index c4964712cf9f7d..6d6ba601556788 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py @@ -1,11 +1,12 @@ from enum import Enum from typing import TYPE_CHECKING, Optional -import datahub.emitter.mce_builder as builder from airflow.configuration import conf -from datahub.configuration.common import AllowDenyPattern, ConfigModel from pydantic.fields import Field +import datahub.emitter.mce_builder as builder +from datahub.configuration.common import AllowDenyPattern, ConfigModel + if TYPE_CHECKING: from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py index 69de61aced0a59..72cdcd8813252a 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py @@ -1,8 +1,9 @@ import logging -import datahub.emitter.mce_builder as builder from openlineage.client.run import Dataset as OpenLineageDataset +import datahub.emitter.mce_builder as builder + logger = logging.getLogger(__name__) diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py index 28d5775f61f542..fd01ac10f98de9 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py @@ -3,17 +3,11 @@ import unittest.mock from typing import TYPE_CHECKING, Optional -import datahub.emitter.mce_builder as builder -from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import ( - get_platform_from_sqlalchemy_uri, -) -from datahub.sql_parsing.sqlglot_lineage import ( - SqlParsingResult, - create_lineage_sql_parsed_result, +from openlineage.airflow.extractors import ( + BaseExtractor, + ExtractorManager as OLExtractorManager, + TaskMetadata, ) -from openlineage.airflow.extractors import BaseExtractor -from openlineage.airflow.extractors import ExtractorManager as OLExtractorManager -from openlineage.airflow.extractors import TaskMetadata from openlineage.airflow.extractors.snowflake_extractor import SnowflakeExtractor from openlineage.airflow.extractors.sql_extractor import SqlExtractor from openlineage.airflow.utils import get_operator_class, try_import_from_string @@ -23,11 +17,20 @@ SqlJobFacet, ) +import datahub.emitter.mce_builder as builder +from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import ( + get_platform_from_sqlalchemy_uri, +) +from datahub.sql_parsing.sqlglot_lineage import ( + SqlParsingResult, + create_lineage_sql_parsed_result, +) from datahub_airflow_plugin._airflow_shims import Operator from datahub_airflow_plugin._datahub_ol_adapter import OL_SCHEME_TWEAKS if TYPE_CHECKING: from airflow.models import DagRun, TaskInstance + from datahub.ingestion.graph.client import DataHubGraph logger = logging.getLogger(__name__) diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py index e9f93c0c1eab0a..c1ccdaeb0a1fbd 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union, cast from airflow.configuration import conf + from datahub.api.entities.datajob import DataFlow, DataJob from datahub.api.entities.dataprocess.dataprocess_instance import ( DataProcessInstance, @@ -11,7 +12,6 @@ from datahub.metadata.schema_classes import DataProcessTypeClass from datahub.utilities.urns.data_flow_urn import DataFlowUrn from datahub.utilities.urns.data_job_urn import DataJobUrn - from datahub_airflow_plugin._airflow_compat import AIRFLOW_PATCHED from datahub_airflow_plugin._config import DatahubLineageConfig, DatajobUrl diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py index 640991a90a1d28..9de44811f60a48 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py @@ -8,9 +8,13 @@ from typing import TYPE_CHECKING, Callable, Dict, List, Optional, TypeVar, cast import airflow -import datahub.emitter.mce_builder as builder from airflow.models import Variable from airflow.models.serialized_dag import SerializedDagModel +from openlineage.airflow.listener import TaskHolder +from openlineage.airflow.utils import redact_with_exclusions +from openlineage.client.serde import Serde + +import datahub.emitter.mce_builder as builder from datahub.api.entities.datajob import DataJob from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult from datahub.emitter.mcp import MetadataChangeProposalWrapper @@ -30,10 +34,6 @@ ) from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult from datahub.telemetry import telemetry -from openlineage.airflow.listener import TaskHolder -from openlineage.airflow.utils import redact_with_exclusions -from openlineage.client.serde import Serde - from datahub_airflow_plugin._airflow_shims import ( HAS_AIRFLOW_DAG_LISTENER_API, HAS_AIRFLOW_DATASET_LISTENER_API, diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py index 137cf97f69280a..7638720db023ac 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py @@ -15,9 +15,9 @@ logger = logging.getLogger(__name__) -_USE_AIRFLOW_LISTENER_INTERFACE = HAS_AIRFLOW_LISTENER_API and not os.getenv( +_USE_AIRFLOW_LISTENER_INTERFACE = HAS_AIRFLOW_LISTENER_API and os.getenv( "DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN", "false" -).lower() in ("true", "1") +).lower() not in ("true", "1") if _USE_AIRFLOW_LISTENER_INTERFACE: try: @@ -32,7 +32,7 @@ with contextlib.suppress(Exception): - if not os.getenv("DATAHUB_AIRFLOW_PLUGIN_SKIP_FORK_PATCH", "false").lower() in ( + if os.getenv("DATAHUB_AIRFLOW_PLUGIN_SKIP_FORK_PATCH", "false").lower() not in ( "true", "1", ): diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py index db47f37bed562e..4bf050d41473e4 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py @@ -7,9 +7,9 @@ from airflow.lineage import PIPELINE_OUTLETS from airflow.models.baseoperator import BaseOperator from airflow.utils.module_loading import import_string + from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult from datahub.telemetry import telemetry - from datahub_airflow_plugin._airflow_shims import ( MappedOperator, get_task_inlets, diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/entities.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/entities.py index 30b35ac6d6198b..f3fd17259c9f63 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/entities.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/entities.py @@ -2,6 +2,7 @@ from typing import List, Optional import attr + import datahub.emitter.mce_builder as builder from datahub.utilities.urns.data_job_urn import DataJobUrn from datahub.utilities.urns.dataset_urn import DatasetUrn diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/generic_recipe_sample_dag.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/generic_recipe_sample_dag.py index ff8dba457066fd..ac620852c6f288 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/generic_recipe_sample_dag.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/generic_recipe_sample_dag.py @@ -9,6 +9,7 @@ from airflow import DAG from airflow.operators.python import PythonOperator from airflow.utils.dates import days_ago + from datahub.configuration.config_loader import load_config_file from datahub.ingestion.run.pipeline import Pipeline diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py index d72ba67c23cd72..7951d6f7fd21ef 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py @@ -4,8 +4,8 @@ import pendulum from airflow.decorators import dag, task -from datahub.ingestion.graph.client import DataHubGraph, RemovedStatusFilter +from datahub.ingestion.graph.client import DataHubGraph, RemovedStatusFilter from datahub_airflow_plugin.hooks.datahub import DatahubRestHook diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py index 0d7cdb6b6e90a5..4351f40fe7e3ad 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py @@ -4,11 +4,11 @@ """ from datetime import timedelta -import datahub.emitter.mce_builder as builder from airflow import DAG from airflow.operators.bash import BashOperator from airflow.utils.dates import days_ago +import datahub.emitter.mce_builder as builder from datahub_airflow_plugin.operators.datahub import DatahubEmitterOperator default_args = { diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py index 5f4d787fb893d3..26c5026c075bd7 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py @@ -2,6 +2,7 @@ from airflow.exceptions import AirflowException from airflow.hooks.base import BaseHook + from datahub.emitter.generic_emitter import Emitter from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.metadata.com.linkedin.pegasus2avro.mxe import ( @@ -11,6 +12,7 @@ if TYPE_CHECKING: from airflow.models.connection import Connection + from datahub.emitter.kafka_emitter import DatahubKafkaEmitter from datahub.emitter.rest_emitter import DataHubRestEmitter from datahub.emitter.synchronized_file_emitter import SynchronizedFileEmitter diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py index 638458b0efd6ab..db50c48dfaf08a 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py @@ -2,7 +2,6 @@ from typing import TYPE_CHECKING, Dict, List from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult - from datahub_airflow_plugin._config import DatahubLineageConfig from datahub_airflow_plugin.client.airflow_generator import AirflowGenerator from datahub_airflow_plugin.entities import ( diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub.py index 41d855512aa066..817db6b7480c4b 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub.py @@ -3,9 +3,9 @@ from airflow.models import BaseOperator from airflow.utils.decorators import apply_defaults from avrogen.dict_wrapper import DictWrapper + from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent - from datahub_airflow_plugin.hooks.datahub import ( DatahubGenericHook, DatahubKafkaHook, diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_operator.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_operator.py index 6f93c09a9e2872..3a440b0ec14e07 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_operator.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_operator.py @@ -2,11 +2,11 @@ from typing import Any, List, Optional, Sequence, Union from airflow.models import BaseOperator + from datahub.api.circuit_breaker import ( AssertionCircuitBreaker, AssertionCircuitBreakerConfig, ) - from datahub_airflow_plugin.hooks.datahub import DatahubRestHook diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_sensor.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_sensor.py index 16e5d1cbe8b1f4..6a446ba1f3b55e 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_sensor.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_sensor.py @@ -2,11 +2,11 @@ from typing import Any, List, Optional, Sequence, Union from airflow.sensors.base import BaseSensorOperator + from datahub.api.circuit_breaker import ( AssertionCircuitBreaker, AssertionCircuitBreakerConfig, ) - from datahub_airflow_plugin.hooks.datahub import DatahubRestHook diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_operator.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_operator.py index 94e105309537b6..eb5fe8168bccf8 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_operator.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_operator.py @@ -2,11 +2,11 @@ from typing import Any, List, Optional, Sequence, Union from airflow.sensors.base import BaseSensorOperator + from datahub.api.circuit_breaker import ( OperationCircuitBreaker, OperationCircuitBreakerConfig, ) - from datahub_airflow_plugin.hooks.datahub import DatahubRestHook diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_sensor.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_sensor.py index 434c60754064d0..89e20e46a0074a 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_sensor.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_sensor.py @@ -2,11 +2,11 @@ from typing import Any, List, Optional, Sequence, Union from airflow.sensors.base import BaseSensorOperator + from datahub.api.circuit_breaker import ( OperationCircuitBreaker, OperationCircuitBreakerConfig, ) - from datahub_airflow_plugin.hooks.datahub import DatahubRestHook diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/datahub_emitter_operator_jinja_template_dag.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/datahub_emitter_operator_jinja_template_dag.py index c1b4aa4d7b94f4..04845e601d674d 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/datahub_emitter_operator_jinja_template_dag.py +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/datahub_emitter_operator_jinja_template_dag.py @@ -1,6 +1,7 @@ from datetime import datetime, timedelta from airflow import DAG + from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent from datahub.metadata.schema_classes import ( @@ -9,7 +10,6 @@ DatasetPropertiesClass, DatasetSnapshotClass, ) - from datahub_airflow_plugin.operators.datahub import DatahubEmitterOperator default_args = { diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py index 3b2c9140e4632f..d2c9821295419c 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py @@ -17,9 +17,9 @@ import requests import tenacity from airflow.models.connection import Connection + from datahub.ingestion.sink.file import write_metadata_file from datahub.testing.compare_metadata_json import assert_metadata_files_equal - from datahub_airflow_plugin._airflow_shims import ( AIRFLOW_VERSION, HAS_AIRFLOW_DAG_LISTENER_API, diff --git a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py index 79620f81a437b0..1dc8e14a425dfc 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py +++ b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py @@ -8,12 +8,12 @@ import airflow.configuration import airflow.version -import datahub.emitter.mce_builder as builder import packaging.version import pytest from airflow.lineage import apply_lineage, prepare_lineage from airflow.models import DAG, Connection, DagBag, DagRun, TaskInstance +import datahub.emitter.mce_builder as builder from datahub_airflow_plugin import get_provider_info from datahub_airflow_plugin._airflow_shims import ( AIRFLOW_PATCHED, diff --git a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_packaging.py b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_packaging.py index a9c8b7ec65fa3c..a822527582c2cd 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_packaging.py +++ b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_packaging.py @@ -1,6 +1,6 @@ import setuptools -from datahub.testing.check_imports import ensure_no_indirect_model_imports +from datahub.testing.check_imports import ensure_no_indirect_model_imports from tests.utils import PytestConfig diff --git a/metadata-ingestion-modules/dagster-plugin/build.gradle b/metadata-ingestion-modules/dagster-plugin/build.gradle index 0d57bb5bfdff70..503b3556a41bfe 100644 --- a/metadata-ingestion-modules/dagster-plugin/build.gradle +++ b/metadata-ingestion-modules/dagster-plugin/build.gradle @@ -55,16 +55,14 @@ task lint(type: Exec, dependsOn: installDev) { commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + "black --check --diff src/ tests/ examples/ && " + - "isort --check --diff src/ tests/ examples/ && " + - "flake8 --count --statistics src/ tests/ examples/ && " + + "ruff check src/ tests/ && " + "mypy --show-traceback --show-error-codes src/ tests/ examples/" } task lintFix(type: Exec, dependsOn: installDev) { commandLine 'bash', '-x', '-c', "source ${venv_name}/bin/activate && " + "black src/ tests/ examples/ && " + - "isort src/ tests/ examples/ && " + - "flake8 src/ tests/ examples/ && " + + "ruff check --fix src/ tests/" "mypy src/ tests/ examples/" } diff --git a/metadata-ingestion-modules/dagster-plugin/pyproject.toml b/metadata-ingestion-modules/dagster-plugin/pyproject.toml index fba81486b9f677..7d03c2a14bf078 100644 --- a/metadata-ingestion-modules/dagster-plugin/pyproject.toml +++ b/metadata-ingestion-modules/dagster-plugin/pyproject.toml @@ -10,10 +10,50 @@ extend-exclude = ''' ''' include = '\.pyi?$' -[tool.isort] -indent = ' ' -profile = 'black' -sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER' +[tool.ruff.lint.isort] +combine-as-imports = true +known-first-party = ["datahub"] +extra-standard-library = ["__future__", "datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"] +section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"] +force-sort-within-sections = false +force-wrap-aliases = false +split-on-trailing-comma = false +order-by-type = true +relative-imports-order = "closest-to-furthest" +force-single-line = false +single-line-exclusions = ["typing"] +length-sort = false +from-first = false +required-imports = [] +classes = ["typing"] -[tool.pyright] -extraPaths = ['tests'] \ No newline at end of file +[tool.ruff.lint] +select = [ + "B", + "C90", + "E", + "F", + "I", # For isort + "TID", +] +ignore = [ + # Ignore line length violations (handled by Black) + "E501", + # Ignore whitespace before ':' (matches Black) + "E203", + "E203", + # Allow usages of functools.lru_cache + "B019", + # Allow function call in argument defaults + "B008", +] + +[tool.ruff.lint.mccabe] +max-complexity = 15 + +[tool.ruff.lint.flake8-tidy-imports] +# Disallow all relative imports. +ban-relative-imports = "all" + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["F401"] \ No newline at end of file diff --git a/metadata-ingestion-modules/dagster-plugin/setup.cfg b/metadata-ingestion-modules/dagster-plugin/setup.cfg index 20a903914332aa..89b28ae45f9648 100644 --- a/metadata-ingestion-modules/dagster-plugin/setup.cfg +++ b/metadata-ingestion-modules/dagster-plugin/setup.cfg @@ -1,24 +1,3 @@ -[flake8] -max-complexity = 15 -ignore = - # Ignore: line length issues, since black's formatter will take care of them. - E501, - # Ignore: 1 blank line required before class docstring. - D203, - # See https://stackoverflow.com/a/57074416. - W503, - # See https://github.com/psf/black/issues/315. - E203 -exclude = - .git, - venv, - .tox, - __pycache__ -per-file-ignores = - # imported but unused - __init__.py: F401 -ban-relative-imports = true - [mypy] plugins = pydantic.mypy diff --git a/metadata-ingestion-modules/dagster-plugin/setup.py b/metadata-ingestion-modules/dagster-plugin/setup.py index 22c15497bd8070..f2e90c14833f78 100644 --- a/metadata-ingestion-modules/dagster-plugin/setup.py +++ b/metadata-ingestion-modules/dagster-plugin/setup.py @@ -53,10 +53,7 @@ def get_long_description(): "dagster-snowflake-pandas >= 0.11.0", "black==22.12.0", "coverage>=5.1", - "flake8>=6.0.0", - "flake8-tidy-imports>=4.3.0", - "flake8-bugbear==23.3.12", - "isort>=5.7.0", + "ruff==0.9.1", "mypy>=1.4.0", # pydantic 1.8.2 is incompatible with mypy 0.910. # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910. diff --git a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py index a87f490f2d947e..9a0a9a1b3a75ed 100644 --- a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py +++ b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py @@ -13,6 +13,7 @@ TableSchemaMetadataValue, ) from dagster._core.execution.stats import RunStepKeyStatsSnapshot, StepEventStatus + from datahub.sql_parsing.sqlglot_utils import get_query_fingerprint try: @@ -23,6 +24,7 @@ from dagster._core.snap.node import OpDefSnap from dagster._core.storage.dagster_run import DagsterRun, DagsterRunStatsSnapshot + from datahub.api.entities.datajob import DataFlow, DataJob from datahub.api.entities.dataprocess.dataprocess_instance import ( DataProcessInstance, diff --git a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py index bccdb4ac7922a5..b91a9cfa56d398 100644 --- a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py +++ b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py @@ -35,7 +35,9 @@ try: from dagster._core.definitions.sensor_definition import SensorReturnTypesUnion except ImportError: - from dagster._core.definitions.sensor_definition import RawSensorEvaluationFunctionReturn as SensorReturnTypesUnion # type: ignore + from dagster._core.definitions.sensor_definition import ( # type: ignore + RawSensorEvaluationFunctionReturn as SensorReturnTypesUnion, + ) from dagster._core.definitions.target import ExecutableDefinition from dagster._core.definitions.unresolved_asset_job_definition import ( @@ -43,6 +45,7 @@ ) from dagster._core.events import DagsterEventType, HandledOutputData, LoadedInputData from dagster._core.execution.stats import RunStepKeyStatsSnapshot + from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph from datahub.metadata.schema_classes import SubTypesClass @@ -52,7 +55,6 @@ ) from datahub.utilities.urns.dataset_urn import DatasetUrn from datahub.utilities.urns.error import InvalidUrnError - from datahub_dagster_plugin.client.dagster_generator import ( DATAHUB_ASSET_GROUP_NAME_CACHE, Constant, diff --git a/metadata-ingestion-modules/dagster-plugin/tests/unit/test_dagster.py b/metadata-ingestion-modules/dagster-plugin/tests/unit/test_dagster.py index c951b959f85d43..9a69822984bb80 100644 --- a/metadata-ingestion-modules/dagster-plugin/tests/unit/test_dagster.py +++ b/metadata-ingestion-modules/dagster-plugin/tests/unit/test_dagster.py @@ -22,11 +22,11 @@ RepositoryDefinition, ) from dagster._core.definitions.resource_definition import ResourceDefinition -from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.ingestion.graph.client import DatahubClientConfig from freezegun import freeze_time from utils.utils import PytestConfig, check_golden_file +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.graph.client import DatahubClientConfig from datahub_dagster_plugin.client.dagster_generator import DatahubDagsterSourceConfig from datahub_dagster_plugin.sensors.datahub_sensors import ( DatahubSensors, diff --git a/metadata-ingestion-modules/gx-plugin/pyproject.toml b/metadata-ingestion-modules/gx-plugin/pyproject.toml index fba81486b9f677..bc951452175268 100644 --- a/metadata-ingestion-modules/gx-plugin/pyproject.toml +++ b/metadata-ingestion-modules/gx-plugin/pyproject.toml @@ -14,6 +14,3 @@ include = '\.pyi?$' indent = ' ' profile = 'black' sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER' - -[tool.pyright] -extraPaths = ['tests'] \ No newline at end of file diff --git a/metadata-ingestion-modules/prefect-plugin/pyproject.toml b/metadata-ingestion-modules/prefect-plugin/pyproject.toml index fba81486b9f677..bc951452175268 100644 --- a/metadata-ingestion-modules/prefect-plugin/pyproject.toml +++ b/metadata-ingestion-modules/prefect-plugin/pyproject.toml @@ -14,6 +14,3 @@ include = '\.pyi?$' indent = ' ' profile = 'black' sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER' - -[tool.pyright] -extraPaths = ['tests'] \ No newline at end of file diff --git a/metadata-ingestion/pyproject.toml b/metadata-ingestion/pyproject.toml index f3a51e135082ee..745547f88bcb93 100644 --- a/metadata-ingestion/pyproject.toml +++ b/metadata-ingestion/pyproject.toml @@ -11,6 +11,7 @@ extend-exclude = ''' include = '\.pyi?$' target-version = ['py38', 'py39', 'py310', 'py311'] + [tool.ruff.lint.isort] combine-as-imports = true known-first-party = ["datahub"] @@ -28,16 +29,6 @@ from-first = false required-imports = [] classes = ["typing"] -[tool.pyright] -extraPaths = ['tests'] - -[tool.vulture] -exclude = ["src/datahub/metadata/"] -ignore_decorators = ["@click.*", "@validator", "@root_validator", "@pydantic.validator", "@pydantic.root_validator", "@pytest.fixture"] -ignore_names = ["*Source", "*Sink", "*Report"] -paths = ["src"] -sort_by_size = true - [tool.ruff] # Same as Black. line-length = 88 @@ -70,7 +61,6 @@ ignore = [ "B008", # TODO: Enable these later "B006", # Mutable args - "B007", # Unused loop control variable "B017", # Do not assert blind exception "B904", # Checks for raise statements in exception handlers that lack a from clause ] diff --git a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py index 2097922c151366..39de4d7f80558e 100644 --- a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py +++ b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py @@ -438,7 +438,7 @@ def _patch_ownership( for replace_index, replace_value in patches_replace.items(): list_to_manipulate[replace_index] = replace_value - for drop_index, drop_value in patches_drop.items(): + for drop_value in patches_drop.values(): list_to_manipulate.remove(drop_value) for add_value in patches_add: diff --git a/metadata-ingestion/src/datahub/ingestion/source/abs/source.py b/metadata-ingestion/src/datahub/ingestion/source/abs/source.py index ad2bc36cf558b5..e4f9cd0ee7e018 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/abs/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/abs/source.py @@ -613,7 +613,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: table_data.table_path ].timestamp = table_data.timestamp - for guid, table_data in table_dict.items(): + for _, table_data in table_dict.items(): yield from self.ingest_table(table_data, path_spec) def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py index d913b7e42065d2..072995c10ebcef 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py @@ -181,7 +181,7 @@ def authenticate(self, connection_args: "DremioSourceConfig") -> None: return # On-prem Dremio authentication (PAT or Basic Auth) - for retry in range(1, self._retry_count + 1): + for _ in range(1, self._retry_count + 1): try: if connection_args.authentication_method == "PAT": self.session.headers.update( diff --git a/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py b/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py index 2c9107b967e4f8..8cdd4b17733e01 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py @@ -286,7 +286,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: df = self.get_neo4j_metadata( "CALL apoc.meta.schema() YIELD value UNWIND keys(value) AS key RETURN key, value[key] AS value;" ) - for index, row in df.iterrows(): + for _, row in df.iterrows(): try: yield MetadataWorkUnit( id=row["key"], diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py index 989d0d734352a2..3173423f86a2ea 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py @@ -1124,7 +1124,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: table_data.table_path ].timestamp = table_data.timestamp - for guid, table_data in table_dict.items(): + for _, table_data in table_dict.items(): yield from self.ingest_table(table_data, path_spec) if not self.source_config.is_profiling_enabled(): diff --git a/metadata-ingestion/src/datahub/ingestion/source/schema/json_schema.py b/metadata-ingestion/src/datahub/ingestion/source/schema/json_schema.py index 635e894d18c7e5..a50e99393fdc27 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/schema/json_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/schema/json_schema.py @@ -354,7 +354,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: browse_prefix = f"/{self.config.env.lower()}/{self.config.platform}/{self.config.platform_instance}" if os.path.isdir(self.config.path): - for root, dirs, files in os.walk(self.config.path, topdown=False): + for root, _, files in os.walk(self.config.path, topdown=False): for file_name in [f for f in files if f.endswith(".json")]: try: yield from self._load_one_file( diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py index aeb21e88d04437..2899bcc2de37b0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py @@ -268,7 +268,7 @@ def _get_table_or_view_names(self, relkind, connection, schema=None, **kw): info_cache = kw.get("info_cache") all_relations = self._get_all_relation_info(connection, info_cache=info_cache) relation_names = [] - for key, relation in all_relations.items(): + for _, relation in all_relations.items(): if relation.database == schema and relation.relkind == relkind: relation_names.append(relation.relname) return relation_names diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index ee841a2a201863..8187fff559208e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -3605,7 +3605,7 @@ def emit_project_in_topological_order( parent_container_key=parent_project_key, ) - for id_, project in self.tableau_project_registry.items(): + for project in self.tableau_project_registry.values(): logger.debug( f"project {project.name} and it's parent {project.parent_name} and parent id {project.parent_id}" ) diff --git a/metadata-ingestion/src/datahub/testing/mcp_diff.py b/metadata-ingestion/src/datahub/testing/mcp_diff.py index 5e669a718e9ad3..b58afc10148edc 100644 --- a/metadata-ingestion/src/datahub/testing/mcp_diff.py +++ b/metadata-ingestion/src/datahub/testing/mcp_diff.py @@ -246,7 +246,7 @@ def pretty(self, verbose: bool = False) -> str: for urn in self.aspect_changes.keys() - self.urns_added - self.urns_removed: aspect_map = self.aspect_changes[urn] s.append(f"Urn changed, {urn}:") - for aspect_name, aspect_diffs in aspect_map.items(): + for aspect_diffs in aspect_map.values(): for i, ga in aspect_diffs.aspects_added.items(): s.append(self.report_aspect(ga, i, "added")) if verbose: diff --git a/metadata-ingestion/src/datahub/utilities/sqllineage_patch.py b/metadata-ingestion/src/datahub/utilities/sqllineage_patch.py index afcd02478ae687..4c237d02727f72 100644 --- a/metadata-ingestion/src/datahub/utilities/sqllineage_patch.py +++ b/metadata-ingestion/src/datahub/utilities/sqllineage_patch.py @@ -8,7 +8,7 @@ # Patch based on sqllineage v1.3.3 def end_of_query_cleanup_patch(self, holder: SubQueryLineageHolder) -> None: # type: ignore - for i, tbl in enumerate(self.tables): + for tbl in self.tables: holder.add_read(tbl) self.union_barriers.append((len(self.columns), len(self.tables))) diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py index 832d00d9c54702..6f7a9c7833ba1a 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py +++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py @@ -1070,7 +1070,7 @@ def test_unsupported_data_platform(): ) # type :ignore is_entry_present: bool = False - for key, entry in info_entries.items(): + for entry in info_entries.values(): if entry.title == "Non-Data Platform Expression": is_entry_present = True break @@ -1163,7 +1163,7 @@ def test_m_query_timeout(mock_get_lark_parser): ) # type :ignore is_entry_present: bool = False - for key, entry in warn_entries.items(): + for entry in warn_entries.values(): if entry.title == "M-Query Parsing Timeout": is_entry_present = True break diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py index 739be7cc8408dd..911d8a9f35139f 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py +++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py @@ -1438,7 +1438,7 @@ def test_powerbi_cross_workspace_reference_info_message( is_entry_present: bool = False # Printing INFO entries - for key, entry in info_entries.items(): + for entry in info_entries.values(): if entry.title == "Missing Lineage For Tile": is_entry_present = True break @@ -1563,7 +1563,7 @@ def test_powerbi_app_ingest_info_message( is_entry_present: bool = False # Printing INFO entries - for key, entry in info_entries.items(): + for entry in info_entries.values(): if entry.title == "App Ingestion Is Disabled": is_entry_present = True break diff --git a/metadata-ingestion/tests/performance/data_generation.py b/metadata-ingestion/tests/performance/data_generation.py index fcff13edf59363..266c0d9af03224 100644 --- a/metadata-ingestion/tests/performance/data_generation.py +++ b/metadata-ingestion/tests/performance/data_generation.py @@ -198,7 +198,7 @@ def generate_queries( all_tables = seed_metadata.tables + seed_metadata.views users = [f"user_{i}@xyz.com" for i in range(num_users)] - for i in range(num_selects): # Pure SELECT statements + for _ in range(num_selects): # Pure SELECT statements tables = _sample_list(all_tables, tables_per_select) all_columns = [ FieldAccess(column, table) for table in tables for column in table.columns @@ -213,7 +213,7 @@ def generate_queries( fields_accessed=_sample_list(all_columns, columns_per_select), ) - for i in range(num_operations): + for _ in range(num_operations): modified_table = random.choice(seed_metadata.tables) n_col = len(modified_table.columns) num_columns_modified = NormalDistribution(n_col / 2, n_col / 2) diff --git a/smoke-test/pyproject.toml b/smoke-test/pyproject.toml index c7745d0e9a3640..aeb3c03b6466dd 100644 --- a/smoke-test/pyproject.toml +++ b/smoke-test/pyproject.toml @@ -42,5 +42,3 @@ warn_unused_configs = true disallow_incomplete_defs = false disallow_untyped_defs = false -[tool.pyright] -extraPaths = ['tests'] From 0c597d35af83e09b3ca4f310bbe2cbab0c44eda3 Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Fri, 17 Jan 2025 18:15:56 +0000 Subject: [PATCH 108/249] feat(docs): Add release docs for 0.15.0 (#12374) --- .../ingest/source/builder/NameSourceStep.tsx | 2 +- docs/api/datahub-apis.md | 4 ++ docs/how/updating-datahub.md | 60 ++++++++++--------- gradle/versioning/versioning.gradle | 2 +- 4 files changed, 39 insertions(+), 29 deletions(-) diff --git a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx index 898fbd6a6d9268..68e6c8d3436fb9 100644 --- a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx @@ -200,7 +200,7 @@ export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps) setVersion(event.target.value)} onBlur={(event) => handleBlur(event, setVersion)} diff --git a/docs/api/datahub-apis.md b/docs/api/datahub-apis.md index c46aacde3a0cb5..62136406e6ff66 100644 --- a/docs/api/datahub-apis.md +++ b/docs/api/datahub-apis.md @@ -12,6 +12,10 @@ DataHub has several APIs to manipulate metadata on the platform. Here's the list In general, **Python and Java SDKs** are our most recommended tools for extending and customizing the behavior of your DataHub instance. We don't recommend using the **OpenAPI** directly, as it's more complex and less user-friendly than the other APIs. +:::warning +About async usage of APIs - DataHub's asynchronous APIs perform only basic schema validation when receiving MCP requests, similar to direct production to MCP Kafka topics. While requests must conform to the MCP schema to be accepted, actual processing happens later in the pipeline. Any processing failures that occur after the initial acceptance are captured in the Failed MCP topic, but these failures are not immediately surfaced to the API caller since they happen asynchronously. +::: + ## Python and Java SDK We offer an SDK for both Python and Java that provide full functionality when it comes to CRUD operations and any complex functionality you may want to build into DataHub. We recommend using the SDKs for most use cases. Here are the examples of how to use the SDKs: diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index eb5a792216d981..b887ca999c4046 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -1,8 +1,3 @@ -# Known Issues - -- Async APIs - DataHub's asynchronous APIs perform only basic schema validation when receiving MCP requests, similar to direct production to MCP Kafka topics. While requests must conform to the MCP schema to be accepted, actual processing happens later in the pipeline. Any processing failures that occur after the initial acceptance are captured in the Failed MCP topic, but these failures are not immediately surfaced to the API caller since they happen asynchronously. - - # Updating DataHub - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docker/datahub-gms/jetty.xml b/docker/datahub-gms/jetty.xml deleted file mode 100644 index 3f04635d9498ca..00000000000000 --- a/docker/datahub-gms/jetty.xml +++ /dev/null @@ -1,57 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docker/datahub-gms/start.sh b/docker/datahub-gms/start.sh index c91580eed83cb9..cac36920022749 100755 --- a/docker/datahub-gms/start.sh +++ b/docker/datahub-gms/start.sh @@ -62,13 +62,8 @@ COMMON=" java $JAVA_OPTS $JMX_OPTS \ $OTEL_AGENT \ $PROMETHEUS_AGENT \ - -jar /jetty-runner.jar \ - --stats unsecure \ - --jar jetty-util.jar \ - --jar jetty-jmx.jar \ - --config /datahub/datahub-gms/scripts/jetty.xml \ - --config /datahub/datahub-gms/scripts/jetty-jmx.xml \ - /datahub/datahub-gms/bin/war.war" + -Dstats=unsecure \ + -jar /datahub/datahub-gms/bin/war.war" if [[ $SKIP_ELASTICSEARCH_CHECK != true ]]; then exec dockerize \ diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index c68a4c1f5a8fcf..0cbb2aee903c84 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -49,8 +49,6 @@ services: - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true} volumes: - ./datahub-gms/start.sh:/datahub/datahub-gms/scripts/start.sh - - ./datahub-gms/jetty.xml:/datahub/datahub-gms/scripts/jetty.xml - - ./datahub-gms/jetty-jmx.xml:/datahub/datahub-gms/scripts/jetty-jmx.xml - ./monitoring/client-prometheus-config.yaml:/datahub/datahub-gms/scripts/prometheus-config.yaml - ../metadata-models/src/main/resources/:/datahub/datahub-gms/resources - ../metadata-service/war/build/libs/:/datahub/datahub-gms/bin diff --git a/docker/profiles/docker-compose.frontend.yml b/docker/profiles/docker-compose.frontend.yml index c6b15a7016670d..b278cd41b0b231 100644 --- a/docker/profiles/docker-compose.frontend.yml +++ b/docker/profiles/docker-compose.frontend.yml @@ -26,6 +26,7 @@ x-datahub-frontend-service-dev: &datahub-frontend-service-dev DATAHUB_ANALYTICS_ENABLED: ${DATAHUB_ANALYTICS_ENABLED:-true} volumes: - ../../datahub-frontend/build/stage/main:/datahub-frontend + - ./monitoring/client-prometheus-config.yaml:/datahub-frontend/client-prometheus-config.yaml services: frontend-quickstart: diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index 2147d6b5a0247f..d4ea7dde9f8481 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -130,15 +130,13 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev <<: [*datahub-dev-telemetry-env, *datahub-gms-env] SKIP_ELASTICSEARCH_CHECK: false JAVA_TOOL_OPTIONS: '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5001' - BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE: false + BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE: ${BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE:-false} SEARCH_SERVICE_ENABLE_CACHE: false LINEAGE_SEARCH_CACHE_ENABLED: false SHOW_BROWSE_V2: true ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ./datahub-gms/start.sh:/datahub/datahub-gms/scripts/start.sh - - ./datahub-gms/jetty.xml:/datahub/datahub-gms/scripts/jetty.xml - - ./datahub-gms/jetty-jmx.xml:/datahub/datahub-gms/scripts/jetty-jmx.xml - ./monitoring/client-prometheus-config.yaml:/datahub/datahub-gms/scripts/prometheus-config.yaml - ../../metadata-models/src/main/resources/:/datahub/datahub-gms/resources - ../../metadata-service/war/build/libs/:/datahub/datahub-gms/bin diff --git a/metadata-integration/java/acryl-spark-lineage/build.gradle b/metadata-integration/java/acryl-spark-lineage/build.gradle index 8816264fbe50f7..c8b78f25d70992 100644 --- a/metadata-integration/java/acryl-spark-lineage/build.gradle +++ b/metadata-integration/java/acryl-spark-lineage/build.gradle @@ -57,7 +57,13 @@ dependencies { //implementation "io.acryl:datahub-client:0.10.2" implementation "io.openlineage:openlineage-spark_2.12:$openLineageVersion" compileOnly "org.apache.iceberg:iceberg-spark3-runtime:0.12.1" - compileOnly "org.apache.spark:spark-sql_2.12:3.1.3" + compileOnly("org.apache.spark:spark-sql_2.12:3.1.3") { + exclude group: 'org.eclipse.jetty', module: 'jetty-servlet' + exclude group: 'org.eclipse.jetty', module: 'jetty-server' + exclude group: 'org.eclipse.jetty', module: 'jetty-util' + exclude group: 'org.eclipse.jetty', module: 'jetty-webapp' + exclude group: 'org.eclipse.jetty', module: 'jetty-security' + } compileOnly "io.github.spark-redshift-community:spark-redshift_2.12:6.2.0-spark_3.5" testCompileOnly externalDependency.lombok diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index aab29101b30f71..b33f19bef95986 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -37,6 +37,7 @@ dependencies { exclude group: 'com.google.guava', module: 'guava' exclude group: 'io.grpc', module: 'grpc-protobuf' } + implementation externalDependency.dgraphNetty implementation externalDependency.slf4jApi runtimeOnly externalDependency.logbackClassic compileOnly externalDependency.lombok diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jTestServerBuilder.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jTestServerBuilder.java index fa04de340e12f7..b34730f481c63b 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jTestServerBuilder.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jTestServerBuilder.java @@ -18,11 +18,11 @@ private Neo4jTestServerBuilder(Neo4jBuilder builder) { } public Neo4jTestServerBuilder() { - this(new InProcessNeo4jBuilder().withProcedure(PathExplorer.class)); + this(new InProcessNeo4jBuilder().withProcedure(PathExplorer.class).withDisabledServer()); } public Neo4jTestServerBuilder(File workingDirectory) { - this(new InProcessNeo4jBuilder(workingDirectory.toPath())); + this(new InProcessNeo4jBuilder(workingDirectory.toPath()).withDisabledServer()); } public Neo4j newServer() { diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/DataHubUsageEventsProcessor.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/DataHubUsageEventsProcessor.java index ce7376f1f8d662..d699f0bff68019 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/DataHubUsageEventsProcessor.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/DataHubUsageEventsProcessor.java @@ -49,7 +49,8 @@ public DataHubUsageEventsProcessor( @KafkaListener( id = "${DATAHUB_USAGE_EVENT_KAFKA_CONSUMER_GROUP_ID:datahub-usage-event-consumer-job-client}", topics = "${DATAHUB_USAGE_EVENT_NAME:" + Topics.DATAHUB_USAGE_EVENT + "}", - containerFactory = "simpleKafkaConsumer") + containerFactory = "simpleKafkaConsumer", + autoStartup = "false") public void consume(final ConsumerRecord consumerRecord) { try (Timer.Context i = MetricUtils.timer(this.getClass(), "consume").time()) { kafkaLagStats.update(System.currentTimeMillis() - consumerRecord.timestamp()); diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListenerRegistrar.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListenerRegistrar.java index c909b0034a9125..20c044b42741e8 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListenerRegistrar.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListenerRegistrar.java @@ -73,7 +73,7 @@ public void afterPropertiesSet() { buildConsumerGroupName(key), List.of(mclVersionedTopicName, mclTimeseriesTopicName), hooks); - registerMCLKafkaListener(kafkaListenerEndpoint, true); + registerMCLKafkaListener(kafkaListenerEndpoint, false); }); } @@ -97,7 +97,7 @@ private KafkaListenerEndpoint createListenerEndpoint( new MethodKafkaListenerEndpoint<>(); kafkaListenerEndpoint.setId(consumerGroupId); kafkaListenerEndpoint.setGroupId(consumerGroupId); - kafkaListenerEndpoint.setAutoStartup(true); + kafkaListenerEndpoint.setAutoStartup(false); kafkaListenerEndpoint.setTopics(topics.toArray(new String[topics.size()])); kafkaListenerEndpoint.setMessageHandlerMethodFactory(new DefaultMessageHandlerMethodFactory()); kafkaListenerEndpoint.setBean( diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeEventsProcessor.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeEventsProcessor.java index 5d2f6452e69197..2152ed15cf0e93 100644 --- a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeEventsProcessor.java +++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeEventsProcessor.java @@ -62,7 +62,8 @@ public class MetadataChangeEventsProcessor { "${METADATA_CHANGE_EVENT_NAME:${KAFKA_MCE_TOPIC_NAME:" + Topics.METADATA_CHANGE_EVENT + "}}", - containerFactory = DEFAULT_EVENT_CONSUMER_NAME) + containerFactory = DEFAULT_EVENT_CONSUMER_NAME, + autoStartup = "false") @Deprecated public void consume(final ConsumerRecord consumerRecord) { try (Timer.Context i = MetricUtils.timer(this.getClass(), "consume").time()) { diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java index 4e356f5fb3670a..d854a5517793ff 100644 --- a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java +++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java @@ -80,7 +80,8 @@ public void registerConsumerThrottle() { @KafkaListener( id = CONSUMER_GROUP_ID_VALUE, topics = "${METADATA_CHANGE_PROPOSAL_TOPIC_NAME:" + Topics.METADATA_CHANGE_PROPOSAL + "}", - containerFactory = MCP_EVENT_CONSUMER_NAME) + containerFactory = MCP_EVENT_CONSUMER_NAME, + autoStartup = "false") public void consume(final ConsumerRecord consumerRecord) { try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "consume").time()) { kafkaLagStats.update(System.currentTimeMillis() - consumerRecord.timestamp()); diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/batch/BatchMetadataChangeProposalsProcessor.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/batch/BatchMetadataChangeProposalsProcessor.java index fed93628fe4d79..5ee9cd6ba94d2f 100644 --- a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/batch/BatchMetadataChangeProposalsProcessor.java +++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/batch/BatchMetadataChangeProposalsProcessor.java @@ -73,7 +73,8 @@ public void registerConsumerThrottle() { id = CONSUMER_GROUP_ID_VALUE, topics = "${METADATA_CHANGE_PROPOSAL_TOPIC_NAME:" + Topics.METADATA_CHANGE_PROPOSAL + "}", containerFactory = "kafkaEventConsumer", - batch = "true") + batch = "true", + autoStartup = "false") public void consume(final List> consumerRecords) { try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "consume").time()) { List metadataChangeProposals = diff --git a/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/PlatformEventProcessor.java b/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/PlatformEventProcessor.java index 5d11697bed93d2..2befeccb951a38 100644 --- a/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/PlatformEventProcessor.java +++ b/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/PlatformEventProcessor.java @@ -58,7 +58,8 @@ public PlatformEventProcessor( @KafkaListener( id = "${PLATFORM_EVENT_KAFKA_CONSUMER_GROUP_ID:generic-platform-event-job-client}", topics = {"${PLATFORM_EVENT_TOPIC_NAME:" + Topics.PLATFORM_EVENT + "}"}, - containerFactory = PE_EVENT_CONSUMER_NAME) + containerFactory = PE_EVENT_CONSUMER_NAME, + autoStartup = "false") public void consume(final ConsumerRecord consumerRecord) { try (Timer.Context i = MetricUtils.timer(this.getClass(), "consume").time()) { diff --git a/metadata-service/auth-config/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java b/metadata-service/auth-config/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java index 81cc5e60552a77..d258fcfeb65750 100644 --- a/metadata-service/auth-config/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java +++ b/metadata-service/auth-config/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java @@ -12,6 +12,9 @@ public class AuthenticationConfiguration { /** Whether user existence is enforced */ private boolean enforceExistenceEnabled; + /** Paths to be excluded from filtering * */ + private String excludedPaths; + /** * List of configurations for {@link com.datahub.plugins.auth.authentication.Authenticator}s to be * registered diff --git a/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java b/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java index 30f98180f80180..492e165c0781a0 100644 --- a/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java +++ b/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java @@ -25,38 +25,43 @@ import com.datahub.plugins.factory.PluginConfigFactory; import com.datahub.plugins.loader.IsolatedClassLoader; import com.datahub.plugins.loader.PluginPermissionManagerImpl; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.entity.EntityService; import jakarta.inject.Named; import jakarta.servlet.Filter; import jakarta.servlet.FilterChain; -import jakarta.servlet.FilterConfig; import jakarta.servlet.ServletException; -import jakarta.servlet.ServletRequest; -import jakarta.servlet.ServletResponse; import jakarta.servlet.http.HttpServletRequest; import jakarta.servlet.http.HttpServletResponse; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; +import javax.annotation.PostConstruct; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; -import org.springframework.web.context.support.SpringBeanAutowiringSupport; +import org.springframework.stereotype.Component; +import org.springframework.util.StringUtils; +import org.springframework.web.filter.OncePerRequestFilter; /** * A servlet {@link Filter} for authenticating requests inbound to the Metadata Service. This filter * is applied to the GraphQL Servlet, the Rest.li Servlet, and the Auth (token) Servlet. */ +@Component @Slf4j -public class AuthenticationFilter implements Filter { +public class AuthenticationFilter extends OncePerRequestFilter { @Autowired private ConfigurationProvider configurationProvider; @@ -72,18 +77,32 @@ public class AuthenticationFilter implements Filter { private boolean _logAuthenticatorExceptions; private AuthenticatorChain authenticatorChain; + private Set excludedPathPatterns; - @Override - public void init(FilterConfig filterConfig) throws ServletException { - SpringBeanAutowiringSupport.processInjectionBasedOnCurrentContext(this); + @PostConstruct + public void init() { buildAuthenticatorChain(); + initializeExcludedPaths(); log.info("AuthenticationFilter initialized."); } + private void initializeExcludedPaths() { + excludedPathPatterns = new HashSet<>(); + String excludedPaths = configurationProvider.getAuthentication().getExcludedPaths(); + if (StringUtils.hasText(excludedPaths)) { + excludedPathPatterns.addAll( + Arrays.stream(excludedPaths.split(",")) + .map(String::trim) + .filter(path -> !path.isBlank()) + .toList()); + } + } + @Override - public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain) - throws IOException, ServletException { - AuthenticationRequest context = buildAuthContext((HttpServletRequest) request); + protected void doFilterInternal( + HttpServletRequest request, HttpServletResponse response, FilterChain chain) + throws ServletException, IOException { + AuthenticationRequest context = buildAuthContext(request); Authentication authentication = null; try { authentication = this.authenticatorChain.authenticate(context, _logAuthenticatorExceptions); @@ -119,6 +138,34 @@ public void doFilter(ServletRequest request, ServletResponse response, FilterCha AuthenticationContext.remove(); } + @VisibleForTesting + @Override + public boolean shouldNotFilter(HttpServletRequest request) { + String path = request.getServletPath(); + if (path == null) { + return false; + } + + // Check if the path matches any of the excluded patterns + boolean shouldExclude = + excludedPathPatterns.stream() + .anyMatch( + pattern -> { + if (pattern.endsWith("/*")) { + // Handle wildcard patterns + String basePattern = pattern.substring(0, pattern.length() - 2); + return path.startsWith(basePattern); + } + return path.equals(pattern); + }); + + if (shouldExclude) { + log.debug("Skipping authentication for excluded path: {}", path); + } + + return shouldExclude; + } + @Override public void destroy() { // Nothing diff --git a/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthenticationFilterTest.java b/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthenticationFilterTest.java index 382e881542b0b8..0ffabb9db305dd 100644 --- a/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthenticationFilterTest.java +++ b/metadata-service/auth-filter/src/test/java/com/datahub/auth/authentication/AuthenticationFilterTest.java @@ -2,6 +2,8 @@ import static com.datahub.authentication.AuthenticationConstants.*; import static org.mockito.Mockito.*; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; import com.datahub.auth.authentication.filter.AuthenticationFilter; import com.datahub.authentication.Actor; @@ -9,27 +11,34 @@ import com.datahub.authentication.token.StatefulTokenService; import com.datahub.authentication.token.TokenException; import jakarta.servlet.FilterChain; +import jakarta.servlet.FilterConfig; import jakarta.servlet.ServletException; import jakarta.servlet.http.HttpServletRequest; import jakarta.servlet.http.HttpServletResponse; import java.io.IOException; +import java.util.Arrays; import java.util.Collections; +import java.util.HashSet; import java.util.List; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.springframework.test.util.ReflectionTestUtils; import org.testng.annotations.Test; @ContextConfiguration(classes = {AuthTestConfiguration.class}) public class AuthenticationFilterTest extends AbstractTestNGSpringContextTests { - @Autowired AuthenticationFilter _authenticationFilter; + @Autowired AuthenticationFilter authenticationFilter; - @Autowired StatefulTokenService _statefulTokenService; + @Autowired StatefulTokenService statefulTokenService; @Test public void testExpiredToken() throws ServletException, IOException, TokenException { - _authenticationFilter.init(null); + FilterConfig mockFilterConfig = mock(FilterConfig.class); + when(mockFilterConfig.getInitParameterNames()).thenReturn(Collections.emptyEnumeration()); + + authenticationFilter.init(mockFilterConfig); HttpServletRequest servletRequest = mock(HttpServletRequest.class); HttpServletResponse servletResponse = mock(HttpServletResponse.class); FilterChain filterChain = mock(FilterChain.class); @@ -46,8 +55,47 @@ public void testExpiredToken() throws ServletException, IOException, TokenExcept .thenReturn(Collections.enumeration(List.of(AUTHORIZATION_HEADER_NAME))); when(servletRequest.getHeader(AUTHORIZATION_HEADER_NAME)).thenReturn("Bearer " + token); - _authenticationFilter.doFilter(servletRequest, servletResponse, filterChain); + authenticationFilter.doFilter(servletRequest, servletResponse, filterChain); verify(servletResponse, times(1)) .sendError(eq(HttpServletResponse.SC_UNAUTHORIZED), anyString()); } + + @Test + public void testExcludedPaths() throws ServletException { + // Mock configuration setup + FilterConfig mockFilterConfig = mock(FilterConfig.class); + when(mockFilterConfig.getInitParameterNames()).thenReturn(Collections.emptyEnumeration()); + authenticationFilter.init(mockFilterConfig); + + // Test cases for different path patterns + HttpServletRequest exactPathRequest = mock(HttpServletRequest.class); + when(exactPathRequest.getServletPath()).thenReturn("/health"); + + HttpServletRequest wildcardPathRequest = mock(HttpServletRequest.class); + when(wildcardPathRequest.getServletPath()).thenReturn("/schema-registry/api/config"); + + HttpServletRequest nonExcludedRequest = mock(HttpServletRequest.class); + when(nonExcludedRequest.getServletPath()).thenReturn("/protected/resource"); + + // Set excluded paths in the filter + ReflectionTestUtils.setField( + authenticationFilter, + "excludedPathPatterns", + new HashSet<>(Arrays.asList("/health", "/schema-registry/*"))); + + // Verify exact path match + assertTrue( + authenticationFilter.shouldNotFilter(exactPathRequest), + "Exact path match should be excluded from filtering"); + + // Verify wildcard path match + assertTrue( + authenticationFilter.shouldNotFilter(wildcardPathRequest), + "Path matching wildcard pattern should be excluded from filtering"); + + // Verify non-excluded path + assertFalse( + authenticationFilter.shouldNotFilter(nonExcludedRequest), + "Non-excluded path should not be excluded from filtering"); + } } diff --git a/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java b/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java index 5d4542cf0826e8..1f86c56049190d 100644 --- a/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java +++ b/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java @@ -35,11 +35,13 @@ import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; import org.springframework.web.client.HttpClientErrorException; @Slf4j @RestController +@RequestMapping("/auth") public class AuthServiceController { private static final String USER_ID_FIELD_NAME = "userId"; diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index 69b86962442b91..04f7409d5c39a3 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -5,6 +5,7 @@ baseUrl: ${DATAHUB_BASE_URL:http://localhost:9002} authentication: # Enable if you want all requests to the Metadata Service to be authenticated. enabled: ${METADATA_SERVICE_AUTH_ENABLED:true} + excludedPaths: /schema-registry/*,/health,/config,/config/search/export # Disable if you want to skip validation of deleted user's tokens enforceExistenceEnabled: ${METADATA_SERVICE_AUTH_ENFORCE_EXISTENCE_ENABLED:true} @@ -330,16 +331,34 @@ neo4j: connectionLivenessCheckTimeout: ${NEO4J_CONNECTION_LIVENESS_CHECK_TIMEOUT_IN_SECONDS:-1} spring: + error: + include-message: never + include-stacktrace: never + include-exception: false + whitelabel: + enabled: false + jmx: + enabled: true + web: + resources: + add-mappings: false # do not serve static files mvc: servlet: - path: /openapi + path: / + throw-exception-if-no-handler-found: true # throw exception on 404 to be handled kafka: security: protocol: ${KAFKA_PROPERTIES_SECURITY_PROTOCOL:PLAINTEXT} springdoc: cache: - disabled: true + disabled: false + swagger-ui: + path: /openapi/swagger-ui/index.html + api-docs: + path: /openapi/v3/api-docs + groups: + enabled: true metadataTests: enabled: ${METADATA_TESTS_ENABLED:false} @@ -563,8 +582,6 @@ graphQL: depthLimit: ${GRAPHQL_QUERY_DEPTH_LIMIT:50} introspectionEnabled: ${GRAPHQL_QUERY_INTROSPECTION_ENABLED:true} -springdoc.api-docs.groups.enabled: true - forms: hook: enabled: ${FORMS_HOOK_ENABLED:true} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java index a1ee4df360b7ec..625623d008e127 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java @@ -172,6 +172,7 @@ private KafkaListenerContainerFactory buildDefaultKafkaListenerContainerFacto factory.setConsumerFactory(factoryWithOverrides); factory.setContainerCustomizer(new ThreadPoolContainerCustomizer()); factory.setConcurrency(kafkaEventConsumerConcurrency); + factory.setAutoStartup(false); /* Sets up a delegating error handler for Deserialization errors, if disabled will use DefaultErrorHandler (does back-off retry and then logs) rather than stopping the container. Stopping the container @@ -202,6 +203,7 @@ protected KafkaListenerContainerFactory duheKafkaEventConsumer( factory.setConsumerFactory(kafkaConsumerFactory); factory.setContainerCustomizer(new ThreadPoolContainerCustomizer()); factory.setConcurrency(1); + factory.setAutoStartup(false); log.info( "Event-based DUHE KafkaListenerContainerFactory built successfully. Consumer concurrency = 1"); diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/SimpleKafkaConsumerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/SimpleKafkaConsumerFactory.java index 0193ded97f81b5..acab78dcfd5f52 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/SimpleKafkaConsumerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/SimpleKafkaConsumerFactory.java @@ -53,6 +53,7 @@ protected KafkaListenerContainerFactory createInstance( new ConcurrentKafkaListenerContainerFactory<>(); factory.setContainerCustomizer(new ThreadPoolContainerCustomizer()); factory.setConsumerFactory(new DefaultKafkaConsumerFactory<>(customizedProperties)); + factory.setAutoStartup(false); log.info("Simple KafkaListenerContainerFactory built successfully"); diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/common/KafkaInitializationManager.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/common/KafkaInitializationManager.java new file mode 100644 index 00000000000000..dcc818d82ee560 --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/common/KafkaInitializationManager.java @@ -0,0 +1,52 @@ +package com.linkedin.gms.factory.kafka.common; + +import java.util.concurrent.atomic.AtomicBoolean; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.availability.AvailabilityChangeEvent; +import org.springframework.boot.availability.ReadinessState; +import org.springframework.context.event.EventListener; +import org.springframework.kafka.config.KafkaListenerEndpointRegistry; +import org.springframework.stereotype.Component; + +@Slf4j +@Component +public class KafkaInitializationManager { + private final AtomicBoolean isInitialized = new AtomicBoolean(false); + private final KafkaListenerEndpointRegistry registry; + + @Autowired + public KafkaInitializationManager(KafkaListenerEndpointRegistry registry) { + this.registry = registry; + log.info( + "Created KafkaInitializationManager. Waiting for application to be ready to enable kafka consumers."); + } + + @EventListener + public void onStateChange(AvailabilityChangeEvent event) { + if (event.getState() == ReadinessState.ACCEPTING_TRAFFIC) { + initialize(this.getClass().getSimpleName()); + } + } + + public void initialize(String initializerName) { + if (isInitialized.compareAndSet(false, true)) { + int containerCount = registry.getAllListenerContainers().size(); + log.info("Starting {} kafka consumers. Initialized by {}", containerCount, initializerName); + registry + .getAllListenerContainers() + .forEach( + container -> { + if (!container.isRunning()) { + container.start(); + log.info("Started container: {}", container.getListenerId()); + } + }); + log.info("All {} kafka containers started.", containerCount); + } + } + + public boolean isInitialized() { + return isInitialized.get(); + } +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/OnBootApplicationListener.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/OnBootApplicationListener.java index 921246fa98f7a2..2d599f340f7588 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/OnBootApplicationListener.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/OnBootApplicationListener.java @@ -1,22 +1,11 @@ package com.linkedin.metadata.boot; -import com.linkedin.gms.factory.config.ConfigurationProvider; -import com.linkedin.gms.factory.kafka.schemaregistry.InternalSchemaRegistryFactory; +import com.linkedin.gms.factory.kafka.common.KafkaInitializationManager; import io.datahubproject.metadata.context.OperationContext; -import java.io.IOException; -import java.util.Set; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; import javax.annotation.Nonnull; import lombok.extern.slf4j.Slf4j; -import org.apache.http.HttpStatus; -import org.apache.http.StatusLine; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; -import org.springframework.beans.factory.annotation.Value; import org.springframework.context.event.ContextRefreshedEvent; import org.springframework.context.event.EventListener; import org.springframework.stereotype.Component; @@ -27,98 +16,39 @@ @Component public class OnBootApplicationListener { - public static final String SCHEMA_REGISTRY_SERVLET_NAME = "dispatcher-schema-registry"; - - private static final Set ACCEPTED_HTTP_CODES = - Set.of( - HttpStatus.SC_OK, - HttpStatus.SC_MOVED_PERMANENTLY, - HttpStatus.SC_MOVED_TEMPORARILY, - HttpStatus.SC_FORBIDDEN, - HttpStatus.SC_UNAUTHORIZED); - - private static final String ROOT_WEB_APPLICATION_CONTEXT_ID = - String.format("%s:", WebApplicationContext.class.getName()); - - private final CloseableHttpClient httpClient = HttpClients.createDefault(); - - private final ExecutorService executorService = Executors.newSingleThreadExecutor(); - @Autowired @Qualifier("bootstrapManager") private BootstrapManager _bootstrapManager; - @Autowired - @Qualifier("configurationProvider") - private ConfigurationProvider provider; - - @Value("${bootstrap.servlets.waitTimeout}") - private int _servletsWaitTimeout; - @Autowired @Qualifier("systemOperationContext") private OperationContext systemOperationContext; + @Autowired private KafkaInitializationManager kafkaInitializationManager; + @EventListener(ContextRefreshedEvent.class) public void onApplicationEvent(@Nonnull ContextRefreshedEvent event) { + String contextId = event.getApplicationContext().getId(); + log.info("Context refreshed for ID: {}", contextId); - if (SCHEMA_REGISTRY_SERVLET_NAME.equals(event.getApplicationContext().getId())) { - log.info("Loading servlet {} without interruption.", SCHEMA_REGISTRY_SERVLET_NAME); - return; - } - - log.warn( - "OnBootApplicationListener context refreshed! {} event: {}", - ROOT_WEB_APPLICATION_CONTEXT_ID.equals(event.getApplicationContext().getId()), - event); - String schemaRegistryType = provider.getKafka().getSchemaRegistry().getType(); - if (ROOT_WEB_APPLICATION_CONTEXT_ID.equals(event.getApplicationContext().getId())) { + // For the root application context + if (event.getApplicationContext() instanceof WebApplicationContext) { + log.info("Root WebApplicationContext initialized, starting bootstrap process"); - // Handle race condition, if ebean code is executed while waiting/bootstrapping (i.e. - // AuthenticationFilter) + // Initialize Ebean first try { Class.forName("io.ebean.XServiceProvider"); } catch (ClassNotFoundException e) { - log.error( - "Failure to initialize required class `io.ebean.XServiceProvider` during initialization."); + log.error("Failed to initialize io.ebean.XServiceProvider", e); throw new RuntimeException(e); } - if (InternalSchemaRegistryFactory.TYPE.equals(schemaRegistryType)) { - executorService.submit(isSchemaRegistryAPIServletReady()); - } else { - _bootstrapManager.start(systemOperationContext); - } - } - } + // Initialize consumers + kafkaInitializationManager.initialize(this.getClass().getSimpleName()); - public Runnable isSchemaRegistryAPIServletReady() { - return () -> { - final HttpGet request = new HttpGet(provider.getKafka().getSchemaRegistry().getUrl()); - int timeouts = _servletsWaitTimeout; - boolean openAPIServletReady = false; - while (!openAPIServletReady && timeouts > 0) { - try { - log.info("Sleeping for 1 second"); - Thread.sleep(1000); - StatusLine statusLine = httpClient.execute(request).getStatusLine(); - if (ACCEPTED_HTTP_CODES.contains(statusLine.getStatusCode())) { - log.info("Connected! Authentication not tested."); - openAPIServletReady = true; - } - } catch (IOException | InterruptedException e) { - log.info("Failed to connect to open servlet: {}", e.getMessage()); - } - timeouts--; - } - if (!openAPIServletReady) { - log.error( - "Failed to bootstrap DataHub, OpenAPI servlet was not ready after {} seconds", - timeouts); - System.exit(1); - } else { - _bootstrapManager.start(systemOperationContext); - } - }; + _bootstrapManager.start(systemOperationContext); + } else { + log.debug("Ignoring non-web application context refresh"); + } } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java index 56a857ee45feb8..388f48650a20f0 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/IngestRetentionPoliciesStepFactory.java @@ -12,6 +12,7 @@ import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Scope; +import org.springframework.core.io.support.PathMatchingResourcePatternResolver; @Configuration @Import({RetentionServiceFactory.class}) @@ -43,6 +44,7 @@ protected IngestRetentionPoliciesStep createInstance() { _entityService, _enableRetention, _applyOnBootstrap, - _pluginRegistryPath); + _pluginRegistryPath, + new PathMatchingResourcePatternResolver()); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/DataHubUpgradeKafkaListener.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/DataHubUpgradeKafkaListener.java index e69ab342740e43..50be0149ce2d4e 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/DataHubUpgradeKafkaListener.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/DataHubUpgradeKafkaListener.java @@ -97,7 +97,8 @@ public void onPartitionsAssigned( id = CONSUMER_GROUP, topics = {TOPIC_NAME}, containerFactory = "duheKafkaEventConsumer", - concurrency = "1") + concurrency = "1", + autoStartup = "false") public void checkSystemVersion(final ConsumerRecord consumerRecord) { try (Timer.Context i = MetricUtils.timer(this.getClass(), "checkSystemVersion").time()) { final GenericRecord record = consumerRecord.value(); diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDefaultGlobalSettingsStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDefaultGlobalSettingsStep.java index c63d71475c2fc8..a41553379b1faa 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDefaultGlobalSettingsStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDefaultGlobalSettingsStep.java @@ -78,7 +78,7 @@ public void execute(@Nonnull OperationContext systemOperationContext) // 1. Read from the file into JSON. JsonNode defaultSettingsObj; try { - defaultSettingsObj = mapper.readTree(new ClassPathResource(_resourcePath).getFile()); + defaultSettingsObj = mapper.readTree(new ClassPathResource(_resourcePath).getInputStream()); } catch (Exception e) { throw new RuntimeException( String.format( diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java index dac2879487469c..04d73895802a8d 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java @@ -74,7 +74,7 @@ public void execute(@Nonnull OperationContext systemOperationContext) log.info("Ingesting default access policies from: {}...", _policiesResource); // 1. Read from the file into JSON. - final JsonNode policiesObj = mapper.readTree(_policiesResource.getFile()); + final JsonNode policiesObj = mapper.readTree(_policiesResource.getInputStream()); if (!policiesObj.isArray()) { throw new RuntimeException( diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRetentionPoliciesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRetentionPoliciesStep.java index b5ca0dee142df3..01e785d06cc7d8 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRetentionPoliciesStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRetentionPoliciesStep.java @@ -15,7 +15,6 @@ import com.linkedin.retention.DataHubRetentionConfig; import io.datahubproject.metadata.context.OperationContext; import jakarta.annotation.Nonnull; -import java.io.File; import java.io.IOException; import java.net.URISyntaxException; import java.util.Collections; @@ -23,7 +22,8 @@ import java.util.Map; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.springframework.core.io.ClassPathResource; +import org.springframework.core.io.Resource; +import org.springframework.core.io.support.ResourcePatternResolver; @Slf4j @RequiredArgsConstructor @@ -34,6 +34,7 @@ public class IngestRetentionPoliciesStep implements BootstrapStep { private final boolean _enableRetention; private final boolean _applyOnBootstrap; private final String pluginPath; + private final ResourcePatternResolver resolver; private static final ObjectMapper YAML_MAPPER = new ObjectMapper(new YAMLFactory()); @@ -77,12 +78,19 @@ public void execute(@Nonnull OperationContext systemOperationContext) return; } - // 1. Read default retention config - final Map retentionPolicyMap = - parseFileOrDir(new ClassPathResource("./boot/retention.yaml").getFile()); - - // 2. Read plugin retention config files from input path and overlay - retentionPolicyMap.putAll(parseFileOrDir(new File(pluginPath))); + // 1. Read default retention config from classpath + Resource defaultResource = resolver.getResource("classpath:boot/retention.yaml"); + Map retentionPolicyMap = + parseYamlRetentionConfig(defaultResource); + + // 2. Read plugin retention config files from filesystem path + if (!pluginPath.isEmpty()) { + String pattern = "file:" + pluginPath + "/**/*.{yaml,yml}"; + Resource[] resources = resolver.getResources(pattern); + for (Resource resource : resources) { + retentionPolicyMap.putAll(parseYamlRetentionConfig(resource)); + } + } // 4. Set the specified retention policies log.info("Setting {} policies", retentionPolicyMap.size()); @@ -106,39 +114,6 @@ public void execute(@Nonnull OperationContext systemOperationContext) BootstrapStep.setUpgradeResult(systemOperationContext, UPGRADE_ID_URN, _entityService); } - // Parse input yaml file or yaml files in the input directory to generate a retention policy map - private Map parseFileOrDir(File retentionFileOrDir) - throws IOException { - // If path does not exist return empty - if (!retentionFileOrDir.exists()) { - return Collections.emptyMap(); - } - - // If directory, parse the yaml files under the directory - if (retentionFileOrDir.isDirectory()) { - Map result = new HashMap<>(); - - for (File retentionFile : retentionFileOrDir.listFiles()) { - if (!retentionFile.isFile()) { - log.info( - "Element {} in plugin directory {} is not a file. Skipping", - retentionFile.getPath(), - retentionFileOrDir.getPath()); - continue; - } - result.putAll(parseFileOrDir(retentionFile)); - } - return result; - } - // If file, parse the yaml file and return result; - if (!retentionFileOrDir.getPath().endsWith(".yaml") - && retentionFileOrDir.getPath().endsWith(".yml")) { - log.info("File {} is not a YAML file. Skipping", retentionFileOrDir.getPath()); - return Collections.emptyMap(); - } - return parseYamlRetentionConfig(retentionFileOrDir); - } - /** * Parse yaml retention config * @@ -147,8 +122,11 @@ private Map parseFileOrDir(File ret * converted into the {@link com.linkedin.retention.DataHubRetentionConfig} class. */ private Map parseYamlRetentionConfig( - File retentionConfigFile) throws IOException { - final JsonNode retentionPolicies = YAML_MAPPER.readTree(retentionConfigFile); + Resource resource) throws IOException { + if (!resource.exists()) { + return Collections.emptyMap(); + } + final JsonNode retentionPolicies = YAML_MAPPER.readTree(resource.getInputStream()); if (!retentionPolicies.isArray()) { throw new IllegalArgumentException( "Retention config file must contain an array of retention policies"); diff --git a/metadata-service/factories/src/main/java/com/linkedin/r2/transport/http/server/AbstractJakartaR2Servlet.java b/metadata-service/factories/src/main/java/com/linkedin/r2/transport/http/server/AbstractJakartaR2Servlet.java index 8d589c4ab2408b..9c4117202ed570 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/r2/transport/http/server/AbstractJakartaR2Servlet.java +++ b/metadata-service/factories/src/main/java/com/linkedin/r2/transport/http/server/AbstractJakartaR2Servlet.java @@ -1,35 +1,9 @@ -/* - Copyright (c) 2012 LinkedIn Corp. - Copyright (c) 2023 Acryl Data, Inc. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - MODIFICATIONS: - Changed javax packages to jakarta for Spring Boot 3 support -*/ - -/* $Id$ */ package com.linkedin.r2.transport.http.server; import com.linkedin.data.ByteString; import com.linkedin.r2.message.RequestContext; -import com.linkedin.r2.message.rest.RestException; -import com.linkedin.r2.message.rest.RestRequest; -import com.linkedin.r2.message.rest.RestRequestBuilder; -import com.linkedin.r2.message.rest.RestResponse; -import com.linkedin.r2.message.rest.RestStatus; +import com.linkedin.r2.message.rest.*; import com.linkedin.r2.transport.common.WireAttributeHelper; -import com.linkedin.r2.transport.common.bridge.common.TransportCallback; import com.linkedin.r2.transport.common.bridge.common.TransportResponse; import com.linkedin.r2.transport.common.bridge.common.TransportResponseImpl; import com.linkedin.r2.transport.http.common.HttpConstants; @@ -40,210 +14,177 @@ import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; -import java.util.Enumeration; -import java.util.Map; -import java.util.concurrent.CountDownLatch; +import java.time.Duration; +import java.util.Collections; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicReference; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * @author Steven Ihde - * @author Chris Pettitt - * @author Fatih Emekci - * @version $Revision$ - */ +import java.util.concurrent.TimeoutException; +import lombok.extern.slf4j.Slf4j; + +@Slf4j public abstract class AbstractJakartaR2Servlet extends HttpServlet { - private static final Logger _log = LoggerFactory.getLogger(AbstractJakartaR2Servlet.class); - private static final long serialVersionUID = 0L; + private static final long serialVersionUID = 1L; - // servlet timeout in ms. - protected final long _timeout; + private final Duration timeout; protected abstract HttpDispatcher getDispatcher(); - public AbstractJakartaR2Servlet(long timeout) { - _timeout = timeout; + protected AbstractJakartaR2Servlet(Duration timeout) { + this.timeout = timeout; } @Override - protected void service(final HttpServletRequest req, final HttpServletResponse resp) + protected void service(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { - RequestContext requestContext = JakartaServletHelper.readRequestContext(req); - - RestRequest restRequest; - try { - restRequest = readFromServletRequest(req); - } catch (URISyntaxException e) { - writeToServletError(resp, RestStatus.BAD_REQUEST, e.toString()); - return; - } + RequestContext requestContext = JakartaServletHelper.readRequestContext(req); + RestRequest restRequest = createRestRequest(req); - final AtomicReference> result = new AtomicReference<>(); - final CountDownLatch latch = new CountDownLatch(1); + CompletableFuture> future = new CompletableFuture<>(); - TransportCallback callback = - new TransportCallback() { - @Override - public void onResponse(TransportResponse response) { - result.set(response); - latch.countDown(); - } - }; + getDispatcher().handleRequest(restRequest, requestContext, future::complete); - getDispatcher().handleRequest(restRequest, requestContext, callback); + TransportResponse result = + future + .orTimeout(timeout.toMillis(), TimeUnit.MILLISECONDS) + .exceptionally(this::handleException) + .join(); - try { - if (latch.await(_timeout, TimeUnit.MILLISECONDS)) { - writeToServletResponse(result.get(), resp); - } else { - writeToServletError( - resp, RestStatus.INTERNAL_SERVER_ERROR, "Server Timeout after " + _timeout + "ms."); - } - } catch (InterruptedException e) { - throw new ServletException("Interrupted!", e); - } - } + writeResponse(result, resp); - protected void writeToServletResponse( - TransportResponse response, HttpServletResponse resp) throws IOException { - Map wireAttrs = response.getWireAttributes(); - for (Map.Entry e : WireAttributeHelper.toWireAttributes(wireAttrs).entrySet()) { - resp.setHeader(e.getKey(), e.getValue()); - } - - RestResponse restResponse = null; - if (response.hasError()) { - Throwable e = response.getError(); - if (e instanceof RestException) { - restResponse = ((RestException) e).getResponse(); - } - if (restResponse == null) { - restResponse = RestStatus.responseForError(RestStatus.INTERNAL_SERVER_ERROR, e); - } - } else { - restResponse = response.getResponse(); - } - - resp.setStatus(restResponse.getStatus()); - Map headers = restResponse.getHeaders(); - for (Map.Entry e : headers.entrySet()) { - // TODO multi-valued headers - resp.setHeader(e.getKey(), e.getValue()); + } catch (URISyntaxException e) { + writeError(resp, RestStatus.BAD_REQUEST, "Invalid URI: " + e.getMessage()); + } catch (Exception e) { + log.error("Unexpected error processing request", e); + writeError(resp, RestStatus.INTERNAL_SERVER_ERROR, "Internal server error"); } + } - for (String cookie : restResponse.getCookies()) { - resp.addHeader(HttpConstants.RESPONSE_COOKIE_HEADER_NAME, cookie); + private TransportResponse handleException(Throwable ex) { + if (ex instanceof TimeoutException) { + RestResponse errorResponse = + RestStatus.responseForError( + RestStatus.INTERNAL_SERVER_ERROR, + new RuntimeException("Server timeout after " + timeout.toSeconds() + " seconds")); + return TransportResponseImpl.error(new RestException(errorResponse)); } + return TransportResponseImpl.error(ex); + } - final ByteString entity = restResponse.getEntity(); - entity.write(resp.getOutputStream()); - - resp.getOutputStream().close(); + private RestRequest createRestRequest(HttpServletRequest req) + throws IOException, ServletException, URISyntaxException { + String pathInfo = extractPathInfo(req); + String queryString = Optional.ofNullable(req.getQueryString()).map(q -> "?" + q).orElse(""); + + URI uri = new URI(pathInfo + queryString); + + RestRequestBuilder builder = new RestRequestBuilder(uri).setMethod(req.getMethod()); + + // Handle headers + Collections.list(req.getHeaderNames()) + .forEach( + headerName -> { + if (headerName.equalsIgnoreCase(HttpConstants.REQUEST_COOKIE_HEADER_NAME)) { + Collections.list(req.getHeaders(headerName)).forEach(builder::addCookie); + } else { + Collections.list(req.getHeaders(headerName)) + .forEach(value -> builder.addHeaderValue(headerName, value)); + } + }); + + // Handle request body + int contentLength = req.getContentLength(); + ByteString entity = + (contentLength > 0) + ? ByteString.read(req.getInputStream(), contentLength) + : ByteString.read(req.getInputStream()); + + builder.setEntity(entity); + + return builder.build(); } - protected void writeToServletError(HttpServletResponse resp, int statusCode, String message) + private void writeResponse(TransportResponse response, HttpServletResponse resp) throws IOException { - RestResponse restResponse = RestStatus.responseForStatus(statusCode, message); - writeToServletResponse(TransportResponseImpl.success(restResponse), resp); - } + // Write wire attributes + WireAttributeHelper.toWireAttributes(response.getWireAttributes()).forEach(resp::setHeader); + + // Get response or create error response + RestResponse restResponse = + Optional.of(response) + .filter(TransportResponse::hasError) + .map( + r -> { + Throwable error = r.getError(); + if (error instanceof RestException) { + return ((RestException) error).getResponse(); + } + return RestStatus.responseForError(RestStatus.INTERNAL_SERVER_ERROR, error); + }) + .orElseGet(response::getResponse); + + // Write status and headers + resp.setStatus(restResponse.getStatus()); + restResponse.getHeaders().forEach(resp::setHeader); - protected RestRequest readFromServletRequest(HttpServletRequest req) - throws IOException, ServletException, URISyntaxException { - StringBuilder sb = new StringBuilder(); - sb.append(extractPathInfo(req)); - String query = req.getQueryString(); - if (query != null) { - sb.append('?'); - sb.append(query); - } + // Write cookies + restResponse + .getCookies() + .forEach(cookie -> resp.addHeader(HttpConstants.RESPONSE_COOKIE_HEADER_NAME, cookie)); - URI uri = new URI(sb.toString()); - - RestRequestBuilder rb = new RestRequestBuilder(uri); - rb.setMethod(req.getMethod()); - - for (Enumeration headerNames = req.getHeaderNames(); headerNames.hasMoreElements(); ) { - String headerName = headerNames.nextElement(); - if (headerName.equalsIgnoreCase(HttpConstants.REQUEST_COOKIE_HEADER_NAME)) { - for (Enumeration cookies = req.getHeaders(headerName); - cookies.hasMoreElements(); ) { - rb.addCookie(cookies.nextElement()); - } - } else { - for (Enumeration headerValues = req.getHeaders(headerName); - headerValues.hasMoreElements(); ) { - rb.addHeaderValue(headerName, headerValues.nextElement()); - } - } + // Write response body + try (var outputStream = resp.getOutputStream()) { + restResponse.getEntity().write(outputStream); } + } - int length = req.getContentLength(); - if (length > 0) { - rb.setEntity(ByteString.read(req.getInputStream(), length)); - } else { - // Known cases for not sending a content-length header in a request - // 1. Chunked transfer encoding - // 2. HTTP/2 - rb.setEntity(ByteString.read(req.getInputStream())); - } - return rb.build(); + private void writeError(HttpServletResponse resp, int statusCode, String message) + throws IOException { + RestResponse errorResponse = RestStatus.responseForStatus(statusCode, message); + writeResponse(TransportResponseImpl.success(errorResponse), resp); } - /** - * Attempts to return a "non decoded" pathInfo by stripping off the contextPath and servletPath - * parts of the requestURI. As a defensive measure, this method will return the "decoded" pathInfo - * directly by calling req.getPathInfo() if it is unable to strip off the contextPath or - * servletPath. - * - * @throws javax.servlet.ServletException if resulting pathInfo is empty - */ protected static String extractPathInfo(HttpServletRequest req) throws ServletException { - // For "http:hostname:8080/contextPath/servletPath/pathInfo" the RequestURI is - // "/contextPath/servletPath/pathInfo" - // where the contextPath, servletPath and pathInfo parts all contain their leading slash. - - // stripping contextPath and servletPath this way is not fully compatible with the HTTP spec. - // If a - // request for, say "/%75scp-proxy/reso%75rces" is made (where %75 decodes to 'u') - // the stripping off of contextPath and servletPath will fail because the requestUri string will - // include the encoded char but the contextPath and servletPath strings will not. String requestUri = req.getRequestURI(); - String contextPath = req.getContextPath(); - StringBuilder builder = new StringBuilder(); - if (contextPath != null) { - builder.append(contextPath); - } + String contextPath = Optional.ofNullable(req.getContextPath()).orElse(""); + String servletPath = Optional.ofNullable(req.getServletPath()).orElse(""); - String servletPath = req.getServletPath(); - if (servletPath != null) { - builder.append(servletPath); - } - String prefix = builder.toString(); - String pathInfo; - if (prefix.length() == 0) { + String prefix = contextPath + servletPath; + String pathInfo = null; + + if (prefix.isEmpty()) { pathInfo = requestUri; - } else if (requestUri.startsWith(prefix)) { + } else if (servletPath.startsWith("/gms") && requestUri.startsWith(prefix)) { pathInfo = requestUri.substring(prefix.length()); - } else { - _log.warn( - "Unable to extract 'non decoded' pathInfo, returning 'decoded' pathInfo instead. This may cause issues processing request URIs containing special characters. requestUri=" - + requestUri); - return req.getPathInfo(); } - if (pathInfo.length() == 0) { - // We prefer to keep servlet mapping trivial with R2 and have R2 - // TransportDispatchers make most of the routing decisions based on the 'pathInfo' - // and query parameters in the URI. - // If pathInfo is null, it's highly likely that the servlet was mapped to an exact - // path or to a file extension, making such R2-based services too reliant on the - // servlet container for routing - throw new ServletException( - "R2 servlet should only be mapped via wildcard path mapping e.g. /r2/*. " - + "Exact path matching (/r2) and file extension mappings (*.r2) are currently not supported"); + if (pathInfo == null || pathInfo.isEmpty()) { + log.debug( + "Previously invalid servlet mapping detected. Request details: method='{}', requestUri='{}', contextPath='{}', " + + "servletPath='{}', prefix='{}', pathInfo='{}', queryString='{}', protocol='{}', remoteAddr='{}', " + + "serverName='{}', serverPort={}, contentType='{}', characterEncoding='{}'", + req.getMethod(), + requestUri, + contextPath, + servletPath, + prefix, + pathInfo, + req.getQueryString(), + req.getProtocol(), + req.getRemoteAddr(), + req.getServerName(), + req.getServerPort(), + req.getContentType(), + req.getCharacterEncoding()); + + /* NOTE: Working around what was previously considered an error. + * throw new ServletException( + * "R2 servlet must be mapped using wildcard path mapping (e.g., /r2/*). " + + * "Exact path matching (/r2) and file extension mappings (*.r2) are not supported."); + **/ + + pathInfo = requestUri; } return pathInfo; diff --git a/metadata-service/factories/src/main/java/com/linkedin/r2/transport/http/server/RAPJakartaServlet.java b/metadata-service/factories/src/main/java/com/linkedin/r2/transport/http/server/RAPJakartaServlet.java index 75e808d5245c83..f0db3cd01ffcf9 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/r2/transport/http/server/RAPJakartaServlet.java +++ b/metadata-service/factories/src/main/java/com/linkedin/r2/transport/http/server/RAPJakartaServlet.java @@ -1,44 +1,21 @@ -/* - Copyright (c) 2012 LinkedIn Corp. - Copyright (c) 2023 Acryl Data, Inc. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - MODIFICATIONS: - Changed javax dependencies to jakarta for Spring Boot 3 support -*/ - -/** $Id: $ */ package com.linkedin.r2.transport.http.server; import com.linkedin.r2.transport.common.bridge.server.TransportDispatcher; +import java.time.Duration; +import lombok.Getter; -/** - * @author Steven Ihde - * @version $Revision: $ - */ public class RAPJakartaServlet extends AbstractJakartaR2Servlet { private static final long serialVersionUID = 0L; - private final HttpDispatcher _dispatcher; + @Getter private final HttpDispatcher dispatcher; - public RAPJakartaServlet(HttpDispatcher dispatcher) { - super(Long.MAX_VALUE); - _dispatcher = dispatcher; + public RAPJakartaServlet(HttpDispatcher dispatcher, int timeoutSeconds) { + super(Duration.ofSeconds(timeoutSeconds)); + this.dispatcher = dispatcher; } - public RAPJakartaServlet(TransportDispatcher dispatcher) { - this(HttpDispatcherFactory.create((dispatcher))); + public RAPJakartaServlet(TransportDispatcher dispatcher, int timeoutSeconds) { + this(HttpDispatcherFactory.create((dispatcher)), timeoutSeconds); } /** @@ -48,22 +25,7 @@ public RAPJakartaServlet(TransportDispatcher dispatcher) { */ public RAPJakartaServlet( HttpDispatcher dispatcher, boolean useContinuations, int timeOut, int timeOutDelta) { - super(timeOut); - _dispatcher = dispatcher; - } - - /** - * Initialize the RAPJakartaServlet. - * - * @see #AbstractJakartaR2Servlet - */ - public RAPJakartaServlet( - TransportDispatcher dispatcher, boolean useContinuations, int timeOut, int timeOutDelta) { - this(HttpDispatcherFactory.create((dispatcher)), useContinuations, timeOut, timeOutDelta); - } - - @Override - protected HttpDispatcher getDispatcher() { - return _dispatcher; + super(Duration.ofSeconds(timeOut)); + this.dispatcher = dispatcher; } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/restli/server/RAPServletFactory.java b/metadata-service/factories/src/main/java/com/linkedin/restli/server/RAPServletFactory.java index be060477aeb1f6..73db7ec73f4d7e 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/restli/server/RAPServletFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/restli/server/RAPServletFactory.java @@ -26,6 +26,9 @@ public class RAPServletFactory { @Value("#{systemEnvironment['RESTLI_SERVLET_THREADS']}") private Integer environmentThreads; + @Value("${RESTLI_TIMEOUT_SECONDS:60}") + private int restliTimeoutSeconds; + @Value("${" + INGESTION_MAX_SERIALIZED_STRING_LENGTH + ":16000000}") private int maxSerializedStringLength; @@ -71,6 +74,7 @@ public RAPJakartaServlet rapServlet( RestLiServer restLiServer = new RestLiServer(config, springInjectResourceFactory, parseqEngine); return new RAPJakartaServlet( new FilterChainDispatcher( - new DelegatingTransportDispatcher(restLiServer, restLiServer), FilterChains.empty())); + new DelegatingTransportDispatcher(restLiServer, restLiServer), FilterChains.empty()), + restliTimeoutSeconds); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/restli/server/RestliHandlerServlet.java b/metadata-service/factories/src/main/java/com/linkedin/restli/server/RestliHandlerServlet.java index bfc25b7ddaef50..4a38b331d95fed 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/restli/server/RestliHandlerServlet.java +++ b/metadata-service/factories/src/main/java/com/linkedin/restli/server/RestliHandlerServlet.java @@ -8,26 +8,25 @@ import java.io.IOException; import lombok.AllArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.web.HttpRequestHandler; import org.springframework.web.context.support.HttpRequestHandlerServlet; @Slf4j @AllArgsConstructor public class RestliHandlerServlet extends HttpRequestHandlerServlet implements HttpRequestHandler { - @Autowired private RAPJakartaServlet _r2Servlet; + private final RAPJakartaServlet r2Servlet; @Override public void init(ServletConfig config) throws ServletException { log.info("Initializing RestliHandlerServlet"); - this._r2Servlet.init(config); + this.r2Servlet.init(config); log.info("Initialized RestliHandlerServlet"); } @Override public void service(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException { - _r2Servlet.service(req, res); + r2Servlet.service(req, res); } @Override diff --git a/metadata-service/factories/src/test/java/com/linkedin/gms/factory/kafka/common/KafkaInitializationManagerTest.java b/metadata-service/factories/src/test/java/com/linkedin/gms/factory/kafka/common/KafkaInitializationManagerTest.java new file mode 100644 index 00000000000000..6404f238b5cf93 --- /dev/null +++ b/metadata-service/factories/src/test/java/com/linkedin/gms/factory/kafka/common/KafkaInitializationManagerTest.java @@ -0,0 +1,165 @@ +package com.linkedin.gms.factory.kafka.common; + +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.availability.AvailabilityChangeEvent; +import org.springframework.boot.availability.ReadinessState; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.kafka.annotation.EnableKafka; +import org.springframework.kafka.config.KafkaListenerEndpointRegistry; +import org.springframework.kafka.listener.MessageListenerContainer; +import org.springframework.test.annotation.DirtiesContext; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +@SpringBootTest +@ContextConfiguration(classes = {KafkaInitializationManager.class}) +@EnableKafka +@DirtiesContext +public class KafkaInitializationManagerTest extends AbstractTestNGSpringContextTests { + + @Autowired private KafkaInitializationManager manager; + + @MockBean private KafkaListenerEndpointRegistry registry; + + private MessageListenerContainer container1; + private MessageListenerContainer container2; + private List containers; + + @BeforeMethod + public void setUp() { + // Reset the initialization state before each test + // This is necessary because Spring maintains the same instance across tests + try { + java.lang.reflect.Field field = + KafkaInitializationManager.class.getDeclaredField("isInitialized"); + field.setAccessible(true); + ((java.util.concurrent.atomic.AtomicBoolean) field.get(manager)).set(false); + } catch (Exception e) { + throw new RuntimeException("Failed to reset initialization state", e); + } + + // Create fresh mocks for each test + container1 = mock(MessageListenerContainer.class); + container2 = mock(MessageListenerContainer.class); + containers = Arrays.asList(container1, container2); + + // Clear any previous mock interactions + reset(registry); + } + + @Test + public void testInitialize_WhenNotInitialized() { + // Arrange + when(registry.getAllListenerContainers()).thenReturn(containers); + when(container1.isRunning()).thenReturn(false); + when(container2.isRunning()).thenReturn(false); + when(container1.getListenerId()).thenReturn("container1"); + when(container2.getListenerId()).thenReturn("container2"); + + // Act + manager.initialize("TestInitializer"); + + // Assert + verify(container1, times(1)).start(); + verify(container2, times(1)).start(); + assertTrue(manager.isInitialized()); + } + + @Test + public void testInitialize_WhenAlreadyInitialized() { + // Arrange + when(registry.getAllListenerContainers()).thenReturn(containers); + + // First initialization + manager.initialize("FirstInitializer"); + + // Clear previous interactions + clearInvocations(container1, container2); + + // Act + manager.initialize("SecondInitializer"); + + // Assert + verify(container1, never()).start(); + verify(container2, never()).start(); + assertTrue(manager.isInitialized()); + } + + @Test + public void testInitialize_WithNoContainers() { + // Arrange + when(registry.getAllListenerContainers()).thenReturn(Collections.emptyList()); + + // Act + manager.initialize("TestInitializer"); + + // Assert + assertTrue(manager.isInitialized()); + } + + @Test + public void testInitialize_WithAlreadyRunningContainer() { + // Arrange + when(registry.getAllListenerContainers()).thenReturn(Collections.singletonList(container1)); + when(container1.isRunning()).thenReturn(true); + when(container1.getListenerId()).thenReturn("container1"); + + // Act + manager.initialize("TestInitializer"); + + // Assert + verify(container1, never()).start(); + assertTrue(manager.isInitialized()); + } + + @Test + public void testOnStateChange_WhenAcceptingTraffic() { + // Arrange + AvailabilityChangeEvent event = + new AvailabilityChangeEvent<>(this, ReadinessState.ACCEPTING_TRAFFIC); + when(registry.getAllListenerContainers()).thenReturn(containers); + when(container1.isRunning()).thenReturn(false); + when(container2.isRunning()).thenReturn(false); + when(container1.getListenerId()).thenReturn("container1"); + when(container2.getListenerId()).thenReturn("container2"); + + // Act + manager.onStateChange(event); + + // Assert + assertTrue(manager.isInitialized()); + // The method is called twice in the implementation - once for size() and once for iteration + verify(registry, times(2)).getAllListenerContainers(); + verify(container1).start(); + verify(container2).start(); + } + + @Test + public void testOnStateChange_WhenNotAcceptingTraffic() { + // Arrange + AvailabilityChangeEvent event = + new AvailabilityChangeEvent<>(this, ReadinessState.REFUSING_TRAFFIC); + + // Act + manager.onStateChange(event); + + // Assert + assertFalse(manager.isInitialized()); + verify(registry, never()).getAllListenerContainers(); + } + + @Test + public void testIsInitialized_DefaultState() { + // Assert + assertFalse(manager.isInitialized()); + } +} diff --git a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestRetentionPoliciesStepTest.java b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestRetentionPoliciesStepTest.java new file mode 100644 index 00000000000000..4f4c4b8bb68ae5 --- /dev/null +++ b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestRetentionPoliciesStepTest.java @@ -0,0 +1,257 @@ +package com.linkedin.metadata.boot.steps; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.boot.BootstrapStep; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.RetentionService; +import com.linkedin.retention.DataHubRetentionConfig; +import io.datahubproject.metadata.context.OperationContext; +import java.io.ByteArrayInputStream; +import org.mockito.Mockito; +import org.springframework.core.io.Resource; +import org.springframework.core.io.support.PathMatchingResourcePatternResolver; +import org.testng.annotations.Test; + +public class IngestRetentionPoliciesStepTest { + + private static final String UPGRADE_ID = "ingest-retention-policies"; + private static final Urn UPGRADE_ID_URN = BootstrapStep.getUpgradeUrn(UPGRADE_ID); + + private static final String DEFAULT_RETENTION_YAML = + "- entity: \"dataHubExecutionRequest\"\n" + + " aspect: \"dataHubExecutionRequestResult\"\n" + + " config:\n" + + " retention:\n" + + " version:\n" + + " maxVersions: 1\n" + + "- entity: \"*\"\n" + + " aspect: \"*\"\n" + + " config:\n" + + " retention:\n" + + " version:\n" + + " maxVersions: 20\n"; + + @Test + public void testExecuteWhenRetentionIsDisabled() throws Exception { + final EntityService mockEntityService = Mockito.mock(EntityService.class); + final RetentionService mockRetentionService = Mockito.mock(RetentionService.class); + final OperationContext mockContext = mock(OperationContext.class); + final PathMatchingResourcePatternResolver mockResolver = + mock(PathMatchingResourcePatternResolver.class); + + final IngestRetentionPoliciesStep retentionStep = + new IngestRetentionPoliciesStep( + mockRetentionService, mockEntityService, false, false, "", mockResolver); + + retentionStep.execute(mockContext); + + // Verify that no interactions occur with retention service when retention is disabled + Mockito.verify(mockRetentionService, Mockito.times(0)).setRetention(any(), any(), any(), any()); + Mockito.verify(mockRetentionService, Mockito.times(0)).batchApplyRetention(any(), any()); + } + + @Test + public void testExecuteWhenAlreadyApplied() throws Exception { + final EntityService mockEntityService = Mockito.mock(EntityService.class); + final RetentionService mockRetentionService = Mockito.mock(RetentionService.class); + final OperationContext mockContext = mock(OperationContext.class); + final PathMatchingResourcePatternResolver mockResolver = + mock(PathMatchingResourcePatternResolver.class); + + when(mockEntityService.exists(mockContext, UPGRADE_ID_URN, true)).thenReturn(true); + + final IngestRetentionPoliciesStep retentionStep = + new IngestRetentionPoliciesStep( + mockRetentionService, mockEntityService, true, true, "", mockResolver); + + retentionStep.execute(mockContext); + + Mockito.verify(mockRetentionService, Mockito.times(0)).setRetention(any(), any(), any(), any()); + Mockito.verify(mockRetentionService, Mockito.times(0)).batchApplyRetention(any(), any()); + } + + @Test + public void testExecuteWithDefaultRetentionOnly() throws Exception { + final EntityService mockEntityService = Mockito.mock(EntityService.class); + final RetentionService mockRetentionService = Mockito.mock(RetentionService.class); + final OperationContext mockContext = mock(OperationContext.class); + final PathMatchingResourcePatternResolver mockResolver = + mock(PathMatchingResourcePatternResolver.class); + final Resource mockResource = mock(Resource.class); + + when(mockEntityService.exists(mockContext, UPGRADE_ID_URN, true)).thenReturn(false); + + // Mock resource loading + when(mockResolver.getResource("classpath:boot/retention.yaml")).thenReturn(mockResource); + when(mockResource.exists()).thenReturn(true); + when(mockResource.getInputStream()) + .thenReturn(new ByteArrayInputStream(DEFAULT_RETENTION_YAML.getBytes())); + + when(mockRetentionService.setRetention( + any(OperationContext.class), + eq("dataHubExecutionRequest"), + eq("dataHubExecutionRequestResult"), + any(DataHubRetentionConfig.class))) + .thenReturn(true); + + when(mockRetentionService.setRetention( + any(OperationContext.class), eq("*"), eq("*"), any(DataHubRetentionConfig.class))) + .thenReturn(true); + + final IngestRetentionPoliciesStep retentionStep = + new IngestRetentionPoliciesStep( + mockRetentionService, mockEntityService, true, true, "", mockResolver); + + retentionStep.execute(mockContext); + + Mockito.verify(mockRetentionService, Mockito.times(1)) + .setRetention( + any(OperationContext.class), + eq("dataHubExecutionRequest"), + eq("dataHubExecutionRequestResult"), + any(DataHubRetentionConfig.class)); + + Mockito.verify(mockRetentionService, Mockito.times(1)) + .setRetention( + any(OperationContext.class), eq("*"), eq("*"), any(DataHubRetentionConfig.class)); + + Mockito.verify(mockRetentionService, Mockito.times(1)).batchApplyRetention(null, null); + + Mockito.verify(mockEntityService, Mockito.times(1)) + .ingestProposal(any(), any(), any(), Mockito.eq(false)); + } + + @Test + public void testExecuteWithoutBatchApply() throws Exception { + final EntityService mockEntityService = Mockito.mock(EntityService.class); + final RetentionService mockRetentionService = Mockito.mock(RetentionService.class); + final OperationContext mockContext = mock(OperationContext.class); + final PathMatchingResourcePatternResolver mockResolver = + mock(PathMatchingResourcePatternResolver.class); + final Resource mockResource = mock(Resource.class); + + when(mockEntityService.exists(mockContext, UPGRADE_ID_URN, true)).thenReturn(false); + + // Mock resource loading + when(mockResolver.getResource("classpath:boot/retention.yaml")).thenReturn(mockResource); + when(mockResource.exists()).thenReturn(true); + when(mockResource.getInputStream()) + .thenReturn(new ByteArrayInputStream(DEFAULT_RETENTION_YAML.getBytes())); + + when(mockRetentionService.setRetention( + any(OperationContext.class), + eq("dataHubExecutionRequest"), + eq("dataHubExecutionRequestResult"), + any(DataHubRetentionConfig.class))) + .thenReturn(true); + + when(mockRetentionService.setRetention( + any(OperationContext.class), eq("*"), eq("*"), any(DataHubRetentionConfig.class))) + .thenReturn(true); + + final IngestRetentionPoliciesStep retentionStep = + new IngestRetentionPoliciesStep( + mockRetentionService, mockEntityService, true, false, "", mockResolver); + + retentionStep.execute(mockContext); + + Mockito.verify(mockRetentionService, Mockito.times(1)) + .setRetention( + any(OperationContext.class), + eq("dataHubExecutionRequest"), + eq("dataHubExecutionRequestResult"), + any(DataHubRetentionConfig.class)); + + Mockito.verify(mockRetentionService, Mockito.times(1)) + .setRetention( + any(OperationContext.class), eq("*"), eq("*"), any(DataHubRetentionConfig.class)); + + Mockito.verify(mockRetentionService, Mockito.times(0)).batchApplyRetention(any(), any()); + } + + @Test + public void testExecuteWithPluginPath() throws Exception { + final EntityService mockEntityService = Mockito.mock(EntityService.class); + final RetentionService mockRetentionService = Mockito.mock(RetentionService.class); + final OperationContext mockContext = mock(OperationContext.class); + final PathMatchingResourcePatternResolver mockResolver = + mock(PathMatchingResourcePatternResolver.class); + final Resource mockDefaultResource = mock(Resource.class); + final Resource mockPluginResource = mock(Resource.class); + + when(mockEntityService.exists(mockContext, UPGRADE_ID_URN, true)).thenReturn(false); + + // Mock resource loading + when(mockResolver.getResource("classpath:boot/retention.yaml")).thenReturn(mockDefaultResource); + when(mockResolver.getResources("file:/test/plugins/**/*.{yaml,yml}")) + .thenReturn(new Resource[] {mockPluginResource}); + + // Mock default resource loading + when(mockDefaultResource.exists()).thenReturn(true); + when(mockDefaultResource.getInputStream()) + .thenReturn(new ByteArrayInputStream(DEFAULT_RETENTION_YAML.getBytes())); + + // Mock plugin resource loading + String pluginYaml = + "- entity: \"container\"\n" + + " aspect: \"containerProperties\"\n" + + " config:\n" + + " retention:\n" + + " version:\n" + + " maxVersions: 5\n"; + + when(mockPluginResource.exists()).thenReturn(true); + when(mockPluginResource.getInputStream()) + .thenReturn(new ByteArrayInputStream(pluginYaml.getBytes())); + + when(mockRetentionService.setRetention( + any(OperationContext.class), + eq("dataHubExecutionRequest"), + eq("dataHubExecutionRequestResult"), + any(DataHubRetentionConfig.class))) + .thenReturn(true); + + when(mockRetentionService.setRetention( + any(OperationContext.class), eq("*"), eq("*"), any(DataHubRetentionConfig.class))) + .thenReturn(true); + + when(mockRetentionService.setRetention( + any(OperationContext.class), + eq("container"), + eq("containerProperties"), + any(DataHubRetentionConfig.class))) + .thenReturn(true); + + final IngestRetentionPoliciesStep retentionStep = + new IngestRetentionPoliciesStep( + mockRetentionService, mockEntityService, true, true, "/test/plugins", mockResolver); + + retentionStep.execute(mockContext); + + // Verify retention operations occur for both default and plugin configs + Mockito.verify(mockRetentionService, Mockito.times(1)) + .setRetention( + any(OperationContext.class), + eq("dataHubExecutionRequest"), + eq("dataHubExecutionRequestResult"), + any(DataHubRetentionConfig.class)); + Mockito.verify(mockRetentionService, Mockito.times(1)) + .setRetention( + any(OperationContext.class), eq("*"), eq("*"), any(DataHubRetentionConfig.class)); + Mockito.verify(mockRetentionService, Mockito.times(1)) + .setRetention( + any(OperationContext.class), + eq("container"), + eq("containerProperties"), + any(DataHubRetentionConfig.class)); + + Mockito.verify(mockRetentionService, Mockito.times(1)).batchApplyRetention(null, null); + Mockito.verify(mockEntityService, Mockito.times(1)) + .ingestProposal(any(), any(), any(), Mockito.eq(false)); + } +} diff --git a/metadata-service/factories/src/test/java/com/linkedin/r2/transport/http/server/RAPJakartaServletTest.java b/metadata-service/factories/src/test/java/com/linkedin/r2/transport/http/server/RAPJakartaServletTest.java new file mode 100644 index 00000000000000..09b582b7c7c179 --- /dev/null +++ b/metadata-service/factories/src/test/java/com/linkedin/r2/transport/http/server/RAPJakartaServletTest.java @@ -0,0 +1,303 @@ +package com.linkedin.r2.transport.http.server; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; + +import com.linkedin.r2.filter.R2Constants; +import com.linkedin.r2.message.RequestContext; +import com.linkedin.r2.message.rest.RestRequest; +import com.linkedin.r2.message.rest.RestResponse; +import com.linkedin.r2.message.rest.RestStatus; +import com.linkedin.r2.transport.common.bridge.common.TransportCallback; +import com.linkedin.r2.transport.common.bridge.common.TransportResponse; +import com.linkedin.r2.transport.common.bridge.common.TransportResponseImpl; +import com.linkedin.r2.transport.common.bridge.server.TransportDispatcher; +import jakarta.servlet.ServletException; +import jakarta.servlet.ServletOutputStream; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.security.cert.X509Certificate; +import java.util.Collections; +import org.mockito.ArgumentCaptor; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +public class RAPJakartaServletTest { + private static final int TIMEOUT_SECONDS = 5; + private static final String REMOTE_ADDRESS = "192.168.1.1"; + private static final int REMOTE_PORT = 12345; + + private HttpDispatcher mockHttpDispatcher; + private TransportDispatcher mockTransportDispatcher; + private HttpServletRequest mockRequest; + private HttpServletResponse mockResponse; + private ServletOutputStream mockOutputStream; + + @BeforeMethod + public void setUp() throws IOException { + mockHttpDispatcher = mock(HttpDispatcher.class); + mockTransportDispatcher = mock(TransportDispatcher.class); + mockRequest = mock(HttpServletRequest.class); + mockResponse = mock(HttpServletResponse.class); + mockOutputStream = mock(ServletOutputStream.class); + + // Basic request setup + when(mockRequest.getMethod()).thenReturn("GET"); + when(mockRequest.getRequestURI()).thenReturn("/test"); + when(mockRequest.getContextPath()).thenReturn(""); + when(mockRequest.getServletPath()).thenReturn(""); + when(mockRequest.getPathInfo()).thenReturn("/test"); + + // Protocol and server info + when(mockRequest.getProtocol()).thenReturn("HTTP/1.1"); + when(mockRequest.getScheme()).thenReturn("http"); + when(mockRequest.getServerName()).thenReturn("localhost"); + when(mockRequest.getServerPort()).thenReturn(8080); + + // Remote client info + when(mockRequest.getRemoteAddr()).thenReturn(REMOTE_ADDRESS); + when(mockRequest.getRemotePort()).thenReturn(REMOTE_PORT); + + // Security defaults (non-secure) + when(mockRequest.isSecure()).thenReturn(false); + when(mockRequest.getAttribute("javax.servlet.request.X509Certificate")).thenReturn(null); + when(mockRequest.getAttribute("javax.servlet.request.cipher_suite")).thenReturn(null); + + // Headers and content + when(mockRequest.getHeaderNames()).thenReturn(Collections.enumeration(Collections.emptyList())); + when(mockRequest.getInputStream()) + .thenReturn(new TestServletInputStream(new ByteArrayInputStream(new byte[0]))); + when(mockResponse.getOutputStream()).thenReturn(mockOutputStream); + } + + @Test + public void testConstructorWithHttpDispatcher() { + RAPJakartaServlet servlet = new RAPJakartaServlet(mockHttpDispatcher, TIMEOUT_SECONDS); + assertEquals(servlet.getDispatcher(), mockHttpDispatcher); + } + + @Test + public void testConstructorWithTransportDispatcher() { + RAPJakartaServlet servlet = new RAPJakartaServlet(mockTransportDispatcher, TIMEOUT_SECONDS); + assertNotNull(servlet.getDispatcher()); + } + + @Test + public void testSuccessfulRequest() throws ServletException, IOException { + // Setup + RAPJakartaServlet servlet = new RAPJakartaServlet(mockHttpDispatcher, TIMEOUT_SECONDS); + RestResponse mockRestResponse = RestStatus.responseForStatus(200, "OK"); + TransportResponse transportResponse = + TransportResponseImpl.success(mockRestResponse); + + doAnswer( + invocation -> { + TransportCallback callback = invocation.getArgument(2); + callback.onResponse(transportResponse); + return null; + }) + .when(mockHttpDispatcher) + .handleRequest( + any(RestRequest.class), any(RequestContext.class), any(TransportCallback.class)); + + // Execute + servlet.service(mockRequest, mockResponse); + + // Verify status code + verify(mockResponse).setStatus(200); + + // Capture and verify the output stream write + ArgumentCaptor dataCaptor = ArgumentCaptor.forClass(byte[].class); + ArgumentCaptor offsetCaptor = ArgumentCaptor.forClass(Integer.class); + ArgumentCaptor lengthCaptor = ArgumentCaptor.forClass(Integer.class); + + verify(mockOutputStream) + .write(dataCaptor.capture(), offsetCaptor.capture(), lengthCaptor.capture()); + + // Convert captured bytes to string for readable verification + byte[] capturedData = dataCaptor.getValue(); + int offset = offsetCaptor.getValue(); + int length = lengthCaptor.getValue(); + + String responseBody = new String(capturedData, offset, length); + System.out.println("Actual success response body: " + responseBody); // For debugging + + assertEquals(responseBody, "OK", "Expected response body to be 'OK' but got: " + responseBody); + } + + @Test + public void testSecureRequest() throws ServletException, IOException { + // Setup secure request + X509Certificate[] certs = new X509Certificate[1]; + certs[0] = mock(X509Certificate.class); + String cipherSuite = "TLS_AES_256_GCM_SHA384"; + + when(mockRequest.isSecure()).thenReturn(true); + when(mockRequest.getAttribute("javax.servlet.request.X509Certificate")).thenReturn(certs); + when(mockRequest.getAttribute("javax.servlet.request.cipher_suite")).thenReturn(cipherSuite); + + RAPJakartaServlet servlet = new RAPJakartaServlet(mockHttpDispatcher, TIMEOUT_SECONDS); + + ArgumentCaptor contextCaptor = ArgumentCaptor.forClass(RequestContext.class); + + // Execute + servlet.service(mockRequest, mockResponse); + + // Verify + verify(mockHttpDispatcher) + .handleRequest( + any(RestRequest.class), contextCaptor.capture(), any(TransportCallback.class)); + + RequestContext capturedContext = contextCaptor.getValue(); + assertTrue((Boolean) capturedContext.getLocalAttr(R2Constants.IS_SECURE)); + assertEquals(capturedContext.getLocalAttr(R2Constants.CIPHER_SUITE), cipherSuite); + assertEquals(capturedContext.getLocalAttr(R2Constants.CLIENT_CERT), certs[0]); + } + + @Test + public void testBadRequest() throws ServletException, IOException { + // Setup with invalid URI components that will cause URISyntaxException + when(mockRequest.getRequestURI()).thenReturn("/test"); + when(mockRequest.getQueryString()).thenReturn("invalid=%%test"); // Invalid percent encoding + when(mockRequest.getContextPath()).thenReturn(""); + when(mockRequest.getServletPath()).thenReturn(""); + + RAPJakartaServlet servlet = new RAPJakartaServlet(mockHttpDispatcher, TIMEOUT_SECONDS); + + // Execute + servlet.service(mockRequest, mockResponse); + + // Verify + verify(mockResponse).setStatus(400); + + // Capture all write invocations to the output stream + ArgumentCaptor dataCaptor = ArgumentCaptor.forClass(byte[].class); + ArgumentCaptor offsetCaptor = ArgumentCaptor.forClass(Integer.class); + ArgumentCaptor lengthCaptor = ArgumentCaptor.forClass(Integer.class); + + verify(mockOutputStream) + .write(dataCaptor.capture(), offsetCaptor.capture(), lengthCaptor.capture()); + + // Convert captured bytes to string for readable verification + byte[] capturedData = dataCaptor.getValue(); + int offset = offsetCaptor.getValue(); + int length = lengthCaptor.getValue(); + + String responseBody = new String(capturedData, offset, length); + System.out.println("Actual response body: " + responseBody); // For debugging + + assertTrue( + responseBody.contains("Invalid URI"), + "Expected response to contain 'Invalid URI' but got: " + responseBody); + } + + @Test + public void testRequestTimeout() throws ServletException, IOException { + // Setup with very short timeout + RAPJakartaServlet servlet = new RAPJakartaServlet(mockHttpDispatcher, 1); + + doAnswer( + invocation -> { + Thread.sleep(2000); + return null; + }) + .when(mockHttpDispatcher) + .handleRequest( + any(RestRequest.class), any(RequestContext.class), any(TransportCallback.class)); + + // Execute + servlet.service(mockRequest, mockResponse); + + // Verify status code + verify(mockResponse).setStatus(500); + + // Capture and verify the output stream write + ArgumentCaptor dataCaptor = ArgumentCaptor.forClass(byte[].class); + ArgumentCaptor offsetCaptor = ArgumentCaptor.forClass(Integer.class); + ArgumentCaptor lengthCaptor = ArgumentCaptor.forClass(Integer.class); + + verify(mockOutputStream) + .write(dataCaptor.capture(), offsetCaptor.capture(), lengthCaptor.capture()); + + // Convert captured bytes to string for readable verification + byte[] capturedData = dataCaptor.getValue(); + int offset = offsetCaptor.getValue(); + int length = lengthCaptor.getValue(); + + String responseBody = new String(capturedData, offset, length); + System.out.println("Actual timeout response body: " + responseBody); // For debugging + + assertTrue( + responseBody.contains("Server timeout after 1 seconds"), + "Expected response to contain timeout message but got: " + responseBody); + } + + @DataProvider(name = "headerTestCases") + public Object[][] headerTestCases() { + return new Object[][] { + {"Content-Type", "application/json"}, + {"Accept", "text/plain"}, + {"X-Custom-Header", "custom-value"} + }; + } + + @Test(dataProvider = "headerTestCases") + public void testHeaderHandling(String headerName, String headerValue) + throws ServletException, IOException { + // Setup + RAPJakartaServlet servlet = new RAPJakartaServlet(mockHttpDispatcher, TIMEOUT_SECONDS); + when(mockRequest.getHeaderNames()) + .thenReturn(Collections.enumeration(Collections.singletonList(headerName))); + when(mockRequest.getHeaders(headerName)) + .thenReturn(Collections.enumeration(Collections.singletonList(headerValue))); + + ArgumentCaptor requestCaptor = ArgumentCaptor.forClass(RestRequest.class); + + // Execute + servlet.service(mockRequest, mockResponse); + + // Verify + verify(mockHttpDispatcher) + .handleRequest( + requestCaptor.capture(), any(RequestContext.class), any(TransportCallback.class)); + RestRequest capturedRequest = requestCaptor.getValue(); + assertEquals(capturedRequest.getHeader(headerName), headerValue); + } + + private static class TestServletInputStream extends jakarta.servlet.ServletInputStream { + private final ByteArrayInputStream basis; + + public TestServletInputStream(ByteArrayInputStream basis) { + this.basis = basis; + } + + @Override + public int read() throws IOException { + return basis.read(); + } + + @Override + public boolean isFinished() { + return basis.available() == 0; + } + + @Override + public boolean isReady() { + return true; + } + + @Override + public void setReadListener(jakarta.servlet.ReadListener readListener) { + // Not implemented for test + } + } +} diff --git a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java index 2f66b30f55844c..6e02db6eb65239 100644 --- a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java +++ b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java @@ -37,10 +37,12 @@ import org.springframework.web.HttpRequestMethodNotSupportedException; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; @Slf4j @RestController +@RequestMapping("/api") public class GraphQLController { public GraphQLController() { diff --git a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphiQLController.java b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphiQLController.java index 61f2720c6cfca4..0461af4db11cc8 100644 --- a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphiQLController.java +++ b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphiQLController.java @@ -32,7 +32,7 @@ public GraphiQLController() { } } - @GetMapping(value = "/graphiql", produces = MediaType.TEXT_HTML_VALUE) + @GetMapping(value = "/api/graphiql", produces = MediaType.TEXT_HTML_VALUE) @ResponseBody CompletableFuture graphiQL() { return GraphQLConcurrencyUtils.supplyAsync( diff --git a/metadata-service/openapi-analytics-servlet/src/main/resources/JavaSpring/apiController.mustache b/metadata-service/openapi-analytics-servlet/src/main/resources/JavaSpring/apiController.mustache index ddf1c130d7be69..fbeb0ba7454a8b 100644 --- a/metadata-service/openapi-analytics-servlet/src/main/resources/JavaSpring/apiController.mustache +++ b/metadata-service/openapi-analytics-servlet/src/main/resources/JavaSpring/apiController.mustache @@ -62,7 +62,7 @@ import java.util.concurrent.Callable; {{/isJava8or11}} {{>generatedAnnotation}} @Controller -@RequestMapping("/v2/analytics") +@RequestMapping("/openapi/v2/analytics") {{#operations}} public class {{classname}}Controller implements {{classname}} { diff --git a/metadata-service/openapi-entity-servlet/src/main/resources/JavaSpring/apiController.mustache b/metadata-service/openapi-entity-servlet/src/main/resources/JavaSpring/apiController.mustache index fbf354ff91688f..78bb061b5d50a4 100644 --- a/metadata-service/openapi-entity-servlet/src/main/resources/JavaSpring/apiController.mustache +++ b/metadata-service/openapi-entity-servlet/src/main/resources/JavaSpring/apiController.mustache @@ -80,7 +80,7 @@ import java.util.concurrent.Callable; {{/useOas2}} {{#operations}} @RestController -@RequestMapping("/v2/entity") +@RequestMapping("/openapi/v2/entity") public class {{classname}}Controller implements {{classname}} { private static final Logger log = LoggerFactory.getLogger({{classname}}Controller.class); diff --git a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImplTest.java b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImplTest.java index 31b35b65ea1a8c..fc91b0990a333a 100644 --- a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImplTest.java +++ b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImplTest.java @@ -301,7 +301,7 @@ public void customModelTest() throws Exception { mockMvc .perform( - MockMvcRequestBuilders.post("/v2/entity/dataset") + MockMvcRequestBuilders.post("/openapi/v2/entity/dataset") .content(body) .contentType(MediaType.APPLICATION_JSON) .accept(MediaType.APPLICATION_JSON)) diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java index 01493d71643481..b78121573811f6 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java @@ -18,6 +18,7 @@ import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.ControllerAdvice; import org.springframework.web.bind.annotation.ExceptionHandler; +import org.springframework.web.servlet.NoHandlerFoundException; import org.springframework.web.servlet.mvc.support.DefaultHandlerExceptionResolver; @Slf4j @@ -88,4 +89,13 @@ public ResponseEntity> handleGenericException( return new ResponseEntity<>( Map.of("error", "Internal server error occurred"), HttpStatus.INTERNAL_SERVER_ERROR); } + + @ExceptionHandler(NoHandlerFoundException.class) + public static ResponseEntity> handleNoHandlerFoundException( + NoHandlerFoundException ex, HttpServletRequest request) { + String message = String.format("No endpoint %s %s.", ex.getHttpMethod(), ex.getRequestURL()); + + log.error("No handler found for request: " + request.getRequestURI()); + return new ResponseEntity<>(Map.of("error", message), HttpStatus.NOT_FOUND); + } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java index c4b4431e77c4ef..c6d5f1452fea91 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java @@ -1,15 +1,7 @@ package io.datahubproject.openapi.config; -import static com.linkedin.metadata.Constants.INGESTION_MAX_SERIALIZED_STRING_LENGTH; -import static com.linkedin.metadata.Constants.MAX_JACKSON_STRING_SIZE; - -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.core.StreamReadConstraints; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.models.registry.EntityRegistry; -import io.datahubproject.openapi.converter.StringToChangeCategoryConverter; import io.datahubproject.openapi.v3.OpenAPIV3Generator; import io.swagger.v3.oas.annotations.OpenAPIDefinition; import io.swagger.v3.oas.annotations.info.Info; @@ -18,29 +10,24 @@ import io.swagger.v3.oas.models.OpenAPI; import java.util.Collections; import java.util.LinkedHashMap; -import java.util.List; import java.util.Map; import java.util.Set; import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.Stream; -import javax.annotation.Nonnull; import org.springdoc.core.models.GroupedOpenApi; -import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import org.springframework.format.FormatterRegistry; -import org.springframework.http.converter.ByteArrayHttpMessageConverter; -import org.springframework.http.converter.FormHttpMessageConverter; -import org.springframework.http.converter.HttpMessageConverter; -import org.springframework.http.converter.StringHttpMessageConverter; -import org.springframework.http.converter.json.MappingJackson2HttpMessageConverter; -import org.springframework.web.servlet.config.annotation.AsyncSupportConfigurer; +import org.springframework.core.annotation.Order; +import org.springframework.web.servlet.config.annotation.EnableWebMvc; +import org.springframework.web.servlet.config.annotation.ResourceHandlerRegistry; import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; +@EnableWebMvc @OpenAPIDefinition( info = @Info(title = "DataHub OpenAPI", version = "2.0.0"), servers = {@Server(url = "/openapi/", description = "Default Server URL")}) +@Order(2) @Configuration public class SpringWebConfig implements WebMvcConfigurer { private static final Set OPERATIONS_PACKAGES = @@ -52,35 +39,6 @@ public class SpringWebConfig implements WebMvcConfigurer { private static final Set OPENLINEAGE_PACKAGES = Set.of("io.datahubproject.openapi.openlineage"); - @Value("${datahub.gms.async.request-timeout-ms}") - private long asyncTimeoutMilliseconds; - - @Override - public void configureMessageConverters(List> messageConverters) { - messageConverters.add(new StringHttpMessageConverter()); - messageConverters.add(new ByteArrayHttpMessageConverter()); - messageConverters.add(new FormHttpMessageConverter()); - - ObjectMapper objectMapper = new ObjectMapper(); - int maxSize = - Integer.parseInt( - System.getenv() - .getOrDefault(INGESTION_MAX_SERIALIZED_STRING_LENGTH, MAX_JACKSON_STRING_SIZE)); - objectMapper - .getFactory() - .setStreamReadConstraints(StreamReadConstraints.builder().maxStringLength(maxSize).build()); - objectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); - objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - MappingJackson2HttpMessageConverter jsonConverter = - new MappingJackson2HttpMessageConverter(objectMapper); - messageConverters.add(jsonConverter); - } - - @Override - public void addFormatters(FormatterRegistry registry) { - registry.addConverter(new StringToChangeCategoryConverter()); - } - @Bean public GroupedOpenApi v3OpenApiGroup( final EntityRegistry entityRegistry, final ConfigurationProvider configurationProvider) { @@ -153,6 +111,13 @@ public GroupedOpenApi openlineageOpenApiGroup() { .build(); } + @Override + public void addResourceHandlers(ResourceHandlerRegistry registry) { + registry + .addResourceHandler("/swagger-ui/**") + .addResourceLocations("classpath:/META-INF/resources/webjars/swagger-ui/"); + } + /** Concatenates two maps. */ private Map concat(Supplier> a, Supplier> b) { return a.get() == null @@ -167,10 +132,4 @@ private Map concat(Supplier> a, Supplier> b) { (v1, v2) -> v2, LinkedHashMap::new)); } - - @Override - public void configureAsyncSupport(@Nonnull AsyncSupportConfigurer configurer) { - WebMvcConfigurer.super.configureAsyncSupport(configurer); - configurer.setDefaultTimeout(asyncTimeoutMilliseconds); - } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java index 6151b866e5208b..64333009dda7a9 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java @@ -60,7 +60,7 @@ import org.springframework.web.bind.annotation.RestController; @RestController -@RequestMapping("/operations/elasticSearch") +@RequestMapping("/openapi/operations/elasticSearch") @Slf4j @Tag( name = "ElasticSearchOperations", diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/test/IdController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/test/IdController.java index 99d3879ab9a320..9be9526afac340 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/test/IdController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/test/IdController.java @@ -19,7 +19,7 @@ import org.springframework.web.bind.annotation.RestController; @RestController -@RequestMapping("/operations/identity") +@RequestMapping("/openapi/operations/identity") @Slf4j @Tag(name = "Identity", description = "An API for checking identity") public class IdController { diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/throttle/ThrottleController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/throttle/ThrottleController.java index 3c44d94428f428..5db2183d3acd62 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/throttle/ThrottleController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/throttle/ThrottleController.java @@ -28,7 +28,7 @@ import org.springframework.web.bind.annotation.RestController; @RestController -@RequestMapping("/operations/throttle") +@RequestMapping("/openapi/operations/throttle") @Slf4j @Tag(name = "GMS Throttle Control", description = "An API for GMS throttle control.") public class ThrottleController { diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/entities/EntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/entities/EntitiesController.java index 03050868efdcab..dd359cbc464319 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/entities/EntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/entities/EntitiesController.java @@ -61,7 +61,7 @@ */ @Deprecated @RestController -@RequestMapping("/entities/v1") +@RequestMapping("/openapi/entities/v1") @Slf4j @Tag( name = "Entities", diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/relationships/RelationshipsController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/relationships/RelationshipsController.java index b2b7eb557ca32a..f778bf54aaeac7 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/relationships/RelationshipsController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/relationships/RelationshipsController.java @@ -50,7 +50,7 @@ @Deprecated @RestController @RequiredArgsConstructor -@RequestMapping("/relationships/v1") +@RequestMapping("/openapi/relationships/v1") @Slf4j @Tag(name = "Relationships", description = "APIs for accessing relationships of entities") public class RelationshipsController { diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/timeline/TimelineControllerV1.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/timeline/TimelineControllerV1.java index 30cdb632d54773..47b558f6a9b3a3 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/timeline/TimelineControllerV1.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/timeline/TimelineControllerV1.java @@ -38,7 +38,7 @@ @Deprecated @RestController @AllArgsConstructor -@RequestMapping("/timeline/v1") +@RequestMapping("/openapi/timeline/v1") @Tag( name = "Timeline", description = diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java index b1c5709ef01470..bafa8c43f60f55 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java @@ -73,7 +73,7 @@ @RestController @RequiredArgsConstructor -@RequestMapping("/v2/entity") +@RequestMapping("/openapi/v2/entity") @Slf4j public class EntityController extends GenericEntitiesController< diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/PlatformEntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/PlatformEntitiesController.java index 6c99d972dde03f..bfd1f952f95f74 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/PlatformEntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/PlatformEntitiesController.java @@ -39,7 +39,7 @@ @RestController @RequiredArgsConstructor -@RequestMapping("/v2/platform/entities/v1") +@RequestMapping("/openapiv2/platform/entities/v1") @Slf4j @Tag( name = "Platform Entities", diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/RelationshipController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/RelationshipController.java index a0412676b5cbce..1e431bd291ca17 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/RelationshipController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/RelationshipController.java @@ -9,7 +9,7 @@ @RestController @RequiredArgsConstructor -@RequestMapping("/v2/relationship") +@RequestMapping("/openapi/v2/relationship") @Slf4j @Tag( name = "Generic Relationships", diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimelineControllerV2.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimelineControllerV2.java index f3d0d5188b1e7e..0245ae289f07ee 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimelineControllerV2.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimelineControllerV2.java @@ -34,7 +34,7 @@ @RestController @AllArgsConstructor -@RequestMapping("/v2/timeline/v1") +@RequestMapping("/openapi/v2/timeline/v1") @Tag( name = "Timeline", description = diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimeseriesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimeseriesController.java index 3b896dc5000822..212084f25ddd5d 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimeseriesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimeseriesController.java @@ -38,7 +38,7 @@ import org.springframework.web.bind.annotation.RestController; @RestController -@RequestMapping("/v2/timeseries") +@RequestMapping("/openapi/v2/timeseries") @Slf4j @Tag( name = "Generic Timeseries Aspects", diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java index a4583082d57c7f..573feec64d2283 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java @@ -93,7 +93,7 @@ @RestController("EntityControllerV3") @RequiredArgsConstructor -@RequestMapping("/v3/entity") +@RequestMapping("/openapi/v3/entity") @Slf4j @Hidden public class EntityController diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/RelationshipController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/RelationshipController.java index 8f317e86227239..3b7b897b55d1c5 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/RelationshipController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/RelationshipController.java @@ -9,7 +9,7 @@ @RestController("RelationshipControllerV3") @RequiredArgsConstructor -@RequestMapping("/v3/relationship") +@RequestMapping("/openapi/v3/relationship") @Slf4j @Tag( name = "Generic Relationships", diff --git a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/config/SpringWebConfigTest.java b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/config/SpringWebConfigTest.java new file mode 100644 index 00000000000000..c449ed20e47ba4 --- /dev/null +++ b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/config/SpringWebConfigTest.java @@ -0,0 +1,77 @@ +package io.datahubproject.openapi.config; + +import static org.mockito.Mockito.mock; +import static org.testng.AssertJUnit.assertEquals; + +import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.spring.YamlPropertySourceFactory; +import io.datahubproject.openapi.v3.OpenAPIV3Generator; +import io.swagger.v3.oas.models.Components; +import io.swagger.v3.oas.models.OpenAPI; +import io.swagger.v3.oas.models.Paths; +import io.swagger.v3.oas.models.media.Schema; +import java.util.HashMap; +import java.util.Map; +import org.mockito.MockedStatic; +import org.mockito.Mockito; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Import; +import org.springframework.context.annotation.PropertySource; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.testng.annotations.Test; + +@Import(ConfigurationProvider.class) +@PropertySource(value = "classpath:/application.yaml", factory = YamlPropertySourceFactory.class) +public class SpringWebConfigTest extends AbstractTestNGSpringContextTests { + @Autowired private ConfigurationProvider configurationProvider; + + @Test + void testComponentsMergeWithDuplicateKeys() { + // Setup + SpringWebConfig config = new SpringWebConfig(); + EntityRegistry entityRegistry = mock(EntityRegistry.class); + + // Create test schemas with duplicate keys + Map schemas1 = new HashMap<>(); + schemas1.put("TestSchema", new Schema().type("string").description("First schema")); + + Map schemas2 = new HashMap<>(); + schemas2.put("TestSchema", new Schema().type("object").description("Second schema")); + + // Create OpenAPI objects with proper initialization + OpenAPI openApi1 = + new OpenAPI().components(new Components().schemas(schemas1)).paths(new Paths()); + + OpenAPI openApi2 = + new OpenAPI().components(new Components().schemas(schemas2)).paths(new Paths()); + + // Mock OpenAPIV3Generator + try (MockedStatic mockedGenerator = + Mockito.mockStatic(OpenAPIV3Generator.class)) { + mockedGenerator + .when( + () -> + OpenAPIV3Generator.generateOpenApiSpec( + Mockito.any(EntityRegistry.class), Mockito.any(ConfigurationProvider.class))) + .thenReturn(openApi2); + + // Get the GroupedOpenApi + var groupedApi = config.v3OpenApiGroup(entityRegistry, configurationProvider); + + // Execute the customizer + groupedApi.getOpenApiCustomizers().get(0).customise(openApi1); + + // Verify the merged components + Map mergedSchemas = openApi1.getComponents().getSchemas(); + + // Assert that we have the expected number of schemas + assertEquals(1, mergedSchemas.size()); + + // Assert that the duplicate key contains the second schema (v2 value) + Schema resultSchema = mergedSchemas.get("TestSchema"); + assertEquals("object", resultSchema.getType()); + assertEquals("Second schema", resultSchema.getDescription()); + } + } +} diff --git a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java index e82ab50a0defeb..5080f6a12cdd28 100644 --- a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java +++ b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java @@ -178,7 +178,7 @@ public void testSearchOrderPreserved() throws Exception { // test ASCENDING mockMvc .perform( - MockMvcRequestBuilders.get("/v3/entity/dataset") + MockMvcRequestBuilders.get("/openapi/v3/entity/dataset") .param("sortOrder", "ASCENDING") .accept(MediaType.APPLICATION_JSON)) .andExpect(status().is2xxSuccessful()) @@ -192,7 +192,7 @@ public void testSearchOrderPreserved() throws Exception { // test DESCENDING mockMvc .perform( - MockMvcRequestBuilders.get("/v3/entity/dataset") + MockMvcRequestBuilders.get("/openapi/v3/entity/dataset") .accept(MediaType.APPLICATION_JSON) .param("sortOrder", "DESCENDING")) .andExpect(status().is2xxSuccessful()) @@ -211,14 +211,14 @@ public void testDeleteEntity() throws Exception { // test delete entity mockMvc .perform( - MockMvcRequestBuilders.delete(String.format("/v3/entity/dataset/%s", TEST_URN)) + MockMvcRequestBuilders.delete(String.format("/openapi/v3/entity/dataset/%s", TEST_URN)) .accept(MediaType.APPLICATION_JSON)) .andExpect(status().is2xxSuccessful()); // test delete entity by aspect key mockMvc .perform( - MockMvcRequestBuilders.delete(String.format("/v3/entity/dataset/%s", TEST_URN)) + MockMvcRequestBuilders.delete(String.format("/openapi/v3/entity/dataset/%s", TEST_URN)) .param("aspects", "datasetKey") .accept(MediaType.APPLICATION_JSON)) .andExpect(status().is2xxSuccessful()); @@ -229,7 +229,7 @@ public void testDeleteEntity() throws Exception { reset(mockEntityService); mockMvc .perform( - MockMvcRequestBuilders.delete(String.format("/v3/entity/dataset/%s", TEST_URN)) + MockMvcRequestBuilders.delete(String.format("/openapi/v3/entity/dataset/%s", TEST_URN)) .param("aspects", "status") .accept(MediaType.APPLICATION_JSON)) .andExpect(status().is2xxSuccessful()); @@ -240,7 +240,7 @@ public void testDeleteEntity() throws Exception { reset(mockEntityService); mockMvc .perform( - MockMvcRequestBuilders.delete(String.format("/v3/entity/dataset/%s", TEST_URN)) + MockMvcRequestBuilders.delete(String.format("/openapi/v3/entity/dataset/%s", TEST_URN)) .param("clear", "true") .accept(MediaType.APPLICATION_JSON)) .andExpect(status().is2xxSuccessful()); @@ -374,7 +374,7 @@ public void testTimeseriesAspect() throws Exception { // test timeseries latest aspect mockMvc .perform( - MockMvcRequestBuilders.get("/v3/entity/dataset/{urn}/datasetprofile", TEST_URN) + MockMvcRequestBuilders.get("/openapi/v3/entity/dataset/{urn}/datasetprofile", TEST_URN) .accept(MediaType.APPLICATION_JSON)) .andExpect(status().is2xxSuccessful()) .andExpect(MockMvcResultMatchers.jsonPath("$.value.rowCount").value(10)) @@ -384,7 +384,7 @@ public void testTimeseriesAspect() throws Exception { // test oldd aspect mockMvc .perform( - MockMvcRequestBuilders.get("/v3/entity/dataset/{urn}/datasetprofile", TEST_URN) + MockMvcRequestBuilders.get("/openapi/v3/entity/dataset/{urn}/datasetprofile", TEST_URN) .param("version", "150") .accept(MediaType.APPLICATION_JSON)) .andExpect(status().is2xxSuccessful()) @@ -471,7 +471,7 @@ public void testGetEntityBatchWithMultipleEntities() throws Exception { mockMvc .perform( - MockMvcRequestBuilders.post("/v3/entity/dataset/batchGet") + MockMvcRequestBuilders.post("/openapi/v3/entity/dataset/batchGet") .content(requestBody) .contentType(MediaType.APPLICATION_JSON) .accept(MediaType.APPLICATION_JSON)) @@ -486,7 +486,7 @@ public void testGetEntityBatchWithInvalidUrn() throws Exception { mockMvc .perform( - MockMvcRequestBuilders.post("/v3/entity/dataset/batchGet") + MockMvcRequestBuilders.post("/openapi/v3/entity/dataset/batchGet") .content(requestBody) .contentType(MediaType.APPLICATION_JSON) .accept(MediaType.APPLICATION_JSON)) @@ -531,7 +531,7 @@ public void testScrollEntitiesWithMultipleSortFields() throws Exception { mockMvc .perform( - MockMvcRequestBuilders.post("/v3/entity/scroll") + MockMvcRequestBuilders.post("/openapi/v3/entity/scroll") .content("{\"entities\":[\"dataset\"]}") .param("sortCriteria", "name", "urn") .contentType(MediaType.APPLICATION_JSON) @@ -575,7 +575,7 @@ public void testScrollEntitiesWithPitKeepAlive() throws Exception { mockMvc .perform( - MockMvcRequestBuilders.post("/v3/entity/scroll") + MockMvcRequestBuilders.post("/openapi/v3/entity/scroll") .content("{\"entities\":[\"dataset\"]}") .param("pitKeepAlive", "10m") .contentType(MediaType.APPLICATION_JSON) @@ -598,7 +598,7 @@ public void testEntityVersioningFeatureFlagDisabled() throws Exception { .perform( MockMvcRequestBuilders.post( String.format( - "/v3/entity/versioning/%s/relationship/versionOf/%s", + "/openapi/v3/entity/versioning/%s/relationship/versionOf/%s", VERSION_SET_URN, TEST_URN)) .content("{}") .contentType(MediaType.APPLICATION_JSON) @@ -610,7 +610,7 @@ public void testEntityVersioningFeatureFlagDisabled() throws Exception { .perform( MockMvcRequestBuilders.delete( String.format( - "/v3/entity/versioning/%s/relationship/versionOf/%s", + "/openapi/v3/entity/versioning/%s/relationship/versionOf/%s", VERSION_SET_URN, TEST_URN)) .accept(MediaType.APPLICATION_JSON)) .andExpect(status().is4xxClientError()); @@ -630,7 +630,7 @@ public void testInvalidVersionSetUrn() throws Exception { .perform( MockMvcRequestBuilders.post( String.format( - "/v3/entity/versioning/%s/relationship/versionOf/%s", + "/openapi/v3/entity/versioning/%s/relationship/versionOf/%s", INVALID_VERSION_SET_URN, TEST_URN)) .content("{}") .contentType(MediaType.APPLICATION_JSON) @@ -642,7 +642,7 @@ public void testInvalidVersionSetUrn() throws Exception { .perform( MockMvcRequestBuilders.delete( String.format( - "/v3/entity/versioning/%s/relationship/versionOf/%s", + "/openapi/v3/entity/versioning/%s/relationship/versionOf/%s", INVALID_VERSION_SET_URN, TEST_URN)) .accept(MediaType.APPLICATION_JSON)) .andExpect(status().is4xxClientError()); diff --git a/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/SchemaRegistryController.java b/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/SchemaRegistryController.java index 09043c6dd5e87e..145ecae92ff0b6 100644 --- a/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/SchemaRegistryController.java +++ b/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/SchemaRegistryController.java @@ -2,7 +2,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableSet; -import com.linkedin.gms.factory.kafka.schemaregistry.InternalSchemaRegistryFactory; import com.linkedin.metadata.registry.SchemaRegistryService; import io.datahubproject.schema_registry.openapi.generated.CompatibilityCheckResponse; import io.datahubproject.schema_registry.openapi.generated.Config; @@ -29,10 +28,12 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import javax.annotation.PostConstruct; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Qualifier; -import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.core.Ordered; +import org.springframework.core.annotation.Order; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.RequestMapping; @@ -42,11 +43,9 @@ /** DataHub Rest Controller implementation for Confluent's Schema Registry OpenAPI spec. */ @Slf4j @RestController -@RequestMapping("/api") +@RequestMapping("/schema-registry/api") +@Order(Ordered.HIGHEST_PRECEDENCE) @RequiredArgsConstructor -@ConditionalOnProperty( - name = "kafka.schemaRegistry.type", - havingValue = InternalSchemaRegistryFactory.TYPE) public class SchemaRegistryController implements CompatibilityApi, ConfigApi, @@ -66,6 +65,11 @@ public class SchemaRegistryController @Qualifier("schemaRegistryService") private final SchemaRegistryService _schemaRegistryService; + @PostConstruct + public void init() { + log.info("SchemaRegistryController initialized with base path: /schema-registry/api"); + } + @Override public Optional getObjectMapper() { return Optional.ofNullable(objectMapper); diff --git a/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/config/SpringWebSchemaRegistryConfig.java b/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/config/SpringWebSchemaRegistryConfig.java index 5f7d8d9604de6e..c2200eef6e60f5 100644 --- a/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/config/SpringWebSchemaRegistryConfig.java +++ b/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/config/SpringWebSchemaRegistryConfig.java @@ -1,23 +1,50 @@ package io.datahubproject.openapi.schema.registry.config; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.gms.factory.kafka.schemaregistry.InternalSchemaRegistryFactory; +import com.linkedin.metadata.registry.SchemaRegistryService; +import io.datahubproject.openapi.schema.registry.SchemaRegistryController; import io.swagger.v3.oas.annotations.OpenAPIDefinition; import io.swagger.v3.oas.annotations.info.Info; import io.swagger.v3.oas.annotations.servers.Server; +import jakarta.servlet.http.HttpServletRequest; import java.util.List; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Configuration; +import org.springframework.core.annotation.Order; import org.springframework.http.converter.ByteArrayHttpMessageConverter; import org.springframework.http.converter.FormHttpMessageConverter; import org.springframework.http.converter.HttpMessageConverter; import org.springframework.http.converter.StringHttpMessageConverter; import org.springframework.http.converter.json.MappingJackson2HttpMessageConverter; +import org.springframework.web.servlet.config.annotation.EnableWebMvc; import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; +@Slf4j +@EnableWebMvc @OpenAPIDefinition( info = @Info(title = "DataHub OpenAPI", version = "1.0.0"), servers = {@Server(url = "/schema-registry/", description = "Schema Registry Server URL")}) +@Order(3) +@ConditionalOnProperty( + name = "kafka.schemaRegistry.type", + havingValue = InternalSchemaRegistryFactory.TYPE) @Configuration +@ComponentScan(basePackages = {"io.datahubproject.openapi.schema.registry"}) public class SpringWebSchemaRegistryConfig implements WebMvcConfigurer { + @Bean + public SchemaRegistryController schemaRegistryController( + ObjectMapper objectMapper, + HttpServletRequest request, + @Qualifier("schemaRegistryService") SchemaRegistryService schemaRegistryService) { + return new SchemaRegistryController(objectMapper, request, schemaRegistryService); + } + @Override public void configureMessageConverters(List> messageConverters) { messageConverters.add(new StringHttpMessageConverter()); diff --git a/metadata-service/schema-registry-servlet/src/test/java/io/datahubproject/openapi/test/SchemaRegistryControllerTest.java b/metadata-service/schema-registry-servlet/src/test/java/io/datahubproject/openapi/test/SchemaRegistryControllerTest.java index e8deed00672da7..fd862d65fb2ef4 100644 --- a/metadata-service/schema-registry-servlet/src/test/java/io/datahubproject/openapi/test/SchemaRegistryControllerTest.java +++ b/metadata-service/schema-registry-servlet/src/test/java/io/datahubproject/openapi/test/SchemaRegistryControllerTest.java @@ -74,7 +74,7 @@ static void kafkaProperties(DynamicPropertyRegistry registry) { kafka.start(); registry.add("kafka.bootstrapServers", kafka::getBootstrapServers); registry.add("kafka.schemaRegistry.type", () -> "INTERNAL"); - registry.add("kafka.schemaRegistry.url", () -> "http://localhost:53222/api/"); + registry.add("kafka.schemaRegistry.url", () -> "http://localhost:53222/schema-registry/api/"); } @Autowired EventProducer _producer; diff --git a/metadata-service/war/build.gradle b/metadata-service/war/build.gradle index 5e00207c1726ee..d83b29c4ad82bc 100644 --- a/metadata-service/war/build.gradle +++ b/metadata-service/war/build.gradle @@ -1,6 +1,7 @@ plugins { - id 'war' + id 'org.springframework.boot' id 'com.palantir.docker' + id 'java' } apply from: '../../gradle/coverage/java-coverage.gradle' @@ -31,6 +32,12 @@ dependencies { runtimeOnly externalDependency.mariadbConnector runtimeOnly externalDependency.mysqlConnector runtimeOnly externalDependency.postgresql + + implementation(externalDependency.springBootStarterWeb) { + exclude module: "spring-boot-starter-tomcat" + } + implementation externalDependency.springBootStarterJetty + implementation externalDependency.jettyJmx implementation externalDependency.springWebMVC implementation externalDependency.springBootAutoconfigure implementation externalDependency.servletApi @@ -38,6 +45,8 @@ dependencies { implementation spec.product.pegasus.restliDocgen implementation spec.product.pegasus.restliSpringBridge + compileOnly externalDependency.lombok + annotationProcessor externalDependency.lombok runtimeOnly externalDependency.log4jCore runtimeOnly externalDependency.log4j2Api runtimeOnly externalDependency.logbackClassic @@ -51,31 +60,30 @@ dependencies { } configurations.all{ exclude group: "com.charleskorn.kaml", module:"kaml" - -} -configurations { - jetty11 } -dependencies { - jetty11 "org.eclipse.jetty:jetty-runner:11.0.19" +tasks.register('run') { + group = 'application' // Add a group for better organization + description = 'Runs the application with Gretty' + dependsOn 'bootRun' // spring task } -task run(type: JavaExec, dependsOn: build) { - main = "org.eclipse.jetty.runner.Runner" - args = ["${projectDir}/build/libs/war.war"] - classpath configurations.jetty11 +bootJar { + // backwards compatible with old war archive name + archiveFileName = 'war.war' + mainClass = 'com.linkedin.gms.GMSApplication' } -war { - archiveFileName = "war.war" +bootRun { + environment "SCHEMA_REGISTRY_TYPE", "INTERNAL" + environment "KAFKA_SCHEMAREGISTRY_URL", "http://localhost:8080/schema-registry/api" } docker { name "${docker_registry}/${docker_repo}:v${version}" version "v${version}" dockerfile file("${rootProject.projectDir}/docker/${docker_repo}/Dockerfile") - files war.outputs.files + files bootJar.outputs.files files fileTree(rootProject.projectDir) { include '.dockerignore' include 'docker/monitoring/*' @@ -107,7 +115,7 @@ docker { buildArgs(dockerBuildArgs) } } -tasks.getByPath(":metadata-service:war:docker").dependsOn([build, war]) +tasks.getByPath(":metadata-service:war:docker").dependsOn([bootJar]) task cleanLocalDockerImages { doLast { @@ -115,3 +123,12 @@ task cleanLocalDockerImages { } } dockerClean.finalizedBy(cleanLocalDockerImages) + +test { + jacoco { + // This contains quickstart tested code for jetty startup + excludes = ["com.linkedin.gms.CommonApplicationConfig", + "com.linkedin.gms.ServletConfig", + "com.linkedin.gms.GMSApplication"] + } +} \ No newline at end of file diff --git a/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java b/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java index 69fb9df2b04dbb..3f3ed9f75cceb3 100644 --- a/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java +++ b/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java @@ -1,11 +1,22 @@ package com.linkedin.gms; -import com.datahub.auth.authentication.filter.AuthenticationFilter; import com.linkedin.metadata.spring.YamlPropertySourceFactory; +import java.lang.management.ManagementFactory; +import javax.management.MBeanServer; +import org.eclipse.jetty.jmx.MBeanContainer; +import org.eclipse.jetty.server.Connector; +import org.eclipse.jetty.server.HttpConfiguration; +import org.eclipse.jetty.server.HttpConnectionFactory; +import org.eclipse.jetty.server.ServerConnector; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.web.embedded.jetty.JettyServletWebServerFactory; +import org.springframework.boot.web.server.WebServerFactoryCustomizer; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.PropertySource; +import org.springframework.core.env.Environment; /** * Common configuration for all servlets. Generally this list also includes dependencies of the @@ -43,12 +54,39 @@ "com.linkedin.gms.factory.notifications", "com.linkedin.gms.factory.telemetry" }) -@PropertySource(value = "classpath:/application.yaml", factory = YamlPropertySourceFactory.class) @Configuration +@PropertySource(value = "classpath:/application.yaml", factory = YamlPropertySourceFactory.class) public class CommonApplicationConfig { - @Bean("authenticationFilter") - public AuthenticationFilter authenticationFilter() { - return new AuthenticationFilter(); + @Autowired private Environment environment; + + @Bean + public WebServerFactoryCustomizer jettyCustomizer() { + return factory -> { + // Configure HTTP + factory.addServerCustomizers( + server -> { + // HTTP Configuration + HttpConfiguration httpConfig = new HttpConfiguration(); + httpConfig.setRequestHeaderSize(32768); + + // HTTP Connector + ServerConnector connector = + new ServerConnector(server, new HttpConnectionFactory(httpConfig)); + + // Get port from environment directly + int port = environment.getProperty("server.port", Integer.class, 8080); + connector.setPort(port); + + // Set connectors + server.setConnectors(new Connector[] {connector}); + + // JMX Configuration + MBeanServer mBeanServer = ManagementFactory.getPlatformMBeanServer(); + MBeanContainer mBeanContainer = new MBeanContainer(mBeanServer); + mBeanContainer.beanAdded(null, LoggerFactory.getILoggerFactory()); + server.addBean(mBeanContainer); + }); + }; } } diff --git a/metadata-service/war/src/main/java/com/linkedin/gms/GMSApplication.java b/metadata-service/war/src/main/java/com/linkedin/gms/GMSApplication.java new file mode 100644 index 00000000000000..ec8aaeffbba1d0 --- /dev/null +++ b/metadata-service/war/src/main/java/com/linkedin/gms/GMSApplication.java @@ -0,0 +1,21 @@ +package com.linkedin.gms; + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.boot.builder.SpringApplicationBuilder; +import org.springframework.boot.web.servlet.support.SpringBootServletInitializer; +import org.springframework.context.annotation.Import; + +@SpringBootApplication +@Import({CommonApplicationConfig.class, ServletConfig.class}) +public class GMSApplication extends SpringBootServletInitializer { + + @Override + protected SpringApplicationBuilder configure(SpringApplicationBuilder application) { + return application.sources(GMSApplication.class); + } + + public static void main(String[] args) { + SpringApplication.run(GMSApplication.class, args); + } +} diff --git a/metadata-service/war/src/main/java/com/linkedin/gms/ServletConfig.java b/metadata-service/war/src/main/java/com/linkedin/gms/ServletConfig.java new file mode 100644 index 00000000000000..fbd8d72b76c8f2 --- /dev/null +++ b/metadata-service/war/src/main/java/com/linkedin/gms/ServletConfig.java @@ -0,0 +1,158 @@ +package com.linkedin.gms; + +import static com.linkedin.metadata.Constants.INGESTION_MAX_SERIALIZED_STRING_LENGTH; +import static com.linkedin.metadata.Constants.MAX_JACKSON_STRING_SIZE; + +import com.datahub.auth.authentication.filter.AuthenticationFilter; +import com.datahub.gms.servlet.Config; +import com.datahub.gms.servlet.ConfigSearchExport; +import com.datahub.gms.servlet.HealthCheck; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.core.StreamReadConstraints; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.r2.transport.http.server.RAPJakartaServlet; +import com.linkedin.restli.server.RestliHandlerServlet; +import io.datahubproject.openapi.converter.StringToChangeCategoryConverter; +import java.util.List; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.web.servlet.FilterRegistrationBean; +import org.springframework.boot.web.servlet.ServletRegistrationBean; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.ComponentScan; +import org.springframework.context.annotation.Configuration; +import org.springframework.core.Ordered; +import org.springframework.core.annotation.Order; +import org.springframework.format.FormatterRegistry; +import org.springframework.http.converter.ByteArrayHttpMessageConverter; +import org.springframework.http.converter.FormHttpMessageConverter; +import org.springframework.http.converter.HttpMessageConverter; +import org.springframework.http.converter.StringHttpMessageConverter; +import org.springframework.http.converter.json.MappingJackson2HttpMessageConverter; +import org.springframework.web.servlet.config.annotation.AsyncSupportConfigurer; +import org.springframework.web.servlet.config.annotation.EnableWebMvc; +import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; + +/** + * Common configuration for all servlets. Generally this list also includes dependencies of the + * embedded MAE/MCE consumers. + */ +@Slf4j +@Configuration +@Order(1) +@EnableWebMvc +@ComponentScan( + basePackages = {"io.datahubproject.openapi.schema.registry.config", "com.linkedin.gms.servlet"}) +public class ServletConfig implements WebMvcConfigurer { + + @Value("${datahub.gms.async.request-timeout-ms}") + private long asyncTimeoutMilliseconds; + + @Bean + public FilterRegistrationBean authFilter(AuthenticationFilter filter) { + FilterRegistrationBean registration = new FilterRegistrationBean<>(); + registration.setFilter(filter); + registration.setOrder(Ordered.HIGHEST_PRECEDENCE); + registration.setAsyncSupported(true); + + // Register filter for all paths - exclusions are handled by shouldNotFilter() + registration.addUrlPatterns("/*"); + + return registration; + } + + @Bean + public ServletRegistrationBean healthCheckServlet() { + ServletRegistrationBean registration = + new ServletRegistrationBean<>(new HealthCheck()); + registration.setName("healthCheck"); + registration.addUrlMappings("/health"); + registration.setLoadOnStartup(15); + registration.setAsyncSupported(true); + return registration; + } + + @Bean + public ServletRegistrationBean configServlet() { + ServletRegistrationBean registration = new ServletRegistrationBean<>(new Config()); + registration.setName("config"); + registration.addUrlMappings("/config"); + registration.setLoadOnStartup(15); + registration.setAsyncSupported(true); + return registration; + } + + @Bean + public ServletRegistrationBean configSearchExportServlet() { + ServletRegistrationBean registration = + new ServletRegistrationBean<>(new ConfigSearchExport()); + registration.setName("config-search-export"); + registration.addUrlMappings("/config/search/export"); + registration.setLoadOnStartup(15); + registration.setAsyncSupported(true); + return registration; + } + + /** + * SpringBoot is now the default, explicitly map these to legacy rest.li servlet. Additions are + * more likely to be built on the Spring side so we're preventing unexpected behavior for the most + * likely changes. Rest.li API is intended to be deprecated and removed. + * + * @param r2Servlet the restli servlet + * @return registration + */ + @Bean + public ServletRegistrationBean restliServletRegistration( + RAPJakartaServlet r2Servlet) { + ServletRegistrationBean registration = + new ServletRegistrationBean<>(new RestliHandlerServlet(r2Servlet)); + registration.addUrlMappings( + "/aspects/*", + "/entities/*", + "/entitiesV2/*", + "/entitiesVersionedV2/*", + "/usageStats/*", + "/platform/*", + "/relationships/*", + "/analytics/*", + "/operations/*", + "/runs/*"); + registration.setLoadOnStartup(2); + registration.setOrder(Integer.MAX_VALUE); // lowest priority + return registration; + } + + @Override + public void configureMessageConverters(List> messageConverters) { + messageConverters.add(new StringHttpMessageConverter()); + messageConverters.add(new ByteArrayHttpMessageConverter()); + messageConverters.add(new FormHttpMessageConverter()); + + ObjectMapper objectMapper = new ObjectMapper(); + int maxSize = + Integer.parseInt( + System.getenv() + .getOrDefault(INGESTION_MAX_SERIALIZED_STRING_LENGTH, MAX_JACKSON_STRING_SIZE)); + objectMapper + .getFactory() + .setStreamReadConstraints(StreamReadConstraints.builder().maxStringLength(maxSize).build()); + objectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); + objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + MappingJackson2HttpMessageConverter jsonConverter = + new MappingJackson2HttpMessageConverter(objectMapper); + messageConverters.add(jsonConverter); + } + + @Override + public void addFormatters(FormatterRegistry registry) { + registry.addConverter(new StringToChangeCategoryConverter()); + } + + @Override + public void configureAsyncSupport(@Nonnull AsyncSupportConfigurer configurer) { + WebMvcConfigurer.super.configureAsyncSupport(configurer); + configurer.setDefaultTimeout(asyncTimeoutMilliseconds); + } +} diff --git a/metadata-service/war/src/main/java/com/linkedin/gms/SpringApplicationInitializer.java b/metadata-service/war/src/main/java/com/linkedin/gms/SpringApplicationInitializer.java deleted file mode 100644 index 821d1bf0ad6f8c..00000000000000 --- a/metadata-service/war/src/main/java/com/linkedin/gms/SpringApplicationInitializer.java +++ /dev/null @@ -1,11 +0,0 @@ -package com.linkedin.gms; - -import javax.annotation.Nonnull; -import org.springframework.context.ApplicationContextInitializer; -import org.springframework.context.ConfigurableApplicationContext; - -public class SpringApplicationInitializer - implements ApplicationContextInitializer { - @Override - public void initialize(@Nonnull ConfigurableApplicationContext applicationContext) {} -} diff --git a/metadata-service/war/src/main/java/com/linkedin/gms/WebApplicationInitializer.java b/metadata-service/war/src/main/java/com/linkedin/gms/WebApplicationInitializer.java deleted file mode 100644 index 4ed84e48a5049f..00000000000000 --- a/metadata-service/war/src/main/java/com/linkedin/gms/WebApplicationInitializer.java +++ /dev/null @@ -1,166 +0,0 @@ -package com.linkedin.gms; - -import static com.linkedin.metadata.boot.OnBootApplicationListener.SCHEMA_REGISTRY_SERVLET_NAME; - -import com.datahub.auth.authentication.filter.AuthenticationFilter; -import com.datahub.gms.servlet.Config; -import com.datahub.gms.servlet.ConfigSearchExport; -import com.datahub.gms.servlet.HealthCheck; -import com.linkedin.gms.servlet.AuthServletConfig; -import com.linkedin.gms.servlet.GraphQLServletConfig; -import com.linkedin.gms.servlet.OpenAPIServletConfig; -import com.linkedin.gms.servlet.RestliServletConfig; -import com.linkedin.gms.servlet.SchemaRegistryServletConfig; -import jakarta.servlet.DispatcherType; -import jakarta.servlet.FilterRegistration; -import jakarta.servlet.ServletContext; -import jakarta.servlet.ServletRegistration; -import java.util.ArrayList; -import java.util.EnumSet; -import java.util.List; -import org.springframework.web.context.ContextLoaderListener; -import org.springframework.web.context.support.AnnotationConfigWebApplicationContext; -import org.springframework.web.context.support.HttpRequestHandlerServlet; -import org.springframework.web.servlet.DispatcherServlet; - -/** This class is before Spring Context is loaded, previously web.xml based */ -public class WebApplicationInitializer - implements org.springframework.web.WebApplicationInitializer { - @Override - public void onStartup(ServletContext container) { - AnnotationConfigWebApplicationContext rootContext = new AnnotationConfigWebApplicationContext(); - rootContext.register(CommonApplicationConfig.class); - - ContextLoaderListener contextLoaderListener = new ContextLoaderListener(rootContext); - container.addListener(contextLoaderListener); - container.setInitParameter( - "contextInitializerClasses", "com.linkedin.gms.SpringApplicationInitializer"); - - // Auth filter - List servletNames = new ArrayList<>(); - - // Independent dispatcher - schemaRegistryServlet(container); - - // Spring Dispatcher servlets - DispatcherServlet dispatcherServlet = new DispatcherServlet(rootContext); - servletNames.add(authServlet(rootContext, dispatcherServlet, container)); - servletNames.add(graphQLServlet(rootContext, dispatcherServlet, container)); - servletNames.add(openAPIServlet(rootContext, dispatcherServlet, container)); - // Restli non-Dispatcher default - servletNames.add(restliServlet(rootContext, container)); - - FilterRegistration.Dynamic filterRegistration = - container.addFilter("authenticationFilter", AuthenticationFilter.class); - filterRegistration.setAsyncSupported(true); - filterRegistration.addMappingForServletNames( - EnumSet.of(DispatcherType.ASYNC, DispatcherType.REQUEST), - false, - servletNames.toArray(String[]::new)); - - // Non-Spring servlets - healthCheckServlet(container); - configServlet(container); - } - - /* - * This is a servlet exclusive to DataHub's implementation of the Confluent OpenAPI spec which is not behind - * DataHub's authentication layer as it is not compatible with confluent consumers & producers. - */ - private void schemaRegistryServlet(ServletContext container) { - AnnotationConfigWebApplicationContext webContext = new AnnotationConfigWebApplicationContext(); - webContext.setId(SCHEMA_REGISTRY_SERVLET_NAME); - webContext.register(SchemaRegistryServletConfig.class); - - DispatcherServlet dispatcherServlet = new DispatcherServlet(webContext); - ServletRegistration.Dynamic registration = - container.addServlet(SCHEMA_REGISTRY_SERVLET_NAME, dispatcherServlet); - registration.addMapping("/schema-registry/*"); - registration.setLoadOnStartup(1); - registration.setAsyncSupported(true); - } - - private String authServlet( - AnnotationConfigWebApplicationContext rootContext, - DispatcherServlet dispatcherServlet, - ServletContext container) { - final String servletName = "dispatcher-auth"; - rootContext.register(AuthServletConfig.class); - - ServletRegistration.Dynamic registration = container.addServlet(servletName, dispatcherServlet); - registration.addMapping("/auth/*"); - registration.setLoadOnStartup(5); - registration.setAsyncSupported(true); - - return servletName; - } - - private String graphQLServlet( - AnnotationConfigWebApplicationContext rootContext, - DispatcherServlet dispatcherServlet, - ServletContext container) { - final String servletName = "dispatcher-graphql"; - rootContext.register(GraphQLServletConfig.class); - - ServletRegistration.Dynamic registration = container.addServlet(servletName, dispatcherServlet); - registration.addMapping("/api/*"); - registration.setLoadOnStartup(5); - registration.setAsyncSupported(true); - - return servletName; - } - - private String openAPIServlet( - AnnotationConfigWebApplicationContext rootContext, - DispatcherServlet dispatcherServlet, - ServletContext container) { - final String servletName = "dispatcher-openapi"; - rootContext.register(OpenAPIServletConfig.class); - - ServletRegistration.Dynamic registration = container.addServlet(servletName, dispatcherServlet); - registration.addMapping("/openapi/*"); - registration.setLoadOnStartup(5); - registration.setAsyncSupported(true); - - return servletName; - } - - private String restliServlet( - AnnotationConfigWebApplicationContext rootContext, ServletContext container) { - final String servletName = "restliRequestHandler"; - - rootContext.register(RestliServletConfig.class); - - ServletRegistration.Dynamic registration = - container.addServlet(servletName, HttpRequestHandlerServlet.class); - registration.addMapping("/*"); - registration.setLoadOnStartup(10); - registration.setAsyncSupported(true); - registration.setInitParameter( - "org.springframework.web.servlet.FrameworkServlet.ORDER", - String.valueOf(Integer.MAX_VALUE - 1)); - - return servletName; - } - - private void healthCheckServlet(ServletContext container) { - ServletRegistration.Dynamic registration = - container.addServlet("healthCheck", new HealthCheck()); - registration.addMapping("/health"); - registration.setLoadOnStartup(15); - registration.setAsyncSupported(true); - } - - private void configServlet(ServletContext container) { - ServletRegistration.Dynamic registration = container.addServlet("config", new Config()); - registration.addMapping("/config"); - registration.setLoadOnStartup(15); - registration.setAsyncSupported(true); - - ServletRegistration.Dynamic registration2 = - container.addServlet("config-search-export", new ConfigSearchExport()); - registration2.addMapping("/config/search/export"); - registration2.setLoadOnStartup(15); - registration2.setAsyncSupported(true); - } -} diff --git a/metadata-service/war/src/main/java/com/linkedin/gms/servlet/AuthServletConfig.java b/metadata-service/war/src/main/java/com/linkedin/gms/servlet/AuthServletConfig.java index 437c8722a747f5..08074c26d44c55 100644 --- a/metadata-service/war/src/main/java/com/linkedin/gms/servlet/AuthServletConfig.java +++ b/metadata-service/war/src/main/java/com/linkedin/gms/servlet/AuthServletConfig.java @@ -2,9 +2,7 @@ import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Configuration; -import org.springframework.web.servlet.config.annotation.EnableWebMvc; -@EnableWebMvc @ComponentScan(basePackages = {"com.datahub.auth.authentication"}) @Configuration public class AuthServletConfig {} diff --git a/metadata-service/war/src/main/java/com/linkedin/gms/servlet/GraphQLServletConfig.java b/metadata-service/war/src/main/java/com/linkedin/gms/servlet/GraphQLServletConfig.java index 42413df0757e64..9f08306518482a 100644 --- a/metadata-service/war/src/main/java/com/linkedin/gms/servlet/GraphQLServletConfig.java +++ b/metadata-service/war/src/main/java/com/linkedin/gms/servlet/GraphQLServletConfig.java @@ -2,9 +2,7 @@ import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Configuration; -import org.springframework.web.servlet.config.annotation.EnableWebMvc; -@EnableWebMvc @ComponentScan( basePackages = { "com.datahub.graphql", diff --git a/metadata-service/war/src/main/java/com/linkedin/gms/servlet/OpenAPIServletConfig.java b/metadata-service/war/src/main/java/com/linkedin/gms/servlet/OpenAPIServletConfig.java index d259f763d3d440..ebab789b058799 100644 --- a/metadata-service/war/src/main/java/com/linkedin/gms/servlet/OpenAPIServletConfig.java +++ b/metadata-service/war/src/main/java/com/linkedin/gms/servlet/OpenAPIServletConfig.java @@ -2,9 +2,7 @@ import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Configuration; -import org.springframework.web.servlet.config.annotation.EnableWebMvc; -@EnableWebMvc @ComponentScan( basePackages = { "io.datahubproject.openapi.config", diff --git a/metadata-service/war/src/main/java/com/linkedin/gms/servlet/RestliServletConfig.java b/metadata-service/war/src/main/java/com/linkedin/gms/servlet/RestliServletConfig.java index 222e2356dfb1ca..b079efac4a8c5b 100644 --- a/metadata-service/war/src/main/java/com/linkedin/gms/servlet/RestliServletConfig.java +++ b/metadata-service/war/src/main/java/com/linkedin/gms/servlet/RestliServletConfig.java @@ -1,20 +1,8 @@ package com.linkedin.gms.servlet; -import com.linkedin.metadata.spring.YamlPropertySourceFactory; -import com.linkedin.r2.transport.http.server.RAPJakartaServlet; -import com.linkedin.restli.server.RestliHandlerServlet; -import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Configuration; -import org.springframework.context.annotation.PropertySource; -import org.springframework.web.context.support.HttpRequestHandlerServlet; @ComponentScan(basePackages = {"com.linkedin.restli.server"}) -@PropertySource(value = "classpath:/application.yaml", factory = YamlPropertySourceFactory.class) @Configuration -public class RestliServletConfig { - @Bean("restliRequestHandler") - public HttpRequestHandlerServlet restliHandlerServlet(final RAPJakartaServlet r2Servlet) { - return new RestliHandlerServlet(r2Servlet); - } -} +public class RestliServletConfig {} diff --git a/metadata-service/war/src/main/java/com/linkedin/gms/servlet/SchemaRegistryServletConfig.java b/metadata-service/war/src/main/java/com/linkedin/gms/servlet/SchemaRegistryServletConfig.java deleted file mode 100644 index beb633888bd349..00000000000000 --- a/metadata-service/war/src/main/java/com/linkedin/gms/servlet/SchemaRegistryServletConfig.java +++ /dev/null @@ -1,10 +0,0 @@ -package com.linkedin.gms.servlet; - -import org.springframework.context.annotation.ComponentScan; -import org.springframework.context.annotation.Configuration; -import org.springframework.web.servlet.config.annotation.EnableWebMvc; - -@EnableWebMvc -@ComponentScan(basePackages = {"io.datahubproject.openapi.schema.registry"}) -@Configuration -public class SchemaRegistryServletConfig {} diff --git a/metadata-service/war/src/test/java/com/linkedin/gms/SpringTest.java b/metadata-service/war/src/test/java/com/linkedin/gms/SpringTest.java index 27dabf1e33ddd8..74d2f2c8107148 100644 --- a/metadata-service/war/src/test/java/com/linkedin/gms/SpringTest.java +++ b/metadata-service/war/src/test/java/com/linkedin/gms/SpringTest.java @@ -3,30 +3,51 @@ import static org.testng.AssertJUnit.assertNotNull; import com.linkedin.gms.factory.telemetry.DailyReport; -import com.linkedin.metadata.models.registry.ConfigEntityRegistry; +import com.linkedin.metadata.boot.BootstrapManager; import com.linkedin.metadata.models.registry.EntityRegistry; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; import io.ebean.Database; import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.context.TestConfiguration; import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Primary; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.annotations.Test; @SpringBootTest( webEnvironment = SpringBootTest.WebEnvironment.MOCK, - properties = {"telemetry.enabledServer=true"}) -@ContextConfiguration(classes = CommonApplicationConfig.class) + properties = { + "telemetry.enabledServer=true", + "spring.main.allow-bean-definition-overriding=true" + }) +@ContextConfiguration(classes = {CommonApplicationConfig.class, SpringTest.TestBeans.class}) public class SpringTest extends AbstractTestNGSpringContextTests { // Mock Beans take precedence, we add these to avoid needing to configure data sources etc. while // still testing prod config @MockBean private Database database; - @MockBean private ConfigEntityRegistry configEntityRegistry; - @MockBean private EntityRegistry entityRegistry; + @MockBean private BootstrapManager bootstrapManager; @Test public void testTelemetry() { DailyReport dailyReport = this.applicationContext.getBean(DailyReport.class); assertNotNull(dailyReport); } + + @TestConfiguration + public static class TestBeans { + @Bean + public OperationContext systemOperationContext() { + return TestOperationContexts.systemContextNoSearchAuthorization(); + } + + @Primary + @Bean + public EntityRegistry entityRegistry(OperationContext systemOperationContext) { + return systemOperationContext.getEntityRegistry(); + } + } } From f97880d8dd127f785ebb07b8d2cc09a6b00e632c Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Thu, 23 Jan 2025 12:38:24 -0500 Subject: [PATCH 156/249] DataHub Cloud release notes v0.3.8 (#12271) Co-authored-by: Jay <159848059+jayacryl@users.noreply.github.com> Co-authored-by: RyanHolstien Co-authored-by: Andrew Sikowitz Co-authored-by: david-leifker <114954101+david-leifker@users.noreply.github.com> Co-authored-by: Harshal Sheth Co-authored-by: John Joyce Co-authored-by: Anton Kuraev --- docs-website/sidebars.js | 1 + docs/managed-datahub/release-notes/v_0_3_8.md | 81 +++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 docs/managed-datahub/release-notes/v_0_3_8.md diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index bcb06affedcff0..fbd35b60aedba9 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -368,6 +368,7 @@ module.exports = { }, { "DataHub Cloud Release History": [ + "docs/managed-datahub/release-notes/v_0_3_8", "docs/managed-datahub/release-notes/v_0_3_7", "docs/managed-datahub/release-notes/v_0_3_6", "docs/managed-datahub/release-notes/v_0_3_5", diff --git a/docs/managed-datahub/release-notes/v_0_3_8.md b/docs/managed-datahub/release-notes/v_0_3_8.md new file mode 100644 index 00000000000000..75bba525688d89 --- /dev/null +++ b/docs/managed-datahub/release-notes/v_0_3_8.md @@ -0,0 +1,81 @@ +# v0.3.8 +--- + +Release Availability Date +--- +21-Jan-2025 + +Recommended CLI/SDK +--- +0.15.0.1 + +## Known Issues + +- Async APIs - DataHub's asynchronous APIs perform only basic schema validation when receiving MCP requests, similar to direct production to MCP Kafka topics. While requests must conform to the MCP schema to be accepted, actual processing happens later in the pipeline. Any processing failures that occur after the initial acceptance are captured in the Failed MCP topic, but these failures are not immediately surfaced to the API caller since they happen asynchronously. + +## Release Changelog +--- + +### v0.3.8 + +- All changes in https://github.com/datahub-project/datahub/releases/tag/v0.14.1 + - Note Breaking Changes: https://datahubproject.io/docs/how/updating-datahub/#0141 + +- Breaking Changes + - Soft-deleted and SCIM suspended users are no longer able to access the UI/APIs with a pre-existing valid token + +- Bug Fixes + - Ingestion page sorts across all sources rather than just the ones loaded on the page. + - Data Products Preview cards now render correctly in scenarios where there are several fully filled out products in a list + - Snowflake tag propagation looks for case-insensitive table match on tables with special characters + - Lineage patch properly encodes special characters + - Fixed multiple bugs around lineage visualization's handling of graph cycles + - Fixed lineage filter node search on schema field level lineage + - Search csv export properly includes data products + - Properly show lineage to an upstream dbt source that has no further upstreams + - Soft-deleted & suspended users with valid tokens are rejected + - Fixes scroll behavior on quality tab with small screens + - Add backwards compatibility to the UI for old policy filters in the UI + - Fixes nesting logic in Properties tab for structured properties that prefix matched each other's qualified names + - Show error when editing help link if there is an error + - Fix UI bug on schema table when clicking hides part of table + - Fix rendering schema field references as structured property values to show field name, not urn + - Partially fix a bug where dbt sync back would generate invalid YAML. This won't happen anymore, but we might miss some changes because of it + - Stability and error message improvements for AI docs generation + - Various quality and performance improvements to AI classification + - Fix searching for nested columns in the Columns tab of Dataset profile page + - Remote Executor: make cron schedules in ingestions/assertions scheduler POSIX-compliant. + - Remote Executor: limit the number of automated restarts of aborted tasks. + - Remote Executor: always update task status synchronously. + + +- Product + - [BETA] Introducing the new Navigation Bar / App redesign! Starting with this release, you can try out a new look and feel to your DataHub app. To enable this BETA feature, reach out to your Acryl representative. + - [BETA] Add ability to create tag propagation automations to downstream and sibling assets via the UI in the Automations tab + - Support the Notes feature for all entity types with profile pages + - Test notifications button is now easier to discover. + - UX polishes to the Assertions Assertions tab + - UX polishes to the Column sidebar + - UX polishes to column and table deprecation, including deprecation visible in lineage graph + - Add the ability to do AND for filters of a given type. For example, we now allow you to filter for assets that have tag1 AND tag2 + - Combine sibling entities on home page insights + - Hardened structured properties feature by supporting broken references and adding better validation to the ID/qualified name + - Hardened forms feature by supporting invalid / unresolvable references + - Recurrent metadata sharing: Any assets shared between DataHub assets will be re-shared automatically once per day. + + +- Platform + - Optional config to reduce Slack app permissions + - Support batch processing for MCP consumption to increase throughput of the Kafka consumer significantly + - Resumability for DataHub Upgrade jobs + - Async actions for Metadata Tests corner case where some actions could be missed has been fixed + - Structured Properties now have a stricter validation for no spaces in the qualified name + - Urn Validation logic has been improved to reduce delayed error conditions + - Database transaction and locking updates to fix conditions where retryable persistence exceptions would cause ingestion batches to fail + - Fix for Metadata Tests to support primitive PDL Typeref fields like timestamps + - Entity Versioning initial feature support, disabled by default and can be turned on via feature flag + - OpenAPI fixes for some faulty parameters + - Improved memory performance of the search ranking system ingestion source + - Remote Executor: Added registration/discovery mechanism. + - Remote Executor: Added weight-based task scheduling to allow variable concurrency for memory-greedy tasks. + From cb47577ac58f658189b24407c26a770685ca7c21 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 23 Jan 2025 11:45:22 -0800 Subject: [PATCH 157/249] docs(ingest/tableau): clarify impact of lower permissions (#12452) --- metadata-ingestion/docs/sources/tableau/tableau_pre.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/docs/sources/tableau/tableau_pre.md b/metadata-ingestion/docs/sources/tableau/tableau_pre.md index 65ff08367fdc8f..a3ac85818a51aa 100644 --- a/metadata-ingestion/docs/sources/tableau/tableau_pre.md +++ b/metadata-ingestion/docs/sources/tableau/tableau_pre.md @@ -16,9 +16,12 @@ Either way, the user/token must have the **Site Administrator Explorer** site ro :::info -We need the `Site Administrator Explorer` site role in order to get complete metadata from Tableau. +We need the **Site Administrator Explorer** site role in order to get complete metadata from Tableau. -With any lower role, the Tableau Metadata API returns missing/partial metadata. This particularly affects data source fields and definitions, which impacts our ability to extract columns and generate column lineage. As such, other site roles like `Viewer` are insufficient with the current Tableau Metadata API. +With any lower role, the Tableau Metadata API returns missing/partial metadata. +This particularly affects data source fields and definitions, which impacts our ability to extract most columns and generate column lineage. Some table-level lineage is also impacted. + +Other site roles, including Site Administrator Creator and Viewer, are insufficient due to these limitations in the current Tableau Metadata API. ::: From 7ac0dc65e15a9ef7c8280bce63f344e799346ca3 Mon Sep 17 00:00:00 2001 From: ryota-cloud Date: Thu, 23 Jan 2025 17:58:06 -0800 Subject: [PATCH 158/249] Adding smoke test for batch ingestion throwing exception (#12453) --- smoke-test/tests/restli/restli_test.py | 2 +- .../restli/test_restli_batch_ingestion.py | 54 ++++++++++++++++++- 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/smoke-test/tests/restli/restli_test.py b/smoke-test/tests/restli/restli_test.py index a0c9a26750c0b0..c10ae3fe584f08 100644 --- a/smoke-test/tests/restli/restli_test.py +++ b/smoke-test/tests/restli/restli_test.py @@ -51,7 +51,7 @@ def make_mcp(self) -> MetadataChangeProposalClass: return mcp -@pytest.fixture(scope="module") +@pytest.fixture(scope="module", autouse=True) def ingest_cleanup_data(auth_session, graph_client, request): yield delete_urns(graph_client, generated_urns) diff --git a/smoke-test/tests/restli/test_restli_batch_ingestion.py b/smoke-test/tests/restli/test_restli_batch_ingestion.py index 0e92988ed64703..ab33a2b26605db 100644 --- a/smoke-test/tests/restli/test_restli_batch_ingestion.py +++ b/smoke-test/tests/restli/test_restli_batch_ingestion.py @@ -3,6 +3,7 @@ import pytest +import datahub.metadata.schema_classes as models from datahub.emitter.mce_builder import make_dashboard_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.serialization_helper import pre_json_transform @@ -12,6 +13,7 @@ ChangeAuditStampsClass, DashboardInfoClass, ) +from datahub.metadata.urns import MlModelUrn from tests.consistency_utils import wait_for_writes_to_sync from tests.restli.restli_test import MetadataChangeProposalInvalidWrapper from tests.utils import delete_urns @@ -19,7 +21,7 @@ generated_urns: List[str] = [] -@pytest.fixture(scope="module") +@pytest.fixture(scope="module", autouse=True) def ingest_cleanup_data(auth_session, graph_client, request): yield delete_urns(graph_client, generated_urns) @@ -84,6 +86,29 @@ def _create_invalid_dashboard_mcp() -> MetadataChangeProposalClass: return mcp_invalid.make_mcp() +def _create_invalid_dataset_mcps() -> List[MetadataChangeProposalWrapper]: + dataset_urn = "urn:li:dataset:(urn:li:dataPlatform:kafka,my_dataset,PROD)" + model_urn = MlModelUrn("mlflow", "my_model", "PROD").urn() + bad_mcps = [ + MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=models.StatusClass(removed=False), + ), + MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=models.UpstreamLineageClass( + upstreams=[ + models.UpstreamClass( + dataset=model_urn, + type=models.DatasetLineageTypeClass.TRANSFORMED, + ) + ] + ), + ), + ] + return bad_mcps + + def test_restli_batch_ingestion_sync(graph_client): # Positive Test (all valid MetadataChangeProposal) mcps = _create_valid_dashboard_mcps() @@ -133,3 +158,30 @@ def test_restli_batch_ingestion_async(graph_client): assert aspect.title == "Dummy Title For Testing" assert aspect.description == "Dummy Description For Testing" assert aspect.lastModified is not None + + +def test_restli_batch_ingestion_exception_sync(graph_client): + """ + Test Batch ingestion when an exception occurs in sync mode + """ + bad_mcps = _create_invalid_dataset_mcps() + generated_urns.extend([mcp.entityUrn for mcp in bad_mcps if mcp.entityUrn]) + + try: + graph_client.emit_mcps(bad_mcps, async_flag=False) + raise AssertionError("should have thrown an exception") + except Exception as e: + if isinstance(e, AssertionError): + raise e + print(f"Error emitting MCPs due to {e}") + + +def test_restli_batch_ingestion_exception_async(graph_client): + """ + Test Batch ingestion when an exception occurs in async mode + """ + bad_mcps = _create_invalid_dataset_mcps() + generated_urns.extend([mcp.entityUrn for mcp in bad_mcps if mcp.entityUrn]) + # TODO expectation is that it throws exception, but it doesn't currently.this test case need to change after fix. + ret = graph_client.emit_mcps(bad_mcps, async_flag=True) + assert ret >= 0 From 86ed40c9041133e5da18655d76b8b2127d94bc4e Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Fri, 24 Jan 2025 07:27:32 +0000 Subject: [PATCH 159/249] fix(docs): Add links to new datahub cloud event source (#12450) --- docs/actions/sources/datahub-cloud-event-source.md | 2 +- docs/managed-datahub/datahub-api/entity-events-api.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/actions/sources/datahub-cloud-event-source.md b/docs/actions/sources/datahub-cloud-event-source.md index d1751ae2568676..656fe4a3a6329f 100644 --- a/docs/actions/sources/datahub-cloud-event-source.md +++ b/docs/actions/sources/datahub-cloud-event-source.md @@ -38,7 +38,7 @@ If you've configured your Action pipeline `failure_mode` to be `THROW`, then eve The DataHub Cloud Event Source produces -- [Entity Change Event V1](../events/entity-change-event.md) +- [Entity Change Event V1](../../managed-datahub/datahub-api/entity-events-api.md) Note that the DataHub Cloud Event Source does _not_ yet support the full [Metadata Change Log V1](../events/metadata-change-log-event.md) event stream. diff --git a/docs/managed-datahub/datahub-api/entity-events-api.md b/docs/managed-datahub/datahub-api/entity-events-api.md index e59f1650c7d766..377f2fd01e813b 100644 --- a/docs/managed-datahub/datahub-api/entity-events-api.md +++ b/docs/managed-datahub/datahub-api/entity-events-api.md @@ -15,6 +15,7 @@ The Events API allows you to integrate changes happening on the DataHub Metadata ### Supported Integrations * [AWS EventBridge](docs/managed-datahub/operator-guide/setting-up-events-api-on-aws-eventbridge.md) +* [DataHub Cloud Event Source](docs/actions/sources/datahub-cloud-event-source.md) ### Use Cases From f80d58d297653bb628f5b6e6cb4969b691af44eb Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 24 Jan 2025 00:42:33 -0800 Subject: [PATCH 160/249] fix(cli): ignore prereleases when suggesting upgrades (#12424) --- metadata-ingestion/src/datahub/upgrade/upgrade.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/upgrade/upgrade.py b/metadata-ingestion/src/datahub/upgrade/upgrade.py index fb14514588e5fc..7872681797d6fe 100644 --- a/metadata-ingestion/src/datahub/upgrade/upgrade.py +++ b/metadata-ingestion/src/datahub/upgrade/upgrade.py @@ -93,11 +93,11 @@ async def get_github_stats(): async with aiohttp.ClientSession( headers={"Accept": "application/vnd.github.v3+json"} ) as session: - gh_url = "https://api.github.com/repos/datahub-project/datahub/releases" + gh_url = "https://api.github.com/repos/datahub-project/datahub/releases/latest" async with session.get(gh_url) as gh_response: gh_response_json = await gh_response.json() - latest_server_version = Version(gh_response_json[0].get("tag_name")) - latest_server_date = gh_response_json[0].get("published_at") + latest_server_version = Version(gh_response_json.get("tag_name")) + latest_server_date = gh_response_json.get("published_at") return (latest_server_version, latest_server_date) From b701e0714ed4b08646d877dce4c0b3e430e0c257 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 24 Jan 2025 00:42:48 -0800 Subject: [PATCH 161/249] fix(ingest/clickhouse): remove unused lineage_properties code path (#12442) --- .../ingestion/source/sql/clickhouse.py | 48 ++----------------- 1 file changed, 5 insertions(+), 43 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py index a8208ca807ed02..a2db116cf20912 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py @@ -53,7 +53,6 @@ ) from datahub.metadata.schema_classes import ( DatasetLineageTypeClass, - DatasetPropertiesClass, DatasetSnapshotClass, UpstreamClass, ) @@ -418,41 +417,11 @@ def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit dataset_snapshot: DatasetSnapshotClass = wu.metadata.proposedSnapshot assert dataset_snapshot - lineage_mcp, lineage_properties_aspect = self.get_lineage_mcp( - wu.metadata.proposedSnapshot.urn - ) + lineage_mcp = self.get_lineage_mcp(wu.metadata.proposedSnapshot.urn) if lineage_mcp is not None: yield lineage_mcp.as_workunit() - if lineage_properties_aspect: - aspects = dataset_snapshot.aspects - if aspects is None: - aspects = [] - - dataset_properties_aspect: Optional[DatasetPropertiesClass] = None - - for aspect in aspects: - if isinstance(aspect, DatasetPropertiesClass): - dataset_properties_aspect = aspect - - if dataset_properties_aspect is None: - dataset_properties_aspect = DatasetPropertiesClass() - aspects.append(dataset_properties_aspect) - - custom_properties = ( - { - **dataset_properties_aspect.customProperties, - **lineage_properties_aspect.customProperties, - } - if dataset_properties_aspect.customProperties - else lineage_properties_aspect.customProperties - ) - dataset_properties_aspect.customProperties = custom_properties - dataset_snapshot.aspects = aspects - - dataset_snapshot.aspects.append(dataset_properties_aspect) - # Emit the work unit from super. yield wu @@ -656,19 +625,16 @@ def _populate_lineage(self) -> None: def get_lineage_mcp( self, dataset_urn: str - ) -> Tuple[ - Optional[MetadataChangeProposalWrapper], Optional[DatasetPropertiesClass] - ]: + ) -> Optional[MetadataChangeProposalWrapper]: dataset_key = mce_builder.dataset_urn_to_key(dataset_urn) if dataset_key is None: - return None, None + return None if not self._lineage_map: self._populate_lineage() assert self._lineage_map is not None upstream_lineage: List[UpstreamClass] = [] - custom_properties: Dict[str, str] = {} if dataset_key.name in self._lineage_map: item = self._lineage_map[dataset_key.name] @@ -684,16 +650,12 @@ def get_lineage_mcp( ) upstream_lineage.append(upstream_table) - properties = None - if custom_properties: - properties = DatasetPropertiesClass(customProperties=custom_properties) - if not upstream_lineage: - return None, properties + return None mcp = MetadataChangeProposalWrapper( entityUrn=dataset_urn, aspect=UpstreamLineage(upstreams=upstream_lineage), ) - return mcp, properties + return mcp From f8149084a1826bfd67c5708372ce5e64eaa0c05d Mon Sep 17 00:00:00 2001 From: Saketh Varma Date: Fri, 24 Jan 2025 14:14:34 +0530 Subject: [PATCH 162/249] fix(ui): fetch Data Products always from the network (#11165) Co-authored-by: Aseem Bansal --- .../src/app/entity/domain/DataProductsTab/DataProductsTab.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/datahub-web-react/src/app/entity/domain/DataProductsTab/DataProductsTab.tsx b/datahub-web-react/src/app/entity/domain/DataProductsTab/DataProductsTab.tsx index 15cc99127f3500..39f89979dd95a0 100644 --- a/datahub-web-react/src/app/entity/domain/DataProductsTab/DataProductsTab.tsx +++ b/datahub-web-react/src/app/entity/domain/DataProductsTab/DataProductsTab.tsx @@ -68,6 +68,7 @@ export default function DataProductsTab() { searchFlags: { skipCache: true }, }, }, + fetchPolicy: 'no-cache', }); const totalResults = data?.searchAcrossEntities?.total || 0; const searchResults = data?.searchAcrossEntities?.searchResults?.map((r) => r.entity) || []; From 0f538d8df2ce26bdc7f46757e95c2c9735d40182 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Fri, 24 Jan 2025 14:51:52 +0530 Subject: [PATCH 163/249] fix(ingest): fix reporting for missing secure view lineage (#12430) Co-authored-by: Harshal Sheth --- .../source/snowflake/snowflake_schema_gen.py | 24 +++++--- .../snowflake/test_snowflake_failures.py | 60 +++++++++++++++++++ 2 files changed, 77 insertions(+), 7 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py index a2d69d9e552916..04bc51f1ebd3f5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py @@ -491,15 +491,25 @@ def fetch_secure_view_definition( try: view_definitions = self.data_dictionary.get_secure_view_definitions() return view_definitions[db_name][schema_name][table_name] + except KeyError: + # Received secure view definitions but the view is not present in results + self.structured_reporter.info( + title="Secure view definition not found", + message="Lineage will be missing for the view.", + context=f"{db_name}.{schema_name}.{table_name}", + ) + return None except Exception as e: - if isinstance(e, SnowflakePermissionError): - error_msg = ( - "Failed to get secure views definitions. Please check permissions." - ) - else: - error_msg = "Failed to get secure views definitions" + action_msg = ( + "Please check permissions." + if isinstance(e, SnowflakePermissionError) + else "" + ) + self.structured_reporter.warning( - error_msg, + title="Failed to get secure views definitions", + message=f"Lineage will be missing for the view. {action_msg}", + context=f"{db_name}.{schema_name}.{table_name}", exc=e, ) return None diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py index de6e996a52642b..4cb6cec4906efa 100644 --- a/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py +++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py @@ -260,3 +260,63 @@ def test_snowflake_missing_snowflake_operations_permission_causes_pipeline_failu assert "usage-permission-error" in [ failure.message for failure in pipeline.source.get_report().failures ] + + +@freeze_time(FROZEN_TIME) +def test_snowflake_missing_snowflake_secure_view_definitions_raises_pipeline_info( + pytestconfig, + snowflake_pipeline_config, +): + with mock.patch("snowflake.connector.connect") as mock_connect: + sf_connection = mock.MagicMock() + sf_cursor = mock.MagicMock() + mock_connect.return_value = sf_connection + sf_connection.cursor.return_value = sf_cursor + + # Empty secure view definitions + sf_cursor.execute.side_effect = query_permission_response_override( + default_query_results, + [snowflake_query.SnowflakeQuery.get_secure_view_definitions()], + [], + ) + pipeline = Pipeline(snowflake_pipeline_config) + pipeline.run() + + pipeline.raise_from_status(raise_warnings=True) + assert pipeline.source.get_report().infos.as_obj() == [ + { + "title": "Secure view definition not found", + "message": "Lineage will be missing for the view.", + "context": ["TEST_DB.TEST_SCHEMA.VIEW_1"], + } + ] + + +@freeze_time(FROZEN_TIME) +def test_snowflake_failed_secure_view_definitions_query_raises_pipeline_warning( + pytestconfig, + snowflake_pipeline_config, +): + with mock.patch("snowflake.connector.connect") as mock_connect: + sf_connection = mock.MagicMock() + sf_cursor = mock.MagicMock() + mock_connect.return_value = sf_connection + sf_connection.cursor.return_value = sf_cursor + + # Error in getting secure view definitions + sf_cursor.execute.side_effect = query_permission_error_override( + default_query_results, + [snowflake_query.SnowflakeQuery.get_secure_view_definitions()], + "Database 'SNOWFLAKE' does not exist or not authorized.", + ) + pipeline = Pipeline(snowflake_pipeline_config) + pipeline.run() + assert pipeline.source.get_report().warnings.as_obj() == [ + { + "title": "Failed to get secure views definitions", + "message": "Lineage will be missing for the view. Please check permissions.", + "context": [ + "TEST_DB.TEST_SCHEMA.VIEW_1 : Database 'SNOWFLAKE' does not exist or not authorized." + ], + } + ] From a8d6c54965006462cf5f2fc5f9029ff517fb5a40 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 24 Jan 2025 11:35:39 -0800 Subject: [PATCH 164/249] feat(sdk): move version info to dedicated file (#12456) --- .../datahub-ingestion-base/smoke.Dockerfile | 6 +- docker/datahub-ingestion/Dockerfile | 8 +- docker/datahub-ingestion/Dockerfile-slim-only | 4 +- .../airflow-plugin/.gitignore | 1 - .../airflow-plugin/scripts/release.sh | 25 +++--- .../airflow-plugin/setup.py | 2 +- .../src/datahub_airflow_plugin/__init__.py | 19 +---- .../src/datahub_airflow_plugin/_version.py | 3 + .../datahub_airflow_plugin/datahub_plugin.py | 2 +- .../dagster-plugin/.gitignore | 1 - .../dagster-plugin/scripts/release.sh | 25 +++--- .../dagster-plugin/setup.py | 2 +- .../src/datahub_dagster_plugin/__init__.py | 22 +---- .../src/datahub_dagster_plugin/_version.py | 3 + .../gx-plugin/.gitignore | 1 - .../gx-plugin/scripts/release.sh | 25 +++--- metadata-ingestion-modules/gx-plugin/setup.py | 2 +- .../src/datahub_gx_plugin/__init__.py | 22 +---- .../src/datahub_gx_plugin/_version.py | 3 + .../prefect-plugin/.gitignore | 1 - .../prefect-plugin/scripts/release.sh | 25 +++--- .../prefect-plugin/setup.py | 6 +- .../src/prefect_datahub/__init__.py | 22 +---- .../src/prefect_datahub/_version.py | 3 + metadata-ingestion/.gitignore | 1 - metadata-ingestion/scripts/release.sh | 25 +++--- metadata-ingestion/setup.py | 17 ++-- metadata-ingestion/src/datahub/__init__.py | 26 +----- metadata-ingestion/src/datahub/_version.py | 13 +++ .../src/datahub/cli/check_cli.py | 2 +- .../src/datahub/cli/cli_utils.py | 6 +- .../src/datahub/cli/ingest_cli.py | 4 +- .../src/datahub/emitter/rest_emitter.py | 2 +- metadata-ingestion/src/datahub/entrypoints.py | 10 +-- .../src/datahub/ingestion/api/registry.py | 2 +- .../datahub_ingestion_run_summary_provider.py | 2 +- .../src/datahub/ingestion/run/connection.py | 2 +- .../src/datahub/ingestion/run/pipeline.py | 6 +- .../datahub/ingestion/source/unity/proxy.py | 4 +- .../src/datahub/telemetry/telemetry.py | 8 +- .../src/datahub/testing/check_imports.py | 28 ++++++ .../src/datahub/upgrade/upgrade.py | 2 +- .../tests/unit/test_packages.py | 6 +- .../tests/unit/test_packaging.py | 4 +- python-build/generate_release_scripts.py | 85 +++++++++++++++++++ 45 files changed, 277 insertions(+), 211 deletions(-) create mode 100644 metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_version.py create mode 100644 metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/_version.py create mode 100644 metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/_version.py create mode 100644 metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/_version.py create mode 100644 metadata-ingestion/src/datahub/_version.py create mode 100644 python-build/generate_release_scripts.py diff --git a/docker/datahub-ingestion-base/smoke.Dockerfile b/docker/datahub-ingestion-base/smoke.Dockerfile index 34654faaad729d..81a6bd0e20cacc 100644 --- a/docker/datahub-ingestion-base/smoke.Dockerfile +++ b/docker/datahub-ingestion-base/smoke.Dockerfile @@ -20,9 +20,9 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ COPY . /datahub-src ARG RELEASE_VERSION RUN cd /datahub-src && \ - sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" metadata-ingestion/src/datahub/__init__.py && \ - sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py && \ - cat metadata-ingestion/src/datahub/__init__.py && \ + sed -i.bak "s/__version__ = .*$/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" metadata-ingestion/src/datahub/_version.py && \ + sed -i.bak "s/__version__ = .*$/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_version.py && \ + cat metadata-ingestion/src/datahub/_version.py && \ ./gradlew :metadata-ingestion:codegen && \ pip install file:metadata-ingestion-modules/airflow-plugin#egg=acryl-datahub-airflow-plugin file:metadata-ingestion#egg=acryl-datahub diff --git a/docker/datahub-ingestion/Dockerfile b/docker/datahub-ingestion/Dockerfile index ee0333e1cb1d1f..a9fd3a6662d1bb 100644 --- a/docker/datahub-ingestion/Dockerfile +++ b/docker/datahub-ingestion/Dockerfile @@ -26,10 +26,10 @@ COPY --chown=datahub ./metadata-ingestion-modules/airflow-plugin /metadata-inges ARG RELEASE_VERSION WORKDIR /metadata-ingestion -RUN sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/datahub/__init__.py && \ - sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" airflow-plugin/src/datahub_airflow_plugin/__init__.py && \ - cat src/datahub/__init__.py | grep __version__ && \ - cat airflow-plugin/src/datahub_airflow_plugin/__init__.py | grep __version__ +RUN sed -i.bak "s/__version__ = .*$/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/datahub/_version.py && \ + sed -i.bak "s/__version__ = .*$/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" airflow-plugin/src/datahub_airflow_plugin/_version.py && \ + cat src/datahub/_version.py | grep __version__ && \ + cat airflow-plugin/src/datahub_airflow_plugin/_version.py | grep __version__ FROM base AS slim-install diff --git a/docker/datahub-ingestion/Dockerfile-slim-only b/docker/datahub-ingestion/Dockerfile-slim-only index 6ade262f2feded..80abff204df9f2 100644 --- a/docker/datahub-ingestion/Dockerfile-slim-only +++ b/docker/datahub-ingestion/Dockerfile-slim-only @@ -15,8 +15,8 @@ COPY --chown=datahub ./metadata-ingestion /metadata-ingestion ARG RELEASE_VERSION WORKDIR /metadata-ingestion -RUN sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/datahub/__init__.py && \ - cat src/datahub/__init__.py +RUN sed -i.bak "s/__version__ = .*$/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/datahub/_version.py && \ + cat src/datahub/_version.py FROM base as slim-install diff --git a/metadata-ingestion-modules/airflow-plugin/.gitignore b/metadata-ingestion-modules/airflow-plugin/.gitignore index d0108e8361a060..6801b785ea1e4b 100644 --- a/metadata-ingestion-modules/airflow-plugin/.gitignore +++ b/metadata-ingestion-modules/airflow-plugin/.gitignore @@ -1,5 +1,4 @@ .envrc -src/datahub_airflow_plugin/__init__.py.bak .vscode/ output pvenv36/ diff --git a/metadata-ingestion-modules/airflow-plugin/scripts/release.sh b/metadata-ingestion-modules/airflow-plugin/scripts/release.sh index 8f23f72082c2ca..994c1ae145ce55 100755 --- a/metadata-ingestion-modules/airflow-plugin/scripts/release.sh +++ b/metadata-ingestion-modules/airflow-plugin/scripts/release.sh @@ -1,26 +1,31 @@ #!/bin/bash +# Auto-generated by python-build/generate_release_scripts.py. Do not edit manually. + set -euxo pipefail +ROOT=../.. +MODULE=datahub_airflow_plugin + if [[ ! ${RELEASE_SKIP_TEST:-} ]] && [[ ! ${RELEASE_SKIP_INSTALL:-} ]]; then - ../../gradlew build # also runs tests + ${ROOT}/gradlew build # also runs tests elif [[ ! ${RELEASE_SKIP_INSTALL:-} ]]; then - ../../gradlew install + ${ROOT}/gradlew install fi -MODULE=datahub_airflow_plugin - # Check packaging constraint. python -c 'import setuptools; where="./src"; assert setuptools.find_packages(where) == setuptools.find_namespace_packages(where), "you seem to be missing or have extra __init__.py files"' -if [[ ${RELEASE_VERSION:-} ]]; then - # Replace version with RELEASE_VERSION env variable - sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/${MODULE}/__init__.py -else - vim src/${MODULE}/__init__.py + +# Update the release version. +if [[ ! ${RELEASE_VERSION:-} ]]; then + echo "RELEASE_VERSION is not set" + exit 1 fi +sed -i.bak "s/__version__ = .*$/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/${MODULE}/_version.py +# Build and upload the release. rm -rf build dist || true python -m build if [[ ! ${RELEASE_SKIP_UPLOAD:-} ]]; then python -m twine upload 'dist/*' fi -mv src/${MODULE}/__init__.py.bak src/${MODULE}/__init__.py +mv src/${MODULE}/_version.py.bak src/${MODULE}/_version.py diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py index 58c04158957ccd..d03ed824c9a261 100644 --- a/metadata-ingestion-modules/airflow-plugin/setup.py +++ b/metadata-ingestion-modules/airflow-plugin/setup.py @@ -5,7 +5,7 @@ import setuptools package_metadata: dict = {} -with open("./src/datahub_airflow_plugin/__init__.py") as fp: +with open("./src/datahub_airflow_plugin/_version.py") as fp: exec(fp.read(), package_metadata) diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py index e4040e3a17dfdc..7743c8ab2bab1a 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py @@ -1,23 +1,12 @@ -# Published at https://pypi.org/project/acryl-datahub/. -__package_name__ = "acryl-datahub-airflow-plugin" -__version__ = "1!0.0.0.dev0" +from datahub_airflow_plugin._version import __package_name__, __version__ -def is_dev_mode() -> bool: - return __version__.endswith("dev0") - - -def nice_version_name() -> str: - if is_dev_mode(): - return "unavailable (installed in develop mode)" - return __version__ - - -def get_provider_info(): +def get_provider_info() -> dict: + # Register our hooks with Airflow. return { "package-name": f"{__package_name__}", "name": f"{__package_name__}", - "description": "Datahub metadata collector plugin", + "description": "DataHub metadata collector plugin", "connection-types": [ { "hook-class-name": "datahub_airflow_plugin.hooks.datahub.DatahubRestHook", diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_version.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_version.py new file mode 100644 index 00000000000000..efda3f6bf31247 --- /dev/null +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_version.py @@ -0,0 +1,3 @@ +# Published at https://pypi.org/project/acryl-datahub-airflow-plugin/. +__package_name__ = "acryl-datahub-airflow-plugin" +__version__ = "1!0.0.0.dev0" diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py index 7638720db023ac..2aeaaee4a542d4 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py @@ -4,12 +4,12 @@ from airflow.plugins_manager import AirflowPlugin -from datahub_airflow_plugin import __package_name__ from datahub_airflow_plugin._airflow_compat import AIRFLOW_PATCHED from datahub_airflow_plugin._airflow_shims import ( HAS_AIRFLOW_LISTENER_API, NEEDS_AIRFLOW_LISTENER_MODULE, ) +from datahub_airflow_plugin._version import __package_name__ assert AIRFLOW_PATCHED logger = logging.getLogger(__name__) diff --git a/metadata-ingestion-modules/dagster-plugin/.gitignore b/metadata-ingestion-modules/dagster-plugin/.gitignore index 4ff42af3e16cff..6801b785ea1e4b 100644 --- a/metadata-ingestion-modules/dagster-plugin/.gitignore +++ b/metadata-ingestion-modules/dagster-plugin/.gitignore @@ -1,5 +1,4 @@ .envrc -src/datahub_dagster_plugin/__init__.py.bak .vscode/ output pvenv36/ diff --git a/metadata-ingestion-modules/dagster-plugin/scripts/release.sh b/metadata-ingestion-modules/dagster-plugin/scripts/release.sh index 10cb816d9ffc04..ffd5201574891c 100755 --- a/metadata-ingestion-modules/dagster-plugin/scripts/release.sh +++ b/metadata-ingestion-modules/dagster-plugin/scripts/release.sh @@ -1,26 +1,31 @@ #!/bin/bash +# Auto-generated by python-build/generate_release_scripts.py. Do not edit manually. + set -euxo pipefail +ROOT=../.. +MODULE=datahub_dagster_plugin + if [[ ! ${RELEASE_SKIP_TEST:-} ]] && [[ ! ${RELEASE_SKIP_INSTALL:-} ]]; then - ../../gradlew build # also runs tests + ${ROOT}/gradlew build # also runs tests elif [[ ! ${RELEASE_SKIP_INSTALL:-} ]]; then - ../../gradlew install + ${ROOT}/gradlew install fi -MODULE=datahub_dagster_plugin - # Check packaging constraint. python -c 'import setuptools; where="./src"; assert setuptools.find_packages(where) == setuptools.find_namespace_packages(where), "you seem to be missing or have extra __init__.py files"' -if [[ ${RELEASE_VERSION:-} ]]; then - # Replace version with RELEASE_VERSION env variable - sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/${MODULE}/__init__.py -else - vim src/${MODULE}/__init__.py + +# Update the release version. +if [[ ! ${RELEASE_VERSION:-} ]]; then + echo "RELEASE_VERSION is not set" + exit 1 fi +sed -i.bak "s/__version__ = .*$/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/${MODULE}/_version.py +# Build and upload the release. rm -rf build dist || true python -m build if [[ ! ${RELEASE_SKIP_UPLOAD:-} ]]; then python -m twine upload 'dist/*' fi -mv src/${MODULE}/__init__.py.bak src/${MODULE}/__init__.py +mv src/${MODULE}/_version.py.bak src/${MODULE}/_version.py diff --git a/metadata-ingestion-modules/dagster-plugin/setup.py b/metadata-ingestion-modules/dagster-plugin/setup.py index 09859b6c4344e3..6e2e013f719f51 100644 --- a/metadata-ingestion-modules/dagster-plugin/setup.py +++ b/metadata-ingestion-modules/dagster-plugin/setup.py @@ -4,7 +4,7 @@ import setuptools package_metadata: dict = {} -with open("./src/datahub_dagster_plugin/__init__.py") as fp: +with open("./src/datahub_dagster_plugin/_version.py") as fp: exec(fp.read(), package_metadata) diff --git a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/__init__.py b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/__init__.py index 1c7d60666a085f..20baf254135355 100644 --- a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/__init__.py +++ b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/__init__.py @@ -1,21 +1 @@ -# Published at https://pypi.org/project/acryl-datahub/. -__package_name__ = "acryl-datahub-dagster-plugin" -__version__ = "1!0.0.0.dev0" - - -def is_dev_mode() -> bool: - return __version__.endswith("dev0") - - -def nice_version_name() -> str: - if is_dev_mode(): - return "unavailable (installed in develop mode)" - return __version__ - - -def get_provider_info(): - return { - "package-name": f"{__package_name__}", - "name": f"{__package_name__}", - "description": "Datahub metadata collector plugin", - } +from datahub_dagster_plugin._version import __package_name__, __version__ diff --git a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/_version.py b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/_version.py new file mode 100644 index 00000000000000..e287b6bf32f5dc --- /dev/null +++ b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/_version.py @@ -0,0 +1,3 @@ +# Published at https://pypi.org/project/acryl-datahub-dagster-plugin/. +__package_name__ = "acryl-datahub-dagster-plugin" +__version__ = "1!0.0.0.dev0" diff --git a/metadata-ingestion-modules/gx-plugin/.gitignore b/metadata-ingestion-modules/gx-plugin/.gitignore index 8c01744589e35e..6801b785ea1e4b 100644 --- a/metadata-ingestion-modules/gx-plugin/.gitignore +++ b/metadata-ingestion-modules/gx-plugin/.gitignore @@ -1,5 +1,4 @@ .envrc -src/datahub_gx_plugin/__init__.py.bak .vscode/ output pvenv36/ diff --git a/metadata-ingestion-modules/gx-plugin/scripts/release.sh b/metadata-ingestion-modules/gx-plugin/scripts/release.sh index 058add495821cb..06605f03a78aa3 100755 --- a/metadata-ingestion-modules/gx-plugin/scripts/release.sh +++ b/metadata-ingestion-modules/gx-plugin/scripts/release.sh @@ -1,26 +1,31 @@ #!/bin/bash +# Auto-generated by python-build/generate_release_scripts.py. Do not edit manually. + set -euxo pipefail +ROOT=../.. +MODULE=datahub_gx_plugin + if [[ ! ${RELEASE_SKIP_TEST:-} ]] && [[ ! ${RELEASE_SKIP_INSTALL:-} ]]; then - ../../gradlew build # also runs tests + ${ROOT}/gradlew build # also runs tests elif [[ ! ${RELEASE_SKIP_INSTALL:-} ]]; then - ../../gradlew install + ${ROOT}/gradlew install fi -MODULE=datahub_gx_plugin - # Check packaging constraint. python -c 'import setuptools; where="./src"; assert setuptools.find_packages(where) == setuptools.find_namespace_packages(where), "you seem to be missing or have extra __init__.py files"' -if [[ ${RELEASE_VERSION:-} ]]; then - # Replace version with RELEASE_VERSION env variable - sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/${MODULE}/__init__.py -else - vim src/${MODULE}/__init__.py + +# Update the release version. +if [[ ! ${RELEASE_VERSION:-} ]]; then + echo "RELEASE_VERSION is not set" + exit 1 fi +sed -i.bak "s/__version__ = .*$/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/${MODULE}/_version.py +# Build and upload the release. rm -rf build dist || true python -m build if [[ ! ${RELEASE_SKIP_UPLOAD:-} ]]; then python -m twine upload 'dist/*' fi -mv src/${MODULE}/__init__.py.bak src/${MODULE}/__init__.py +mv src/${MODULE}/_version.py.bak src/${MODULE}/_version.py diff --git a/metadata-ingestion-modules/gx-plugin/setup.py b/metadata-ingestion-modules/gx-plugin/setup.py index fbc4097388993f..43495673a7ff12 100644 --- a/metadata-ingestion-modules/gx-plugin/setup.py +++ b/metadata-ingestion-modules/gx-plugin/setup.py @@ -4,7 +4,7 @@ import setuptools package_metadata: dict = {} -with open("./src/datahub_gx_plugin/__init__.py") as fp: +with open("./src/datahub_gx_plugin/_version.py") as fp: exec(fp.read(), package_metadata) diff --git a/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/__init__.py b/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/__init__.py index a7689be82a5d99..b3f8638c28088d 100644 --- a/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/__init__.py +++ b/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/__init__.py @@ -1,21 +1 @@ -# Published at https://pypi.org/project/acryl-datahub/. -__package_name__ = "acryl-datahub-gx-plugin" -__version__ = "1!0.0.0.dev0" - - -def is_dev_mode() -> bool: - return __version__.endswith("dev0") - - -def nice_version_name() -> str: - if is_dev_mode(): - return "unavailable (installed in develop mode)" - return __version__ - - -def get_provider_info(): - return { - "package-name": f"{__package_name__}", - "name": f"{__package_name__}", - "description": "Datahub metadata collector plugin", - } +from datahub_gx_plugin._version import __package_name__, __version__ diff --git a/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/_version.py b/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/_version.py new file mode 100644 index 00000000000000..02dbb70a9b4833 --- /dev/null +++ b/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/_version.py @@ -0,0 +1,3 @@ +# Published at https://pypi.org/project/acryl-datahub-gx-plugin/. +__package_name__ = "acryl-datahub-gx-plugin" +__version__ = "1!0.0.0.dev0" diff --git a/metadata-ingestion-modules/prefect-plugin/.gitignore b/metadata-ingestion-modules/prefect-plugin/.gitignore index 1d2916d00eabde..6801b785ea1e4b 100644 --- a/metadata-ingestion-modules/prefect-plugin/.gitignore +++ b/metadata-ingestion-modules/prefect-plugin/.gitignore @@ -1,5 +1,4 @@ .envrc -src/prefect_datahub/__init__.py.bak .vscode/ output pvenv36/ diff --git a/metadata-ingestion-modules/prefect-plugin/scripts/release.sh b/metadata-ingestion-modules/prefect-plugin/scripts/release.sh index f398db98b60290..b3b99d61c904a3 100755 --- a/metadata-ingestion-modules/prefect-plugin/scripts/release.sh +++ b/metadata-ingestion-modules/prefect-plugin/scripts/release.sh @@ -1,26 +1,31 @@ #!/bin/bash +# Auto-generated by python-build/generate_release_scripts.py. Do not edit manually. + set -euxo pipefail +ROOT=../.. +MODULE=prefect_datahub + if [[ ! ${RELEASE_SKIP_TEST:-} ]] && [[ ! ${RELEASE_SKIP_INSTALL:-} ]]; then - ../../gradlew build # also runs tests + ${ROOT}/gradlew build # also runs tests elif [[ ! ${RELEASE_SKIP_INSTALL:-} ]]; then - ../../gradlew install + ${ROOT}/gradlew install fi -MODULE=prefect_datahub - # Check packaging constraint. python -c 'import setuptools; where="./src"; assert setuptools.find_packages(where) == setuptools.find_namespace_packages(where), "you seem to be missing or have extra __init__.py files"' -if [[ ${RELEASE_VERSION:-} ]]; then - # Replace version with RELEASE_VERSION env variable - sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/${MODULE}/__init__.py -else - vim src/${MODULE}/__init__.py + +# Update the release version. +if [[ ! ${RELEASE_VERSION:-} ]]; then + echo "RELEASE_VERSION is not set" + exit 1 fi +sed -i.bak "s/__version__ = .*$/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/${MODULE}/_version.py +# Build and upload the release. rm -rf build dist || true python -m build if [[ ! ${RELEASE_SKIP_UPLOAD:-} ]]; then python -m twine upload 'dist/*' fi -mv src/${MODULE}/__init__.py.bak src/${MODULE}/__init__.py \ No newline at end of file +mv src/${MODULE}/_version.py.bak src/${MODULE}/_version.py diff --git a/metadata-ingestion-modules/prefect-plugin/setup.py b/metadata-ingestion-modules/prefect-plugin/setup.py index 1d56cae8d938a2..87feb810b8e5a6 100644 --- a/metadata-ingestion-modules/prefect-plugin/setup.py +++ b/metadata-ingestion-modules/prefect-plugin/setup.py @@ -4,7 +4,7 @@ import setuptools package_metadata: dict = {} -with open("./src/prefect_datahub/__init__.py") as fp: +with open("./src/prefect_datahub/_version.py") as fp: exec(fp.read(), package_metadata) @@ -30,9 +30,7 @@ def get_long_description(): # Temporary pinning to 2.0.0 until we can upgrade to 3.0.0 "prefect >= 2.0.0,<3.0.0", *rest_common, - # Ignoring the dependency below because it causes issues with the vercel built wheel install - # f"acryl-datahub[datahub-rest]{_self_pin}", - "acryl-datahub[datahub-rest]", + f"acryl-datahub[datahub-rest]{_self_pin}", } diff --git a/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/__init__.py b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/__init__.py index 8cc65f9010613d..f38863a1f31e28 100644 --- a/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/__init__.py +++ b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/__init__.py @@ -1,21 +1 @@ -# Published at https://pypi.org/project/acryl-datahub/. -__package_name__ = "prefect-datahub" -__version__ = "1!0.0.0.dev0" - - -def is_dev_mode() -> bool: - return __version__.endswith("dev0") - - -def nice_version_name() -> str: - if is_dev_mode(): - return "unavailable (installed in develop mode)" - return __version__ - - -def get_provider_info(): - return { - "package-name": f"{__package_name__}", - "name": f"{__package_name__}", - "description": "Datahub prefect block to capture executions and send to Datahub", - } +from prefect_datahub._version import __package_name__, __version__ diff --git a/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/_version.py b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/_version.py new file mode 100644 index 00000000000000..8c45e7d04b3679 --- /dev/null +++ b/metadata-ingestion-modules/prefect-plugin/src/prefect_datahub/_version.py @@ -0,0 +1,3 @@ +# Published at https://pypi.org/project/prefect-datahub/. +__package_name__ = "prefect-datahub" +__version__ = "1!0.0.0.dev0" diff --git a/metadata-ingestion/.gitignore b/metadata-ingestion/.gitignore index acc15c45988698..c7a781ded1d688 100644 --- a/metadata-ingestion/.gitignore +++ b/metadata-ingestion/.gitignore @@ -1,5 +1,4 @@ .envrc -src/datahub/__init__.py.bak .vscode/ output pvenv36/ diff --git a/metadata-ingestion/scripts/release.sh b/metadata-ingestion/scripts/release.sh index a18dd6f934b431..9b4e62aef581ae 100755 --- a/metadata-ingestion/scripts/release.sh +++ b/metadata-ingestion/scripts/release.sh @@ -1,26 +1,31 @@ #!/bin/bash +# Auto-generated by python-build/generate_release_scripts.py. Do not edit manually. + set -euxo pipefail +ROOT=.. +MODULE=datahub + if [[ ! ${RELEASE_SKIP_TEST:-} ]] && [[ ! ${RELEASE_SKIP_INSTALL:-} ]]; then - ../gradlew build # also runs tests + ${ROOT}/gradlew build # also runs tests elif [[ ! ${RELEASE_SKIP_INSTALL:-} ]]; then - ../gradlew install + ${ROOT}/gradlew install fi -MODULE=datahub - # Check packaging constraint. python -c 'import setuptools; where="./src"; assert setuptools.find_packages(where) == setuptools.find_namespace_packages(where), "you seem to be missing or have extra __init__.py files"' -if [[ ${RELEASE_VERSION:-} ]]; then - # Replace version with RELEASE_VERSION env variable - sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/${MODULE}/__init__.py -else - vim src/${MODULE}/__init__.py + +# Update the release version. +if [[ ! ${RELEASE_VERSION:-} ]]; then + echo "RELEASE_VERSION is not set" + exit 1 fi +sed -i.bak "s/__version__ = .*$/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/${MODULE}/_version.py +# Build and upload the release. rm -rf build dist || true python -m build if [[ ! ${RELEASE_SKIP_UPLOAD:-} ]]; then python -m twine upload 'dist/*' fi -mv src/${MODULE}/__init__.py.bak src/${MODULE}/__init__.py +mv src/${MODULE}/_version.py.bak src/${MODULE}/_version.py diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index c91dbf709e6d86..b3175989309849 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -3,7 +3,7 @@ import setuptools package_metadata: dict = {} -with open("./src/datahub/__init__.py") as fp: +with open("./src/datahub/_version.py") as fp: exec(fp.read(), package_metadata) _version: str = package_metadata["__version__"] @@ -312,7 +312,10 @@ powerbi_report_server = {"requests", "requests_ntlm"} -slack = {"slack-sdk==3.18.1", "tenacity>=8.0.1",} +slack = { + "slack-sdk==3.18.1", + "tenacity>=8.0.1", +} databricks = { # 0.1.11 appears to have authentication issues with azure databricks @@ -505,12 +508,10 @@ "starburst-trino-usage": sql_common | usage_common | trino, "nifi": {"requests", "packaging", "requests-gssapi"}, "powerbi": ( - ( - microsoft_common - | {"lark[regex]==1.1.4", "sqlparse", "more-itertools"} - | sqlglot_lib - | threading_timeout_common - ) + microsoft_common + | {"lark[regex]==1.1.4", "sqlparse", "more-itertools"} + | sqlglot_lib + | threading_timeout_common ), "powerbi-report-server": powerbi_report_server, "vertica": sql_common | {"vertica-sqlalchemy-dialect[vertica-python]==0.0.8.2"}, diff --git a/metadata-ingestion/src/datahub/__init__.py b/metadata-ingestion/src/datahub/__init__.py index b254deb7fa30e5..8b8ef52d27bb92 100644 --- a/metadata-ingestion/src/datahub/__init__.py +++ b/metadata-ingestion/src/datahub/__init__.py @@ -1,25 +1 @@ -import sys -import warnings - -# Published at https://pypi.org/project/acryl-datahub/. -__package_name__ = "acryl-datahub" -__version__ = "1!0.0.0.dev0" - - -def is_dev_mode() -> bool: - return __version__.endswith("dev0") - - -def nice_version_name() -> str: - if is_dev_mode(): - return "unavailable (installed in develop mode)" - return __version__ - - -if sys.version_info < (3, 8): - warnings.warn( - "DataHub requires Python 3.8 or newer. " - "Please upgrade your Python version to continue using DataHub.", - FutureWarning, - stacklevel=2, - ) +from datahub._version import __package_name__, __version__ diff --git a/metadata-ingestion/src/datahub/_version.py b/metadata-ingestion/src/datahub/_version.py new file mode 100644 index 00000000000000..a34748ac942a17 --- /dev/null +++ b/metadata-ingestion/src/datahub/_version.py @@ -0,0 +1,13 @@ +# Published at https://pypi.org/project/acryl-datahub/. +__package_name__ = "acryl-datahub" +__version__ = "1!0.0.0.dev0" + + +def is_dev_mode() -> bool: + return __version__.endswith("dev0") + + +def nice_version_name() -> str: + if is_dev_mode(): + return "unavailable (installed in develop mode)" + return __version__ diff --git a/metadata-ingestion/src/datahub/cli/check_cli.py b/metadata-ingestion/src/datahub/cli/check_cli.py index fbe07b64f0e154..6b3124fc37393a 100644 --- a/metadata-ingestion/src/datahub/cli/check_cli.py +++ b/metadata-ingestion/src/datahub/cli/check_cli.py @@ -9,7 +9,7 @@ import click -from datahub import __package_name__ +from datahub._version import __package_name__ from datahub.cli.json_file import check_mce_file from datahub.configuration import config_loader from datahub.configuration.common import AllowDenyPattern diff --git a/metadata-ingestion/src/datahub/cli/cli_utils.py b/metadata-ingestion/src/datahub/cli/cli_utils.py index 1f13391644c6c8..26f4117e151f9e 100644 --- a/metadata-ingestion/src/datahub/cli/cli_utils.py +++ b/metadata-ingestion/src/datahub/cli/cli_utils.py @@ -9,7 +9,7 @@ import requests from requests.sessions import Session -import datahub +import datahub._version as datahub_version from datahub.cli import config_utils from datahub.emitter.aspect import ASPECT_MAP, TIMESERIES_ASPECT_MAP from datahub.emitter.mcp import MetadataChangeProposalWrapper @@ -422,5 +422,5 @@ def ensure_has_system_metadata( if metadata.properties is None: metadata.properties = {} props = metadata.properties - props["clientId"] = datahub.__package_name__ - props["clientVersion"] = datahub.__version__ + props["clientId"] = datahub_version.__package_name__ + props["clientVersion"] = datahub_version.__version__ diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py index c9eaccbc65ee21..e2a2f35a36631b 100644 --- a/metadata-ingestion/src/datahub/cli/ingest_cli.py +++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py @@ -12,7 +12,7 @@ from click_default_group import DefaultGroup from tabulate import tabulate -import datahub as datahub_package +from datahub._version import nice_version_name from datahub.cli import cli_utils from datahub.cli.config_utils import CONDENSED_DATAHUB_CONFIG_PATH from datahub.configuration.common import ConfigModel, GraphError @@ -147,7 +147,7 @@ def run_pipeline_to_completion(pipeline: Pipeline) -> int: return ret # main function begins - logger.info("DataHub CLI version: %s", datahub_package.nice_version_name()) + logger.info("DataHub CLI version: %s", nice_version_name()) pipeline_config = load_config_file( config, diff --git a/metadata-ingestion/src/datahub/emitter/rest_emitter.py b/metadata-ingestion/src/datahub/emitter/rest_emitter.py index 7271f784bf881e..4e7a152204da89 100644 --- a/metadata-ingestion/src/datahub/emitter/rest_emitter.py +++ b/metadata-ingestion/src/datahub/emitter/rest_emitter.py @@ -22,7 +22,7 @@ from requests.adapters import HTTPAdapter, Retry from requests.exceptions import HTTPError, RequestException -from datahub import nice_version_name +from datahub._version import nice_version_name from datahub.cli import config_utils from datahub.cli.cli_utils import ensure_has_system_metadata, fixup_gms_url, get_or_else from datahub.cli.env_utils import get_boolean_env_variable diff --git a/metadata-ingestion/src/datahub/entrypoints.py b/metadata-ingestion/src/datahub/entrypoints.py index 182084e4794259..73d35381d5df29 100644 --- a/metadata-ingestion/src/datahub/entrypoints.py +++ b/metadata-ingestion/src/datahub/entrypoints.py @@ -6,7 +6,7 @@ import click -import datahub as datahub_package +import datahub._version as datahub_version from datahub.cli.check_cli import check from datahub.cli.cli_utils import ( fixup_gms_url, @@ -74,8 +74,8 @@ help="Write debug-level logs to a file.", ) @click.version_option( - version=datahub_package.nice_version_name(), - prog_name=datahub_package.__package_name__, + version=datahub_version.nice_version_name(), + prog_name=datahub_version.__package_name__, ) def datahub( debug: bool, @@ -112,7 +112,7 @@ def datahub( def version(include_server: bool = False) -> None: """Print version number and exit.""" - click.echo(f"DataHub CLI version: {datahub_package.nice_version_name()}") + click.echo(f"DataHub CLI version: {datahub_version.nice_version_name()}") click.echo(f"Models: {model_version_name()}") click.echo(f"Python version: {sys.version}") if include_server: @@ -223,7 +223,7 @@ def main(**kwargs): logger.exception(f"Command failed: {exc}") logger.debug( - f"DataHub CLI version: {datahub_package.__version__} at {datahub_package.__file__}" + f"DataHub CLI version: {datahub_version.__version__} at {__file__}" ) logger.debug( f"Python version: {sys.version} at {sys.executable} on {platform.platform()}" diff --git a/metadata-ingestion/src/datahub/ingestion/api/registry.py b/metadata-ingestion/src/datahub/ingestion/api/registry.py index 5e372a964c7e6c..91ee98865e78e4 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/registry.py +++ b/metadata-ingestion/src/datahub/ingestion/api/registry.py @@ -17,7 +17,7 @@ import typing_inspect -from datahub import __package_name__ +from datahub._version import __package_name__ from datahub.configuration.common import ConfigurationError if sys.version_info < (3, 10): diff --git a/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py b/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py index c143a8b49f4b7c..fc790535cfe03d 100644 --- a/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +++ b/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py @@ -3,7 +3,7 @@ import time from typing import Any, Dict, Optional -from datahub import nice_version_name +from datahub._version import nice_version_name from datahub.configuration.common import ( ConfigModel, DynamicTypedConfig, diff --git a/metadata-ingestion/src/datahub/ingestion/run/connection.py b/metadata-ingestion/src/datahub/ingestion/run/connection.py index 54b0ab9f22c65e..d42a1ba8767f96 100644 --- a/metadata-ingestion/src/datahub/ingestion/run/connection.py +++ b/metadata-ingestion/src/datahub/ingestion/run/connection.py @@ -1,6 +1,6 @@ import logging -from datahub import __version__ +from datahub._version import __version__ from datahub.ingestion.api.source import TestableSource, TestConnectionReport from datahub.ingestion.source.source_registry import source_registry diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py index 25cbd340c9674b..120cf6a79bc02c 100644 --- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py +++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py @@ -15,7 +15,7 @@ import humanfriendly import psutil -import datahub +from datahub._version import nice_version_name from datahub.configuration.common import ( ConfigModel, IgnorableError, @@ -144,8 +144,8 @@ def _add_init_error_context(step: str) -> Iterator[None]: @dataclass class CliReport(Report): - cli_version: str = datahub.nice_version_name() - cli_entry_location: str = datahub.__file__ + cli_version: str = nice_version_name() + cli_entry_location: str = __file__ models_version: str = model_version_name() py_version: str = sys.version py_exec_path: str = sys.executable diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py index fd6fa8a50f707b..86e577febf454c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy.py @@ -26,7 +26,7 @@ ) from databricks.sdk.service.workspace import ObjectType -import datahub +from datahub._version import nice_version_name from datahub.emitter.mce_builder import parse_ts_millis from datahub.ingestion.source.unity.hive_metastore_proxy import HiveMetastoreProxy from datahub.ingestion.source.unity.proxy_profiling import ( @@ -103,7 +103,7 @@ def __init__( host=workspace_url, token=personal_access_token, product="datahub", - product_version=datahub.nice_version_name(), + product_version=nice_version_name(), ) self.warehouse_id = warehouse_id or "" self.report = report diff --git a/metadata-ingestion/src/datahub/telemetry/telemetry.py b/metadata-ingestion/src/datahub/telemetry/telemetry.py index 22b2cb6a101af9..c64c133fbf456f 100644 --- a/metadata-ingestion/src/datahub/telemetry/telemetry.py +++ b/metadata-ingestion/src/datahub/telemetry/telemetry.py @@ -12,7 +12,7 @@ from mixpanel import Consumer, Mixpanel from typing_extensions import ParamSpec -import datahub as datahub_package +from datahub._version import __version__, nice_version_name from datahub.cli.config_utils import DATAHUB_ROOT_FOLDER from datahub.cli.env_utils import get_boolean_env_variable from datahub.configuration.common import ExceptionWithProps @@ -106,7 +106,7 @@ def _default_telemetry_properties() -> Dict[str, Any]: return { - "datahub_version": datahub_package.nice_version_name(), + "datahub_version": nice_version_name(), "python_version": platform.python_version(), "os": platform.system(), "arch": platform.machine(), @@ -132,7 +132,7 @@ def __init__(self): sentry_sdk.init( dsn=SENTRY_DSN, environment=SENTRY_ENVIRONMENT, - release=datahub_package.__version__, + release=__version__, ) except Exception as e: # We need to print initialization errors to stderr, since logger is not initialized yet @@ -277,7 +277,7 @@ def init_capture_exception(self) -> None: "environment", { "environment": SENTRY_ENVIRONMENT, - "datahub_version": datahub_package.nice_version_name(), + "datahub_version": nice_version_name(), "os": platform.system(), "python_version": platform.python_version(), }, diff --git a/metadata-ingestion/src/datahub/testing/check_imports.py b/metadata-ingestion/src/datahub/testing/check_imports.py index e4bf07882b36ae..b65b3aa90dca35 100644 --- a/metadata-ingestion/src/datahub/testing/check_imports.py +++ b/metadata-ingestion/src/datahub/testing/check_imports.py @@ -1,4 +1,5 @@ import pathlib +import re from typing import List @@ -32,3 +33,30 @@ def ensure_no_indirect_model_imports(dirs: List[pathlib.Path]) -> None: f"Disallowed import found in {file}: `{line.rstrip()}`. " f"Import from {replacement} instead." ) + + +def ban_direct_datahub_imports(dirs: List[pathlib.Path]) -> None: + # We also want to ban all direct imports of datahub. + # The base `datahub` package is used to export public-facing classes. + # If we import it directly, we'll likely end up with circular imports. + + banned_strings = [ + r"^import datahub[\s$]", + r"^from datahub import", + ] + ignored_files = { + __file__, + } + for dir in dirs: + for file in dir.rglob("*.py"): + if str(file) in ignored_files: + continue + + file_contents = file.read_text() + + for banned_string in banned_strings: + if re.search(banned_string, file_contents, re.MULTILINE): + raise ValueError( + f"Disallowed bare datahub import found in {file}. " + f"Do not import datahub directly; instead import from the underlying file." + ) diff --git a/metadata-ingestion/src/datahub/upgrade/upgrade.py b/metadata-ingestion/src/datahub/upgrade/upgrade.py index 7872681797d6fe..276f4ccd54a4a1 100644 --- a/metadata-ingestion/src/datahub/upgrade/upgrade.py +++ b/metadata-ingestion/src/datahub/upgrade/upgrade.py @@ -10,7 +10,7 @@ from packaging.version import Version from pydantic import BaseModel -from datahub import __version__ +from datahub._version import __version__ from datahub.cli.config_utils import load_client_config from datahub.ingestion.graph.client import DataHubGraph from datahub.utilities.perf_timer import PerfTimer diff --git a/metadata-ingestion/tests/unit/test_packages.py b/metadata-ingestion/tests/unit/test_packages.py index f4045bac6e6ef4..ab538cf0c1ed09 100644 --- a/metadata-ingestion/tests/unit/test_packages.py +++ b/metadata-ingestion/tests/unit/test_packages.py @@ -1,7 +1,10 @@ import pytest import setuptools -from datahub.testing.check_imports import ensure_no_indirect_model_imports +from datahub.testing.check_imports import ( + ban_direct_datahub_imports, + ensure_no_indirect_model_imports, +) from datahub.testing.check_str_enum import ensure_no_enum_mixin @@ -16,6 +19,7 @@ def test_check_import_paths(pytestconfig: pytest.Config) -> None: root = pytestconfig.rootpath ensure_no_indirect_model_imports([root / "src", root / "tests"]) + ban_direct_datahub_imports([root / "src", root / "tests"]) def test_check_str_enum_usage(pytestconfig: pytest.Config) -> None: diff --git a/metadata-ingestion/tests/unit/test_packaging.py b/metadata-ingestion/tests/unit/test_packaging.py index 4b99be750a4da7..f9a3ae9562d3eb 100644 --- a/metadata-ingestion/tests/unit/test_packaging.py +++ b/metadata-ingestion/tests/unit/test_packaging.py @@ -1,6 +1,6 @@ import pytest -import datahub as datahub_metadata +import datahub._version as datahub_version @pytest.mark.filterwarnings( @@ -10,4 +10,4 @@ def test_datahub_version(): # Simply importing pkg_resources checks for unsatisfied dependencies. import pkg_resources - assert pkg_resources.get_distribution(datahub_metadata.__package_name__).version + assert pkg_resources.get_distribution(datahub_version.__package_name__).version diff --git a/python-build/generate_release_scripts.py b/python-build/generate_release_scripts.py new file mode 100644 index 00000000000000..36253a24cfa3b0 --- /dev/null +++ b/python-build/generate_release_scripts.py @@ -0,0 +1,85 @@ +import dataclasses +import pathlib + +REPO_ROOT = pathlib.Path(__file__).parent.parent + + +@dataclasses.dataclass +class Package: + # TODO: This doesn't have the actual package names. + directory: str + main_module_name: str + + def root_from_directory(self) -> str: + ups = self.directory.count("/") + 1 + + return "/".join([".."] * ups) + + +packages = [ + Package(directory="metadata-ingestion", main_module_name="datahub"), + Package( + directory="metadata-ingestion-modules/airflow-plugin", + main_module_name="datahub_airflow_plugin", + ), + Package( + directory="metadata-ingestion-modules/dagster-plugin", + main_module_name="datahub_dagster_plugin", + ), + Package( + directory="metadata-ingestion-modules/gx-plugin", + main_module_name="datahub_gx_plugin", + ), + Package( + directory="metadata-ingestion-modules/prefect-plugin", + main_module_name="prefect_datahub", + ), +] + +generation_header = f"# Auto-generated by {pathlib.Path(__file__).relative_to(REPO_ROOT)}. Do not edit manually." + +template = """\ +#!/bin/bash +%s + +set -euxo pipefail + +ROOT=%s +MODULE=%s + +if [[ ! ${RELEASE_SKIP_TEST:-} ]] && [[ ! ${RELEASE_SKIP_INSTALL:-} ]]; then + ${ROOT}/gradlew build # also runs tests +elif [[ ! ${RELEASE_SKIP_INSTALL:-} ]]; then + ${ROOT}/gradlew install +fi + +# Check packaging constraint. +python -c 'import setuptools; where="./src"; assert setuptools.find_packages(where) == setuptools.find_namespace_packages(where), "you seem to be missing or have extra __init__.py files"' + +# Update the release version. +if [[ ! ${RELEASE_VERSION:-} ]]; then + echo "RELEASE_VERSION is not set" + exit 1 +fi +sed -i.bak "s/__version__ = .*$/__version__ = \\"$(echo $RELEASE_VERSION|sed s/-/+/)\\"/" src/${MODULE}/_version.py + +# Build and upload the release. +rm -rf build dist || true +python -m build +if [[ ! ${RELEASE_SKIP_UPLOAD:-} ]]; then + python -m twine upload 'dist/*' +fi +mv src/${MODULE}/_version.py.bak src/${MODULE}/_version.py +""" + +for package in packages: + script_path = REPO_ROOT / package.directory / "scripts/release.sh" + + script_path.write_text( + template + % ( + generation_header, + package.root_from_directory(), + package.main_module_name, + ) + ) From a4f8d170f9c232989edd5e6235b6b062b7b326db Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Sat, 25 Jan 2025 10:25:43 -0600 Subject: [PATCH 165/249] misc(search-explain): set default value (#12463) --- .../operations/elastic/OperationsController.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java index 64333009dda7a9..ea437f4cf35114 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java @@ -231,7 +231,10 @@ public ResponseEntity explainSearchQuery( @Nullable List sortCriteria, @Parameter(name = "searchFlags", description = "Optional configuration flags.") - @RequestParam(value = "searchFlags", required = false) + @RequestParam( + value = "searchFlags", + required = false, + defaultValue = "{\"fulltext\":true}") @Nullable String searchFlags) throws JsonProcessingException { @@ -338,7 +341,10 @@ public ResponseEntity explainSearchQueryDiff( @Nullable List sortCriteria, @Parameter(name = "searchFlags", description = "Optional configuration flags.") - @RequestParam(value = "searchFlags", required = false) + @RequestParam( + value = "searchFlags", + required = false, + defaultValue = "{\"fulltext\":true}") @Nullable String searchFlags) throws JsonProcessingException { From 3e9e6e4fe07d68d3926f73080f4898d142720a58 Mon Sep 17 00:00:00 2001 From: sagar-salvi-apptware <159135491+sagar-salvi-apptware@users.noreply.github.com> Date: Mon, 27 Jan 2025 15:37:31 +0530 Subject: [PATCH 166/249] fix(lookml/ingestion): Skip unreferenced or improperly loaded Lookml view files (#12351) --- .../ingestion/source/looker/lookml_config.py | 5 +- .../ingestion/source/looker/lookml_source.py | 56 ++++++++++++++++ .../lkml_unreachable_views/data.model.lkml | 10 +++ .../employee_income_source.view.lkml | 40 ++++++++++++ .../employee_total_income.view.lkml | 18 ++++++ .../unreachable_view.view.lkml | 18 ++++++ .../tests/integration/lookml/test_lookml.py | 64 ++++++++++++++++--- 7 files changed, 201 insertions(+), 10 deletions(-) create mode 100644 metadata-ingestion/tests/integration/lookml/lkml_unreachable_views/data.model.lkml create mode 100644 metadata-ingestion/tests/integration/lookml/lkml_unreachable_views/employee_income_source.view.lkml create mode 100644 metadata-ingestion/tests/integration/lookml/lkml_unreachable_views/employee_total_income.view.lkml create mode 100644 metadata-ingestion/tests/integration/lookml/lkml_unreachable_views/unreachable_view.view.lkml diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py index 7ffb895349ed29..4d3255c3c0715b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py @@ -139,7 +139,10 @@ class LookMLSourceConfig( ) emit_reachable_views_only: bool = Field( True, - description="When enabled, only views that are reachable from explores defined in the model files are emitted", + description=( + "When enabled, only views that are reachable from explores defined in the model files are emitted. " + "If set to False, all views imported in model files are emitted. Views that are unreachable i.e. not explicitly defined in the model files are currently not emitted however reported as warning for debugging purposes." + ), ) populate_sql_logic_for_missing_descriptions: bool = Field( False, diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py index a8575c84b510d5..9a937840a5012f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py @@ -59,6 +59,7 @@ from datahub.ingestion.source.looker.lookml_config import ( BASE_PROJECT_NAME, MODEL_FILE_EXTENSION, + VIEW_FILE_EXTENSION, LookerConnectionDefinition, LookMLSourceConfig, LookMLSourceReport, @@ -884,6 +885,7 @@ def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901 view_urn = maybe_looker_view.id.get_urn( self.source_config ) + view_connection_mapping = view_connection_map.get( view_urn ) @@ -939,6 +941,9 @@ def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901 str(maybe_looker_view.id) ) + if not self.source_config.emit_reachable_views_only: + self.report_skipped_unreachable_views(viewfile_loader, processed_view_map) + if ( self.source_config.tag_measures_and_dimensions and self.reporter.events_produced != 0 @@ -966,5 +971,56 @@ def gen_project_workunits(self, project_name: str) -> Iterable[MetadataWorkUnit] ), ).as_workunit() + def report_skipped_unreachable_views( + self, + viewfile_loader: LookerViewFileLoader, + processed_view_map: Dict[str, Set[str]] = {}, + ) -> None: + view_files: Dict[str, List[pathlib.Path]] = {} + for project, folder_path in self.base_projects_folder.items(): + folder = pathlib.Path(folder_path) + view_files[project] = list(folder.glob(f"**/*{VIEW_FILE_EXTENSION}")) + + skipped_view_paths: Dict[str, List[str]] = {} + for project, views in view_files.items(): + skipped_paths: Set[str] = set() + + for view_path in views: + # Check if the view is already in processed_view_map + if not any( + str(view_path) in view_set + for view_set in processed_view_map.values() + ): + looker_viewfile = viewfile_loader.load_viewfile( + path=str(view_path), + project_name=project, + connection=None, + reporter=self.reporter, + ) + + if looker_viewfile is not None: + for raw_view in looker_viewfile.views: + raw_view_name = raw_view.get("name", "") + + if ( + raw_view_name + and self.source_config.view_pattern.allowed( + raw_view_name + ) + ): + skipped_paths.add(str(view_path)) + + skipped_view_paths[project] = list(skipped_paths) + + for project, view_paths in skipped_view_paths.items(): + for path in view_paths: + self.reporter.report_warning( + title="Skipped View File", + message=( + "The Looker view file was skipped because it may not be referenced by any models." + ), + context=(f"Project: {project}, View File Path: {path}"), + ) + def get_report(self): return self.reporter diff --git a/metadata-ingestion/tests/integration/lookml/lkml_unreachable_views/data.model.lkml b/metadata-ingestion/tests/integration/lookml/lkml_unreachable_views/data.model.lkml new file mode 100644 index 00000000000000..b19135659b07a1 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/lkml_unreachable_views/data.model.lkml @@ -0,0 +1,10 @@ +connection: "my_connection" + +include: "employee_income_source.view.lkml" +include: "employee_total_income.view.lkml" + +explore: employee_income_source { +} + +explore: employee_total_income { +} diff --git a/metadata-ingestion/tests/integration/lookml/lkml_unreachable_views/employee_income_source.view.lkml b/metadata-ingestion/tests/integration/lookml/lkml_unreachable_views/employee_income_source.view.lkml new file mode 100644 index 00000000000000..f4a443ab115374 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/lkml_unreachable_views/employee_income_source.view.lkml @@ -0,0 +1,40 @@ +view: employee_income_source { + derived_table: { + sql: SELECT + employee_id, + employee_name, + {% if dw_eff_dt_date._is_selected or finance_dw_eff_dt_date._is_selected %} + prod_core.data.r_metric_summary_v2 + {% elsif dw_eff_dt_week._is_selected or finance_dw_eff_dt_week._is_selected %} + prod_core.data.r_metric_summary_v3 + {% else %} + 'default_table' as source + {% endif %}, + employee_income + FROM source_table + WHERE + {% condition source_region %} source_table.region {% endcondition %} + ;; + } + + dimension: id { + type: number + sql: ${TABLE}.employee_id;; + } + + dimension: name { + type: string + sql: ${TABLE}.employee_name;; + } + + dimension: source { + type: string + sql: ${TABLE}.source ;; + } + + dimension: income { + type: number + sql: ${TABLE}.employee_income ;; + } + +} diff --git a/metadata-ingestion/tests/integration/lookml/lkml_unreachable_views/employee_total_income.view.lkml b/metadata-ingestion/tests/integration/lookml/lkml_unreachable_views/employee_total_income.view.lkml new file mode 100644 index 00000000000000..18a1ab660b3a1b --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/lkml_unreachable_views/employee_total_income.view.lkml @@ -0,0 +1,18 @@ +view: employee_total_income { + sql_table_name: ${employee_income_source.SQL_TABLE_NAME} ;; + + dimension: id { + type: number + sql: ${TABLE}.id;; + } + + dimension: name { + type: string + sql: ${TABLE}.name;; + } + + measure: total_income { + type: sum + sql: ${TABLE}.income;; + } +} diff --git a/metadata-ingestion/tests/integration/lookml/lkml_unreachable_views/unreachable_view.view.lkml b/metadata-ingestion/tests/integration/lookml/lkml_unreachable_views/unreachable_view.view.lkml new file mode 100644 index 00000000000000..5c75abe41cfce0 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/lkml_unreachable_views/unreachable_view.view.lkml @@ -0,0 +1,18 @@ +view: employee_unreachable { + sql_table_name: ${employee_income_source.SQL_TABLE_NAME} ;; + + dimension: id { + type: number + sql: ${TABLE}.id;; + } + + dimension: name { + type: string + sql: ${TABLE}.name;; + } + + measure: total_income { + type: sum + sql: ${TABLE}.income;; + } +} diff --git a/metadata-ingestion/tests/integration/lookml/test_lookml.py b/metadata-ingestion/tests/integration/lookml/test_lookml.py index d803b8498104fd..ac011324684189 100644 --- a/metadata-ingestion/tests/integration/lookml/test_lookml.py +++ b/metadata-ingestion/tests/integration/lookml/test_lookml.py @@ -10,6 +10,8 @@ from freezegun import freeze_time from looker_sdk.sdk.api40.models import DBConnection +from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.run.pipeline import Pipeline from datahub.ingestion.source.file import read_metadata_file from datahub.ingestion.source.looker.looker_dataclasses import LookerModel @@ -20,6 +22,7 @@ ) from datahub.ingestion.source.looker.lookml_config import LookMLSourceConfig from datahub.ingestion.source.looker.lookml_refinement import LookerRefinementResolver +from datahub.ingestion.source.looker.lookml_source import LookMLSource from datahub.metadata.schema_classes import ( DatasetSnapshotClass, MetadataChangeEventClass, @@ -78,7 +81,8 @@ def test_lookml_ingest(pytestconfig, tmp_path, mock_time): ) pipeline.run() pipeline.pretty_print_summary() - pipeline.raise_from_status(raise_warnings=True) + pipeline.raise_from_status(raise_warnings=False) + assert pipeline.source.get_report().warnings.total_elements == 1 mce_helpers.check_golden_file( pytestconfig, @@ -112,7 +116,8 @@ def test_lookml_refinement_ingest(pytestconfig, tmp_path, mock_time): pipeline = Pipeline.create(new_recipe) pipeline.run() pipeline.pretty_print_summary() - pipeline.raise_from_status(raise_warnings=True) + pipeline.raise_from_status(raise_warnings=False) + assert pipeline.source.get_report().warnings.total_elements == 1 golden_path = test_resources_dir / "refinements_ingestion_golden.json" mce_helpers.check_golden_file( @@ -142,7 +147,8 @@ def test_lookml_refinement_include_order(pytestconfig, tmp_path, mock_time): pipeline = Pipeline.create(new_recipe) pipeline.run() pipeline.pretty_print_summary() - pipeline.raise_from_status(raise_warnings=True) + pipeline.raise_from_status(raise_warnings=False) + assert pipeline.source.get_report().warnings.total_elements == 1 golden_path = test_resources_dir / "refinement_include_order_golden.json" mce_helpers.check_golden_file( @@ -332,7 +338,8 @@ def test_lookml_ingest_offline(pytestconfig, tmp_path, mock_time): ) pipeline.run() pipeline.pretty_print_summary() - pipeline.raise_from_status(raise_warnings=True) + pipeline.raise_from_status(raise_warnings=False) + assert pipeline.source.get_report().warnings.total_elements == 1 mce_helpers.check_golden_file( pytestconfig, @@ -377,7 +384,8 @@ def test_lookml_ingest_offline_with_model_deny(pytestconfig, tmp_path, mock_time ) pipeline.run() pipeline.pretty_print_summary() - pipeline.raise_from_status(raise_warnings=True) + pipeline.raise_from_status(raise_warnings=False) + assert pipeline.source.get_report().warnings.total_elements == 1 mce_helpers.check_golden_file( pytestconfig, @@ -424,7 +432,8 @@ def test_lookml_ingest_offline_platform_instance(pytestconfig, tmp_path, mock_ti ) pipeline.run() pipeline.pretty_print_summary() - pipeline.raise_from_status(raise_warnings=True) + pipeline.raise_from_status(raise_warnings=False) + assert pipeline.source.get_report().warnings.total_elements == 1 mce_helpers.check_golden_file( pytestconfig, @@ -507,7 +516,8 @@ def ingestion_test( ) pipeline.run() pipeline.pretty_print_summary() - pipeline.raise_from_status(raise_warnings=True) + pipeline.raise_from_status(raise_warnings=False) + assert pipeline.source.get_report().warnings.total_elements == 1 mce_helpers.check_golden_file( pytestconfig, @@ -553,7 +563,8 @@ def test_lookml_git_info(pytestconfig, tmp_path, mock_time): ) pipeline.run() pipeline.pretty_print_summary() - pipeline.raise_from_status(raise_warnings=True) + pipeline.raise_from_status(raise_warnings=False) + assert pipeline.source.get_report().warnings.total_elements == 1 mce_helpers.check_golden_file( pytestconfig, @@ -668,7 +679,8 @@ def test_hive_platform_drops_ids(pytestconfig, tmp_path, mock_time): ) pipeline.run() pipeline.pretty_print_summary() - pipeline.raise_from_status(raise_warnings=True) + pipeline.raise_from_status(raise_warnings=False) + assert pipeline.source.get_report().warnings.total_elements == 1 events = read_metadata_file(tmp_path / mce_out) for mce in events: @@ -1051,3 +1063,37 @@ def test_gms_schema_resolution(pytestconfig, tmp_path, mock_time): output_path=tmp_path / mce_out_file, golden_path=golden_path, ) + + +@freeze_time(FROZEN_TIME) +def test_unreachable_views(pytestconfig): + test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml" + + config = { + "base_folder": f"{test_resources_dir}/lkml_unreachable_views", + "connection_to_platform_map": {"my_connection": "postgres"}, + "parse_table_names_from_sql": True, + "tag_measures_and_dimensions": False, + "project_name": "lkml_samples", + "model_pattern": {"deny": ["data2"]}, + "emit_reachable_views_only": False, + "liquid_variable": { + "order_region": "ap-south-1", + "source_region": "ap-south-1", + "dw_eff_dt_date": { + "_is_selected": True, + }, + }, + } + + source = LookMLSource( + LookMLSourceConfig.parse_obj(config), + ctx=PipelineContext(run_id="lookml-source-test"), + ) + wu: List[MetadataWorkUnit] = [*source.get_workunits_internal()] + assert len(wu) == 15 + assert source.reporter.warnings.total_elements == 1 + assert ( + "The Looker view file was skipped because it may not be referenced by any models." + in [failure.message for failure in source.get_report().warnings] + ) From 6ab2c702b7a7b9b829a2093827244970e2ee50f9 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Mon, 27 Jan 2025 16:13:13 +0530 Subject: [PATCH 167/249] docs: add beta labels for Automation (#12459) --- docs/automations/bigquery-metadata-sync.md | 6 ++++++ docs/automations/docs-propagation.md | 6 ++++++ docs/automations/glossary-term-propagation.md | 6 ++++++ docs/automations/snowflake-tag-propagation.md | 7 ++++++- 4 files changed, 24 insertions(+), 1 deletion(-) diff --git a/docs/automations/bigquery-metadata-sync.md b/docs/automations/bigquery-metadata-sync.md index 78bdbdd453e9f7..705c3951c060da 100644 --- a/docs/automations/bigquery-metadata-sync.md +++ b/docs/automations/bigquery-metadata-sync.md @@ -4,6 +4,12 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; +:::info + +This feature is currently in open beta in Acryl Cloud. Reach out to your Acryl representative to get access. + +::: + ## Introduction BigQuery Metadata Sync is an automation that synchronizes DataHub Tags, Table and Column descriptions, and Column Glossary Terms with diff --git a/docs/automations/docs-propagation.md b/docs/automations/docs-propagation.md index 9f389028941919..af553b9b84a7eb 100644 --- a/docs/automations/docs-propagation.md +++ b/docs/automations/docs-propagation.md @@ -1,5 +1,11 @@ # Documentation Propagation Automation +:::info + +This feature is currently in open beta in Acryl Cloud. Reach out to your Acryl representative to get access. + +::: + ## Introduction Documentation Propagation is an automation automatically propagates column and asset (coming soon) descriptions based on downstream column-level lineage and sibling relationships. diff --git a/docs/automations/glossary-term-propagation.md b/docs/automations/glossary-term-propagation.md index 90e8e75ea44ef6..5a0f20eb79db21 100644 --- a/docs/automations/glossary-term-propagation.md +++ b/docs/automations/glossary-term-propagation.md @@ -2,6 +2,12 @@ +:::info + +This feature is currently in open beta in Acryl Cloud. Reach out to your Acryl representative to get access. + +::: + ## Introduction Glossary Term Propagation is an automation feature that propagates classification labels (Glossary Terms) across column and assets based on downstream lineage and sibling relationships. diff --git a/docs/automations/snowflake-tag-propagation.md b/docs/automations/snowflake-tag-propagation.md index 8eded451644cce..dd920c247bbfc7 100644 --- a/docs/automations/snowflake-tag-propagation.md +++ b/docs/automations/snowflake-tag-propagation.md @@ -4,7 +4,12 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -> Note that this Automation in currently in open **Beta**. With any questions or issues, please reach out to your Acryl representative. +:::info + +This feature is currently in open beta in Acryl Cloud. Reach out to your Acryl representative to get access. + +::: + ## Introduction From 0c98cdce2e017a2066d72cdc64c641aebd97d214 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Mon, 27 Jan 2025 17:24:11 +0530 Subject: [PATCH 168/249] fix(ingest/glue): add info in report (#12470) --- metadata-ingestion/src/datahub/ingestion/source/aws/glue.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py index 2ace71b6ff6c1d..214b14a2b6c100 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py @@ -218,6 +218,7 @@ def platform_validator(cls, v: str) -> str: @dataclass class GlueSourceReport(StaleEntityRemovalSourceReport): + catalog_id: Optional[str] = None tables_scanned = 0 filtered: List[str] = dataclass_field(default_factory=list) databases: EntityFilterReport = EntityFilterReport.field(type="database") @@ -315,6 +316,7 @@ def __init__(self, config: GlueSourceConfig, ctx: PipelineContext): self.extract_owners = config.extract_owners self.source_config = config self.report = GlueSourceReport() + self.report.catalog_id = self.source_config.catalog_id self.glue_client = config.glue_client self.s3_client = config.s3_client self.extract_transforms = config.extract_transforms From 1ca95cc2abe3de16d9c9bdf29a5c1adf58bd0d6e Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 27 Jan 2025 08:55:23 -0800 Subject: [PATCH 169/249] docs(ingest/tableau): tweak permissions docs (#12460) --- metadata-ingestion/docs/sources/tableau/tableau_pre.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/metadata-ingestion/docs/sources/tableau/tableau_pre.md b/metadata-ingestion/docs/sources/tableau/tableau_pre.md index a3ac85818a51aa..2cc9ed23513222 100644 --- a/metadata-ingestion/docs/sources/tableau/tableau_pre.md +++ b/metadata-ingestion/docs/sources/tableau/tableau_pre.md @@ -12,16 +12,15 @@ DataHub supports two authentication methods: 1. Username/Password 2. [Personal Access Token](https://help.tableau.com/current/pro/desktop/en-us/useracct.htm#create-and-revoke-personal-access-tokens) -Either way, the user/token must have the **Site Administrator Explorer** site role. +Either way, the user/token must have at least the **Site Administrator Explorer** site role. :::info -We need the **Site Administrator Explorer** site role in order to get complete metadata from Tableau. +We need at least the **Site Administrator Explorer** site role in order to get complete metadata from Tableau. Roles with higher privileges, like **Site Administrator Creator** or **Server Admin** also work. With any lower role, the Tableau Metadata API returns missing/partial metadata. This particularly affects data source fields and definitions, which impacts our ability to extract most columns and generate column lineage. Some table-level lineage is also impacted. - -Other site roles, including Site Administrator Creator and Viewer, are insufficient due to these limitations in the current Tableau Metadata API. +Other site roles, like Viewer or Explorer, are insufficient due to these limitations in the current Tableau Metadata API. ::: From d8ac6cd2586e041a0cb7b18a6c1f04207932dbe2 Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Mon, 27 Jan 2025 19:49:12 -0800 Subject: [PATCH 170/249] gql: add data platform instance to search fragment (#12472) --- datahub-web-react/src/graphql/search.graphql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datahub-web-react/src/graphql/search.graphql b/datahub-web-react/src/graphql/search.graphql index 9edd6754022866..d12193b471d469 100644 --- a/datahub-web-react/src/graphql/search.graphql +++ b/datahub-web-react/src/graphql/search.graphql @@ -954,6 +954,9 @@ fragment searchResultsWithoutSchemaField on Entity { ...versionProperties } } + ... on DataPlatformInstance { + ...dataPlatformInstanceFields + } } fragment searchResultFields on Entity { From 563656c4d52ee8ab2ee373b78c4f896c643e8def Mon Sep 17 00:00:00 2001 From: sagar-salvi-apptware <159135491+sagar-salvi-apptware@users.noreply.github.com> Date: Tue, 28 Jan 2025 12:25:45 +0530 Subject: [PATCH 171/249] feat(ingestion/lookml): resolve access notation for LookML Constant (#12277) Co-authored-by: Siddique Bagwan Co-authored-by: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> --- .../docs/sources/looker/looker_recipe.yml | 14 + .../docs/sources/looker/lookml_post.md | 54 +- .../source/looker/looker_dataclasses.py | 8 + .../source/looker/looker_file_loader.py | 13 +- .../source/looker/looker_template_language.py | 118 +++- .../ingestion/source/looker/lookml_config.py | 18 +- .../ingestion/source/looker/lookml_source.py | 76 ++- .../tests/integration/lookml/test_lookml.py | 143 ++++- .../data.model.lkml | 2 +- .../data.model.lkml | 10 + .../manifest.lkml | 15 + .../star_award_winner.view.lkml | 12 + .../star_award_winner_dev.view.lkml | 17 + .../vv_lineage_lookml_constant_golden.json | 514 ++++++++++++++++++ 14 files changed, 962 insertions(+), 52 deletions(-) create mode 100644 metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/data.model.lkml create mode 100644 metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/manifest.lkml create mode 100644 metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/star_award_winner.view.lkml create mode 100644 metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/star_award_winner_dev.view.lkml create mode 100644 metadata-ingestion/tests/integration/lookml/vv_lineage_lookml_constant_golden.json diff --git a/metadata-ingestion/docs/sources/looker/looker_recipe.yml b/metadata-ingestion/docs/sources/looker/looker_recipe.yml index 42209f8cc68092..0939b6546411d2 100644 --- a/metadata-ingestion/docs/sources/looker/looker_recipe.yml +++ b/metadata-ingestion/docs/sources/looker/looker_recipe.yml @@ -8,4 +8,18 @@ source: client_id: ${LOOKER_CLIENT_ID} client_secret: ${LOOKER_CLIENT_SECRET} + # Liquid variables + # liquid_variables: + # _user_attributes: + # looker_env: "dev" + # dev_database_prefix: "employee" + # dev_schema_prefix: "public" + # dw_eff_dt_date: + # _is_selected: true + # source_region: "ap-south-1" + # db: "test-db" + + # LookML Constants + # lookml_constants: + # star_award_winner_year: "public.winner_2025" # sink configs diff --git a/metadata-ingestion/docs/sources/looker/lookml_post.md b/metadata-ingestion/docs/sources/looker/lookml_post.md index 8a4bf823ffc27d..fdbe7f3e1217d4 100644 --- a/metadata-ingestion/docs/sources/looker/lookml_post.md +++ b/metadata-ingestion/docs/sources/looker/lookml_post.md @@ -1,11 +1,49 @@ -#### Configuration Notes - -1. If a view contains a liquid template (e.g. `sql_table_name: {{ user_attributes['db']}}.kafka_streaming.events }}`, with `db=ANALYTICS_PROD`), then you will need to specify the values of those variables in the `liquid_variable` config as shown below: - ```yml - liquid_variable: - user_attributes: - db: ANALYTICS_PROD - ``` +### Configuration Notes + +1. Handling Liquid Templates + + If a view contains a liquid template, for example: + + ``` + sql_table_name: {{ user_attributes['db'] }}.kafka_streaming.events + ``` + + where `db=ANALYTICS_PROD`, you need to specify the values of those variables in the liquid_variables configuration as shown below: + + ```yml + liquid_variables: + user_attributes: + db: ANALYTICS_PROD + ``` + +2. Resolving LookML Constants + + If a view contains a LookML constant, for example: + + ``` + sql_table_name: @{db}.kafka_streaming.events; + ``` + + Ingestion attempts to resolve it's value by looking at project manifest files + + ```yml + manifest.lkml + constant: db { + value: "ANALYTICS_PROD" + } + ``` + + - If the constant's value is not resolved or incorrectly resolved, you can specify `lookml_constants` configuration in ingestion recipe as shown below. The constant value in recipe takes precedence over constant values resolved from manifest. + + ```yml + lookml_constants: + db: ANALYTICS_PROD + ``` + + +**Additional Notes** + +Although liquid variables and LookML constants can be used anywhere in LookML code, their values are currently resolved only for LookML views by DataHub LookML ingestion. This behavior is sufficient since LookML ingestion processes only views and their upstream dependencies. ### Multi-Project LookML (Advanced) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_dataclasses.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_dataclasses.py index d771821a14d88d..e928c25e22fbd0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_dataclasses.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_dataclasses.py @@ -32,6 +32,12 @@ class LookerField: sql: Optional[str] +@dataclass +class LookerConstant: + name: str + value: str + + @dataclass class LookerModel: connection: str @@ -75,6 +81,7 @@ def from_looker_dict( try: parsed = load_and_preprocess_file( path=included_file, + reporter=reporter, source_config=source_config, ) included_explores = parsed.get("explores", []) @@ -217,6 +224,7 @@ def resolve_includes( try: parsed = load_and_preprocess_file( path=included_file, + reporter=reporter, source_config=source_config, ) seen_so_far.add(included_file) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py index 9fac0b52fde0dd..bd6a37fe4b4e24 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py @@ -4,7 +4,10 @@ from typing import Dict, Optional from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition -from datahub.ingestion.source.looker.looker_dataclasses import LookerViewFile +from datahub.ingestion.source.looker.looker_dataclasses import ( + LookerConstant, + LookerViewFile, +) from datahub.ingestion.source.looker.looker_template_language import ( load_and_preprocess_file, ) @@ -30,12 +33,14 @@ def __init__( base_projects_folder: Dict[str, pathlib.Path], reporter: LookMLSourceReport, source_config: LookMLSourceConfig, + manifest_constants: Dict[str, LookerConstant] = {}, ) -> None: self.viewfile_cache: Dict[str, Optional[LookerViewFile]] = {} self._root_project_name = root_project_name self._base_projects_folder = base_projects_folder self.reporter = reporter self.source_config = source_config + self.manifest_constants = manifest_constants def _load_viewfile( self, project_name: str, path: str, reporter: LookMLSourceReport @@ -71,9 +76,15 @@ def _load_viewfile( try: logger.debug(f"Loading viewfile {path}") + # load_and preprocess_file is called multiple times for loading view file from multiple flows. + # Flag resolve_constants is a hack to avoid passing around manifest_constants from all of the flows. + # This is fine as rest of flows do not need resolution of constants. parsed = load_and_preprocess_file( path=path, + reporter=self.reporter, source_config=self.source_config, + resolve_constants=True, + manifest_constants=self.manifest_constants, ) looker_viewfile = LookerViewFile.from_looker_dict( diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py index 2bcae4d46b8d52..60983f04bafa05 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py @@ -2,7 +2,7 @@ import pathlib import re from abc import ABC, abstractmethod -from typing import Any, ClassVar, Dict, List, Optional, Set, Union +from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Set, Union from deepmerge import always_merger from liquid import Undefined @@ -27,8 +27,12 @@ from datahub.ingestion.source.looker.lookml_config import ( DERIVED_VIEW_PATTERN, LookMLSourceConfig, + LookMLSourceReport, ) +if TYPE_CHECKING: + from datahub.ingestion.source.looker.looker_dataclasses import LookerConstant + logger = logging.getLogger(__name__) @@ -82,7 +86,12 @@ def liquid_variable_with_default(self, text: str) -> dict: return self._create_new_liquid_variables_with_default(variables=variables) -def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str: +def resolve_liquid_variable( + text: str, + view_name: str, + liquid_variable: Dict[Any, Any], + report: LookMLSourceReport, +) -> str: # Set variable value to NULL if not present in liquid_variable dictionary Undefined.__str__ = lambda instance: "NULL" # type: ignore try: @@ -96,6 +105,7 @@ def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str: # Resolve liquid template return create_template(text).render(liquid_variable) except LiquidSyntaxError as e: + # TODO: Will add warning once we get rid of duplcate warning message for same view logger.warning(f"Unsupported liquid template encountered. error [{e.message}]") # TODO: There are some tag specific to looker and python-liquid library does not understand them. currently # we are not parsing such liquid template. @@ -103,6 +113,7 @@ def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str: # See doc: https://cloud.google.com/looker/docs/templated-filters and look for { % condition region %} # order.region { % endcondition %} except CustomTagException as e: + # TODO: Will add warning once we get rid of duplcate warning message for same view logger.warning(e) logger.debug(e, exc_info=e) @@ -192,15 +203,20 @@ class LookMLViewTransformer(ABC): source_config: LookMLSourceConfig - def __init__(self, source_config: LookMLSourceConfig): + def __init__( + self, + source_config: LookMLSourceConfig, + reporter: LookMLSourceReport, + ): self.source_config = source_config + self.reporter = reporter def transform(self, view: dict) -> dict: value_to_transform: Optional[str] = None - # is_attribute_supported check is required because not all transformer works on all attributes in current - # case mostly all transformer works on sql_table_name and derived.sql attributes, - # however IncompleteSqlTransformer only transform the derived.sql attribute + # is_attribute_supported check is required because not all transformers work on all attributes in the current + # case, mostly all transformers work on sql_table_name and derived.sql attributes; + # however, IncompleteSqlTransformer only transform the derived.sql attribute if SQL_TABLE_NAME in view and self.is_attribute_supported(SQL_TABLE_NAME): # Give precedence to already processed transformed view.sql_table_name to apply more transformation value_to_transform = view.get( @@ -252,7 +268,9 @@ class LiquidVariableTransformer(LookMLViewTransformer): def _apply_transformation(self, value: str, view: dict) -> str: return resolve_liquid_variable( text=value, - liquid_variable=self.source_config.liquid_variable, + liquid_variable=self.source_config.liquid_variables, + view_name=view["name"], + report=self.reporter, ) @@ -287,7 +305,7 @@ def _apply_transformation(self, value: str, view: dict) -> str: class DropDerivedViewPatternTransformer(LookMLViewTransformer): """ - drop ${} from datahub_transformed_sql_table_name and view["derived_table"]["datahub_transformed_sql_table_name"] values. + drop ${} from datahub_transformed_sql_table_name and view["derived_table"]["datahub_transformed_sql_table_name"] values. Example: transform ${employee_income_source.SQL_TABLE_NAME} to employee_income_source.SQL_TABLE_NAME """ @@ -308,8 +326,8 @@ class LookMlIfCommentTransformer(LookMLViewTransformer): evaluate_to_true_regx: str remove_if_comment_line_regx: str - def __init__(self, source_config: LookMLSourceConfig): - super().__init__(source_config=source_config) + def __init__(self, source_config: LookMLSourceConfig, reporter: LookMLSourceReport): + super().__init__(source_config=source_config, reporter=reporter) # This regx will keep whatever after -- if looker_environment -- self.evaluate_to_true_regx = r"-- if {} --".format( @@ -335,6 +353,61 @@ def _apply_transformation(self, value: str, view: dict) -> str: return self._apply_regx(value) +class LookmlConstantTransformer(LookMLViewTransformer): + """ + Replace LookML constants @{constant} from the manifest/configuration. + """ + + CONSTANT_PATTERN = r"@{(\w+)}" # Matches @{constant} + + def __init__( + self, + source_config: LookMLSourceConfig, + reporter: LookMLSourceReport, + manifest_constants: Dict[str, "LookerConstant"], + ): + super().__init__(source_config=source_config, reporter=reporter) + self.manifest_constants = manifest_constants + + def resolve_lookml_constant(self, text: str, view_name: Optional[str]) -> str: + """ + Resolves LookML constants (@{ }) from manifest or config. + Logs warnings for misplaced or missing variables. + """ + + def replace_constants(match): + key = match.group(1) + # Resolve constant from config + if key in self.source_config.lookml_constants: + return str(self.source_config.lookml_constants.get(key)) + + # Resolve constant from manifest + if key in self.manifest_constants: + return self.manifest_constants[key].value + + # Check if it's a misplaced lookml constant + if key in self.source_config.liquid_variables: + self.reporter.warning( + title="Misplaced lookml constant", + message="Use 'lookml_constants' instead of 'liquid_variables'.", + context=f"Key {key}", + ) + return f"@{{{key}}}" + + self.reporter.warning( + title="LookML constant not found", + message="The constant is missing. Either add it under 'lookml_constants' in the config or define it in `manifest.lkml`.", + context=f"view-name: {view_name}, constant: {key}", + ) + return f"@{{{key}}}" + + # Resolve @{} (constant) + return re.sub(self.CONSTANT_PATTERN, replace_constants, text) + + def _apply_transformation(self, value: str, view: dict) -> str: + return self.resolve_lookml_constant(text=value, view_name=view.get("name")) + + class TransformedLookMlView: """ TransformedLookMlView is collecting output of LookMLViewTransformer and creating a new transformed LookML view. @@ -390,24 +463,35 @@ def view(self) -> dict: def process_lookml_template_language( source_config: LookMLSourceConfig, view_lkml_file_dict: dict, + reporter: LookMLSourceReport, + manifest_constants: Dict[str, "LookerConstant"] = {}, + resolve_constants: bool = False, ) -> None: if "views" not in view_lkml_file_dict: return transformers: List[LookMLViewTransformer] = [ LookMlIfCommentTransformer( - source_config=source_config + source_config=source_config, reporter=reporter ), # First evaluate the -- if -- comments. Looker does the same LiquidVariableTransformer( - source_config=source_config + source_config=source_config, reporter=reporter ), # Now resolve liquid variables DropDerivedViewPatternTransformer( - source_config=source_config + source_config=source_config, reporter=reporter ), # Remove any ${} symbol IncompleteSqlTransformer( - source_config=source_config + source_config=source_config, reporter=reporter ), # complete any incomplete sql ] + if resolve_constants: + transformers.append( + LookmlConstantTransformer( + source_config=source_config, + manifest_constants=manifest_constants, + reporter=reporter, + ), # Resolve @{} constant with its corresponding value + ) transformed_views: List[dict] = [] @@ -422,12 +506,18 @@ def process_lookml_template_language( def load_and_preprocess_file( path: Union[str, pathlib.Path], source_config: LookMLSourceConfig, + reporter: LookMLSourceReport, + manifest_constants: Dict[str, "LookerConstant"] = {}, + resolve_constants: bool = False, ) -> dict: parsed = load_lkml(path) process_lookml_template_language( view_lkml_file_dict=parsed, + reporter=reporter, source_config=source_config, + manifest_constants=manifest_constants, + resolve_constants=resolve_constants, ) return parsed diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py index 4d3255c3c0715b..75de6f1fe3c6e1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py @@ -161,13 +161,27 @@ class LookMLSourceConfig( description="When enabled, looker refinement will be processed to adapt an existing view.", ) - liquid_variable: Dict[Any, Any] = Field( + liquid_variables: Dict[Any, Any] = Field( {}, - description="A dictionary containing Liquid variables and their corresponding values, utilized in SQL-defined " + description="A dictionary containing Liquid variables with their corresponding values, utilized in SQL-defined " "derived views. The Liquid template will be resolved in view.derived_table.sql and " "view.sql_table_name. Defaults to an empty dictionary.", ) + _liquid_variable_deprecated = pydantic_renamed_field( + old_name="liquid_variable", new_name="liquid_variables", print_warning=True + ) + + lookml_constants: Dict[str, str] = Field( + {}, + description=( + "A dictionary containing LookML constants (`@{constant_name}`) and their values. " + "If a constant is defined in the `manifest.lkml` file, its value will be used. " + "If not found in the manifest, the value from this config will be used instead. " + "Defaults to an empty dictionary." + ), + ) + looker_environment: Literal["prod", "dev"] = Field( "prod", description="A looker prod or dev environment. " diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py index 9a937840a5012f..5f39821ee6c2e3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py @@ -43,6 +43,7 @@ from datahub.ingestion.source.looker.looker_connection import ( get_connection_def_based_on_connection_string, ) +from datahub.ingestion.source.looker.looker_dataclasses import LookerConstant from datahub.ingestion.source.looker.looker_lib_wrapper import LookerAPI from datahub.ingestion.source.looker.looker_template_language import ( load_and_preprocess_file, @@ -254,6 +255,7 @@ class LookerManifest: # This must be set if the manifest has local_dependency entries. # See https://cloud.google.com/looker/docs/reference/param-manifest-project-name project_name: Optional[str] + constants: Optional[List[Dict[str, str]]] local_dependencies: List[str] remote_dependencies: List[LookerRemoteDependency] @@ -310,11 +312,14 @@ def __init__(self, config: LookMLSourceConfig, ctx: PipelineContext): "manage_models permission enabled on this API key." ) from err + self.manifest_constants: Dict[str, "LookerConstant"] = {} + def _load_model(self, path: str) -> LookerModel: logger.debug(f"Loading model from file {path}") parsed = load_and_preprocess_file( path=path, + reporter=self.reporter, source_config=self.source_config, ) @@ -500,27 +505,33 @@ def get_project_name(self, model_name: str) -> str: def get_manifest_if_present(self, folder: pathlib.Path) -> Optional[LookerManifest]: manifest_file = folder / "manifest.lkml" - if manifest_file.exists(): - manifest_dict = load_and_preprocess_file( - path=manifest_file, source_config=self.source_config - ) - manifest = LookerManifest( - project_name=manifest_dict.get("project_name"), - local_dependencies=[ - x["project"] for x in manifest_dict.get("local_dependencys", []) - ], - remote_dependencies=[ - LookerRemoteDependency( - name=x["name"], url=x["url"], ref=x.get("ref") - ) - for x in manifest_dict.get("remote_dependencys", []) - ], + if not manifest_file.exists(): + self.reporter.info( + message="manifest.lkml file missing from project", + context=str(manifest_file), ) - return manifest - else: return None + manifest_dict = load_and_preprocess_file( + path=manifest_file, + source_config=self.source_config, + reporter=self.reporter, + ) + + manifest = LookerManifest( + project_name=manifest_dict.get("project_name"), + constants=manifest_dict.get("constants", []), + local_dependencies=[ + x["project"] for x in manifest_dict.get("local_dependencys", []) + ], + remote_dependencies=[ + LookerRemoteDependency(name=x["name"], url=x["url"], ref=x.get("ref")) + for x in manifest_dict.get("remote_dependencys", []) + ], + ) + return manifest + def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: return [ *super().get_workunit_processors(), @@ -575,7 +586,10 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: self.base_projects_folder[project] = p_ref self._recursively_check_manifests( - tmp_dir, BASE_PROJECT_NAME, visited_projects + tmp_dir, + BASE_PROJECT_NAME, + visited_projects, + self.manifest_constants, ) yield from self.get_internal_workunits() @@ -588,7 +602,11 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: ) def _recursively_check_manifests( - self, tmp_dir: str, project_name: str, project_visited: Set[str] + self, + tmp_dir: str, + project_name: str, + project_visited: Set[str], + manifest_constants: Dict[str, "LookerConstant"], ) -> None: if project_name in project_visited: return @@ -605,6 +623,14 @@ def _recursively_check_manifests( if not manifest: return + if manifest.constants: + for constant in manifest.constants: + if constant.get("name") and constant.get("value"): + manifest_constants[constant["name"]] = LookerConstant( + name=constant["name"], + value=constant["value"], + ) + # Special case handling if the root project has a name in the manifest file. if project_name == BASE_PROJECT_NAME and manifest.project_name: if ( @@ -664,21 +690,27 @@ def _recursively_check_manifests( project_visited.add(project_name) else: self._recursively_check_manifests( - tmp_dir, remote_project.name, project_visited + tmp_dir, + remote_project.name, + project_visited, + manifest_constants, ) for project in manifest.local_dependencies: - self._recursively_check_manifests(tmp_dir, project, project_visited) + self._recursively_check_manifests( + tmp_dir, project, project_visited, manifest_constants + ) def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901 assert self.source_config.base_folder - viewfile_loader = LookerViewFileLoader( self.source_config.project_name, self.base_projects_folder, self.reporter, self.source_config, + self.manifest_constants, ) + logger.debug(f"LookML Constants : {', '.join(self.manifest_constants.keys())}") # Some views can be mentioned by multiple 'include' statements and can be included via different connections. diff --git a/metadata-ingestion/tests/integration/lookml/test_lookml.py b/metadata-ingestion/tests/integration/lookml/test_lookml.py index ac011324684189..7baaccbbaa664b 100644 --- a/metadata-ingestion/tests/integration/lookml/test_lookml.py +++ b/metadata-ingestion/tests/integration/lookml/test_lookml.py @@ -14,13 +14,20 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.run.pipeline import Pipeline from datahub.ingestion.source.file import read_metadata_file -from datahub.ingestion.source.looker.looker_dataclasses import LookerModel +from datahub.ingestion.source.looker.looker_dataclasses import ( + LookerConstant, + LookerModel, +) from datahub.ingestion.source.looker.looker_template_language import ( + LookmlConstantTransformer, SpecialVariable, load_and_preprocess_file, resolve_liquid_variable, ) -from datahub.ingestion.source.looker.lookml_config import LookMLSourceConfig +from datahub.ingestion.source.looker.lookml_config import ( + LookMLSourceConfig, + LookMLSourceReport, +) from datahub.ingestion.source.looker.lookml_refinement import LookerRefinementResolver from datahub.ingestion.source.looker.lookml_source import LookMLSource from datahub.metadata.schema_classes import ( @@ -835,8 +842,7 @@ def test_manifest_parser(pytestconfig: pytest.Config) -> None: manifest_file = test_resources_dir / "lkml_manifest_samples/complex-manifest.lkml" manifest = load_and_preprocess_file( - path=manifest_file, - source_config=MagicMock(), + path=manifest_file, source_config=MagicMock(), reporter=LookMLSourceReport() ) assert manifest @@ -900,6 +906,31 @@ def test_view_to_view_lineage_and_liquid_template(pytestconfig, tmp_path, mock_t ) +@freeze_time(FROZEN_TIME) +def test_view_to_view_lineage_and_lookml_constant(pytestconfig, tmp_path, mock_time): + test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml" + mce_out_file = "vv_lineage_lookml_constant_golden.json" + + new_recipe = get_default_recipe( + f"{tmp_path}/{mce_out_file}", + f"{test_resources_dir}/vv-lineage-and-lookml-constant", + ) + + new_recipe["source"]["config"]["lookml_constants"] = {"winner_table": "dev"} + + pipeline = Pipeline.create(new_recipe) + pipeline.run() + pipeline.pretty_print_summary() + assert pipeline.source.get_report().warnings.total_elements == 1 + + golden_path = test_resources_dir / "vv_lineage_lookml_constant_golden.json" + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / mce_out_file, + golden_path=golden_path, + ) + + @freeze_time(FROZEN_TIME) def test_special_liquid_variables(): text: str = """{% assign source_table_variable = "source_table" | sql_quote | non_existing_filter_where_it_should_not_fail %} @@ -966,6 +997,8 @@ def test_special_liquid_variables(): actual_text = resolve_liquid_variable( text=text, liquid_variable=input_liquid_variable, + report=LookMLSourceReport(), + view_name="test", ) expected_text: str = ( @@ -976,6 +1009,108 @@ def test_special_liquid_variables(): assert actual_text == expected_text +@pytest.mark.parametrize( + "view, expected_result, warning_expected", + [ + # Case 1: Single constant replacement in sql_table_name + ( + {"sql_table_name": "@{constant1}.kafka_streaming.events"}, + {"datahub_transformed_sql_table_name": "value1.kafka_streaming.events"}, + False, + ), + # Case 2: Single constant replacement with config-defined constant + ( + {"sql_table_name": "SELECT * FROM @{constant2}"}, + {"datahub_transformed_sql_table_name": "SELECT * FROM value2"}, + False, + ), + # Case 3: Multiple constants in a derived_table SQL query + ( + {"derived_table": {"sql": "SELECT @{constant1}, @{constant3}"}}, + { + "derived_table": { + "datahub_transformed_sql": "SELECT value1, manifest_value3" + } + }, + False, + ), + # Case 4: Non-existent constant in sql_table_name + ( + {"sql_table_name": "SELECT * FROM @{nonexistent}"}, + {"datahub_transformed_sql_table_name": "SELECT * FROM @{nonexistent}"}, + False, + ), + # Case 5: View with unsupported attribute + ({"unsupported_attribute": "SELECT * FROM @{constant1}"}, {}, False), + # Case 6: View with no transformable attributes + ( + {"sql_table_name": "SELECT * FROM table_name"}, + {"datahub_transformed_sql_table_name": "SELECT * FROM table_name"}, + False, + ), + # Case 7: Constants only in manifest_constants + ( + {"sql_table_name": "SELECT @{constant3}"}, + {"datahub_transformed_sql_table_name": "SELECT manifest_value3"}, + False, + ), + # Case 8: Constants only in lookml_constants + ( + {"sql_table_name": "SELECT @{constant2}"}, + {"datahub_transformed_sql_table_name": "SELECT value2"}, + False, + ), + # Case 9: Multiple unsupported attributes + ( + { + "unsupported_attribute": "SELECT @{constant1}", + "another_unsupported_attribute": "SELECT @{constant2}", + }, + {}, + False, + ), + # Case 10: Misplaced lookml constant + ( + {"sql_table_name": "@{constant1}.@{constant2}.@{constant4}"}, + {"datahub_transformed_sql_table_name": "value1.value2.@{constant4}"}, + True, + ), + ], +) +@freeze_time(FROZEN_TIME) +def test_lookml_constant_transformer(view, expected_result, warning_expected): + """ + Test LookmlConstantTransformer with various view structures. + """ + config = MagicMock() + report = MagicMock() + config.lookml_constants = { + "constant1": "value1", + "constant2": "value2", + } + config.liquid_variables = { + "constant4": "liquid_value1", + } + + transformer = LookmlConstantTransformer( + source_config=config, + reporter=report, + manifest_constants={ + "constant1": LookerConstant(name="constant1", value="manifest_value1"), + "constant3": LookerConstant(name="constant3", value="manifest_value3"), + }, + ) + + result = transformer.transform(view) + assert result == expected_result + if warning_expected: + report.warning.assert_called_once_with( + title="Misplaced lookml constant", + message="Use 'lookml_constants' instead of 'liquid_variables'.", + context="Key constant4", + ) + + @freeze_time(FROZEN_TIME) def test_field_tag_ingest(pytestconfig, tmp_path, mock_time): test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml" diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml index d570e0ecdb5b22..4de4df34e15d1e 100644 --- a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml @@ -39,4 +39,4 @@ explore: rent_as_employee_income_source { } explore: child_view { -} \ No newline at end of file +} diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/data.model.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/data.model.lkml new file mode 100644 index 00000000000000..6f425c469c9546 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/data.model.lkml @@ -0,0 +1,10 @@ +connection: "my_connection" + +include: "star_award_winner.view.lkml" +include: "star_award_winner_dev.view.lkml" + +explore: star_award_winner { +} + +explore: star_award_winner_dev { +} diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/manifest.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/manifest.lkml new file mode 100644 index 00000000000000..fcdd71a6262945 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/manifest.lkml @@ -0,0 +1,15 @@ +constant: customer_support_db { + value: "star_award_winner_year" + export: none +} + +constant: customer_support_schema { + value: "public" + export: none +} + +constant: customer_support_table { + value: "winner" + export: none +} + diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/star_award_winner.view.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/star_award_winner.view.lkml new file mode 100644 index 00000000000000..fd0fcf33c376e7 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/star_award_winner.view.lkml @@ -0,0 +1,12 @@ +view: star_award_winner { + sql_table_name: @{customer_support_db}.@{customer_support_schema}.@{invalid_constant};; + + + dimension: id { + label: "id" + primary_key: yes + type: number + sql: ${TABLE}.id ;; + } + +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/star_award_winner_dev.view.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/star_award_winner_dev.view.lkml new file mode 100644 index 00000000000000..0c2417251fc15c --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-lookml-constant/star_award_winner_dev.view.lkml @@ -0,0 +1,17 @@ +view: star_award_winner_dev { + sql_table_name: @{customer_support_db}.@{customer_support_schema}.@{winner_table};; + + + dimension: id { + label: "id" + primary_key: yes + type: number + sql: ${TABLE}.id ;; + } + + dimension: name { + type: string + sql: ${TABLE}.name;; + } + +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/vv_lineage_lookml_constant_golden.json b/metadata-ingestion/tests/integration/lookml/vv_lineage_lookml_constant_golden.json new file mode 100644 index 00000000000000..296f09b697ee4d --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/vv_lineage_lookml_constant_golden.json @@ -0,0 +1,514 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "looker", + "env": "PROD", + "project_name": "lkml_samples" + }, + "name": "lkml_samples", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "LookML Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Folders" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "view: star_award_winner {\n sql_table_name: @{customer_support_db}.@{customer_support_schema}.@{invalid_constant};;\n\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n\n}", + "viewLanguage": "lookml" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Develop/lkml_samples/" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,star_award_winner_year.public.@{invalid_constant},PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,star_award_winner_year.public.@{invalid_constant},PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner,PROD),id)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "star_award_winner", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "id", + "nullable": false, + "description": "", + "label": "id", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": true + } + ], + "primaryKeys": [ + "id" + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.file.path": "star_award_winner.view.lkml", + "looker.model": "data" + }, + "name": "star_award_winner", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner_dev,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner_dev,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "view: star_award_winner_dev {\n sql_table_name: @{customer_support_db}.@{customer_support_schema}.@{winner_table};;\n\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n\n dimension: name {\n type: string\n sql: ${TABLE}.name;;\n }\n\n}", + "viewLanguage": "lookml" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner_dev,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner_dev,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Develop/lkml_samples/" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,star_award_winner_year.public.dev,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,star_award_winner_year.public.dev,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner_dev,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,star_award_winner_year.public.dev,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner_dev,PROD),name)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "star_award_winner_dev", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "id", + "nullable": false, + "description": "", + "label": "id", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": true + }, + { + "fieldPath": "name", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + } + ], + "primaryKeys": [ + "id" + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.file.path": "star_award_winner_dev.view.lkml", + "looker.model": "data" + }, + "name": "star_award_winner_dev", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.star_award_winner_dev,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Dimension", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "Dimension" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file From 7870b13490e684e1179c8df7f95ec52c8ea3b090 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Tue, 28 Jan 2025 10:26:19 +0100 Subject: [PATCH 172/249] feat(snowflake): set is_temp_table and is_allowed_table function for SqlParsingAggregator in SnowflakeV2Source (#12438) --- .../source/snowflake/snowflake_v2.py | 45 +++++++++++++++++-- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index b4ef2180d71d45..7d63f41f4bcf03 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -5,6 +5,7 @@ import os import os.path import platform +import re from dataclasses import dataclass from typing import Dict, Iterable, List, Optional, Union @@ -33,6 +34,7 @@ from datahub.ingestion.source.snowflake.constants import ( GENERIC_PERMISSION_ERROR_KEY, SnowflakeEdition, + SnowflakeObjectDomain, ) from datahub.ingestion.source.snowflake.snowflake_assertion import ( SnowflakeAssertionsHandler, @@ -162,6 +164,8 @@ def __init__(self, ctx: PipelineContext, config: SnowflakeV2Config): self.data_dictionary = SnowflakeDataDictionary(connection=self.connection) self.lineage_extractor: Optional[SnowflakeLineageExtractor] = None + self.discovered_datasets: Optional[List[str]] = None + self.aggregator: SqlParsingAggregator = self._exit_stack.enter_context( SqlParsingAggregator( platform=self.identifiers.platform, @@ -182,6 +186,8 @@ def __init__(self, ctx: PipelineContext, config: SnowflakeV2Config): generate_usage_statistics=False, generate_operations=False, format_queries=self.config.format_sql_queries, + is_temp_table=self._is_temp_table, + is_allowed_table=self._is_allowed_table, ) ) self.report.sql_aggregator = self.aggregator.report @@ -444,6 +450,34 @@ class SnowflakePrivilege: return _report + def _is_temp_table(self, name: str) -> bool: + if any( + re.match(pattern, name, flags=re.IGNORECASE) + for pattern in self.config.temporary_tables_pattern + ): + return True + + # This is also a temp table if + # 1. this name would be allowed by the dataset patterns, and + # 2. we have a list of discovered tables, and + # 3. it's not in the discovered tables list + if ( + self.filters.is_dataset_pattern_allowed(name, SnowflakeObjectDomain.TABLE) + and self.discovered_datasets + and name not in self.discovered_datasets + ): + return True + + return False + + def _is_allowed_table(self, name: str) -> bool: + if self.discovered_datasets and name not in self.discovered_datasets: + return False + + return self.filters.is_dataset_pattern_allowed( + name, SnowflakeObjectDomain.TABLE + ) + def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: return [ *super().get_workunit_processors(), @@ -513,7 +547,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: ) return - discovered_datasets = discovered_tables + discovered_views + self.discovered_datasets = discovered_tables + discovered_views if self.config.use_queries_v2: with self.report.new_stage(f"*: {VIEW_PARSING}"): @@ -538,13 +572,14 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: filters=self.filters, identifiers=self.identifiers, schema_resolver=schema_resolver, - discovered_tables=discovered_datasets, + discovered_tables=self.discovered_datasets, graph=self.ctx.graph, ) # TODO: This is slightly suboptimal because we create two SqlParsingAggregator instances with different configs # but a shared schema resolver. That's fine for now though - once we remove the old lineage/usage extractors, # it should be pretty straightforward to refactor this and only initialize the aggregator once. + # This also applies for the _is_temp_table and _is_allowed_table methods above, duplicated from SnowflakeQueriesExtractor. self.report.queries_extractor = queries_extractor.report yield from queries_extractor.get_workunits_internal() queries_extractor.close() @@ -568,12 +603,14 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: if ( self.config.include_usage_stats or self.config.include_operational_stats ) and self.usage_extractor: - yield from self.usage_extractor.get_usage_workunits(discovered_datasets) + yield from self.usage_extractor.get_usage_workunits( + self.discovered_datasets + ) if self.config.include_assertion_results: yield from SnowflakeAssertionsHandler( self.config, self.report, self.connection, self.identifiers - ).get_assertion_workunits(discovered_datasets) + ).get_assertion_workunits(self.discovered_datasets) self.connection.close() From 79aa40f1e69632ab6e0d63c1c9554a682a99424c Mon Sep 17 00:00:00 2001 From: skrydal Date: Tue, 28 Jan 2025 13:06:24 +0100 Subject: [PATCH 173/249] log(ingest/lookml): view file missing/parsing as warnings (#12448) --- .../src/datahub/ingestion/source/looker/looker_config.py | 4 +++- .../src/datahub/ingestion/source/looker/looker_file_loader.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py index 3ed3186399588e..0f8d86a2cbd295 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py @@ -177,7 +177,9 @@ def _get_generic_definition( class LookerConnectionDefinition(ConfigModel): platform: str default_db: str - default_schema: Optional[str] # Optional since some sources are two-level only + default_schema: Optional[str] = ( + None # Optional since some sources are two-level only + ) platform_instance: Optional[str] = None platform_env: Optional[str] = Field( default=None, diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py index bd6a37fe4b4e24..ba7b62a1281c02 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py @@ -65,7 +65,7 @@ def _load_viewfile( with open(path) as file: raw_file_content = file.read() except Exception as e: - self.reporter.failure( + self.reporter.report_warning( title="LKML File Loading Error", message="A lookml file is not present on local storage or GitHub", context=f"file path: {path}", @@ -101,7 +101,7 @@ def _load_viewfile( self.viewfile_cache[path] = looker_viewfile return looker_viewfile except Exception as e: - self.reporter.failure( + self.reporter.report_warning( title="LKML File Parsing Error", message="The input file is not lookml file", context=f"file path: {path}", From 15c3783532ceb489f783baf1f7a43d3b98a362a8 Mon Sep 17 00:00:00 2001 From: Gabe Lyons Date: Tue, 28 Jan 2025 13:57:42 -0800 Subject: [PATCH 174/249] docs(entity-change-events): include add/remove/update examples (#12388) --- docs/actions/events/entity-change-event.md | 69 +++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/docs/actions/events/entity-change-event.md b/docs/actions/events/entity-change-event.md index 27277a97ad1994..4a7264c20bcc62 100644 --- a/docs/actions/events/entity-change-event.md +++ b/docs/actions/events/entity-change-event.md @@ -219,6 +219,73 @@ This event is emitted when an existing owner has been removed from an entity on } ``` +### Add Structured Property Event + +This event is emitted when a Structured Property has been added to an entity on DataHub. + +#### Sample Event +```json +{ + "entityUrn": "urn:li:dataset:abc", + "entityType": "dataset", + "category": "STRUCTURED_PROPERTY", + "operation": "ADD", + "modifier": "urn:li:structuredProperty:prop1", + "parameters": { + "propertyUrn": "urn:li:structuredProperty:prop1", + "propertyValues": "[\"value1\"]" + }, + "version": 0, + "auditStamp": { + "actor": "urn:li:corpuser:jdoe", + "time": 1649953100653 + } +} +``` + +### Remove Structured Property Event + +This event is emitted when a Structured Property has been removed from an entity on DataHub. + +#### Sample Event +```json +{ + "entityUrn": "urn:li:dataset:abc", + "entityType": "dataset", + "category": "STRUCTURED_PROPERTY", + "operation": "REMOVE", + "modifier": "urn:li:structuredProperty:prop1", + "version": 0, + "auditStamp": { + "actor": "urn:li:corpuser:jdoe", + "time": 1649953100653 + } +} +``` + +### Modify Structured Property Event + +This event is emitted when a Structured Property's values have been modified on an entity in DataHub. + +#### Sample Event +```json +{ + "entityUrn": "urn:li:dataset:abc", + "entityType": "dataset", + "category": "STRUCTURED_PROPERTY", + "operation": "MODIFY", + "modifier": "urn:li:structuredProperty:prop1", + "parameters": { + "propertyUrn": "urn:li:structuredProperty:prop1", + "propertyValues": "[\"value1\",\"value2\"]" + }, + "version": 0, + "auditStamp": { + "actor": "urn:li:corpuser:jdoe", + "time": 1649953100653 + } +} +``` ### Modify Deprecation Event @@ -349,4 +416,4 @@ This event is emitted when a new entity has been hard-deleted on DataHub. "time": 1649953100653 } } -``` \ No newline at end of file +``` From dbd57c972f79ae1469adc0c8bde77fd6821fb819 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Tue, 28 Jan 2025 15:59:01 -0600 Subject: [PATCH 175/249] fix(ci): fix datahub-ingestion release/tag publishing (#12466) --- .github/workflows/docker-unified.yml | 14 +++++++------- docker/datahub-ingestion-base/build.gradle | 2 +- docker/datahub-ingestion/build.gradle | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 5f944c8e28769f..a756b27a38e848 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -44,6 +44,7 @@ jobs: tag: ${{ steps.tag.outputs.tag }} slim_tag: ${{ steps.tag.outputs.slim_tag }} full_tag: ${{ steps.tag.outputs.full_tag }} + short_sha: ${{ steps.tag.outputs.short_sha }} # needed for auto-deploy unique_tag: ${{ steps.tag.outputs.unique_tag }} unique_slim_tag: ${{ steps.tag.outputs.unique_slim_tag }} unique_full_tag: ${{ steps.tag.outputs.unique_full_tag }} @@ -65,6 +66,8 @@ jobs: postgres_setup_change: ${{ steps.ci-optimize.outputs.postgres-setup-change == 'true' }} elasticsearch_setup_change: ${{ steps.ci-optimize.outputs.elasticsearch-setup-change == 'true' }} smoke_test_change: ${{ steps.ci-optimize.outputs.smoke-test-change == 'true' }} + integrations_service_change: "false" + datahub_executor_change: "false" steps: - name: Check out the repo uses: acryldata/sane-checkout-action@v3 @@ -864,7 +867,8 @@ jobs: context: . file: ./docker/datahub-ingestion/Dockerfile platforms: linux/amd64,linux/arm64/v8 - depot-project: ${{ vars.DEPOT_PROJECT_ID }} + # Workaround 2025-01-25 - Depot publishing errors + depot-project: ${{ (startsWith(github.ref, 'refs/tags/') || github.event_name == 'release') && '' || vars.DEPOT_PROJECT_ID }} - name: Compute Tag id: tag run: echo "tag=${{ needs.setup.outputs.ingestion_change == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> "$GITHUB_OUTPUT" @@ -963,7 +967,8 @@ jobs: context: . file: ./docker/datahub-ingestion/Dockerfile platforms: linux/amd64,linux/arm64/v8 - depot-project: ${{ vars.DEPOT_PROJECT_ID }} + # Workaround 2025-01-25 - Depot publishing errors + depot-project: ${{ (startsWith(github.ref, 'refs/tags/') || github.event_name == 'release') && '' || vars.DEPOT_PROJECT_ID }} - name: Compute Tag (Full) id: tag run: echo "tag=${{ needs.setup.outputs.ingestion_change == 'true' && needs.setup.outputs.unique_tag || 'head' }}" >> "$GITHUB_OUTPUT" @@ -1178,11 +1183,6 @@ jobs: docker pull '${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }}:head' docker tag '${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }}:head' '${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }}:${{ needs.setup.outputs.unique_tag }}' fi - if [ '${{ needs.setup.outputs.integrations_service_change }}' == 'false' ]; then - echo 'datahub-integration-service head images' - docker pull '${{ env.DATAHUB_INTEGRATIONS_IMAGE }}:head' - docker tag '${{ env.DATAHUB_INTEGRATIONS_IMAGE }}:head' '${{ env.DATAHUB_INTEGRATIONS_IMAGE }}:${{ needs.setup.outputs.unique_tag }}' - fi - name: CI Slim Head Images run: | if [ '${{ needs.setup.outputs.ingestion_change }}' == 'false' ]; then diff --git a/docker/datahub-ingestion-base/build.gradle b/docker/datahub-ingestion-base/build.gradle index f19faa227ca612..b3ed6463b9f6c8 100644 --- a/docker/datahub-ingestion-base/build.gradle +++ b/docker/datahub-ingestion-base/build.gradle @@ -12,7 +12,7 @@ ext { docker_target = project.getProperties().getOrDefault("dockerTarget", "slim") docker_version = "${version}${docker_target == 'slim' ? '-slim' : ''}" - revision = 8 // increment to trigger rebuild + revision = 10 // increment to trigger rebuild } docker { diff --git a/docker/datahub-ingestion/build.gradle b/docker/datahub-ingestion/build.gradle index b236a53c288f7f..b0b666f75eb5a6 100644 --- a/docker/datahub-ingestion/build.gradle +++ b/docker/datahub-ingestion/build.gradle @@ -12,7 +12,7 @@ ext { docker_target = project.getProperties().getOrDefault("dockerTarget", "slim") docker_version = "${version}${docker_target == 'slim' ? '-slim' : ''}" - revision = 9 // increment to trigger rebuild + revision = 10 // increment to trigger rebuild } dependencies { From 47134c272bd82ff8d00b6a30c725fbde4165335c Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Wed, 29 Jan 2025 11:41:56 +0900 Subject: [PATCH 176/249] feat: update ml system UI (#12334) Co-authored-by: Andrew Sikowitz Co-authored-by: RyanHolstien Co-authored-by: Shirshanka Das Co-authored-by: ryota-cloud --- .../mappers/MLModelGroupPropertiesMapper.java | 33 +++ .../mappers/MLModelPropertiesMapper.java | 15 ++ .../src/main/resources/lineage.graphql | 29 +++ .../MLModelGroupPropertiesMapperTest.java | 68 ++++++ .../mappers/MLModelPropertiesMapperTest.java | 187 ++++++++++++++++ .../src/app/entity/EntityPage.tsx | 1 + .../DataProcessInstanceEntity.tsx | 72 ++---- .../dataProcessInstance/preview/Preview.tsx | 20 +- .../profile/DataProcessInstanceSummary.tsx | 102 +++++++++ .../src/app/entity/mlModel/MLModelEntity.tsx | 2 +- .../app/entity/mlModel/preview/Preview.tsx | 3 +- .../entity/mlModel/profile/MLModelSummary.tsx | 96 +++++++- .../mlModelGroup/MLModelGroupEntity.tsx | 2 +- .../entity/mlModelGroup/preview/Preview.tsx | 3 +- .../mlModelGroup/profile/ModelGroupModels.tsx | 208 ++++++++++++++++-- .../search/EmbeddedListSearchSection.tsx | 27 ++- .../src/app/entity/shared/constants.ts | 4 + .../DataProcessInstanceRightColumn.tsx | 87 ++++++++ .../src/app/preview/DefaultPreviewCard.tsx | 12 +- .../src/app/shared/time/timeUtils.tsx | 38 ++++ .../src/graphql/fragments.graphql | 12 + datahub-web-react/src/graphql/lineage.graphql | 32 +++ datahub-web-react/src/graphql/mlModel.graphql | 17 ++ datahub-web-react/src/graphql/search.graphql | 9 + 24 files changed, 989 insertions(+), 90 deletions(-) create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupPropertiesMapperTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapperTest.java create mode 100644 datahub-web-react/src/app/entity/dataProcessInstance/profile/DataProcessInstanceSummary.tsx create mode 100644 datahub-web-react/src/app/preview/DataProcessInstanceRightColumn.tsx diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupPropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupPropertiesMapper.java index a6cfded9865d90..2da2fa2a58a6af 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupPropertiesMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupPropertiesMapper.java @@ -3,8 +3,11 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.MLModelGroupProperties; +import com.linkedin.datahub.graphql.generated.MLModelLineageInfo; import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; +import com.linkedin.datahub.graphql.types.common.mappers.TimeStampToAuditStampMapper; import com.linkedin.datahub.graphql.types.mappers.EmbeddedModelMapper; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -33,10 +36,40 @@ public MLModelGroupProperties apply( result.setVersion(VersionTagMapper.map(context, mlModelGroupProperties.getVersion())); } result.setCreatedAt(mlModelGroupProperties.getCreatedAt()); + if (mlModelGroupProperties.hasCreated()) { + result.setCreated( + TimeStampToAuditStampMapper.map(context, mlModelGroupProperties.getCreated())); + } + if (mlModelGroupProperties.getName() != null) { + result.setName(mlModelGroupProperties.getName()); + } else { + // backfill name from URN for backwards compatibility + result.setName(entityUrn.getEntityKey().get(1)); // indexed access is safe here + } + + if (mlModelGroupProperties.hasLastModified()) { + result.setLastModified( + TimeStampToAuditStampMapper.map(context, mlModelGroupProperties.getLastModified())); + } result.setCustomProperties( CustomPropertiesMapper.map(mlModelGroupProperties.getCustomProperties(), entityUrn)); + final MLModelLineageInfo lineageInfo = new MLModelLineageInfo(); + if (mlModelGroupProperties.hasTrainingJobs()) { + lineageInfo.setTrainingJobs( + mlModelGroupProperties.getTrainingJobs().stream() + .map(urn -> urn.toString()) + .collect(Collectors.toList())); + } + if (mlModelGroupProperties.hasDownstreamJobs()) { + lineageInfo.setDownstreamJobs( + mlModelGroupProperties.getDownstreamJobs().stream() + .map(urn -> urn.toString()) + .collect(Collectors.toList())); + } + result.setMlModelLineageInfo(lineageInfo); + return result; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java index 7b00fe88f2d683..1f1003dea720c3 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java @@ -5,6 +5,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.MLModelGroup; +import com.linkedin.datahub.graphql.generated.MLModelLineageInfo; import com.linkedin.datahub.graphql.generated.MLModelProperties; import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; import com.linkedin.datahub.graphql.types.common.mappers.TimeStampToAuditStampMapper; @@ -87,6 +88,20 @@ public MLModelProperties apply( .collect(Collectors.toList())); } result.setTags(mlModelProperties.getTags()); + final MLModelLineageInfo lineageInfo = new MLModelLineageInfo(); + if (mlModelProperties.hasTrainingJobs()) { + lineageInfo.setTrainingJobs( + mlModelProperties.getTrainingJobs().stream() + .map(urn -> urn.toString()) + .collect(Collectors.toList())); + } + if (mlModelProperties.hasDownstreamJobs()) { + lineageInfo.setDownstreamJobs( + mlModelProperties.getDownstreamJobs().stream() + .map(urn -> urn.toString()) + .collect(Collectors.toList())); + } + result.setMlModelLineageInfo(lineageInfo); return result; } diff --git a/datahub-graphql-core/src/main/resources/lineage.graphql b/datahub-graphql-core/src/main/resources/lineage.graphql index 975d013a448058..abb1446421858f 100644 --- a/datahub-graphql-core/src/main/resources/lineage.graphql +++ b/datahub-graphql-core/src/main/resources/lineage.graphql @@ -25,3 +25,32 @@ input LineageEdge { """ upstreamUrn: String! } + +""" +Represents lineage information for ML entities. +""" +type MLModelLineageInfo { + """ + List of jobs or processes used to train the model. + """ + trainingJobs: [String!] + + """ + List of jobs or processes that use this model. + """ + downstreamJobs: [String!] +} + +extend type MLModelProperties { + """ + Information related to lineage to this model group + """ + mlModelLineageInfo: MLModelLineageInfo +} + +extend type MLModelGroupProperties { + """ + Information related to lineage to this model group + """ + mlModelLineageInfo: MLModelLineageInfo +} \ No newline at end of file diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupPropertiesMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupPropertiesMapperTest.java new file mode 100644 index 00000000000000..fc738837c09d17 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupPropertiesMapperTest.java @@ -0,0 +1,68 @@ +package com.linkedin.datahub.graphql.types.mlmodel.mappers; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; + +import com.linkedin.common.urn.Urn; +import com.linkedin.ml.metadata.MLModelGroupProperties; +import java.net.URISyntaxException; +import org.testng.annotations.Test; + +public class MLModelGroupPropertiesMapperTest { + + @Test + public void testMapMLModelGroupProperties() throws URISyntaxException { + // Create backend ML Model Group Properties + MLModelGroupProperties input = new MLModelGroupProperties(); + + // Set description + input.setDescription("a ml trust model group"); + + // Set Name + input.setName("ML trust model group"); + + // Create URN + Urn groupUrn = + Urn.createFromString( + "urn:li:mlModelGroup:(urn:li:dataPlatform:sagemaker,another-group,PROD)"); + + // Map the properties + com.linkedin.datahub.graphql.generated.MLModelGroupProperties result = + MLModelGroupPropertiesMapper.map(null, input, groupUrn); + + // Verify mapped properties + assertNotNull(result); + assertEquals(result.getDescription(), "a ml trust model group"); + assertEquals(result.getName(), "ML trust model group"); + + // Verify lineage info is null as in the mock data + assertNotNull(result.getMlModelLineageInfo()); + assertNull(result.getMlModelLineageInfo().getTrainingJobs()); + assertNull(result.getMlModelLineageInfo().getDownstreamJobs()); + } + + @Test + public void testMapWithMinimalProperties() throws URISyntaxException { + // Create backend ML Model Group Properties with minimal information + MLModelGroupProperties input = new MLModelGroupProperties(); + + // Create URN + Urn groupUrn = + Urn.createFromString( + "urn:li:mlModelGroup:(urn:li:dataPlatform:sagemaker,another-group,PROD)"); + + // Map the properties + com.linkedin.datahub.graphql.generated.MLModelGroupProperties result = + MLModelGroupPropertiesMapper.map(null, input, groupUrn); + + // Verify basic mapping with minimal properties + assertNotNull(result); + assertNull(result.getDescription()); + + // Verify lineage info is null + assertNotNull(result.getMlModelLineageInfo()); + assertNull(result.getMlModelLineageInfo().getTrainingJobs()); + assertNull(result.getMlModelLineageInfo().getDownstreamJobs()); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapperTest.java new file mode 100644 index 00000000000000..17fa7a0abe1396 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapperTest.java @@ -0,0 +1,187 @@ +package com.linkedin.datahub.graphql.types.mlmodel.mappers; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; + +import com.linkedin.common.MLFeatureUrnArray; +import com.linkedin.common.TimeStamp; +import com.linkedin.common.VersionTag; +import com.linkedin.common.url.Url; +import com.linkedin.common.urn.MLFeatureUrn; +import com.linkedin.common.urn.MLModelUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.StringArray; +import com.linkedin.data.template.StringMap; +import com.linkedin.ml.metadata.MLHyperParam; +import com.linkedin.ml.metadata.MLHyperParamArray; +import com.linkedin.ml.metadata.MLMetric; +import com.linkedin.ml.metadata.MLMetricArray; +import com.linkedin.ml.metadata.MLModelProperties; +import java.net.URISyntaxException; +import org.testng.annotations.Test; + +public class MLModelPropertiesMapperTest { + + @Test + public void testMapMLModelProperties() throws URISyntaxException { + MLModelProperties input = new MLModelProperties(); + + // Set basic properties + input.setName("TestModel"); + input.setDescription("A test ML model"); + input.setType("Classification"); + + // Set version + VersionTag versionTag = new VersionTag(); + versionTag.setVersionTag("1.0.0"); + input.setVersion(versionTag); + + // Set external URL + Url externalUrl = new Url("https://example.com/model"); + input.setExternalUrl(externalUrl); + + // Set created and last modified timestamps + TimeStamp createdTimeStamp = new TimeStamp(); + createdTimeStamp.setTime(1000L); + Urn userUrn = Urn.createFromString("urn:li:corpuser:test"); + createdTimeStamp.setActor(userUrn); + input.setCreated(createdTimeStamp); + + TimeStamp lastModifiedTimeStamp = new TimeStamp(); + lastModifiedTimeStamp.setTime(2000L); + lastModifiedTimeStamp.setActor(userUrn); + input.setLastModified(lastModifiedTimeStamp); + + // Set custom properties + StringMap customProps = new StringMap(); + customProps.put("key1", "value1"); + customProps.put("key2", "value2"); + input.setCustomProperties(customProps); + + // Set hyper parameters + MLHyperParamArray hyperParams = new MLHyperParamArray(); + MLHyperParam hyperParam1 = new MLHyperParam(); + hyperParam1.setName("learning_rate"); + hyperParam1.setValue("0.01"); + hyperParams.add(hyperParam1); + input.setHyperParams(hyperParams); + + // Set training metrics + MLMetricArray trainingMetrics = new MLMetricArray(); + MLMetric metric1 = new MLMetric(); + metric1.setName("accuracy"); + metric1.setValue("0.95"); + trainingMetrics.add(metric1); + input.setTrainingMetrics(trainingMetrics); + + // Set ML features + MLFeatureUrnArray mlFeatures = new MLFeatureUrnArray(); + MLFeatureUrn featureUrn = MLFeatureUrn.createFromString("urn:li:mlFeature:(dataset,feature)"); + mlFeatures.add(featureUrn); + input.setMlFeatures(mlFeatures); + + // Set tags + StringArray tags = new StringArray(); + tags.add("tag1"); + tags.add("tag2"); + input.setTags(tags); + + // Set training and downstream jobs + input.setTrainingJobs( + new com.linkedin.common.UrnArray(Urn.createFromString("urn:li:dataJob:train"))); + input.setDownstreamJobs( + new com.linkedin.common.UrnArray(Urn.createFromString("urn:li:dataJob:predict"))); + + // Create ML Model URN + MLModelUrn modelUrn = + MLModelUrn.createFromString( + "urn:li:mlModel:(urn:li:dataPlatform:sagemaker,unittestmodel,PROD)"); + + // Map the properties + com.linkedin.datahub.graphql.generated.MLModelProperties result = + MLModelPropertiesMapper.map(null, input, modelUrn); + + // Verify mapped properties + assertNotNull(result); + assertEquals(result.getName(), "TestModel"); + assertEquals(result.getDescription(), "A test ML model"); + assertEquals(result.getType(), "Classification"); + assertEquals(result.getVersion(), "1.0.0"); + assertEquals(result.getExternalUrl(), "https://example.com/model"); + + // Verify audit stamps + assertNotNull(result.getCreated()); + assertEquals(result.getCreated().getTime().longValue(), 1000L); + assertEquals(result.getCreated().getActor(), userUrn.toString()); + + assertNotNull(result.getLastModified()); + assertEquals(result.getLastModified().getTime().longValue(), 2000L); + assertEquals(result.getLastModified().getActor(), userUrn.toString()); + + // Verify custom properties + assertNotNull(result.getCustomProperties()); + + // Verify hyper parameters + assertNotNull(result.getHyperParams()); + assertEquals(result.getHyperParams().size(), 1); + assertEquals(result.getHyperParams().get(0).getName(), "learning_rate"); + assertEquals(result.getHyperParams().get(0).getValue(), "0.01"); + + // Verify training metrics + assertNotNull(result.getTrainingMetrics()); + assertEquals(result.getTrainingMetrics().size(), 1); + assertEquals(result.getTrainingMetrics().get(0).getName(), "accuracy"); + assertEquals(result.getTrainingMetrics().get(0).getValue(), "0.95"); + + // Verify ML features + assertNotNull(result.getMlFeatures()); + assertEquals(result.getMlFeatures().size(), 1); + assertEquals(result.getMlFeatures().get(0), featureUrn.toString()); + + // Verify tags + assertNotNull(result.getTags()); + assertEquals(result.getTags().get(0), "tag1"); + assertEquals(result.getTags().get(1), "tag2"); + + // Verify lineage info + assertNotNull(result.getMlModelLineageInfo()); + assertEquals(result.getMlModelLineageInfo().getTrainingJobs().size(), 1); + assertEquals(result.getMlModelLineageInfo().getTrainingJobs().get(0), "urn:li:dataJob:train"); + assertEquals(result.getMlModelLineageInfo().getDownstreamJobs().size(), 1); + assertEquals( + result.getMlModelLineageInfo().getDownstreamJobs().get(0), "urn:li:dataJob:predict"); + } + + @Test + public void testMapWithMissingName() throws URISyntaxException { + MLModelProperties input = new MLModelProperties(); + MLModelUrn modelUrn = + MLModelUrn.createFromString( + "urn:li:mlModel:(urn:li:dataPlatform:sagemaker,missingnamemodel,PROD)"); + + com.linkedin.datahub.graphql.generated.MLModelProperties result = + MLModelPropertiesMapper.map(null, input, modelUrn); + + // Verify that name is extracted from URN when not present in input + assertEquals(result.getName(), "missingnamemodel"); + } + + @Test + public void testMapWithMinimalProperties() throws URISyntaxException { + MLModelProperties input = new MLModelProperties(); + MLModelUrn modelUrn = + MLModelUrn.createFromString( + "urn:li:mlModel:(urn:li:dataPlatform:sagemaker,minimalmodel,PROD)"); + + com.linkedin.datahub.graphql.generated.MLModelProperties result = + MLModelPropertiesMapper.map(null, input, modelUrn); + + // Verify basic mapping with minimal properties + assertNotNull(result); + assertEquals(result.getName(), "minimalmodel"); + assertNull(result.getDescription()); + assertNull(result.getType()); + assertNull(result.getVersion()); + } +} diff --git a/datahub-web-react/src/app/entity/EntityPage.tsx b/datahub-web-react/src/app/entity/EntityPage.tsx index 916fa417954126..d05f75694ab94e 100644 --- a/datahub-web-react/src/app/entity/EntityPage.tsx +++ b/datahub-web-react/src/app/entity/EntityPage.tsx @@ -66,6 +66,7 @@ export const EntityPage = ({ entityType }: Props) => { entityType === EntityType.MlfeatureTable || entityType === EntityType.MlmodelGroup || entityType === EntityType.GlossaryTerm || + entityType === EntityType.DataProcessInstance || entityType === EntityType.GlossaryNode; return ( diff --git a/datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx b/datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx index 9bb9bd745d1ee6..bdf77959e97c7f 100644 --- a/datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx +++ b/datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx @@ -1,12 +1,7 @@ import React from 'react'; import { ApiOutlined } from '@ant-design/icons'; -import { - DataProcessInstance, - Entity as GeneratedEntity, - EntityType, - OwnershipType, - SearchResult, -} from '../../../types.generated'; +import { Entity as GraphQLEntity } from '@types'; +import { DataProcessInstance, EntityType, OwnershipType, SearchResult } from '../../../types.generated'; import { Preview } from './preview/Preview'; import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; import { EntityProfile } from '../shared/containers/profile/EntityProfile'; @@ -23,32 +18,21 @@ import { EntityMenuItems } from '../shared/EntityDropdown/EntityDropdown'; import { capitalizeFirstLetterOnly } from '../../shared/textUtil'; import DataProductSection from '../shared/containers/profile/sidebar/DataProduct/DataProductSection'; import { getDataProduct } from '../shared/utils'; -// import SummaryTab from './profile/DataProcessInstaceSummary'; +import SummaryTab from './profile/DataProcessInstanceSummary'; -// const getProcessPlatformName = (data?: DataProcessInstance): string => { -// return ( -// data?.dataPlatformInstance?.platform?.properties?.displayName || -// capitalizeFirstLetterOnly(data?.dataPlatformInstance?.platform?.name) || -// '' -// ); -// }; - -const getParentEntities = (data: DataProcessInstance): GeneratedEntity[] => { +const getParentEntities = (data: DataProcessInstance): GraphQLEntity[] => { const parentEntity = data?.relationships?.relationships?.find( (rel) => rel.type === 'InstanceOf' && rel.entity?.type === EntityType.DataJob, ); - if (!parentEntity?.entity) return []; + if (!parentEntity || !parentEntity.entity) { + return []; + } - // Convert to GeneratedEntity - return [ - { - type: parentEntity.entity.type, - urn: (parentEntity.entity as any).urn, // Make sure urn exists - relationships: (parentEntity.entity as any).relationships, - }, - ]; + // First cast to unknown, then to Entity with proper type + return [parentEntity.entity]; }; + /** * Definition of the DataHub DataProcessInstance entity. */ @@ -97,18 +81,13 @@ export class DataProcessInstanceEntity implements Entity { urn={urn} entityType={EntityType.DataProcessInstance} useEntityQuery={this.useEntityQuery} - // useUpdateQuery={useUpdateDataProcessInstanceMutation} getOverrideProperties={this.getOverridePropertiesFromEntity} headerDropdownItems={new Set([EntityMenuItems.UPDATE_DEPRECATION, EntityMenuItems.RAISE_INCIDENT])} tabs={[ - // { - // name: 'Documentation', - // component: DocumentationTab, - // }, - // { - // name: 'Summary', - // component: SummaryTab, - // }, + { + name: 'Summary', + component: SummaryTab, + }, { name: 'Lineage', component: LineageTab, @@ -117,14 +96,6 @@ export class DataProcessInstanceEntity implements Entity { name: 'Properties', component: PropertiesTab, }, - // { - // name: 'Incidents', - // component: IncidentTab, - // getDynamicName: (_, processInstance) => { - // const activeIncidentCount = processInstance?.dataProcessInstance?.activeIncidents.total; - // return `Incidents${(activeIncidentCount && ` (${activeIncidentCount})`) || ''}`; - // }, - // }, ]} sidebarSections={this.getSidebarSections()} /> @@ -181,13 +152,11 @@ export class DataProcessInstanceEntity implements Entity { platformLogo={data?.dataPlatformInstance?.platform?.properties?.logoUrl} owners={null} globalTags={null} - // domain={data.domain?.domain} dataProduct={getDataProduct(genericProperties?.dataProduct)} externalUrl={data.properties?.externalUrl} parentContainers={data.parentContainers} parentEntities={parentEntities} container={data.container || undefined} - // health={data.health} /> ); }; @@ -196,6 +165,9 @@ export class DataProcessInstanceEntity implements Entity { const data = result.entity as DataProcessInstance; const genericProperties = this.getGenericEntityProperties(data); const parentEntities = getParentEntities(data); + + const firstState = data?.state && data.state.length > 0 ? data.state[0] : undefined; + return ( { platformInstanceId={data.dataPlatformInstance?.instanceId} owners={null} globalTags={null} - // domain={data.domain?.domain} dataProduct={getDataProduct(genericProperties?.dataProduct)} - // deprecation={data.deprecation} insights={result.insights} externalUrl={data.properties?.externalUrl} degree={(result as any).degree} @@ -220,10 +190,9 @@ export class DataProcessInstanceEntity implements Entity { parentContainers={data.parentContainers} parentEntities={parentEntities} container={data.container || undefined} - // duration={data?.state?.[0]?.durationMillis} - // status={data?.state?.[0]?.result?.resultType} - // startTime={data?.state?.[0]?.timestampMillis} - // health={data.health} + duration={firstState?.durationMillis} + status={firstState?.result?.resultType} + startTime={firstState?.timestampMillis} /> ); }; @@ -237,7 +206,6 @@ export class DataProcessInstanceEntity implements Entity { icon: entity?.dataPlatformInstance?.platform?.properties?.logoUrl || undefined, platform: entity?.dataPlatformInstance?.platform, container: entity?.container, - // health: entity?.health || undefined, }; }; diff --git a/datahub-web-react/src/app/entity/dataProcessInstance/preview/Preview.tsx b/datahub-web-react/src/app/entity/dataProcessInstance/preview/Preview.tsx index 3a3b0340695d96..9a2acbe11c0845 100644 --- a/datahub-web-react/src/app/entity/dataProcessInstance/preview/Preview.tsx +++ b/datahub-web-react/src/app/entity/dataProcessInstance/preview/Preview.tsx @@ -39,10 +39,10 @@ export const Preview = ({ health, parentEntities, parentContainers, -}: // duration, -// status, -// startTime, -{ + duration, + status, + startTime, +}: { urn: string; name: string; subType?: string | null; @@ -64,9 +64,9 @@ export const Preview = ({ health?: Health[] | null; parentEntities?: Array | null; parentContainers?: ParentContainersResult | null; - // duration?: number | null; - // status?: string | null; - // startTime?: number | null; + duration?: number | null; + status?: string | null; + startTime?: number | null; }): JSX.Element => { const entityRegistry = useEntityRegistry(); return ( @@ -95,9 +95,9 @@ export const Preview = ({ paths={paths} health={health || undefined} parentEntities={parentEntities} - // duration={duration} - // status={status} - // startTime={startTime} + duration={duration} + status={status} + startTime={startTime} /> ); }; diff --git a/datahub-web-react/src/app/entity/dataProcessInstance/profile/DataProcessInstanceSummary.tsx b/datahub-web-react/src/app/entity/dataProcessInstance/profile/DataProcessInstanceSummary.tsx new file mode 100644 index 00000000000000..c6591d4f5faa1d --- /dev/null +++ b/datahub-web-react/src/app/entity/dataProcessInstance/profile/DataProcessInstanceSummary.tsx @@ -0,0 +1,102 @@ +import React from 'react'; +import styled from 'styled-components'; +import { Space, Table, Typography } from 'antd'; +import { formatDetailedDuration } from '@src/app/shared/time/timeUtils'; +import { capitalize } from 'lodash'; +import moment from 'moment'; +import { MlHyperParam, MlMetric, DataProcessInstanceRunResultType } from '../../../../types.generated'; +import { useBaseEntity } from '../../shared/EntityContext'; +import { InfoItem } from '../../shared/components/styled/InfoItem'; +import { GetDataProcessInstanceQuery } from '../../../../graphql/dataProcessInstance.generated'; +import { Pill } from '../../../../alchemy-components/components/Pills'; + +const TabContent = styled.div` + padding: 16px; +`; + +const InfoItemContainer = styled.div<{ justifyContent }>` + display: flex; + position: relative; + justify-content: ${(props) => props.justifyContent}; + padding: 0px 2px; +`; + +const InfoItemContent = styled.div` + padding-top: 8px; + width: 100px; +`; + +const propertyTableColumns = [ + { + title: 'Name', + dataIndex: 'name', + width: 450, + }, + { + title: 'Value', + dataIndex: 'value', + }, +]; + +export default function MLModelSummary() { + const baseEntity = useBaseEntity(); + const dpi = baseEntity?.dataProcessInstance; + + const formatStatus = (state) => { + if (!state || state.length === 0) return '-'; + const result = state[0]?.result?.resultType; + const statusColor = result === DataProcessInstanceRunResultType.Success ? 'green' : 'red'; + return ; + }; + + const formatDuration = (state) => { + if (!state || state.length === 0) return '-'; + return formatDetailedDuration(state[0]?.durationMillis); + }; + + return ( + + + Details + + + + {dpi?.properties?.created?.time + ? moment(dpi.properties.created.time).format('YYYY-MM-DD HH:mm:ss') + : '-'} + + + + {formatStatus(dpi?.state)} + + + {formatDuration(dpi?.state)} + + + {dpi?.mlTrainingRunProperties?.id} + + + {dpi?.properties?.created?.actor} + + + + + {dpi?.mlTrainingRunProperties?.outputUrls} + + + Training Metrics + + Hyper Parameters +
+ + + ); +} diff --git a/datahub-web-react/src/app/entity/mlModel/MLModelEntity.tsx b/datahub-web-react/src/app/entity/mlModel/MLModelEntity.tsx index b77f6a19436a51..5e75b4680e427f 100644 --- a/datahub-web-react/src/app/entity/mlModel/MLModelEntity.tsx +++ b/datahub-web-react/src/app/entity/mlModel/MLModelEntity.tsx @@ -151,7 +151,7 @@ export class MLModelEntity implements Entity { }; displayName = (data: MlModel) => { - return data.name || data.urn; + return data.properties?.name || data.name || data.urn; }; getGenericEntityProperties = (mlModel: MlModel) => { diff --git a/datahub-web-react/src/app/entity/mlModel/preview/Preview.tsx b/datahub-web-react/src/app/entity/mlModel/preview/Preview.tsx index 4b57976dfe1a27..7ea33ba4c15f6f 100644 --- a/datahub-web-react/src/app/entity/mlModel/preview/Preview.tsx +++ b/datahub-web-react/src/app/entity/mlModel/preview/Preview.tsx @@ -21,7 +21,8 @@ export const Preview = ({ return ( ` + display: flex; + position: relative; + justify-content: ${(props) => props.justifyContent}; + padding: 0px 2px; +`; + +const InfoItemContent = styled.div` + padding-top: 8px; + width: 100px; + display: flex; + flex-wrap: wrap; + gap: 5px; +`; + +const JobLink = styled(Link)` + color: ${colors.blue[700]}; + &:hover { + text-decoration: underline; + } +`; + export default function MLModelSummary() { const baseEntity = useBaseEntity(); const model = baseEntity?.mlModel; + const entityRegistry = useEntityRegistry(); const propertyTableColumns = [ { @@ -26,9 +55,72 @@ export default function MLModelSummary() { }, ]; + const renderTrainingJobs = () => { + const trainingJobs = + model?.trainedBy?.relationships?.map((relationship) => relationship.entity).filter(notEmpty) || []; + + if (trainingJobs.length === 0) return '-'; + + return ( +
+ {trainingJobs.map((job, index) => { + const { urn, name } = job as { urn: string; name?: string }; + return ( + + + {name || urn} + + {index < trainingJobs.length - 1 && ', '} + + ); + })} +
+ ); + }; + return ( + Model Details + + + {model?.versionProperties?.version?.versionTag} + + + + {model?.properties?.created?.time + ? moment(model.properties.created.time).format('YYYY-MM-DD HH:mm:ss') + : '-'} + + + + + {model?.properties?.lastModified?.time + ? moment(model.properties.lastModified.time).format('YYYY-MM-DD HH:mm:ss') + : '-'} + + + + {model?.properties?.created?.actor} + + + + + + {model?.versionProperties?.aliases?.map((alias) => ( + + ))} + + + + {renderTrainingJobs()} + + Training Metrics
{ }; displayName = (data: MlModelGroup) => { - return data.name || data.urn; + return data.properties?.name || data.name || data.urn; }; getGenericEntityProperties = (mlModelGroup: MlModelGroup) => { diff --git a/datahub-web-react/src/app/entity/mlModelGroup/preview/Preview.tsx b/datahub-web-react/src/app/entity/mlModelGroup/preview/Preview.tsx index 910397af899f57..76ad9c06daece3 100644 --- a/datahub-web-react/src/app/entity/mlModelGroup/preview/Preview.tsx +++ b/datahub-web-react/src/app/entity/mlModelGroup/preview/Preview.tsx @@ -19,7 +19,8 @@ export const Preview = ({ return ( ` + display: flex; + position: relative; + justify-content: ${(props) => props.justifyContent}; + padding: 12px 2px 20px 2px; +`; + +const InfoItemContent = styled.div` + padding-top: 8px; + width: 100px; +`; + +const NameContainer = styled.div` + display: flex; + align-items: center; +`; + +const NameLink = styled.a` + font-weight: 700; + color: inherit; + font-size: 0.9rem; + &:hover { + color: ${colors.blue[400]} !important; + } +`; + +const TagContainer = styled.div` + display: inline-flex; + margin-left: 0px; + margin-top: 3px; + flex-wrap: wrap; + margin-right: 8px; + backgroundcolor: white; + gap: 5px; +`; + +const StyledTable = styled(Table)` + &&& .ant-table-cell { + padding: 16px; + } +` as typeof Table; + +const ModelsContainer = styled.div` + width: 100%; + padding: 20px; +`; + +const VersionContainer = styled.div` + display: flex; + align-items: center; +`; export default function MLGroupModels() { const baseEntity = useBaseEntity(); - const models = baseEntity?.mlModelGroup?.incoming?.relationships?.map((relationship) => relationship.entity) || []; - const entityRegistry = useEntityRegistry(); + const modelGroup = baseEntity?.mlModelGroup; + + const models = + baseEntity?.mlModelGroup?.incoming?.relationships + ?.map((relationship) => relationship.entity) + .filter(notEmpty) || []; + + const columns = [ + { + title: 'Name', + dataIndex: 'name', + key: 'name', + width: 300, + render: (_: any, record) => ( + + + {record?.properties?.propertiesName || record?.name} + + + ), + }, + { + title: 'Version', + key: 'version', + width: 70, + render: (_: any, record: any) => ( + {record.versionProperties?.version?.versionTag || '-'} + ), + }, + { + title: 'Created At', + key: 'createdAt', + width: 150, + render: (_: any, record: any) => ( + + {record.properties?.createdTS?.time + ? moment(record.properties.createdTS.time).format('YYYY-MM-DD HH:mm:ss') + : '-'} + + ), + }, + { + title: 'Aliases', + key: 'aliases', + width: 200, + render: (_: any, record: any) => { + const aliases = record.versionProperties?.aliases || []; + + return ( + + {aliases.map((alias) => ( + + ))} + + ); + }, + }, + { + title: 'Tags', + key: 'tags', + width: 200, + render: (_: any, record: any) => { + const tags = record.properties?.tags || []; + + return ( + + {tags.map((tag) => ( + + ))} + + ); + }, + }, + { + title: 'Description', + dataIndex: 'description', + key: 'description', + width: 300, + render: (_: any, record: any) => { + const editableDesc = record.editableProperties?.description; + const originalDesc = record.description; + + return {editableDesc || originalDesc || '-'}; + }, + }, + ]; return ( - <> - - Models} - renderItem={(item) => ( - - {entityRegistry.renderPreview(EntityType.Mlmodel, PreviewType.PREVIEW, item)} - - )} - /> - - + + Model Group Details + + + + {modelGroup?.properties?.created?.time + ? moment(modelGroup.properties.created.time).format('YYYY-MM-DD HH:mm:ss') + : '-'} + + + + + {modelGroup?.properties?.lastModified?.time + ? moment(modelGroup.properties.lastModified.time).format('YYYY-MM-DD HH:mm:ss') + : '-'} + + + {modelGroup?.properties?.created?.actor && ( + + {modelGroup.properties.created?.actor} + + )} + + Models + , + }} + /> + ); } diff --git a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchSection.tsx b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchSection.tsx index 9648aaf852bbe3..9da7b5d0ffb0c9 100644 --- a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchSection.tsx +++ b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchSection.tsx @@ -2,7 +2,7 @@ import React from 'react'; import * as QueryString from 'query-string'; import { useHistory, useLocation } from 'react-router'; import { ApolloError } from '@apollo/client'; -import { FacetFilterInput } from '../../../../../../types.generated'; +import { EntityType, FacetFilterInput } from '../../../../../../types.generated'; import useFilters from '../../../../../search/utils/useFilters'; import { navigateToEntitySearchUrl } from './navigateToEntitySearchUrl'; import { FilterSet, GetSearchResultsParams, SearchResultsInterface } from './types'; @@ -16,6 +16,30 @@ import { } from '../../../../../search/utils/types'; const FILTER = 'filter'; +const SEARCH_ENTITY_TYPES = [ + EntityType.Dataset, + EntityType.Dashboard, + EntityType.Chart, + EntityType.Mlmodel, + EntityType.MlmodelGroup, + EntityType.MlfeatureTable, + EntityType.Mlfeature, + EntityType.MlprimaryKey, + EntityType.DataFlow, + EntityType.DataJob, + EntityType.GlossaryTerm, + EntityType.GlossaryNode, + EntityType.Tag, + EntityType.Role, + EntityType.CorpUser, + EntityType.CorpGroup, + EntityType.Container, + EntityType.Domain, + EntityType.DataProduct, + EntityType.Notebook, + EntityType.BusinessAttribute, + EntityType.DataProcessInstance, +]; function getParamsWithoutFilters(params: QueryString.ParsedQuery) { const paramsCopy = { ...params }; @@ -137,6 +161,7 @@ export const EmbeddedListSearchSection = ({ return ( ; + duration: Maybe; + status: Maybe; +} + +export default function DataProcessInstanceRightColumn({ startTime, duration, status }: Props) { + const statusPillColor = status === DataProcessInstanceRunResultType.Success ? 'green' : 'red'; + + return ( + <> + {startTime && ( + {toLocalDateTimeString(startTime)}} + title={Start Time} + trigger="hover" + overlayInnerStyle={popoverStyles.overlayInnerStyle} + overlayStyle={popoverStyles.overlayStyle} + > + {toRelativeTimeString(startTime)} + + )} + {duration && ( + {formatDetailedDuration(duration)}} + title={Duration} + trigger="hover" + overlayInnerStyle={popoverStyles.overlayInnerStyle} + overlayStyle={popoverStyles.overlayStyle} + > + {formatDuration(duration)} + + )} + {status && ( + <> + + + + + )} + + ); +} diff --git a/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx b/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx index a19862e83ae510..42a32a5a1951ff 100644 --- a/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx +++ b/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx @@ -1,8 +1,8 @@ +import DataProcessInstanceRightColumn from '@app/preview/DataProcessInstanceRightColumn'; import React, { ReactNode, useState } from 'react'; import { Divider, Tooltip, Typography } from 'antd'; import { Link } from 'react-router-dom'; import styled from 'styled-components'; - import { GlobalTags, Owner, @@ -200,6 +200,9 @@ interface Props { paths?: EntityPath[]; health?: Health[]; parentDataset?: Dataset; + startTime?: number | null; + duration?: number | null; + status?: string | null; } export default function DefaultPreviewCard({ @@ -243,6 +246,9 @@ export default function DefaultPreviewCard({ paths, health, parentDataset, + startTime, + duration, + status, }: Props) { // sometimes these lists will be rendered inside an entity container (for example, in the case of impact analysis) // in those cases, we may want to enrich the preview w/ context about the container entity @@ -270,7 +276,8 @@ export default function DefaultPreviewCard({ event.stopPropagation(); }; - const shouldShowRightColumn = (topUsers && topUsers.length > 0) || (owners && owners.length > 0); + const shouldShowRightColumn = + (topUsers && topUsers.length > 0) || (owners && owners.length > 0) || startTime || duration || status; const uniqueOwners = getUniqueOwners(owners); return ( @@ -380,6 +387,7 @@ export default function DefaultPreviewCard({ {shouldShowRightColumn && ( + {topUsers && topUsers?.length > 0 && ( <> diff --git a/datahub-web-react/src/app/shared/time/timeUtils.tsx b/datahub-web-react/src/app/shared/time/timeUtils.tsx index 26d768a204be6f..4ff6ffedf65337 100644 --- a/datahub-web-react/src/app/shared/time/timeUtils.tsx +++ b/datahub-web-react/src/app/shared/time/timeUtils.tsx @@ -206,3 +206,41 @@ export function getTimeRangeDescription(startDate: moment.Moment | null, endDate return 'Unknown time range'; } + +export function formatDuration(durationMs: number): string { + const duration = moment.duration(durationMs); + const hours = Math.floor(duration.asHours()); + const minutes = duration.minutes(); + const seconds = duration.seconds(); + + if (hours === 0 && minutes === 0) { + return `${seconds} secs`; + } + + if (hours === 0) { + return minutes === 1 ? `${minutes} min` : `${minutes} mins`; + } + + const minuteStr = minutes > 0 ? ` ${minutes} mins` : ''; + return hours === 1 ? `${hours} hr${minuteStr}` : `${hours} hrs${minuteStr}`; +} + +export function formatDetailedDuration(durationMs: number): string { + const duration = moment.duration(durationMs); + const hours = Math.floor(duration.asHours()); + const minutes = duration.minutes(); + const seconds = duration.seconds(); + + const parts: string[] = []; + + if (hours > 0) { + parts.push(hours === 1 ? `${hours} hr` : `${hours} hrs`); + } + if (minutes > 0) { + parts.push(minutes === 1 ? `${minutes} min` : `${minutes} mins`); + } + if (seconds > 0) { + parts.push(`${seconds} secs`); + } + return parts.join(' '); +} diff --git a/datahub-web-react/src/graphql/fragments.graphql b/datahub-web-react/src/graphql/fragments.graphql index ecac2997489354..e94fc207fefd97 100644 --- a/datahub-web-react/src/graphql/fragments.graphql +++ b/datahub-web-react/src/graphql/fragments.graphql @@ -897,6 +897,10 @@ fragment nonRecursiveMLModel on MLModel { key value } + mlModelLineageInfo { + trainingJobs + downstreamJobs + } } globalTags { ...globalTagsFields @@ -971,6 +975,14 @@ fragment nonRecursiveMLModelGroupFields on MLModelGroup { time actor } + lastModified { + time + actor + } + mlModelLineageInfo { + trainingJobs + downstreamJobs + } } browsePathV2 { ...browsePathV2Fields diff --git a/datahub-web-react/src/graphql/lineage.graphql b/datahub-web-react/src/graphql/lineage.graphql index 457936ed62cd2e..f387c0c050668f 100644 --- a/datahub-web-react/src/graphql/lineage.graphql +++ b/datahub-web-react/src/graphql/lineage.graphql @@ -272,6 +272,7 @@ fragment lineageNodeProperties on EntityWithRelationships { removed } properties { + propertiesName: name createdTS: created { time actor @@ -296,6 +297,9 @@ fragment lineageNodeProperties on EntityWithRelationships { name description origin + tags { + ...globalTagsFields + } platform { ...platformFields } @@ -305,6 +309,34 @@ fragment lineageNodeProperties on EntityWithRelationships { status { removed } + versionProperties { + versionSet { + urn + type + } + version { + versionTag + } + aliases { + versionTag + } + comment + } + properties { + propertiesName: name + createdTS: created { + time + actor + } + tags + customProperties { + key + value + } + } + editableProperties { + description + } structuredProperties { properties { ...structuredPropertiesFields diff --git a/datahub-web-react/src/graphql/mlModel.graphql b/datahub-web-react/src/graphql/mlModel.graphql index ad97c7c6f530a1..ba10a243e6f9b3 100644 --- a/datahub-web-react/src/graphql/mlModel.graphql +++ b/datahub-web-react/src/graphql/mlModel.graphql @@ -20,6 +20,23 @@ query getMLModel($urn: String!) { } } } + trainedBy: relationships(input: { types: ["TrainedBy"], direction: OUTGOING, start: 0, count: 100 }) { + start + count + total + relationships { + type + direction + entity { + ... on DataProcessInstance { + urn + name + type + ...dataProcessInstanceFields + } + } + } + } privileges { ...entityPrivileges } diff --git a/datahub-web-react/src/graphql/search.graphql b/datahub-web-react/src/graphql/search.graphql index d12193b471d469..be72ff31a4f264 100644 --- a/datahub-web-react/src/graphql/search.graphql +++ b/datahub-web-react/src/graphql/search.graphql @@ -886,6 +886,9 @@ fragment searchResultsWithoutSchemaField on Entity { ...structuredPropertiesFields } } + properties { + propertiesName: name + } } ... on MLModelGroup { name @@ -908,6 +911,9 @@ fragment searchResultsWithoutSchemaField on Entity { ...structuredPropertiesFields } } + properties { + propertiesName: name + } } ... on Tag { name @@ -954,6 +960,9 @@ fragment searchResultsWithoutSchemaField on Entity { ...versionProperties } } + ... on DataProcessInstance { + ...dataProcessInstanceFields + } ... on DataPlatformInstance { ...dataPlatformInstanceFields } From 22e012a2093e336d27e37ad55789c2f05744c0b7 Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Wed, 29 Jan 2025 16:44:31 +0100 Subject: [PATCH 177/249] feat(ingestion/airflow): Add support for mutiple datahub emitter (#12398) --- .../src/datahub_airflow_plugin/_config.py | 32 ++++++++++--- .../datahub_plugin_v22.py | 20 +++++---- .../datahub_airflow_plugin/hooks/datahub.py | 45 ++++++++++++++++++- .../tests/integration/test_plugin.py | 40 +++++++++++++++-- .../src/datahub/emitter/composite_emitter.py | 36 +++++++++++++++ .../datahub/emitter/test_composite_emitter.py | 42 +++++++++++++++++ 6 files changed, 197 insertions(+), 18 deletions(-) create mode 100644 metadata-ingestion/src/datahub/emitter/composite_emitter.py create mode 100644 metadata-ingestion/tests/unit/datahub/emitter/test_composite_emitter.py diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py index 6d6ba601556788..473cf9f907ba84 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py @@ -1,14 +1,18 @@ from enum import Enum -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, Dict, List, Optional, Union from airflow.configuration import conf +from pydantic import root_validator from pydantic.fields import Field import datahub.emitter.mce_builder as builder from datahub.configuration.common import AllowDenyPattern, ConfigModel if TYPE_CHECKING: - from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook + from datahub_airflow_plugin.hooks.datahub import ( + DatahubCompositeHook, + DatahubGenericHook, + ) class DatajobUrl(Enum): @@ -27,6 +31,8 @@ class DatahubLineageConfig(ConfigModel): # DataHub hook connection ID. datahub_conn_id: str + _datahub_connection_ids: List[str] + # Cluster to associate with the pipelines and tasks. Defaults to "prod". cluster: str = builder.DEFAULT_FLOW_CLUSTER @@ -68,11 +74,25 @@ class DatahubLineageConfig(ConfigModel): disable_openlineage_plugin: bool = True - def make_emitter_hook(self) -> "DatahubGenericHook": + def make_emitter_hook(self) -> Union["DatahubGenericHook", "DatahubCompositeHook"]: # This is necessary to avoid issues with circular imports. - from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook - - return DatahubGenericHook(self.datahub_conn_id) + from datahub_airflow_plugin.hooks.datahub import ( + DatahubCompositeHook, + DatahubGenericHook, + ) + + if len(self._datahub_connection_ids) == 1: + return DatahubGenericHook(self._datahub_connection_ids[0]) + else: + return DatahubCompositeHook(self._datahub_connection_ids) + + @root_validator(skip_on_failure=True) + def split_conn_ids(cls, values: Dict) -> Dict: + if not values.get("datahub_conn_id"): + raise ValueError("datahub_conn_id is required") + conn_ids = values.get("datahub_conn_id", "").split(",") + cls._datahub_connection_ids = [conn_id.strip() for conn_id in conn_ids] + return values def get_lineage_config() -> DatahubLineageConfig: diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py index 99b0a40fd3c13e..b7e6e109e8567c 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py @@ -29,6 +29,11 @@ TASK_ON_RETRY_CALLBACK = "on_retry_callback" +def load_config_v22(): + plugin_config = get_lineage_config() + return plugin_config + + def get_task_inlets_advanced(task: BaseOperator, context: Any) -> Iterable[Any]: # TODO: Fix for https://github.com/apache/airflow/commit/1b1f3fabc5909a447a6277cafef3a0d4ef1f01ae # in Airflow 2.4. @@ -99,9 +104,7 @@ def datahub_task_status_callback(context, status): task_inlets = get_task_inlets_advanced(task, context) task_outlets = get_task_outlets(task) - emitter = ( - DatahubGenericHook(config.datahub_conn_id).get_underlying_hook().make_emitter() - ) + emitter = config.make_emitter_hook().make_emitter() dataflow = AirflowGenerator.generate_dataflow( config=config, @@ -217,7 +220,7 @@ def datahub_pre_execution(context): def _wrap_pre_execution(pre_execution): def custom_pre_execution(context): - config = get_lineage_config() + config = load_config_v22() if config.enabled: context["_datahub_config"] = config datahub_pre_execution(context) @@ -231,7 +234,7 @@ def custom_pre_execution(context): def _wrap_on_failure_callback(on_failure_callback): def custom_on_failure_callback(context): - config = get_lineage_config() + config = load_config_v22() if config.enabled: context["_datahub_config"] = config try: @@ -251,7 +254,7 @@ def custom_on_failure_callback(context): def _wrap_on_success_callback(on_success_callback): def custom_on_success_callback(context): - config = get_lineage_config() + config = load_config_v22() if config.enabled: context["_datahub_config"] = config try: @@ -271,7 +274,8 @@ def custom_on_success_callback(context): def _wrap_on_retry_callback(on_retry_callback): def custom_on_retry_callback(context): - config = get_lineage_config() + config = load_config_v22() + if config.enabled: context["_datahub_config"] = config try: @@ -363,7 +367,7 @@ def _patch_datahub_policy(): _patch_policy(settings) - plugin_config = get_lineage_config() + plugin_config = load_config_v22() telemetry.telemetry_instance.ping( "airflow-plugin-init", { diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py index 26c5026c075bd7..e2e4c9fef6f16e 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py @@ -1,8 +1,9 @@ -from typing import TYPE_CHECKING, Any, Dict, Optional, Sequence, Tuple, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple, Union from airflow.exceptions import AirflowException from airflow.hooks.base import BaseHook +from datahub.emitter.composite_emitter import CompositeEmitter from datahub.emitter.generic_emitter import Emitter from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.metadata.com.linkedin.pegasus2avro.mxe import ( @@ -303,3 +304,45 @@ def emit( # Retained for backwards compatibility. emit_mces = emit + + +class DatahubCompositeHook(BaseHook): + """ + A hook that can emit metadata to multiple DataHub instances. + + :param datahub_conn_ids: References to the DataHub connections. + :type datahub_conn_ids: List[str] + """ + + hooks: List[DatahubGenericHook] = [] + + def __init__(self, datahub_conn_ids: List[str]) -> None: + self.datahub_conn_ids = datahub_conn_ids + + def make_emitter(self) -> CompositeEmitter: + print(f"Create emitters for {self.datahub_conn_ids}") + return CompositeEmitter( + [ + self._get_underlying_hook(conn_id).make_emitter() + for conn_id in self.datahub_conn_ids + ] + ) + + def emit( + self, + items: Sequence[ + Union[ + MetadataChangeEvent, + MetadataChangeProposal, + MetadataChangeProposalWrapper, + ] + ], + ) -> None: + emitter = self.make_emitter() + + for item in items: + print(f"emitting item {item}") + emitter.emit(item) + + def _get_underlying_hook(self, conn_id: str) -> DatahubGenericHook: + return DatahubGenericHook(conn_id) diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py index 2744c26021cde3..8d0bc9748ff5fd 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py @@ -50,6 +50,7 @@ class AirflowInstance: password: str metadata_file: pathlib.Path + metadata_file2: pathlib.Path @property def airflow_url(self) -> str: @@ -178,6 +179,7 @@ def _run_airflow( tmp_path: pathlib.Path, dags_folder: pathlib.Path, is_v1: bool, + multiple_connections: bool, ) -> Iterator[AirflowInstance]: airflow_home = tmp_path / "airflow_home" print(f"Using airflow home: {airflow_home}") @@ -189,7 +191,9 @@ def _run_airflow( print(f"Using airflow port: {airflow_port}") datahub_connection_name = "datahub_file_default" + datahub_connection_name_2 = "datahub_file_default_2" meta_file = tmp_path / "datahub_metadata.json" + meta_file2 = tmp_path / "datahub_metadata_2.json" environment = { **os.environ, @@ -204,7 +208,9 @@ def _run_airflow( "AIRFLOW__API__AUTH_BACKEND": "airflow.api.auth.backend.basic_auth", # Configure the datahub plugin and have it write the MCPs to a file. "AIRFLOW__CORE__LAZY_LOAD_PLUGINS": "False" if is_v1 else "True", - "AIRFLOW__DATAHUB__CONN_ID": datahub_connection_name, + "AIRFLOW__DATAHUB__CONN_ID": f"{datahub_connection_name}, {datahub_connection_name_2}" + if multiple_connections + else datahub_connection_name, "AIRFLOW__DATAHUB__DAG_FILTER_STR": f'{{ "deny": ["{DAG_TO_SKIP_INGESTION}"] }}', f"AIRFLOW_CONN_{datahub_connection_name.upper()}": Connection( conn_id="datahub_file_default", @@ -251,6 +257,13 @@ def _run_airflow( "SQLALCHEMY_SILENCE_UBER_WARNING": "1", } + if multiple_connections: + environment[f"AIRFLOW_CONN_{datahub_connection_name_2.upper()}"] = Connection( + conn_id="datahub_file_default2", + conn_type="datahub-file", + host=str(meta_file2), + ).get_uri() + if not HAS_AIRFLOW_STANDALONE_CMD: raise pytest.skip("Airflow standalone command is not available") @@ -315,6 +328,7 @@ def _run_airflow( username=airflow_username, password=airflow_password, metadata_file=meta_file, + metadata_file2=meta_file2, ) yield airflow_instance @@ -355,10 +369,11 @@ class DagTestCase: success: bool = True v2_only: bool = False + multiple_connections: bool = False test_cases = [ - DagTestCase("simple_dag"), + DagTestCase("simple_dag", multiple_connections=True), DagTestCase("basic_iolets"), DagTestCase("dag_to_skip", v2_only=True), DagTestCase("snowflake_operator", success=False, v2_only=True), @@ -441,7 +456,10 @@ def test_airflow_plugin( dag_id = test_case.dag_id with _run_airflow( - tmp_path, dags_folder=DAGS_FOLDER, is_v1=is_v1 + tmp_path, + dags_folder=DAGS_FOLDER, + is_v1=is_v1, + multiple_connections=test_case.multiple_connections, ) as airflow_instance: print(f"Running DAG {dag_id}...") _wait_for_dag_to_load(airflow_instance, dag_id) @@ -491,6 +509,21 @@ def test_airflow_plugin( ], ) + if test_case.multiple_connections: + _sanitize_output_file(airflow_instance.metadata_file2) + check_golden_file( + pytestconfig=pytestconfig, + output_path=airflow_instance.metadata_file2, + golden_path=golden_path, + ignore_paths=[ + # TODO: If we switched to Git urls, maybe we could get this to work consistently. + r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['datahub_sql_parser_error'\]", + r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['openlineage_.*'\]", + r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['log_url'\]", + r"root\[\d+\]\['aspect'\]\['json'\]\['externalUrl'\]", + ], + ) + def _sanitize_output_file(output_path: pathlib.Path) -> None: # Overwrite some custom properties in the output file to make it easier to compare. @@ -534,6 +567,7 @@ def _sanitize(obj: Any) -> None: tmp_path=pathlib.Path(tempfile.mkdtemp("airflow-plugin-test")), dags_folder=DAGS_FOLDER, is_v1=not HAS_AIRFLOW_LISTENER_API, + multiple_connections=False, ) as airflow_instance: # input("Press enter to exit...") breakpoint() diff --git a/metadata-ingestion/src/datahub/emitter/composite_emitter.py b/metadata-ingestion/src/datahub/emitter/composite_emitter.py new file mode 100644 index 00000000000000..fe35f74be11cb3 --- /dev/null +++ b/metadata-ingestion/src/datahub/emitter/composite_emitter.py @@ -0,0 +1,36 @@ +from typing import Callable, List, Optional, Union + +from datahub.emitter.generic_emitter import Emitter +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.metadata.com.linkedin.pegasus2avro.mxe import ( + MetadataChangeEvent, + MetadataChangeProposal, +) + + +# Experimental composite emitter that allows multiple emitters to be used in a single ingestion job +class CompositeEmitter(Emitter): + def __init__(self, emitters: List[Emitter]) -> None: + self.emitters = emitters + + def emit( + self, + item: Union[ + MetadataChangeEvent, + MetadataChangeProposal, + MetadataChangeProposalWrapper, + ], + callback: Optional[Callable[[Exception, str], None]] = None, + ) -> None: + callback_called = False + for emitter in self.emitters: + if not callback_called: + # We want to ensure that the callback is only called once and we tie it to the first emitter + emitter.emit(item, callback) + callback_called = True + else: + emitter.emit(item) + + def flush(self) -> None: + for emitter in self.emitters: + emitter.flush() diff --git a/metadata-ingestion/tests/unit/datahub/emitter/test_composite_emitter.py b/metadata-ingestion/tests/unit/datahub/emitter/test_composite_emitter.py new file mode 100644 index 00000000000000..fdab1224d77d4a --- /dev/null +++ b/metadata-ingestion/tests/unit/datahub/emitter/test_composite_emitter.py @@ -0,0 +1,42 @@ +from unittest.mock import MagicMock + +import pytest + +from datahub.emitter.composite_emitter import CompositeEmitter +from datahub.emitter.generic_emitter import Emitter +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.metadata.com.linkedin.pegasus2avro.dataset import DatasetProfile + + +@pytest.fixture +def mock_emitters(): + return [MagicMock(spec=Emitter), MagicMock(spec=Emitter)] + + +def test_composite_emitter_emit(mock_emitters): + composite_emitter = CompositeEmitter(mock_emitters) + item = MetadataChangeProposalWrapper( + entityUrn="urn:li:dataset:(urn:li:dataPlatform:mysql,User.UserAccount,PROD)", + aspect=DatasetProfile( + rowCount=2000, + columnCount=15, + timestampMillis=1626995099686, + ), + ) + callback = MagicMock() + + composite_emitter.emit(item, callback) + + mock_emitters[0].emit.assert_called_once_with(item, callback) + mock_emitters[1].emit.assert_called_once_with(item) + assert mock_emitters[0].emit.call_count == 1 + assert mock_emitters[1].emit.call_count == 1 + + +def test_composite_emitter_flush(mock_emitters): + composite_emitter = CompositeEmitter(mock_emitters) + + composite_emitter.flush() + + for emitter in mock_emitters: + emitter.flush.assert_called_once() From cc0d43f5a82d9e4bf78a50d7af26f8dee66dfe86 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Wed, 29 Jan 2025 21:37:17 +0530 Subject: [PATCH 178/249] feat(ingest): add datahub apply source (#12482) --- metadata-ingestion/setup.py | 1 + .../src/datahub/cli/container_cli.py | 65 +---- .../ingestion/source/apply/__init__.py | 0 .../ingestion/source/apply/datahub_apply.py | 223 ++++++++++++++++++ 4 files changed, 225 insertions(+), 64 deletions(-) create mode 100644 metadata-ingestion/src/datahub/ingestion/source/apply/__init__.py create mode 100644 metadata-ingestion/src/datahub/ingestion/source/apply/datahub_apply.py diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index b3175989309849..e603b5f6ac1d30 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -743,6 +743,7 @@ "looker = datahub.ingestion.source.looker.looker_source:LookerDashboardSource", "lookml = datahub.ingestion.source.looker.lookml_source:LookMLSource", "datahub-gc = datahub.ingestion.source.gc.datahub_gc:DataHubGcSource", + "datahub-apply = datahub.ingestion.source.apply.datahub_apply:DataHubApplySource", "datahub-lineage-file = datahub.ingestion.source.metadata.lineage:LineageFileSource", "datahub-business-glossary = datahub.ingestion.source.metadata.business_glossary:BusinessGlossaryFileSource", "mlflow = datahub.ingestion.source.mlflow:MLflowSource", diff --git a/metadata-ingestion/src/datahub/cli/container_cli.py b/metadata-ingestion/src/datahub/cli/container_cli.py index 5f1dc35edd3ee1..d44eda15c6f8c8 100644 --- a/metadata-ingestion/src/datahub/cli/container_cli.py +++ b/metadata-ingestion/src/datahub/cli/container_cli.py @@ -1,19 +1,8 @@ import logging -from typing import Any, List import click -import progressbar -from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.ingestion.graph.client import get_default_graph -from datahub.metadata.schema_classes import ( - DomainsClass, - GlossaryTermAssociationClass, - OwnerClass, - OwnershipTypeClass, - TagAssociationClass, -) -from datahub.specific.dataset import DatasetPatchBuilder +from datahub.ingestion.source.apply.datahub_apply import apply_association_to_container logger = logging.getLogger(__name__) @@ -24,58 +13,6 @@ def container() -> None: pass -def apply_association_to_container( - container_urn: str, - association_urn: str, - association_type: str, -) -> None: - """ - Common function to add either tags, terms, domains, or owners to child datasets (for now). - - Args: - container_urn: The URN of the container - association_urn: The URN of the tag, term, or user to apply - association_type: One of 'tag', 'term', 'domain' or 'owner' - """ - urns: List[str] = [] - graph = get_default_graph() - logger.info(f"Using {graph}") - urns.extend( - graph.get_urns_by_filter( - container=container_urn, batch_size=1000, entity_types=["dataset"] - ) - ) - - all_patches: List[Any] = [] - for urn in urns: - builder = DatasetPatchBuilder(urn) - patches: List[Any] = [] - if association_type == "tag": - patches = builder.add_tag(TagAssociationClass(association_urn)).build() - elif association_type == "term": - patches = builder.add_term( - GlossaryTermAssociationClass(association_urn) - ).build() - elif association_type == "owner": - patches = builder.add_owner( - OwnerClass( - owner=association_urn, - type=OwnershipTypeClass.TECHNICAL_OWNER, - ) - ).build() - elif association_type == "domain": - patches = [ - MetadataChangeProposalWrapper( - entityUrn=urn, - aspect=DomainsClass(domains=[association_urn]), - ) - ] - all_patches.extend(patches) - mcps_iter = progressbar.progressbar(all_patches, redirect_stdout=True) - for mcp in mcps_iter: - graph.emit(mcp) - - @container.command() @click.option("--container-urn", required=True, type=str) @click.option("--tag-urn", required=True, type=str) diff --git a/metadata-ingestion/src/datahub/ingestion/source/apply/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/apply/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/metadata-ingestion/src/datahub/ingestion/source/apply/datahub_apply.py b/metadata-ingestion/src/datahub/ingestion/source/apply/datahub_apply.py new file mode 100644 index 00000000000000..6e80fa5972bc56 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/apply/datahub_apply.py @@ -0,0 +1,223 @@ +import logging +from functools import partial +from typing import Any, Iterable, List, Optional, Union + +import progressbar +from pydantic import Field + +from datahub.configuration.common import ConfigModel +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.api.decorators import ( + SupportStatus, + config_class, + platform_name, + support_status, +) +from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source, SourceReport +from datahub.ingestion.api.source_helpers import auto_workunit_reporter +from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.graph.client import DataHubGraph, get_default_graph +from datahub.metadata.schema_classes import ( + DomainsClass, + GlossaryTermAssociationClass, + MetadataChangeProposalClass, + OwnerClass, + OwnershipTypeClass, + TagAssociationClass, +) +from datahub.specific.dataset import DatasetPatchBuilder + +logger = logging.getLogger(__name__) + + +def apply_association_to_container( + container_urn: str, + association_urn: str, + association_type: str, + emit: bool = True, + graph: Optional[DataHubGraph] = None, +) -> Optional[List[Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass]]]: + """ + Common function to add either tags, terms, domains, or owners to child datasets (for now). + + Args: + container_urn: The URN of the container + association_urn: The URN of the tag, term, or user to apply + association_type: One of 'tag', 'term', 'domain' or 'owner' + """ + urns: List[str] = [container_urn] + if not graph: + graph = get_default_graph() + logger.info(f"Using {graph}") + urns.extend( + graph.get_urns_by_filter( + container=container_urn, + batch_size=1000, + entity_types=["dataset", "container"], + ) + ) + + all_patches: List[Any] = [] + for urn in urns: + builder = DatasetPatchBuilder(urn) + patches: List[Any] = [] + if association_type == "tag": + patches = builder.add_tag(TagAssociationClass(association_urn)).build() + elif association_type == "term": + patches = builder.add_term( + GlossaryTermAssociationClass(association_urn) + ).build() + elif association_type == "owner": + patches = builder.add_owner( + OwnerClass( + owner=association_urn, + type=OwnershipTypeClass.TECHNICAL_OWNER, + ) + ).build() + elif association_type == "domain": + patches = [ + MetadataChangeProposalWrapper( + entityUrn=urn, + aspect=DomainsClass(domains=[association_urn]), + ) + ] + all_patches.extend(patches) + if emit: + mcps_iter = progressbar.progressbar(all_patches, redirect_stdout=True) + for mcp in mcps_iter: + graph.emit(mcp) + return None + else: + return all_patches + + +class DomainApplyConfig(ConfigModel): + assets: List[str] = Field( + default_factory=list, + description="List of assets to apply domain hierarchichaly. Currently only containers and datasets are supported", + ) + domain_urn: str = Field(default="") + + +class TagApplyConfig(ConfigModel): + assets: List[str] = Field( + default_factory=list, + description="List of assets to apply tag hierarchichaly. Currently only containers and datasets are supported", + ) + tag_urn: str = Field(default="") + + +class TermApplyConfig(ConfigModel): + assets: List[str] = Field( + default_factory=list, + description="List of assets to apply term hierarchichaly. Currently only containers and datasets are supported", + ) + term_urn: str = Field(default="") + + +class OwnerApplyConfig(ConfigModel): + assets: List[str] = Field( + default_factory=list, + description="List of assets to apply owner hierarchichaly. Currently only containers and datasets are supported", + ) + owner_urn: str = Field(default="") + + +class DataHubApplyConfig(ConfigModel): + domain_apply: Optional[List[DomainApplyConfig]] = Field( + default=None, + description="List to apply domains to assets", + ) + tag_apply: Optional[List[TagApplyConfig]] = Field( + default=None, + description="List to apply tags to assets", + ) + term_apply: Optional[List[TermApplyConfig]] = Field( + default=None, + description="List to apply terms to assets", + ) + owner_apply: Optional[List[OwnerApplyConfig]] = Field( + default=None, + description="List to apply owners to assets", + ) + + +@platform_name("DataHubApply") +@config_class(DataHubApplyConfig) +@support_status(SupportStatus.TESTING) +class DataHubApplySource(Source): + """ + This source is a helper over CLI + so people can use the helper to apply various metadata changes to DataHub + via Managed Ingestion + """ + + def __init__(self, ctx: PipelineContext, config: DataHubApplyConfig): + self.ctx = ctx + self.config = config + self.report = SourceReport() + self.graph = ctx.require_graph() + + def _yield_workunits( + self, + proposals: List[ + Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass] + ], + ) -> Iterable[MetadataWorkUnit]: + for proposal in proposals: + if isinstance(proposal, MetadataChangeProposalWrapper): + yield proposal.as_workunit() + else: + yield MetadataWorkUnit( + id=MetadataWorkUnit.generate_workunit_id(proposal), + mcp_raw=proposal, + ) + + def _handle_assets( + self, assets: List[str], apply_urn: str, apply_type: str + ) -> Iterable[MetadataWorkUnit]: + for asset in assets: + change_proposals = apply_association_to_container( + asset, apply_urn, apply_type, emit=False, graph=self.graph + ) + assert change_proposals is not None + yield from self._yield_workunits(change_proposals) + + def _yield_domain(self) -> Iterable[MetadataWorkUnit]: + if not self.config.domain_apply: + return + for apply in self.config.domain_apply: + yield from self._handle_assets(apply.assets, apply.domain_urn, "domain") + + def _yield_tag(self) -> Iterable[MetadataWorkUnit]: + if not self.config.tag_apply: + return + for apply in self.config.tag_apply: + yield from self._handle_assets(apply.assets, apply.tag_urn, "tag") + + def _yield_term(self) -> Iterable[MetadataWorkUnit]: + if not self.config.term_apply: + return + for apply in self.config.term_apply: + yield from self._handle_assets(apply.assets, apply.term_urn, "term") + + def _yield_owner(self) -> Iterable[MetadataWorkUnit]: + if not self.config.owner_apply: + return + for apply in self.config.owner_apply: + yield from self._handle_assets(apply.assets, apply.owner_urn, "owner") + + def get_workunits_internal( + self, + ) -> Iterable[MetadataWorkUnit]: + yield from self._yield_domain() + yield from self._yield_tag() + yield from self._yield_term() + yield from self._yield_owner() + + def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: + return [partial(auto_workunit_reporter, self.get_report())] + + def get_report(self) -> SourceReport: + return self.report From 412600a163df1eac3bd3b364a16aaedc40f8bb70 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Wed, 29 Jan 2025 11:30:44 -0600 Subject: [PATCH 179/249] feat(telemetry): cross-component async write tracing (#12405) --- build.gradle | 7 +- .../resolvers/group/EntityCountsResolver.java | 2 +- .../mutate/MutableTypeBatchResolver.java | 41 +- .../ListRecommendationsResolver.java | 2 +- .../resolvers/search/SearchResolver.java | 2 +- .../upgrade/UpgradeCliApplication.java | 6 +- .../upgrade/config/SystemUpdateConfig.java | 1 + .../upgrade/impl/DefaultUpgradeManager.java | 93 +- docs-website/sidebars.js | 1 + docs/advanced/api-tracing.md | 332 +++++++ .../linkedin/metadata/aspect/ReadItem.java | 7 + .../metadata/aspect/SystemAspect.java | 6 + .../metadata/aspect/batch/MCLItem.java | 5 + .../test/metadata/aspect/batch/TestMCP.java | 2 +- .../dao/producer/KafkaEventProducer.java | 72 +- .../java/com/linkedin/mxe/ConsumerGroups.java | 8 + .../com/linkedin/metadata/EventUtils.java | 24 +- metadata-io/build.gradle | 1 + .../entity/ebean/batch/AspectsBatchImpl.java | 8 +- .../entity/ebean/batch/ChangeItemImpl.java | 28 +- .../entity/ebean/batch/DeleteItemImpl.java | 6 + .../entity/ebean/batch/PatchItemImpl.java | 10 +- .../entity/ebean/batch/ProposedItem.java | 5 + .../aspect/utils/DefaultAspectsUtil.java | 14 +- .../metadata/client/JavaEntityClient.java | 2 +- .../metadata/entity/EntityServiceImpl.java | 904 ++++++++++-------- .../cassandra/CassandraRetentionService.java | 2 +- .../entity/ebean/EbeanRetentionService.java | 2 +- .../entity/validation/ValidationUtils.java | 237 ++--- .../metadata/event/EventProducer.java | 88 +- .../graph/elastic/ESGraphQueryDAO.java | 81 +- .../elastic/ElasticSearchGraphService.java | 2 +- .../graph/neo4j/Neo4jGraphService.java | 39 +- .../candidatesource/MostPopularSource.java | 42 +- .../candidatesource/RecentlyEditedSource.java | 42 +- .../candidatesource/RecentlyViewedSource.java | 42 +- .../metadata/search/LineageSearchService.java | 2 +- .../metadata/search/SearchService.java | 15 +- .../search/cache/CacheableSearcher.java | 138 +-- .../search/cache/EntityDocCountCache.java | 2 +- .../client/CachingEntitySearchService.java | 250 ++--- .../elasticsearch/query/ESBrowseDAO.java | 96 +- .../elasticsearch/query/ESSearchDAO.java | 214 +++-- .../request/AggregationQueryBuilder.java | 2 +- .../query/request/SearchRequestHandler.java | 2 +- .../service/UpdateIndicesService.java | 132 ++- .../systemmetadata/ESSystemMetadataDAO.java | 48 +- .../ElasticSearchSystemMetadataService.java | 79 +- .../SystemMetadataMappingsBuilder.java | 3 + .../ElasticSearchTimeseriesAspectService.java | 69 +- .../timeseries/elastic/UsageServiceUtil.java | 28 +- .../metadata/trace/KafkaTraceReader.java | 460 +++++++++ .../metadata/trace/MCLTraceReader.java | 43 + .../metadata/trace/MCPFailedTraceReader.java | 45 + .../metadata/trace/MCPTraceReader.java | 43 + .../metadata/trace/TraceServiceImpl.java | 484 ++++++++++ .../metadata/entity/EntityServiceTest.java | 194 +++- .../graph/neo4j/Neo4jGraphServiceTest.java | 3 +- .../TimeseriesAspectServiceUnitTest.java | 93 +- .../trace/BaseKafkaTraceReaderTest.java | 239 +++++ .../metadata/trace/MCLTraceReaderTest.java | 93 ++ .../trace/MCPFailedTraceReaderTest.java | 98 ++ .../metadata/trace/MCPTraceReaderTest.java | 93 ++ .../metadata/trace/TraceServiceImplTest.java | 350 +++++++ .../kafka/MAEOpenTelemetryConfig.java | 21 + metadata-jobs/mae-consumer/build.gradle | 1 + .../kafka/DataHubUsageEventsProcessor.java | 59 +- .../metadata/kafka/MCLKafkaListener.java | 92 +- .../MCLSpringCommonTestConfiguration.java | 1 + .../kafka/MCEOpenTelemetryConfig.java | 21 + metadata-jobs/mce-consumer/build.gradle | 1 + .../kafka/MetadataChangeEventsProcessor.java | 55 +- .../MetadataChangeProposalsProcessor.java | 81 +- ...BatchMetadataChangeProposalsProcessor.java | 107 ++- .../kafka/util/KafkaListenerUtil.java | 42 - .../datahub/event/PlatformEventProcessor.java | 100 +- .../metadata/run/AspectRowSummary.pdl | 1 + metadata-operation-context/build.gradle | 4 +- .../metadata/context/OperationContext.java | 95 +- .../metadata/context/RequestContext.java | 8 + .../metadata/context/TraceContext.java | 414 ++++++++ .../metadata/context/TraceIdGenerator.java | 48 + .../metadata/exception/TraceException.java | 40 + .../context/TestOperationContexts.java | 58 +- .../context/OperationContextTest.java | 183 ++++ .../metadata/context/TraceContextTest.java | 293 ++++++ .../authorization/DataHubAuthorizerTest.java | 1 + .../src/main/resources/application.yaml | 3 + metadata-service/factories/build.gradle | 5 + .../common/Neo4jGraphServiceFactory.java | 10 +- .../SystemOperationContextFactory.java | 9 +- .../kafka/common/AdminClientFactory.java | 30 + .../kafka/throttle/KafkaThrottleFactory.java | 25 +- .../kafka/trace/KafkaTraceReaderFactory.java | 196 ++++ .../OpenTelemetryBaseFactory.java | 81 ++ .../factory/trace/TraceServiceFactory.java | 37 + .../kafka/DataHubUpgradeKafkaListener.java | 85 +- .../OpenAPIAnalyticsTestConfiguration.java | 2 + .../OpenAPIEntityTestConfiguration.java | 8 + metadata-service/openapi-servlet/build.gradle | 4 + .../openapi-servlet/models/build.gradle | 1 + .../openapi/v1/models/TraceRequestV1.java | 17 + .../openapi/v1/models/TraceResponseV1.java | 22 + .../openapi/config/SpringWebConfig.java | 9 + .../openapi/config/TracingInterceptor.java | 93 ++ .../controller/GenericEntitiesController.java | 98 +- .../operations/v1/TraceController.java | 149 +++ .../openapi/util/MappingUtil.java | 3 - .../openapi/util/RequestInputUtil.java | 136 +++ .../v1/entities/EntitiesController.java | 6 +- .../RelationshipsController.java | 4 +- .../v3/controller/EntityController.java | 13 + .../java/entities/EntitiesControllerTest.java | 3 +- .../operations/v1/TraceControllerTest.java | 275 ++++++ .../v3/controller/EntityControllerTest.java | 82 ++ .../com.linkedin.entity.aspects.snapshot.json | 38 +- ...com.linkedin.entity.entities.snapshot.json | 56 +- .../com.linkedin.entity.runs.snapshot.json | 42 +- ...nkedin.operations.operations.snapshot.json | 38 +- ...m.linkedin.platform.platform.snapshot.json | 52 +- .../resources/entity/AspectResource.java | 12 +- .../entity/BatchIngestionRunResource.java | 8 +- .../resources/entity/EntityResource.java | 40 +- .../resources/entity/EntityV2Resource.java | 6 +- .../entity/EntityVersionedV2Resource.java | 4 +- .../resources/lineage/Relationships.java | 6 +- .../operations/OperationsResource.java | 10 +- .../resources/platform/PlatformResource.java | 2 +- .../resources/restli/RestliUtils.java | 30 +- .../metadata/resources/usage/UsageStats.java | 8 +- .../resources/entity/AspectResourceTest.java | 75 +- .../metadata/entity/IngestAspectsResult.java | 62 ++ .../metadata/entity/IngestProposalResult.java | 11 - .../RecommendationsService.java | 2 +- .../EntitySearchAggregationSource.java | 2 +- .../RecentlySearchedSource.java | 38 +- .../candidatesource/RecommendationSource.java | 2 +- .../systemmetadata/SystemMetadataService.java | 4 + .../metadata/systemmetadata/TraceService.java | 38 + .../metadata/systemmetadata/TraceStatus.java | 16 + .../systemmetadata/TraceStorageStatus.java | 52 + .../systemmetadata/TraceWriteStatus.java | 18 + .../linkedin/gms/CommonApplicationConfig.java | 4 +- .../config/GMSOpenTelemetryConfig.java | 21 + metadata-utils/build.gradle | 1 + .../utils/metrics/MetricSpanExporter.java | 58 ++ .../metadata/utils/metrics/MetricUtils.java | 21 +- smoke-test/requirements.txt | 3 +- smoke-test/tests/trace/__init__.py | 0 smoke-test/tests/trace/test_api_trace.py | 486 ++++++++++ 150 files changed, 8050 insertions(+), 1832 deletions(-) create mode 100644 docs/advanced/api-tracing.md create mode 100644 metadata-events/mxe-registration/src/main/java/com/linkedin/mxe/ConsumerGroups.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/trace/KafkaTraceReader.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/trace/MCLTraceReader.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/trace/MCPFailedTraceReader.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/trace/MCPTraceReader.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/trace/TraceServiceImpl.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/trace/BaseKafkaTraceReaderTest.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/trace/MCLTraceReaderTest.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/trace/MCPFailedTraceReaderTest.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/trace/MCPTraceReaderTest.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/trace/TraceServiceImplTest.java create mode 100644 metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MAEOpenTelemetryConfig.java create mode 100644 metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/MCEOpenTelemetryConfig.java create mode 100644 metadata-operation-context/src/main/java/io/datahubproject/metadata/context/TraceContext.java create mode 100644 metadata-operation-context/src/main/java/io/datahubproject/metadata/context/TraceIdGenerator.java create mode 100644 metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/TraceException.java create mode 100644 metadata-operation-context/src/test/java/io/datahubproject/metadata/context/TraceContextTest.java create mode 100644 metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/common/AdminClientFactory.java create mode 100644 metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/trace/KafkaTraceReaderFactory.java create mode 100644 metadata-service/factories/src/main/java/com/linkedin/gms/factory/system_telemetry/OpenTelemetryBaseFactory.java create mode 100644 metadata-service/factories/src/main/java/com/linkedin/gms/factory/trace/TraceServiceFactory.java create mode 100644 metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v1/models/TraceRequestV1.java create mode 100644 metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v1/models/TraceResponseV1.java create mode 100644 metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/TracingInterceptor.java create mode 100644 metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/v1/TraceController.java create mode 100644 metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/RequestInputUtil.java create mode 100644 metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/operations/v1/TraceControllerTest.java create mode 100644 metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestAspectsResult.java delete mode 100644 metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestProposalResult.java create mode 100644 metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/TraceService.java create mode 100644 metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/TraceStatus.java create mode 100644 metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/TraceStorageStatus.java create mode 100644 metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/TraceWriteStatus.java create mode 100644 metadata-service/war/src/main/java/com/linkedin/gms/factory/config/GMSOpenTelemetryConfig.java create mode 100644 metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/MetricSpanExporter.java create mode 100644 smoke-test/tests/trace/__init__.py create mode 100644 smoke-test/tests/trace/test_api_trace.py diff --git a/build.gradle b/build.gradle index 2984812bda13b8..73555a72dd26da 100644 --- a/build.gradle +++ b/build.gradle @@ -38,7 +38,7 @@ buildscript { ext.springVersion = '6.1.14' ext.springBootVersion = '3.2.9' ext.springKafkaVersion = '3.1.6' - ext.openTelemetryVersion = '1.18.0' + ext.openTelemetryVersion = '1.45.0' ext.neo4jVersion = '5.20.0' ext.neo4jTestVersion = '5.20.0' ext.neo4jApocVersion = '5.20.0' @@ -220,7 +220,10 @@ project.ext.externalDependency = [ 'neo4jApocCore': 'org.neo4j.procedure:apoc-core:' + neo4jApocVersion, 'neo4jApocCommon': 'org.neo4j.procedure:apoc-common:' + neo4jApocVersion, 'opentelemetryApi': 'io.opentelemetry:opentelemetry-api:' + openTelemetryVersion, - 'opentelemetryAnnotations': 'io.opentelemetry:opentelemetry-extension-annotations:' + openTelemetryVersion, + 'opentelemetrySdk': 'io.opentelemetry:opentelemetry-sdk:' + openTelemetryVersion, + 'opentelemetrySdkTrace': 'io.opentelemetry:opentelemetry-sdk-trace:' + openTelemetryVersion, + 'opentelemetryAutoConfig': 'io.opentelemetry:opentelemetry-sdk-extension-autoconfigure:' + openTelemetryVersion, + 'opentelemetryAnnotations': 'io.opentelemetry.instrumentation:opentelemetry-instrumentation-annotations:2.11.0', 'opentracingJdbc':'io.opentracing.contrib:opentracing-jdbc:0.2.15', 'parquet': 'org.apache.parquet:parquet-avro:1.12.3', 'parquetHadoop': 'org.apache.parquet:parquet-hadoop:1.13.1', diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/EntityCountsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/EntityCountsResolver.java index ac195ca5d82520..d97141b84588c6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/EntityCountsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/EntityCountsResolver.java @@ -12,7 +12,7 @@ import com.linkedin.metadata.service.ViewService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.util.List; import java.util.Map; import java.util.concurrent.CompletableFuture; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MutableTypeBatchResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MutableTypeBatchResolver.java index d647374b8e1efc..b343a78412ccc6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MutableTypeBatchResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/MutableTypeBatchResolver.java @@ -2,13 +2,14 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; -import com.codahale.metrics.Timer; +import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.types.BatchMutableType; import com.linkedin.metadata.utils.metrics.MetricUtils; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; +import io.datahubproject.metadata.context.OperationContext; import java.util.List; import java.util.concurrent.CompletableFuture; import org.slf4j.Logger; @@ -33,25 +34,29 @@ public MutableTypeBatchResolver(final BatchMutableType batchMutableType @Override public CompletableFuture> get(DataFetchingEnvironment environment) throws Exception { + final QueryContext context = environment.getContext(); + final OperationContext opContext = context.getOperationContext(); + final B[] input = bindArgument(environment.getArgument("input"), _batchMutableType.batchInputClass()); - return GraphQLConcurrencyUtils.supplyAsync( - () -> { - Timer.Context timer = MetricUtils.timer(this.getClass(), "batchMutate").time(); - - try { - return _batchMutableType.batchUpdate(input, environment.getContext()); - } catch (AuthorizationException e) { - throw e; - } catch (Exception e) { - _logger.error("Failed to perform batchUpdate", e); - throw new IllegalArgumentException(e); - } finally { - timer.stop(); - } - }, - this.getClass().getSimpleName(), - "get"); + return opContext.withSpan( + "batchMutate", + () -> + GraphQLConcurrencyUtils.supplyAsync( + () -> { + try { + return _batchMutableType.batchUpdate(input, environment.getContext()); + } catch (AuthorizationException e) { + throw e; + } catch (Exception e) { + _logger.error("Failed to perform batchUpdate", e); + throw new IllegalArgumentException(e); + } + }, + this.getClass().getSimpleName(), + "get"), + MetricUtils.DROPWIZARD_METRIC, + "true"); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java index 77f6eb285ecc5b..e613f4fc5849e7 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java @@ -27,7 +27,7 @@ import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; import io.datahubproject.metadata.context.OperationContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.net.URISyntaxException; import java.util.Collections; import java.util.List; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java index 45751fc6eb8cb2..bbf59234247e95 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java @@ -19,7 +19,7 @@ import com.linkedin.metadata.query.SearchFlags; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.util.Collections; import java.util.concurrent.CompletableFuture; import lombok.RequiredArgsConstructor; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCliApplication.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCliApplication.java index e17ac6be79face..b1f601761212a4 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCliApplication.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCliApplication.java @@ -5,7 +5,9 @@ import com.linkedin.gms.factory.graphql.GraphQLEngineFactory; import com.linkedin.gms.factory.kafka.KafkaEventConsumerFactory; import com.linkedin.gms.factory.kafka.SimpleKafkaConsumerFactory; +import com.linkedin.gms.factory.kafka.trace.KafkaTraceReaderFactory; import com.linkedin.gms.factory.telemetry.ScheduledAnalyticsFactory; +import com.linkedin.gms.factory.trace.TraceServiceFactory; import org.springframework.boot.WebApplicationType; import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.boot.autoconfigure.elasticsearch.ElasticsearchRestClientAutoConfiguration; @@ -30,7 +32,9 @@ DataHubAuthorizerFactory.class, SimpleKafkaConsumerFactory.class, KafkaEventConsumerFactory.class, - GraphQLEngineFactory.class + GraphQLEngineFactory.class, + KafkaTraceReaderFactory.class, + TraceServiceFactory.class }) }) public class UpgradeCliApplication { diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java index d0493019a40af2..2600ea2300dc9c 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java @@ -195,6 +195,7 @@ protected OperationContext javaSystemOperationContext( .alternateValidation( configurationProvider.getFeatureFlags().isAlternateMCPValidation()) .build(), + null, true); entityServiceAspectRetriever.setSystemOperationContext(systemOperationContext); diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/impl/DefaultUpgradeManager.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/impl/DefaultUpgradeManager.java index 443042049e8856..8142c04ddf600d 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/impl/DefaultUpgradeManager.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/impl/DefaultUpgradeManager.java @@ -1,7 +1,6 @@ package com.linkedin.datahub.upgrade.impl; import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.Timer; import com.linkedin.datahub.upgrade.Upgrade; import com.linkedin.datahub.upgrade.UpgradeCleanupStep; import com.linkedin.datahub.upgrade.UpgradeContext; @@ -119,44 +118,60 @@ private UpgradeResult executeInternal(UpgradeContext context) { } private UpgradeStepResult executeStepInternal(UpgradeContext context, UpgradeStep step) { - int retryCount = step.retryCount(); - UpgradeStepResult result = null; - int maxAttempts = retryCount + 1; - for (int i = 0; i < maxAttempts; i++) { - try (Timer.Context completionTimer = - MetricUtils.timer(MetricRegistry.name(step.id(), "completionTime")).time()) { - try (Timer.Context executionTimer = - MetricUtils.timer(MetricRegistry.name(step.id(), "executionTime")).time()) { - result = step.executable().apply(context); - } - - if (result == null) { - // Failed to even retrieve a result. Create a default failure result. - result = new DefaultUpgradeStepResult(step.id(), DataHubUpgradeState.FAILED); - context - .report() - .addLine(String.format("Retrying %s more times...", maxAttempts - (i + 1))); - MetricUtils.counter(MetricRegistry.name(step.id(), "retry")).inc(); - } - - if (DataHubUpgradeState.SUCCEEDED.equals(result.result())) { - MetricUtils.counter(MetricRegistry.name(step.id(), "succeeded")).inc(); - break; - } - } catch (Exception e) { - log.error("Caught exception during attempt {} of Step with id {}", i, step.id(), e); - context - .report() - .addLine( - String.format( - "Caught exception during attempt %s of Step with id %s: %s", i, step.id(), e)); - MetricUtils.counter(MetricRegistry.name(step.id(), "failed")).inc(); - result = new DefaultUpgradeStepResult(step.id(), DataHubUpgradeState.FAILED); - context.report().addLine(String.format("Retrying %s more times...", maxAttempts - (i + 1))); - } - } - - return result; + return context + .opContext() + .withSpan( + "completionTime", + () -> { + int retryCount = step.retryCount(); + UpgradeStepResult result = null; + int maxAttempts = retryCount + 1; + for (int i = 0; i < maxAttempts; i++) { + try { + result = + context + .opContext() + .withSpan( + "executionTime", + () -> step.executable().apply(context), + "step.id", + step.id(), + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(step.id(), "executionTime")); + + if (result == null) { + // Failed to even retrieve a result. Create a default failure result. + result = new DefaultUpgradeStepResult(step.id(), DataHubUpgradeState.FAILED); + context + .report() + .addLine(String.format("Retrying %s more times...", maxAttempts - (i + 1))); + MetricUtils.counter(MetricRegistry.name(step.id(), "retry")).inc(); + } + + if (DataHubUpgradeState.SUCCEEDED.equals(result.result())) { + MetricUtils.counter(MetricRegistry.name(step.id(), "succeeded")).inc(); + break; + } + } catch (Exception e) { + log.error( + "Caught exception during attempt {} of Step with id {}", i, step.id(), e); + context + .report() + .addLine( + String.format( + "Caught exception during attempt %s of Step with id %s: %s", + i, step.id(), e)); + MetricUtils.counter(MetricRegistry.name(step.id(), "failed")).inc(); + result = new DefaultUpgradeStepResult(step.id(), DataHubUpgradeState.FAILED); + context + .report() + .addLine(String.format("Retrying %s more times...", maxAttempts - (i + 1))); + } + } + return result; + }, + MetricUtils.DROPWIZARD_METRIC, + "true"); } private void executeCleanupInternal(UpgradeContext context, UpgradeResult result) { diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index fbd35b60aedba9..b53a7a4f2be56a 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -702,6 +702,7 @@ module.exports = { "docs/browseV2/browse-paths-v2", "docs/plugins", "docs/advanced/bootstrap-mcps", + "docs/advanced/api-tracing", ], }, { diff --git a/docs/advanced/api-tracing.md b/docs/advanced/api-tracing.md new file mode 100644 index 00000000000000..589693209359b1 --- /dev/null +++ b/docs/advanced/api-tracing.md @@ -0,0 +1,332 @@ +# API Tracing + +## Introduction + +DataHub's asynchronous APIs enable high-volume data operations, particularly for bulk ingestion processes. While these +APIs optimize throughput, they previously lacked built-in validation mechanisms for operation status. Consequently, +detecting processing issues required direct monitoring of backend system metrics and logs. + +To address this limitation, DataHub implemented a trace/request ID system that enables end-to-end tracking of write +operations. This tracing mechanism is particularly crucial given DataHub's multi-stage write architecture, where data +propagates through multiple components and persists across distinct storage systems. The trace ID maintains continuity +throughout this complex processing pipeline, providing visibility into the operation's status at each stage of execution. + +The system effectively balances the performance benefits of asynchronous processing with the operational necessity of +request tracking and validation. This enhancement significantly improves observability without compromising the +throughput advantages of bulk operations. + +## Architecture Overview + +Shown below is the write path for an asynchronous write within DataHub. For more information about MCPs please see +the documentation on [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md). + +

+ +

+ +A successful write operation requires data persistence in at least one storage system, though typically both primary and +search storage systems must be updated. The storage architecture consists of two main components: + +* Primary Storage: Comprises MySQL, Postgres, or Cassandra, serving as the persistent store for all non-Timeseries aspects. +* Search Storage: Utilizes either Elasticsearch or OpenSearch systems. + +In most operational scenarios, write operations must successfully complete across both storage layers to maintain system +consistency and ensure complete data availability. + +## Trace API + +The trace API's status retrieval functionality requires three key identifiers to locate specific write operations: +the trace ID (unique to the request), the URN, and the aspect name. This combination of identifiers ensures precise +operation tracking within the system. + +For batch operations involving multiple URNs and aspects, a single trace ID is assigned to monitor the entire request. +In asynchronous mode, the system maintains independent status tracking for each aspect within the batch, allowing for +granular operation monitoring. + +The API returns a comprehensive status report that includes: + +* Per-aspect success/failure status +* Detailed status breakdowns for each storage system +* Write states as defined in the [Write States](#Write-States) documentation +* Error information from MCP processing, when applicable, to facilitate debugging + +This structured approach to status reporting enables precise monitoring of complex write operations across the system's +various components. + +### Retrieving the `trace id` + +DataHub's asynchronous APIs provide trace ID information through two distinct mechanisms: + +* HTTP Response Header: A W3C-compliant `traceparent` header is included in all API responses +The complete header value serves as a valid trace ID +* System Metadata: For OpenAPI v3 APIs and those returning systemMetadata, the trace ID is accessible via the +`telemetryTraceId` property within systemMetadata + +While these two trace ID formats differ structurally—with the `traceparent` adhering to W3C's Trace Context +specification—both formats are fully compatible with the Trace API for operation tracking purposes. + +Header Example: +```text +traceparent: 00-00062c53a468cbd8077e7dd079846870-9199effb49910b4e-01 +``` + +`SystemMetadata` Example: +```json +[ + { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)", + "status": { + "value": { + "removed": false + }, + "systemMetadata": { + "properties": { + "telemetryLog": "false", + "telemetryQueueSpanId": "ee9e40edcb66ce4f", + "telemetryTraceId": "00062c53a468cbd8077e7dd079846870", + "telemetryEnqueuedAt": "1737587612508" + } + } + } + } +] +``` + +### Write States + +As mentioned earlier, there are multiple states for an aspect write both storage systems. These states are as follows: + +| Write State | Description | +|--------------------------|--------------------------------------------------------------------------------------------------------| +| `ERROR` | This state indicates an error occurred when processing the write request. | +| `PENDING` | A pending state indicates that the write is queued and the consumer has not yet processed the message. | +| `ACTIVE_STATE` | The write was successful and is the current value. | +| `HISTORIC_STATE` | The write was successful, however it has been overwritten by a newer value. | +| `NO_OP` | The write is not applicable for a given storage system. | +| `UNKNOWN` | We are unable to determine the state of the write and no record of its failure exists either. | +| `TRACE_NOT_IMPLEMENTED` | We have not yet implemented tracing a particular aspect type. This applies to Timeseries aspects. | + +### Using the Trace API + +The Trace API is implemented as an OpenAPI endpoint and can be used both programmatically and through the Swagger UI. + +Required Values: +* `traceId` - The `trace id` associated with the write request. See the previous [Retrieving the `trace id`](#retrieving-the-trace-id) section for how to find this id. +* URN/Aspect names - These are passed as a POST body and should represent at least a subset of the URN/aspects from the initial request. + An example is shown here for a single URN and 2 aspects [`datasetInfo`, `status`]. + ```json + { + "urn:li:dataset:(urn:li:dataPlatform:bigquery,transactions.user_profile,PROD)": ["datasetInfo", "status"] + } + ``` +* Authorization token + +Optional Parameters: +* `onlyIncludeErrors` (default: `true`) - If this parameter is set to `true`, the response will only include status information on the failed aspects. +* `detailed` (default: `false`) - If set to `true`, will include detailed information from exceptions for failed MCPs. +* `skipCache` (default: `false`) - If set to `true`, will bypass a short-lived cache of the kafka consumer group offsets. + +The following shows a few examples of requests/response pairs. +* Successful Write + * Request for URN `urn:li:dataset:(urn:li:dataPlatform:bigquery,transactions.user_profile,PROD)` and aspect `status` + ```shell + curl -v 'http://localhost:8080/openapi/v1/trace/write/00062c2b698bcb28e92508f8f311802d?onlyIncludeErrors=false&detailed=true&skipCache=false' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -H 'Authorization: Bearer ' \ + -d '{ + "urn:li:dataset:(urn:li:dataPlatform:bigquery,transactions.user_profile,PROD)": [ + "status" + ] + }' | jq + ``` + * Example response + ```json + { + "urn:li:dataset:(urn:li:dataPlatform:bigquery,transactions.user_profile,PROD)": { + "status": { + "success": true, + "primaryStorage": { + "writeStatus": "ACTIVE_STATE" + }, + "searchStorage": { + "writeStatus": "ACTIVE_STATE" + } + } + } + } + ``` +* Error with exception details + * Example request + ```shell + curl -v 'http://localhost:8080/openapi/v1/trace/write/00062c543e4550c8400e6f6864471a20?onlyIncludeErrors=true&detailed=true&skipCache=false' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -H 'Authorization: Bearer ' \ + -d '{"urn:li:dataset:(urn:li:dataPlatform:bigquery,transactions.user_profile,PROD)": ["status"]}' + ``` + * Example response + ```json + { + "urn:li:dataset:(urn:li:dataPlatform:bigquery,transactions.user_profile,PROD)": { + "status": { + "success": false, + "primaryStorage": { + "writeStatus": "ERROR", + "writeExceptions": [ + { + "message": "Expected version -100000, actual version -1", + "exceptionClass": "com.linkedin.metadata.aspect.plugins.validation.AspectValidationException", + "stackTrace": [ + "com.linkedin.metadata.aspect.plugins.validation.AspectValidationException.forPrecondition(AspectValidationException.java:33)", + "com.linkedin.metadata.aspect.plugins.validation.AspectValidationException.forPrecondition(AspectValidationException.java:25)", + "com.linkedin.metadata.aspect.validation.ConditionalWriteValidator.validateVersionPrecondition(ConditionalWriteValidator.java:152)", + "com.linkedin.metadata.aspect.validation.ConditionalWriteValidator.lambda$validatePreCommitAspects$2(ConditionalWriteValidator.java:100)", + "java.base/java.util.Optional.flatMap(Optional.java:289)", + "com.linkedin.metadata.aspect.validation.ConditionalWriteValidator.validatePreCommitAspects(ConditionalWriteValidator.java:98)", + "com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator.validatePreCommit(AspectPayloadValidator.java:38)", + "com.linkedin.metadata.aspect.batch.AspectsBatch.lambda$validatePreCommit$4(AspectsBatch.java:129)", + "java.base/java.util.stream.ReferencePipeline$7$1.accept(ReferencePipeline.java:273)", + "java.base/java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1625)", + "java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509)", + "java.base/java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:499)", + "java.base/java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:150)", + "java.base/java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:173)", + "java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)", + "java.base/java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:596)", + "com.linkedin.metadata.aspect.batch.AspectsBatch.validatePreCommit(AspectsBatch.java:130)" + ] + } + ] + }, + "searchStorage": { + "writeStatus": "ERROR", + "writeMessage": "Primary storage write failed." + } + } + } + } + ``` + +## Trace Performance + +The Trace API's performance profile varies based on operation status: + +Successful Operations: + * Optimal performance through direct storage access + * Requires single lookup operations from SQL and Elasticsearch + * Bypasses Kafka interaction entirely + +Error State Operations: +* Performance impact due to required Kafka topic inspection + * Optimization mechanisms implemented: + * Timestamp-based offset seeking for efficient topic traversal + * Parallel trace processing with controlled concurrency + * Offset caching system to enhance response times + * Cache bypass available via skipCache parameter when data currency is critical + +The performance differential between success and error states stems primarily from the additional overhead of Kafka +topic inspection required for error tracking and diagnosis. + +For more detail, please see the [Design Notes](#design-notes) section. + +## Trace Exporters + +At the foundation of the trace instrumentation is OpenTelemetry which has been a part of DataHub for quite some time. As +documented in the [Monitoring](/docs/advanced/monitoring.md) section, OpenTelemetry can be configured to export traces +to external systems. For the Trace API to function, this external system is NOT required. + +### Trace Log Export + +A special log-based OpenTelemetry exporter was implemented for debugging purposes. When selectively activated for a given +request it will print `trace id`s and detailed timing information as the request traverses the different components of DataHub. +The output of these logs is also not required for the Trace API to function, however it leverages the same underlying OpenTelemetry +foundation. + +Activating a trace log is done using one of these methods: +* HTTP Header: `X-Enable-Trace-Log: true` +* Cookie: `enable-trace-log: true` + * javascript: `document.cookie = "enable-trace-log=true";` + +Example logs for a single request with tracing logging enabled: +* GMS +```text +i.d.metadata.context.RequestContext:53 - RequestContext{actorUrn='urn:li:corpuser:datahub', sourceIP='172.18.0.5', requestAPI=OPENAPI, requestID='createAspect([dataset])', userAgent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'} +i.d.metadata.context.TraceContext:366 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, SpanId: a2898a18f9f0c4f1, ParentId: dd746f079d1232ba, Name: ingestTimeseriesProposal, Duration: 0.03 ms +i.d.metadata.context.TraceContext:376 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, Attributes: AttributesMap{data={async=true, batch.size=1}, capacity=128, totalAddedValues=2} +i.d.metadata.context.TraceContext:366 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, SpanId: 02e058ff616e4c99, ParentId: 7ed88659811a8fdb, Name: produceMetadataChangeProposal, Duration: 0.03 ms +i.d.metadata.context.TraceContext:376 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, Attributes: AttributesMap{data={messaging.destination_kind=topic, messaging.system=kafka, messaging.destination=MetadataChangeProposal_v1, messaging.operation=publish, queue.enqueued_at=1737418391958}, capacity=128, totalAddedValues=5} +i.d.metadata.context.TraceContext:366 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, SpanId: 7ed88659811a8fdb, ParentId: dd746f079d1232ba, Name: ingestProposalAsync, Duration: 2.57 ms +i.d.metadata.context.TraceContext:376 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, Attributes: AttributesMap{data={batch.size=1}, capacity=128, totalAddedValues=1} +``` +* MCE Consumer +```text +c.l.m.k.MetadataChangeProposalsProcessor:89 - Got MCP event key: urn:li:dataset:(urn:li:dataPlatform:snowflake,climate.daily_temperature,PROD), topic: MetadataChangeProposal_v1, partition: 0, offset: 75, value size: 412, timestamp: 1737418391959 +i.d.metadata.context.TraceContext:366 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, SpanId: a65075fe0982d873, ParentId: 02e058ff616e4c99, Name: consume, Duration: 0.01 ms +i.d.metadata.context.TraceContext:376 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, Attributes: AttributesMap{data={messaging.destination_kind=topic, queue.duration_ms=4, messaging.system=kafka, messaging.destination=MetadataChangeProposal_v1, messaging.operation=receive, queue.enqueued_at=1737418391958}, capacity=128, totalAddedValues=6} +i.d.metadata.context.TraceContext:366 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, SpanId: dd746f079d1232ba, ParentId: 0000000000000000, Name: POST /openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Cclimate.daily_temperature%2CPROD%29/status, Duration: 16.18 ms +i.d.metadata.context.TraceContext:376 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, Attributes: AttributesMap{data={request.api=OPENAPI, http.status_code=202, user.id=urn:li:corpuser:datahub, http.url=/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Cclimate.daily_temperature%2CPROD%29/status, request.id=createAspect([dataset]), http.method=POST}, capacity=128, totalAddedValues=6} +i.d.metadata.context.TraceContext:366 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, SpanId: 94a019b95154c0e7, ParentId: 0cb378fe4f5ad185, Name: ingestProposalSync, Duration: 0.01 ms +i.d.metadata.context.TraceContext:376 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, Attributes: AttributesMap{data={batch.size=0}, capacity=128, totalAddedValues=1} +i.d.metadata.context.TraceContext:366 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, SpanId: 0cb378fe4f5ad185, ParentId: 68df6bc4729dc0a2, Name: ingestTimeseriesProposal, Duration: 0.25 ms +i.d.metadata.context.TraceContext:376 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, Attributes: AttributesMap{data={async=false, batch.size=1}, capacity=128, totalAddedValues=2} +c.l.m.entity.EntityServiceImpl:988 - Ingesting aspects batch to database: AspectsBatchImpl{items=[ChangeMCP{changeType=UPSERT, urn=urn:li:dataset:(urn:li:dataPlatform:snowflake,climate.daily_temperature,PROD), aspectName='status', recordTemplate={removed=false}, systemMetadata={lastObserved=1737418391954, version=1, properties={telemetryLog=true, telemetryQueueSpanId=02e058ff616e4c99, telemetryEnqueu...}]} +i.d.metadata.context.TraceContext:366 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, SpanId: 4754a1c02dadec4c, ParentId: ef383b26f0040fc5, Name: retentionService, Duration: 0.09 ms +i.d.metadata.context.TraceContext:376 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, Attributes: AttributesMap{data={batch.size=1}, capacity=128, totalAddedValues=1} +i.d.metadata.context.TraceContext:366 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, SpanId: ef383b26f0040fc5, ParentId: 7ae629151400fc18, Name: ingestAspectsToLocalDB, Duration: 18.64 ms +i.d.metadata.context.TraceContext:376 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, Attributes: AttributesMap{data={batch.size=1, dwizName=com.linkedin.metadata.entity.EntityServiceImpl.ingestAspectsToLocalDB}, capacity=128, totalAddedValues=2} +c.l.m.entity.EntityServiceImpl:1900 - Producing MCL for ingested aspect status, urn urn:li:dataset:(urn:li:dataPlatform:snowflake,climate.daily_temperature,PROD) +i.d.metadata.context.TraceContext:366 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, SpanId: f1a8a1da99f1ae23, ParentId: c5f8b3884060722c, Name: produceMetadataChangeLog, Duration: 0.10 ms +i.d.metadata.context.TraceContext:376 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, Attributes: AttributesMap{data={messaging.destination_kind=topic, messaging.system=kafka, messaging.destination=MetadataChangeLog_Versioned_v1, messaging.operation=publish, queue.enqueued_at=1737418391982}, capacity=128, totalAddedValues=5} +i.d.metadata.context.TraceContext:366 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, SpanId: c5f8b3884060722c, ParentId: 7ae629151400fc18, Name: emitMCL, Duration: 14.32 ms +i.d.metadata.context.TraceContext:376 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, Attributes: AttributesMap{data={batch.size=1}, capacity=128, totalAddedValues=1} +i.d.metadata.context.TraceContext:366 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, SpanId: 7ae629151400fc18, ParentId: 68df6bc4729dc0a2, Name: ingestProposalSync, Duration: 37.90 ms +i.d.metadata.context.TraceContext:376 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, Attributes: AttributesMap{data={batch.size=1}, capacity=128, totalAddedValues=1} +c.l.m.k.MetadataChangeProposalsProcessor:128 - Successfully processed MCP event urn: urn:li:dataset:(urn:li:dataPlatform:snowflake,climate.daily_temperature,PROD) +i.d.metadata.context.TraceContext:366 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, SpanId: 68df6bc4729dc0a2, ParentId: 02e058ff616e4c99, Name: consume, Duration: 39.11 ms +i.d.metadata.context.TraceContext:376 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, Attributes: AttributesMap{data={batch.size=1, dwizName=com.linkedin.metadata.kafka.MetadataChangeProposalsProcessor.consume}, capacity=128, totalAddedValues=2} +i.d.metadata.context.TraceContext:366 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, SpanId: 04dc44653b634df2, ParentId: 02e058ff616e4c99, Name: consume, Duration: 0.03 ms +``` +* MAE Consumer +```text +i.d.metadata.context.TraceContext:376 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, Attributes: AttributesMap{data={messaging.destination_kind=topic, queue.duration_ms=22, messaging.system=kafka, messaging.destination=MetadataChangeLog_Versioned_v1, messaging.operation=receive, queue.enqueued_at=1737418391982}, capacity=128, totalAddedValues=6} +c.l.metadata.kafka.MCLKafkaListener:96 - Invoking MCL hooks for consumer: generic-mae-consumer-job-client urn: urn:li:dataset:(urn:li:dataPlatform:snowflake,climate.daily_temperature,PROD), aspect name: status, entity type: dataset, change type: UPSERT +i.d.metadata.context.TraceContext:366 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, SpanId: 3c3c055c360dc8e4, ParentId: 1de99215a0e82697, Name: FormAssignmentHook, Duration: 0.06 ms +i.d.metadata.context.TraceContext:376 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, Attributes: AttributesMap{data={dwizName=com.linkedin.metadata.kafka.MCLKafkaListener.FormAssignmentHook_latency}, capacity=128, totalAddedValues=1} +i.d.metadata.context.TraceContext:366 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, SpanId: 8e238d0156baacc4, ParentId: 1de99215a0e82697, Name: IngestionSchedulerHook, Duration: 0.05 ms +i.d.metadata.context.TraceContext:376 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, Attributes: AttributesMap{data={dwizName=com.linkedin.metadata.kafka.MCLKafkaListener.IngestionSchedulerHook_latency}, capacity=128, totalAddedValues=1} +c.l.m.s.e.update.ESBulkProcessor:85 - Added request id: urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Asnowflake%2Cclimate.daily_temperature%2CPROD%29, operation type: UPDATE, index: datasetindex_v2 +c.l.m.s.e.update.ESBulkProcessor:85 - Added request id: SIHRXj1ktF7qkwPBZO8w0A==, operation type: UPDATE, index: system_metadata_service_v1 +c.l.m.s.e.update.ESBulkProcessor:85 - Added request id: 2p3742l4sFS3wcL82Qh2lQ==, operation type: UPDATE, index: system_metadata_service_v1 +c.l.m.s.e.update.ESBulkProcessor:85 - Added request id: CfZKRLsf25/e3p3mURzlnA==, operation type: UPDATE, index: system_metadata_service_v1 +c.l.m.s.e.update.ESBulkProcessor:85 - Added request id: 8tvhG5ARd5BOdEbqaZkE0g==, operation type: UPDATE, index: system_metadata_service_v1 +c.l.m.s.e.update.ESBulkProcessor:85 - Added request id: rAvQOOBItiKAym622S4dcQ==, operation type: UPDATE, index: system_metadata_service_v1 +c.l.m.s.e.update.ESBulkProcessor:85 - Added request id: YqT6TNy7MAMOAyVXh6abMA==, operation type: UPDATE, index: system_metadata_service_v1 +i.d.metadata.context.TraceContext:366 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, SpanId: 054ac726204b449c, ParentId: 1de99215a0e82697, Name: UpdateIndicesHook, Duration: 47.31 ms +i.d.metadata.context.TraceContext:376 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, Attributes: AttributesMap{data={dwizName=com.linkedin.metadata.kafka.MCLKafkaListener.UpdateIndicesHook_latency}, capacity=128, totalAddedValues=1} +i.d.metadata.context.TraceContext:366 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, SpanId: 14d9ded49a94c7b8, ParentId: 1de99215a0e82697, Name: IncidentsSummaryHook, Duration: 0.09 ms +i.d.metadata.context.TraceContext:376 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, Attributes: AttributesMap{data={dwizName=com.linkedin.metadata.kafka.MCLKafkaListener.IncidentsSummaryHook_latency}, capacity=128, totalAddedValues=1} +i.d.metadata.context.TraceContext:366 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, SpanId: a92d9e54ade6073b, ParentId: 1de99215a0e82697, Name: EntityChangeEventGeneratorHook, Duration: 9.10 ms +i.d.metadata.context.TraceContext:376 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, Attributes: AttributesMap{data={dwizName=com.linkedin.metadata.kafka.MCLKafkaListener.EntityChangeEventGeneratorHook_latency}, capacity=128, totalAddedValues=1} +i.d.metadata.context.TraceContext:366 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, SpanId: c06dc7f131e57fca, ParentId: 1de99215a0e82697, Name: SiblingAssociationHook, Duration: 0.07 ms +i.d.metadata.context.TraceContext:376 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, Attributes: AttributesMap{data={dwizName=com.linkedin.metadata.kafka.MCLKafkaListener.SiblingAssociationHook_latency}, capacity=128, totalAddedValues=1} +c.l.metadata.kafka.MCLKafkaListener:139 - Successfully completed MCL hooks for consumer: generic-mae-consumer-job-client urn: urn:li:dataset:(urn:li:dataPlatform:snowflake,climate.daily_temperature,PROD) +i.d.metadata.context.TraceContext:366 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, SpanId: 1de99215a0e82697, ParentId: 02e058ff616e4c99, Name: consume, Duration: 58.67 ms +i.d.metadata.context.TraceContext:376 - Trace: 00062c2c3e1403109bbaf3d2e39adcd0, Attributes: AttributesMap{data={batch.size=1, dwizName=com.linkedin.metadata.kafka.MCLKafkaListener.consume}, capacity=128, totalAddedValues=2} +``` + +## Design Notes + +For the initial implementation no specific OpenTelemetry infrastructure is required, however existing environment variables +for OpenTelemetry can continue to be used and will export the new spans if configured. + +The Trace API implementation does not rely on any additional external systems or infrastructure. Due to this design +choice, the trace is determined by inspecting the 3 storage systems (Primary Storage (SQL/Cassandra), Elasticsearch/Opensearch, +Kafka topics) for the `trace id` or related timestamps. + +The `trace id` is stored in systemMetadata in both SQL and ES. For ES specifically, the presence of the `trace id` in +the system metadata index is used as a proxy to determine a successful write to ES. + +The tracing feature will additionally fetch messages from the kafka topics (including the failed MCP topic) for +more detailed error information. Pending states are derived from offsets of the message vs the current offsets of the +consumer groups. \ No newline at end of file diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/ReadItem.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/ReadItem.java index 341dec4d4741c7..46e6bf45d654e5 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/ReadItem.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/ReadItem.java @@ -64,6 +64,13 @@ static T getAspect(Class clazz, @Nullable RecordTemplate recordTemplate) @Nullable SystemMetadata getSystemMetadata(); + /** + * Set system metadata on the item + * + * @param systemMetadata + */ + void setSystemMetadata(@Nonnull SystemMetadata systemMetadata); + /** * The entity's schema * diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/SystemAspect.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/SystemAspect.java index 4c9bf3d4fdbc78..abbc82833c55f8 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/SystemAspect.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/SystemAspect.java @@ -6,6 +6,7 @@ import java.sql.Timestamp; import java.util.Optional; import javax.annotation.Nonnull; +import org.apache.commons.lang3.NotImplementedException; /** * An aspect along with system metadata and creation timestamp. Represents an aspect as stored in @@ -36,4 +37,9 @@ default Optional getSystemMetadataVersion() { .map(SystemMetadata::getVersion) .map(Long::parseLong); } + + @Override + default void setSystemMetadata(@Nonnull SystemMetadata systemMetadata) { + throw new NotImplementedException(); + } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCLItem.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCLItem.java index 09da0a52ff0c30..1a9d66581ad523 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCLItem.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCLItem.java @@ -37,6 +37,11 @@ default SystemMetadata getSystemMetadata() { return getMetadataChangeLog().getSystemMetadata(); } + @Override + default void setSystemMetadata(@Nonnull SystemMetadata systemMetadata) { + getMetadataChangeLog().setSystemMetadata(systemMetadata); + } + default SystemMetadata getPreviousSystemMetadata() { return getMetadataChangeLog().getPreviousSystemMetadata(); } diff --git a/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java index d7dd1fab2b6acf..aacd09adc4036a 100644 --- a/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java +++ b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java @@ -116,7 +116,7 @@ public static Set ofOneMCP( private Urn urn; private RecordTemplate recordTemplate; - private SystemMetadata systemMetadata; + @Setter private SystemMetadata systemMetadata; private AuditStamp auditStamp; private ChangeType changeType; @Nonnull private final EntitySpec entitySpec; diff --git a/metadata-dao-impl/kafka-producer/src/main/java/com/linkedin/metadata/dao/producer/KafkaEventProducer.java b/metadata-dao-impl/kafka-producer/src/main/java/com/linkedin/metadata/dao/producer/KafkaEventProducer.java index 26b48449c1c2ff..f2434f07dd11c8 100644 --- a/metadata-dao-impl/kafka-producer/src/main/java/com/linkedin/metadata/dao/producer/KafkaEventProducer.java +++ b/metadata-dao-impl/kafka-producer/src/main/java/com/linkedin/metadata/dao/producer/KafkaEventProducer.java @@ -6,13 +6,17 @@ import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.mxe.DataHubUpgradeHistoryEvent; +import com.linkedin.mxe.FailedMetadataChangeProposal; import com.linkedin.mxe.MetadataChangeLog; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.PlatformEvent; import com.linkedin.mxe.TopicConvention; import com.linkedin.mxe.TopicConventionImpl; -import io.opentelemetry.extension.annotations.WithSpan; +import io.datahubproject.metadata.context.OperationContext; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.io.IOException; +import java.util.Set; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.Future; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -29,7 +33,7 @@ * delimiter of an underscore (_). */ @Slf4j -public class KafkaEventProducer implements EventProducer { +public class KafkaEventProducer extends EventProducer { private final Producer _producer; private final TopicConvention _topicConvention; @@ -69,13 +73,19 @@ record = EventUtils.pegasusToAvroMCL(metadataChangeLog); throw new ModelConversionException("Failed to convert Pegasus MAE to Avro", e); } + String topic = getMetadataChangeLogTopicName(aspectSpec); + return _producer.send( + new ProducerRecord(topic, urn.toString(), record), + _kafkaHealthChecker.getKafkaCallBack("MCL", urn.toString())); + } + + @Override + public String getMetadataChangeLogTopicName(@Nonnull AspectSpec aspectSpec) { String topic = _topicConvention.getMetadataChangeLogVersionedTopicName(); if (aspectSpec.isTimeseries()) { topic = _topicConvention.getMetadataChangeLogTimeseriesTopicName(); } - return _producer.send( - new ProducerRecord(topic, urn.toString(), record), - _kafkaHealthChecker.getKafkaCallBack("MCL", urn.toString())); + return topic; } @Override @@ -102,6 +112,42 @@ record = EventUtils.pegasusToAvroMCP(metadataChangeProposal); _kafkaHealthChecker.getKafkaCallBack("MCP", urn.toString())); } + @Override + public String getMetadataChangeProposalTopicName() { + return _topicConvention.getMetadataChangeProposalTopicName(); + } + + @Override + public Future produceFailedMetadataChangeProposalAsync( + @Nonnull OperationContext opContext, + @Nonnull MetadataChangeProposal mcp, + @Nonnull Set throwables) { + + try { + String topic = _topicConvention.getFailedMetadataChangeProposalTopicName(); + final FailedMetadataChangeProposal failedMetadataChangeProposal = + createFailedMCPEvent(opContext, mcp, throwables); + + final GenericRecord record = EventUtils.pegasusToAvroFailedMCP(failedMetadataChangeProposal); + log.debug( + "Sending FailedMessages to topic - {}", + _topicConvention.getFailedMetadataChangeProposalTopicName()); + log.info( + "Error while processing FMCP: FailedMetadataChangeProposal - {}", + failedMetadataChangeProposal); + + return _producer.send( + new ProducerRecord(topic, mcp.getEntityUrn().toString(), record), + _kafkaHealthChecker.getKafkaCallBack("FMCP", mcp.getEntityUrn().toString())); + } catch (IOException e) { + log.error( + "Error while sending FailedMetadataChangeProposal: Exception - {}, FailedMetadataChangeProposal - {}", + e.getStackTrace(), + mcp); + return CompletableFuture.failedFuture(e); + } + } + @Override public Future producePlatformEvent( @Nonnull String name, @Nullable String key, @Nonnull PlatformEvent event) { @@ -121,6 +167,11 @@ record = EventUtils.pegasusToAvroPE(event); _kafkaHealthChecker.getKafkaCallBack("Platform Event", name)); } + @Override + public String getPlatformEventTopicName() { + return _topicConvention.getPlatformEventTopicName(); + } + @Override public void produceDataHubUpgradeHistoryEvent(@Nonnull DataHubUpgradeHistoryEvent event) { GenericRecord record; @@ -141,4 +192,15 @@ record = EventUtils.pegasusToAvroDUHE(event); _kafkaHealthChecker.getKafkaCallBack( "History Event", "Event Version: " + event.getVersion())); } + + @Nonnull + private static FailedMetadataChangeProposal createFailedMCPEvent( + @Nonnull OperationContext opContext, + @Nonnull MetadataChangeProposal event, + @Nonnull Set throwables) { + final FailedMetadataChangeProposal fmcp = new FailedMetadataChangeProposal(); + fmcp.setError(opContext.traceException(throwables)); + fmcp.setMetadataChangeProposal(event); + return fmcp; + } } diff --git a/metadata-events/mxe-registration/src/main/java/com/linkedin/mxe/ConsumerGroups.java b/metadata-events/mxe-registration/src/main/java/com/linkedin/mxe/ConsumerGroups.java new file mode 100644 index 00000000000000..565be1d56e5fd4 --- /dev/null +++ b/metadata-events/mxe-registration/src/main/java/com/linkedin/mxe/ConsumerGroups.java @@ -0,0 +1,8 @@ +package com.linkedin.mxe; + +public class ConsumerGroups { + private ConsumerGroups() {} + + public static final String MCP_CONSUMER_GROUP_ID_VALUE = + "${METADATA_CHANGE_PROPOSAL_KAFKA_CONSUMER_GROUP_ID:generic-mce-consumer-job-client}"; +} diff --git a/metadata-events/mxe-utils-avro/src/main/java/com/linkedin/metadata/EventUtils.java b/metadata-events/mxe-utils-avro/src/main/java/com/linkedin/metadata/EventUtils.java index 18005dfb7b2a5d..e40124b6abba82 100644 --- a/metadata-events/mxe-utils-avro/src/main/java/com/linkedin/metadata/EventUtils.java +++ b/metadata-events/mxe-utils-avro/src/main/java/com/linkedin/metadata/EventUtils.java @@ -38,6 +38,9 @@ public class EventUtils { private static final RecordDataSchema MCP_PEGASUS_SCHEMA = new MetadataChangeProposal().schema(); + private static final RecordDataSchema FMCP_PEGASUS_SCHEMA = + new FailedMetadataChangeProposal().schema(); + private static final RecordDataSchema MCL_PEGASUS_SCHEMA = new MetadataChangeLog().schema(); private static final RecordDataSchema PE_PEGASUS_SCHEMA = new PlatformEvent().schema(); @@ -60,7 +63,7 @@ public class EventUtils { private static final Schema ORIGINAL_MCL_AVRO_SCHEMA = getAvroSchemaFromResource("avro/com/linkedin/mxe/MetadataChangeLog.avsc"); - private static final Schema ORIGINAL_FMCL_AVRO_SCHEMA = + private static final Schema ORIGINAL_FMCP_AVRO_SCHEMA = getAvroSchemaFromResource("avro/com/linkedin/mxe/FailedMetadataChangeProposal.avsc"); private static final Schema ORIGINAL_PE_AVRO_SCHEMA = @@ -175,6 +178,23 @@ public static MetadataChangeProposal avroToPegasusMCP(@Nonnull GenericRecord rec ORIGINAL_MCP_AVRO_SCHEMA)); } + /** + * Converts a {@link GenericRecord} Failed MCP into the equivalent Pegasus model. + * + * @param record the {@link GenericRecord} that contains the MCP in com.linkedin.pegasus2avro + * namespace + * @return the Pegasus {@link FailedMetadataChangeProposal} model + */ + @Nonnull + public static FailedMetadataChangeProposal avroToPegasusFailedMCP(@Nonnull GenericRecord record) + throws IOException { + return new FailedMetadataChangeProposal( + DataTranslator.genericRecordToDataMap( + renameSchemaNamespace(record, RENAMED_FMCP_AVRO_SCHEMA, ORIGINAL_FMCP_AVRO_SCHEMA), + FMCP_PEGASUS_SCHEMA, + ORIGINAL_FMCP_AVRO_SCHEMA)); + } + /** * Converts a {@link GenericRecord} PE into the equivalent Pegasus model. * @@ -323,7 +343,7 @@ public static GenericRecord pegasusToAvroFailedMCP( DataTranslator.dataMapToGenericRecord( failedMetadataChangeProposal.data(), failedMetadataChangeProposal.schema(), - ORIGINAL_FMCL_AVRO_SCHEMA); + ORIGINAL_FMCP_AVRO_SCHEMA); return renameSchemaNamespace(original, RENAMED_FMCP_AVRO_SCHEMA); } diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index b33f19bef95986..8efb96d7238456 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -58,6 +58,7 @@ dependencies { implementation externalDependency.ebeanDdl implementation externalDependency.ebeanAgent implementation externalDependency.opentelemetryAnnotations + implementation externalDependency.opentelemetrySdkTrace implementation externalDependency.resilience4j // Newer Spring libraries require JDK17 classes, allow for JDK11 compileOnly externalDependency.springBootAutoconfigureJdk11 diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java index 1af9fc1565a456..9b993ab5fc4abb 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java @@ -154,8 +154,8 @@ private static BatchItem patchDiscriminator(MCPItem mcpItem, AspectRetriever asp mcpItem.getAuditStamp(), aspectRetriever.getEntityRegistry()); } - return ChangeItemImpl.ChangeItemImplBuilder.build( - mcpItem.getMetadataChangeProposal(), mcpItem.getAuditStamp(), aspectRetriever); + return ChangeItemImpl.builder() + .build(mcpItem.getMetadataChangeProposal(), mcpItem.getAuditStamp(), aspectRetriever); } public static class AspectsBatchImplBuilder { @@ -208,8 +208,8 @@ public AspectsBatchImplBuilder mcps( auditStamp, retrieverContext.getAspectRetriever().getEntityRegistry()); } else { - return ChangeItemImpl.ChangeItemImplBuilder.build( - mcp, auditStamp, retrieverContext.getAspectRetriever()); + return ChangeItemImpl.builder() + .build(mcp, auditStamp, retrieverContext.getAspectRetriever()); } } catch (IllegalArgumentException e) { log.error("Invalid proposal, skipping and proceeding with batch: {}", mcp, e); diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java index 64263859e4aadb..94f71ff0897c63 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java @@ -150,6 +150,14 @@ public MetadataChangeProposal getMetadataChangeProposal() { } } + @Override + public void setSystemMetadata(@Nonnull SystemMetadata systemMetadata) { + this.systemMetadata = systemMetadata; + if (this.metadataChangeProposal != null) { + this.metadataChangeProposal.setSystemMetadata(systemMetadata); + } + } + @Override public Map getHeaders() { return Optional.ofNullable(metadataChangeProposal) @@ -183,6 +191,10 @@ public ChangeItemImpl build(AspectRetriever aspectRetriever) { this.headers = Map.of(); } + if (this.urn == null && this.metadataChangeProposal != null) { + this.urn = this.metadataChangeProposal.getEntityUrn(); + } + ValidationApiUtils.validateUrn(aspectRetriever.getEntityRegistry(), this.urn); log.debug("entity type = {}", this.urn.getEntityType()); @@ -210,7 +222,7 @@ public ChangeItemImpl build(AspectRetriever aspectRetriever) { this.headers); } - public static ChangeItemImpl build( + public ChangeItemImpl build( MetadataChangeProposal mcp, AuditStamp auditStamp, AspectRetriever aspectRetriever) { log.debug("entity type = {}", mcp.getEntityType()); @@ -303,15 +315,17 @@ public String toString() { return "ChangeItemImpl{" + "changeType=" + changeType - + ", urn=" - + urn + + ", auditStamp=" + + auditStamp + + ", systemMetadata=" + + systemMetadata + + ", recordTemplate=" + + recordTemplate + ", aspectName='" + aspectName + '\'' - + ", recordTemplate=" - + recordTemplate - + ", systemMetadata=" - + systemMetadata + + ", urn=" + + urn + '}'; } } diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java index 40bcb0fa8ed2d1..935227e55b6638 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java @@ -23,6 +23,7 @@ import lombok.Setter; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.NotImplementedException; @Slf4j @Getter @@ -61,6 +62,11 @@ public SystemMetadata getSystemMetadata() { return null; } + @Override + public void setSystemMetadata(@Nonnull SystemMetadata systemMetadata) { + throw new NotImplementedException(); + } + @Nullable @Override public MetadataChangeProposal getMetadataChangeProposal() { diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java index 2543d99ac6af37..5e4e36cfe6fbd8 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java @@ -59,7 +59,7 @@ public class PatchItemImpl implements PatchMCP { private final Urn urn; // aspectName name of the aspect being inserted private final String aspectName; - private final SystemMetadata systemMetadata; + private SystemMetadata systemMetadata; private final AuditStamp auditStamp; private final JsonPatch patch; @@ -105,6 +105,14 @@ public MetadataChangeProposal getMetadataChangeProposal() { } } + @Override + public void setSystemMetadata(@Nonnull SystemMetadata systemMetadata) { + this.systemMetadata = systemMetadata; + if (this.metadataChangeProposal != null) { + this.metadataChangeProposal.setSystemMetadata(systemMetadata); + } + } + public ChangeItemImpl applyPatch(RecordTemplate recordTemplate, AspectRetriever aspectRetriever) { ChangeItemImpl.ChangeItemImplBuilder builder = ChangeItemImpl.builder() diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ProposedItem.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ProposedItem.java index 370f1f6f073e65..b5dec0bb06d0e2 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ProposedItem.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ProposedItem.java @@ -81,6 +81,11 @@ public SystemMetadata getSystemMetadata() { return metadataChangeProposal.getSystemMetadata(); } + @Override + public void setSystemMetadata(@Nonnull SystemMetadata systemMetadata) { + metadataChangeProposal.setSystemMetadata(systemMetadata); + } + @Nonnull @Override public ChangeType getChangeType() { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java index 82bc0ae1409c52..b98e2465e971a8 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java @@ -133,11 +133,15 @@ public static List getAdditionalChanges( return defaultAspects.stream() .map( entry -> - ChangeItemImpl.ChangeItemImplBuilder.build( - getProposalFromAspectForDefault( - entry.getKey(), entry.getValue(), entityKeyAspect, templateItem), - templateItem.getAuditStamp(), - opContext.getAspectRetriever())) + ChangeItemImpl.builder() + .build( + getProposalFromAspectForDefault( + entry.getKey(), + entry.getValue(), + entityKeyAspect, + templateItem), + templateItem.getAuditStamp(), + opContext.getAspectRetriever())) .filter(Objects::nonNull); }) .collect(Collectors.toList()); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java index 35d133c74c0692..71e1aea59c711a 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java @@ -58,7 +58,7 @@ import com.linkedin.r2.RemoteInvocationException; import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.net.URISyntaxException; import java.time.Clock; import java.util.ArrayList; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 75f16ae4d981d2..153aa0685f9b26 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -12,8 +12,8 @@ import static com.linkedin.metadata.utils.PegasusUtils.urnToEntityName; import static com.linkedin.metadata.utils.SystemMetadataUtils.createDefaultSystemMetadata; import static com.linkedin.metadata.utils.metrics.ExceptionUtils.collectMetrics; +import static com.linkedin.metadata.utils.metrics.MetricUtils.BATCH_SIZE_ATTR; -import com.codahale.metrics.Timer; import com.datahub.util.RecordUtils; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; @@ -48,6 +48,7 @@ import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.batch.MCLItem; import com.linkedin.metadata.aspect.batch.MCPItem; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; import com.linkedin.metadata.aspect.utils.DefaultAspectsUtil; import com.linkedin.metadata.config.PreProcessHooks; @@ -84,7 +85,9 @@ import com.linkedin.r2.RemoteInvocationException; import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.StatusCode; +import io.opentelemetry.instrumentation.annotations.WithSpan; import jakarta.persistence.EntityNotFoundException; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; @@ -792,14 +795,17 @@ public List ingestAspects( // Handle throttling APIThrottle.evaluate(opContext, new HashSet<>(throttleEvents.values()), false); - List ingestResults = - ingestAspectsToLocalDB(opContext, aspectsBatch, overwrite); - - List mclResults = emitMCL(opContext, ingestResults, emitMCL); + IngestAspectsResult ingestResults = ingestAspectsToLocalDB(opContext, aspectsBatch, overwrite); + // Produce MCLs & run side effects + List mclResults = + emitMCL(opContext, ingestResults.getUpdateAspectResults(), emitMCL); processPostCommitMCLSideEffects( opContext, mclResults.stream().map(UpdateAspectResult::toMCL).collect(Collectors.toList())); + // Produce FailedMCPs for tracing + produceFailedMCPs(opContext, ingestResults); + return mclResults; } @@ -827,6 +833,7 @@ private void processPostCommitMCLSideEffects( sideEffects -> { long count = ingestProposalAsync( + opContext, AspectsBatchImpl.builder() .items(sideEffects) .retrieverContext(opContext.getRetrieverContext()) @@ -847,254 +854,299 @@ private void processPostCommitMCLSideEffects( * @return Details about the new and old version of the aspect */ @Nonnull - private List ingestAspectsToLocalDB( + private IngestAspectsResult ingestAspectsToLocalDB( @Nonnull OperationContext opContext, @Nonnull final AspectsBatch inputBatch, boolean overwrite) { - if (inputBatch.containsDuplicateAspects()) { - log.warn("Batch contains duplicates: {}", inputBatch.duplicateAspects()); - MetricUtils.counter(EntityServiceImpl.class, "batch_with_duplicate").inc(); - } - - return aspectDao - .runInTransactionWithRetry( - (txContext) -> { - // Generate default aspects within the transaction (they are re-calculated on retry) - AspectsBatch batchWithDefaults = - DefaultAspectsUtil.withAdditionalChanges( - opContext, inputBatch, this, enableBrowseV2); - - // Read before write is unfortunate, however batch it - final Map> urnAspects = batchWithDefaults.getUrnAspectsMap(); - - // read #1 - // READ COMMITED is used in conjunction with SELECT FOR UPDATE (read lock) in order - // to ensure that the aspect's version is not modified outside the transaction. - // We rely on the retry mechanism if the row is modified and will re-read (require the - // lock) - Map> databaseAspects = - aspectDao.getLatestAspects(urnAspects, true); - - final Map> batchAspects = - EntityUtils.toSystemAspects(opContext.getRetrieverContext(), databaseAspects); - - // read #2 (potentially) - final Map> nextVersions = - EntityUtils.calculateNextVersions(txContext, aspectDao, batchAspects, urnAspects); - - // 1. Convert patches to full upserts - // 2. Run any entity/aspect level hooks - Pair>, List> updatedItems = - batchWithDefaults.toUpsertBatchItems(batchAspects, nextVersions); - - // Fetch additional information if needed - final List changeMCPs; - - if (!updatedItems.getFirst().isEmpty()) { - // These items are new items from side effects - Map> sideEffects = updatedItems.getFirst(); - - final Map> updatedLatestAspects; - final Map> updatedNextVersions; - - Map> newLatestAspects = - EntityUtils.toSystemAspects( - opContext.getRetrieverContext(), - aspectDao.getLatestAspects(updatedItems.getFirst(), true)); - // merge - updatedLatestAspects = AspectsBatch.merge(batchAspects, newLatestAspects); - - Map> newNextVersions = - EntityUtils.calculateNextVersions( - txContext, aspectDao, updatedLatestAspects, updatedItems.getFirst()); - // merge - updatedNextVersions = AspectsBatch.merge(nextVersions, newNextVersions); - - changeMCPs = - updatedItems.getSecond().stream() - .peek( - changeMCP -> { - // Add previous version to each side-effect - if (sideEffects - .getOrDefault( - changeMCP.getUrn().toString(), Collections.emptySet()) - .contains(changeMCP.getAspectName())) { - - AspectsBatch.incrementBatchVersion( - changeMCP, updatedLatestAspects, updatedNextVersions); - } - }) - .collect(Collectors.toList()); - } else { - changeMCPs = updatedItems.getSecond(); - } - - // No changes, return - if (changeMCPs.isEmpty()) { - MetricUtils.counter(EntityServiceImpl.class, "batch_empty").inc(); - return Collections.emptyList(); - } - - // do final pre-commit checks with previous aspect value - ValidationExceptionCollection exceptions = - AspectsBatch.validatePreCommit(changeMCPs, opContext.getRetrieverContext()); - - if (exceptions.hasFatalExceptions()) { - // IF this is a client request/API request we fail the `transaction batch` - if (opContext.getRequestContext() != null) { - MetricUtils.counter(EntityServiceImpl.class, "batch_request_validation_exception") - .inc(); - throw new ValidationException(collectMetrics(exceptions).toString()); - } + return opContext.withSpan( + "ingestAspectsToLocalDB", + () -> { + if (inputBatch.containsDuplicateAspects()) { + log.warn("Batch contains duplicates: {}", inputBatch.duplicateAspects()); + MetricUtils.counter(EntityServiceImpl.class, "batch_with_duplicate").inc(); + } - MetricUtils.counter(EntityServiceImpl.class, "batch_consumer_validation_exception") - .inc(); - log.error("mce-consumer batch exceptions: {}", collectMetrics(exceptions)); - } + return aspectDao + .runInTransactionWithRetry( + (txContext) -> { + // Generate default aspects within the transaction (they are re-calculated on + // retry) + AspectsBatch batchWithDefaults = + DefaultAspectsUtil.withAdditionalChanges( + opContext, inputBatch, this, enableBrowseV2); + + // Read before write is unfortunate, however batch it + final Map> urnAspects = + batchWithDefaults.getUrnAspectsMap(); + + // read #1 + // READ COMMITED is used in conjunction with SELECT FOR UPDATE (read lock) in + // order + // to ensure that the aspect's version is not modified outside the transaction. + // We rely on the retry mechanism if the row is modified and will re-read + // (require the + // lock) + Map> databaseAspects = + aspectDao.getLatestAspects(urnAspects, true); + + final Map> batchAspects = + EntityUtils.toSystemAspects( + opContext.getRetrieverContext(), databaseAspects); + + // read #2 (potentially) + final Map> nextVersions = + EntityUtils.calculateNextVersions( + txContext, aspectDao, batchAspects, urnAspects); + + // 1. Convert patches to full upserts + // 2. Run any entity/aspect level hooks + Pair>, List> updatedItems = + batchWithDefaults.toUpsertBatchItems(batchAspects, nextVersions); + + // Fetch additional information if needed + final List changeMCPs; + + if (!updatedItems.getFirst().isEmpty()) { + // These items are new items from side effects + Map> sideEffects = updatedItems.getFirst(); + + final Map> updatedLatestAspects; + final Map> updatedNextVersions; + + Map> newLatestAspects = + EntityUtils.toSystemAspects( + opContext.getRetrieverContext(), + aspectDao.getLatestAspects(updatedItems.getFirst(), true)); + // merge + updatedLatestAspects = AspectsBatch.merge(batchAspects, newLatestAspects); + + Map> newNextVersions = + EntityUtils.calculateNextVersions( + txContext, aspectDao, updatedLatestAspects, updatedItems.getFirst()); + // merge + updatedNextVersions = AspectsBatch.merge(nextVersions, newNextVersions); + + changeMCPs = + updatedItems.getSecond().stream() + .peek( + changeMCP -> { + // Add previous version to each side-effect + if (sideEffects + .getOrDefault( + changeMCP.getUrn().toString(), Collections.emptySet()) + .contains(changeMCP.getAspectName())) { + + AspectsBatch.incrementBatchVersion( + changeMCP, updatedLatestAspects, updatedNextVersions); + } + }) + .collect(Collectors.toList()); + } else { + changeMCPs = updatedItems.getSecond(); + } - // Database Upsert successfully validated results - log.info( - "Ingesting aspects batch to database: {}", - AspectsBatch.toAbbreviatedString(changeMCPs, 2048)); - Timer.Context ingestToLocalDBTimer = - MetricUtils.timer(this.getClass(), "ingestAspectsToLocalDB").time(); - List upsertResults = - exceptions - .streamSuccessful(changeMCPs.stream()) - .map( - writeItem -> { - - /* - database*Aspect - should be used for comparisons of before batch operation information - */ - final EntityAspect databaseAspect = - databaseAspects - .getOrDefault(writeItem.getUrn().toString(), Map.of()) - .get(writeItem.getAspectName()); - final EntityAspect.EntitySystemAspect databaseSystemAspect = - databaseAspect == null - ? null - : EntityAspect.EntitySystemAspect.builder() - .build( - writeItem.getEntitySpec(), - writeItem.getAspectSpec(), - databaseAspect); - - /* - This condition is specifically for an older conditional write ingestAspectIfNotPresent() - overwrite is always true otherwise - */ - if (overwrite || databaseAspect == null) { - return Optional.ofNullable( - ingestAspectToLocalDB( - txContext, writeItem, databaseSystemAspect)) - .map( - optResult -> optResult.toBuilder().request(writeItem).build()) - .orElse(null); - } + // No changes, return + if (changeMCPs.isEmpty()) { + MetricUtils.counter(EntityServiceImpl.class, "batch_empty").inc(); + return IngestAspectsResult.EMPTY; + } - return null; - }) - .filter(Objects::nonNull) - .collect(Collectors.toList()); + // do final pre-commit checks with previous aspect value + ValidationExceptionCollection exceptions = + AspectsBatch.validatePreCommit(changeMCPs, opContext.getRetrieverContext()); + + List>> failedUpsertResults = + new ArrayList<>(); + if (exceptions.hasFatalExceptions()) { + // IF this is a client request/API request we fail the `transaction batch` + if (opContext.getRequestContext() != null) { + MetricUtils.counter( + EntityServiceImpl.class, "batch_request_validation_exception") + .inc(); + throw new ValidationException(collectMetrics(exceptions).toString()); + } + + MetricUtils.counter( + EntityServiceImpl.class, "batch_consumer_validation_exception") + .inc(); + log.error("mce-consumer batch exceptions: {}", collectMetrics(exceptions)); + failedUpsertResults = + exceptions + .streamExceptions(changeMCPs.stream()) + .map( + writeItem -> + Pair.of( + writeItem, + exceptions.get( + Pair.of( + writeItem.getUrn(), writeItem.getAspectName())))) + .collect(Collectors.toList()); + } - if (!upsertResults.isEmpty()) { - // commit upserts prior to retention or kafka send, if supported by impl - if (txContext != null) { - txContext.commitAndContinue(); - } - long took = TimeUnit.NANOSECONDS.toMillis(ingestToLocalDBTimer.stop()); - if (took > DB_TIMER_LOG_THRESHOLD_MS) { - log.info("Ingestion of aspects batch to database took {} ms", took); - } + // Database Upsert successfully validated results + log.info( + "Ingesting aspects batch to database: {}", + AspectsBatch.toAbbreviatedString(changeMCPs, 2048)); - // Retention optimization and tx - if (retentionService != null) { - List retentionBatch = - upsertResults.stream() - // Only consider retention when there was a previous version - .filter( - result -> - batchAspects.containsKey(result.getUrn().toString()) - && batchAspects - .get(result.getUrn().toString()) - .containsKey(result.getRequest().getAspectName())) - .filter( - result -> { - RecordTemplate oldAspect = result.getOldValue(); - RecordTemplate newAspect = result.getNewValue(); - // Apply retention policies if there was an update to existing - // aspect - // value - return oldAspect != newAspect - && oldAspect != null - && retentionService != null; - }) - .map( - result -> - RetentionService.RetentionContext.builder() - .urn(result.getUrn()) - .aspectName(result.getRequest().getAspectName()) - .maxVersion(Optional.of(result.getMaxVersion())) - .build()) - .collect(Collectors.toList()); - retentionService.applyRetentionWithPolicyDefaults(opContext, retentionBatch); - } else { - log.warn("Retention service is missing!"); - } - } else { - MetricUtils.counter(EntityServiceImpl.class, "batch_empty_transaction").inc(); - // This includes no-op batches. i.e. patch removing non-existent items - log.debug("Empty transaction detected"); - } + List upsertResults = + exceptions + .streamSuccessful(changeMCPs.stream()) + .map( + writeItem -> { + + /* + database*Aspect - should be used for comparisons of before batch operation information + */ + final EntityAspect databaseAspect = + databaseAspects + .getOrDefault(writeItem.getUrn().toString(), Map.of()) + .get(writeItem.getAspectName()); + final EntityAspect.EntitySystemAspect databaseSystemAspect = + databaseAspect == null + ? null + : EntityAspect.EntitySystemAspect.builder() + .build( + writeItem.getEntitySpec(), + writeItem.getAspectSpec(), + databaseAspect); + + /* + This condition is specifically for an older conditional write ingestAspectIfNotPresent() + overwrite is always true otherwise + */ + if (overwrite || databaseAspect == null) { + return Optional.ofNullable( + ingestAspectToLocalDB( + opContext, + txContext, + writeItem, + databaseSystemAspect)) + .map( + optResult -> + optResult.toBuilder().request(writeItem).build()) + .orElse(null); + } + + return null; + }) + .filter(Objects::nonNull) + .collect(Collectors.toList()); + + if (!upsertResults.isEmpty()) { + // commit upserts prior to retention or kafka send, if supported by impl + if (txContext != null) { + txContext.commitAndContinue(); + } + + // Retention optimization and tx + if (retentionService != null) { + opContext.withSpan( + "retentionService", + () -> { + List retentionBatch = + upsertResults.stream() + // Only consider retention when there was a previous version + .filter( + result -> + batchAspects.containsKey(result.getUrn().toString()) + && batchAspects + .get(result.getUrn().toString()) + .containsKey( + result.getRequest().getAspectName())) + .filter( + result -> { + RecordTemplate oldAspect = result.getOldValue(); + RecordTemplate newAspect = result.getNewValue(); + // Apply retention policies if there was an update to + // existing + // aspect + // value + return oldAspect != newAspect + && oldAspect != null + && retentionService != null; + }) + .map( + result -> + RetentionService.RetentionContext.builder() + .urn(result.getUrn()) + .aspectName(result.getRequest().getAspectName()) + .maxVersion(Optional.of(result.getMaxVersion())) + .build()) + .collect(Collectors.toList()); + retentionService.applyRetentionWithPolicyDefaults( + opContext, retentionBatch); + }, + BATCH_SIZE_ATTR, + String.valueOf(upsertResults.size())); + } else { + log.warn("Retention service is missing!"); + } + } else { + MetricUtils.counter(EntityServiceImpl.class, "batch_empty_transaction").inc(); + // This includes no-op batches. i.e. patch removing non-existent items + log.debug("Empty transaction detected"); + } - return upsertResults; - }, - inputBatch, - DEFAULT_MAX_TRANSACTION_RETRY) - .stream() - .filter(Objects::nonNull) - .flatMap(List::stream) - .collect(Collectors.toList()); + return IngestAspectsResult.builder() + .updateAspectResults(upsertResults) + .failedUpdateAspectResults(failedUpsertResults) + .build(); + }, + inputBatch, + DEFAULT_MAX_TRANSACTION_RETRY) + .stream() + .reduce(IngestAspectsResult.EMPTY, IngestAspectsResult::combine); + }, + BATCH_SIZE_ATTR, + String.valueOf(inputBatch.getItems().size()), + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "ingestAspectsToLocalDB")); } @Nonnull private List emitMCL( @Nonnull OperationContext opContext, List sqlResults, boolean emitMCL) { - List withEmitMCL = - sqlResults.stream() - .map(result -> emitMCL ? conditionallyProduceMCLAsync(opContext, result) : result) - .collect(Collectors.toList()); - - // join futures messages, capture error state - List> statusPairs = - withEmitMCL.stream() - .filter(result -> result.getMclFuture() != null) - .map( - result -> { - try { - result.getMclFuture().get(); - return Pair.of(true, result); - } catch (InterruptedException | ExecutionException e) { - return Pair.of(false, result); - } - }) - .collect(Collectors.toList()); - if (statusPairs.stream().anyMatch(p -> !p.getFirst())) { - log.error( - "Failed to produce MCLs: {}", - statusPairs.stream() - .filter(p -> !p.getFirst()) - .map(Pair::getValue) - .map(v -> v.getRequest().toString()) - .collect(Collectors.toList())); - // TODO restoreIndices? - throw new RuntimeException("Failed to produce MCLs"); - } + return opContext.withSpan( + "emitMCL", + () -> { + List withEmitMCL = + sqlResults.stream() + .map(result -> emitMCL ? conditionallyProduceMCLAsync(opContext, result) : result) + .collect(Collectors.toList()); + + // join futures messages, capture error state + List> statusPairs = + withEmitMCL.stream() + .filter(result -> result.getMclFuture() != null) + .map( + result -> { + try { + result.getMclFuture().get(); + return Pair.of(true, result); + } catch (InterruptedException | ExecutionException e) { + return Pair.of(false, result); + } + }) + .collect(Collectors.toList()); + + if (statusPairs.stream().anyMatch(p -> !p.getFirst())) { + log.error( + "Failed to produce MCLs: {}", + statusPairs.stream() + .filter(p -> !p.getFirst()) + .map(Pair::getValue) + .map(v -> v.getRequest().toString()) + .collect(Collectors.toList())); + // TODO restoreIndices? + throw new RuntimeException("Failed to produce MCLs"); + } - return withEmitMCL; + return withEmitMCL; + }, + BATCH_SIZE_ATTR, + String.valueOf(sqlResults.size())); } /** @@ -1193,7 +1245,9 @@ public List ingestProposal( Stream timeseriesIngestResults = ingestTimeseriesProposal(opContext, aspectsBatch, async); Stream nonTimeseriesIngestResults = - async ? ingestProposalAsync(aspectsBatch) : ingestProposalSync(opContext, aspectsBatch); + async + ? ingestProposalAsync(opContext, aspectsBatch) + : ingestProposalSync(opContext, aspectsBatch); return Stream.concat(nonTimeseriesIngestResults, timeseriesIngestResults) .collect(Collectors.toList()); @@ -1222,89 +1276,99 @@ private Stream ingestTimeseriesProposal( + unsupported.stream().map(BatchItem::getChangeType).collect(Collectors.toSet())); } - if (!async) { - // Handle throttling - APIThrottle.evaluate(opContext, new HashSet<>(throttleEvents.values()), true); + return opContext.withSpan( + "ingestTimeseriesProposal", + () -> { + if (!async) { + // Handle throttling + APIThrottle.evaluate(opContext, new HashSet<>(throttleEvents.values()), true); + + // Create default non-timeseries aspects for timeseries aspects + List timeseriesKeyAspects = + aspectsBatch.getMCPItems().stream() + .filter( + item -> item.getAspectSpec() != null && item.getAspectSpec().isTimeseries()) + .map( + item -> + ChangeItemImpl.builder() + .urn(item.getUrn()) + .aspectName(item.getEntitySpec().getKeyAspectName()) + .changeType(ChangeType.UPSERT) + .entitySpec(item.getEntitySpec()) + .aspectSpec(item.getEntitySpec().getKeyAspectSpec()) + .auditStamp(item.getAuditStamp()) + .systemMetadata(item.getSystemMetadata()) + .recordTemplate( + EntityApiUtils.buildKeyAspect( + opContext.getEntityRegistry(), item.getUrn())) + .build(opContext.getAspectRetriever())) + .collect(Collectors.toList()); + + ingestProposalSync( + opContext, + AspectsBatchImpl.builder() + .retrieverContext(aspectsBatch.getRetrieverContext()) + .items(timeseriesKeyAspects) + .build()); + } - // Create default non-timeseries aspects for timeseries aspects - List timeseriesKeyAspects = - aspectsBatch.getMCPItems().stream() - .filter(item -> item.getAspectSpec() != null && item.getAspectSpec().isTimeseries()) + // Emit timeseries MCLs + List, Boolean>>>> timeseriesResults = + aspectsBatch.getItems().stream() + .filter( + item -> item.getAspectSpec() != null && item.getAspectSpec().isTimeseries()) + .map(item -> (MCPItem) item) + .map( + item -> + Pair.of( + item, + conditionallyProduceMCLAsync( + opContext, + null, + null, + item.getRecordTemplate(), + item.getSystemMetadata(), + item.getMetadataChangeProposal(), + item.getUrn(), + item.getAuditStamp(), + item.getAspectSpec()))) + .collect(Collectors.toList()); + + return timeseriesResults.stream() .map( - item -> - ChangeItemImpl.builder() - .urn(item.getUrn()) - .aspectName(item.getEntitySpec().getKeyAspectName()) - .changeType(ChangeType.UPSERT) - .entitySpec(item.getEntitySpec()) - .aspectSpec(item.getEntitySpec().getKeyAspectSpec()) - .auditStamp(item.getAuditStamp()) - .systemMetadata(item.getSystemMetadata()) - .recordTemplate( - EntityApiUtils.buildKeyAspect( - opContext.getEntityRegistry(), item.getUrn())) - .build(opContext.getAspectRetriever())) - .collect(Collectors.toList()); - - ingestProposalSync( - opContext, - AspectsBatchImpl.builder() - .retrieverContext(aspectsBatch.getRetrieverContext()) - .items(timeseriesKeyAspects) - .build()); - } - - // Emit timeseries MCLs - List, Boolean>>>> timeseriesResults = - aspectsBatch.getItems().stream() - .filter(item -> item.getAspectSpec() != null && item.getAspectSpec().isTimeseries()) - .map(item -> (MCPItem) item) - .map( - item -> - Pair.of( - item, - conditionallyProduceMCLAsync( - opContext, - null, - null, - item.getRecordTemplate(), - item.getSystemMetadata(), - item.getMetadataChangeProposal(), - item.getUrn(), - item.getAuditStamp(), - item.getAspectSpec()))) - .collect(Collectors.toList()); - - return timeseriesResults.stream() - .map( - result -> { - MCPItem item = result.getFirst(); - Optional, Boolean>> emissionStatus = result.getSecond(); - - emissionStatus.ifPresent( - status -> { - try { - status.getFirst().get(); - } catch (InterruptedException | ExecutionException e) { - throw new RuntimeException(e); - } + result -> { + MCPItem item = result.getFirst(); + Optional, Boolean>> emissionStatus = result.getSecond(); + + emissionStatus.ifPresent( + status -> { + try { + status.getFirst().get(); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + }); + + return IngestResult.builder() + .urn(item.getUrn()) + .request(item) + .result( + UpdateAspectResult.builder() + .urn(item.getUrn()) + .newValue(item.getRecordTemplate()) + .auditStamp(item.getAuditStamp()) + .newSystemMetadata(item.getSystemMetadata()) + .build()) + .publishedMCL( + emissionStatus.map(status -> status.getFirst() != null).orElse(false)) + .processedMCL(emissionStatus.map(Pair::getSecond).orElse(false)) + .build(); }); - - return IngestResult.builder() - .urn(item.getUrn()) - .request(item) - .result( - UpdateAspectResult.builder() - .urn(item.getUrn()) - .newValue(item.getRecordTemplate()) - .auditStamp(item.getAuditStamp()) - .newSystemMetadata(item.getSystemMetadata()) - .build()) - .publishedMCL( - emissionStatus.map(status -> status.getFirst() != null).orElse(false)) - .processedMCL(emissionStatus.map(Pair::getSecond).orElse(false)) - .build(); - }); + }, + "async", + String.valueOf(async), + BATCH_SIZE_ATTR, + String.valueOf(aspectsBatch.getItems().size())); } /** @@ -1313,81 +1377,100 @@ private Stream ingestTimeseriesProposal( * @param aspectsBatch non-timeseries ingest aspects * @return produced items to the MCP topic */ - private Stream ingestProposalAsync(AspectsBatch aspectsBatch) { - List nonTimeseries = - aspectsBatch.getMCPItems().stream() - .filter(item -> item.getAspectSpec() == null || !item.getAspectSpec().isTimeseries()) - .collect(Collectors.toList()); - - List> futures = - nonTimeseries.stream() - .map( - item -> - // When async is turned on, we write to proposal log and return without waiting - producer.produceMetadataChangeProposal( - item.getUrn(), item.getMetadataChangeProposal())) - .filter(Objects::nonNull) - .collect(Collectors.toList()); + private Stream ingestProposalAsync( + OperationContext opContext, AspectsBatch aspectsBatch) { + return opContext.withSpan( + "ingestProposalAsync", + () -> { + List nonTimeseries = + aspectsBatch.getMCPItems().stream() + .filter( + item -> item.getAspectSpec() == null || !item.getAspectSpec().isTimeseries()) + .collect(Collectors.toList()); + + List> futures = + nonTimeseries.stream() + .map( + item -> { + // When async is turned on, we write to proposal log and return without + // waiting + return producer.produceMetadataChangeProposal( + opContext, item.getUrn(), item); + }) + .filter(Objects::nonNull) + .collect(Collectors.toList()); + + futures.forEach( + f -> { + try { + f.get(); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + }); - try { - return nonTimeseries.stream() - .map( - item -> - IngestResult.builder() - .urn(item.getUrn()) - .request(item) - .publishedMCP(true) - .build()); - } finally { - futures.forEach( - f -> { - try { - f.get(); - } catch (InterruptedException | ExecutionException e) { - throw new RuntimeException(e); - } - }); - } + return nonTimeseries.stream() + .map( + item -> + IngestResult.builder() + .urn(item.getUrn()) + .request(item) + .publishedMCP(true) + .build()); + }, + BATCH_SIZE_ATTR, + String.valueOf(aspectsBatch.getItems().size())); } private Stream ingestProposalSync( @Nonnull OperationContext opContext, AspectsBatch aspectsBatch) { - AspectsBatchImpl nonTimeseries = - AspectsBatchImpl.builder() - .retrieverContext(aspectsBatch.getRetrieverContext()) - .items( - aspectsBatch.getItems().stream() - .filter(item -> !item.getAspectSpec().isTimeseries()) - .collect(Collectors.toList())) - .build(); + return opContext.withSpan( + "ingestProposalSync", + () -> { + AspectsBatchImpl nonTimeseries = + AspectsBatchImpl.builder() + .retrieverContext(aspectsBatch.getRetrieverContext()) + .items( + aspectsBatch.getItems().stream() + .filter(item -> !item.getAspectSpec().isTimeseries()) + .collect(Collectors.toList())) + .build(); - List unsupported = - nonTimeseries.getMCPItems().stream() - .filter(item -> !MCPItem.isValidChangeType(item.getChangeType(), item.getAspectSpec())) - .collect(Collectors.toList()); - if (!unsupported.isEmpty()) { - throw new UnsupportedOperationException( - "ChangeType not supported: " - + unsupported.stream().map(item -> item.getChangeType()).collect(Collectors.toSet())); - } + List unsupported = + nonTimeseries.getMCPItems().stream() + .filter( + item -> + !MCPItem.isValidChangeType(item.getChangeType(), item.getAspectSpec())) + .collect(Collectors.toList()); + if (!unsupported.isEmpty()) { + throw new UnsupportedOperationException( + "ChangeType not supported: " + + unsupported.stream() + .map(item -> item.getChangeType()) + .collect(Collectors.toSet())); + } - List upsertResults = ingestAspects(opContext, nonTimeseries, true, true); + List upsertResults = + ingestAspects(opContext, nonTimeseries, true, true); - return upsertResults.stream() - .map( - result -> { - ChangeMCP item = result.getRequest(); - - return IngestResult.builder() - .urn(item.getUrn()) - .request(item) - .result(result) - .publishedMCL(result.getMclFuture() != null) - .sqlCommitted(true) - .isUpdate(result.getOldValue() != null) - .build(); - }); + return upsertResults.stream() + .map( + result -> { + ChangeMCP item = result.getRequest(); + + return IngestResult.builder() + .urn(item.getUrn()) + .request(item) + .result(result) + .publishedMCL(result.getMclFuture() != null) + .sqlCommitted(true) + .isUpdate(result.getOldValue() != null) + .build(); + }); + }, + BATCH_SIZE_ATTR, + String.valueOf(aspectsBatch.getItems().size())); } @Override @@ -1768,8 +1851,10 @@ public Pair, Boolean> alwaysProduceMCLAsync( @Nonnull final Urn urn, @Nonnull final AspectSpec aspectSpec, @Nonnull final MetadataChangeLog metadataChangeLog) { - Future future = producer.produceMetadataChangeLog(urn, aspectSpec, metadataChangeLog); - return Pair.of(future, preprocessEvent(opContext, metadataChangeLog)); + boolean preprocessed = preprocessEvent(opContext, metadataChangeLog); + Future future = + producer.produceMetadataChangeLog(opContext, urn, aspectSpec, metadataChangeLog); + return Pair.of(future, preprocessed); } @Override @@ -1830,6 +1915,19 @@ public Optional, Boolean>> conditionallyProduceMCLAsync( log.debug("Serialized MCL event: {}", metadataChangeLog); Pair, Boolean> emissionStatus = alwaysProduceMCLAsync(opContext, entityUrn, aspectSpec, metadataChangeLog); + + // for tracing propagate properties to system meta + if (newSystemMetadata != null && metadataChangeLog.getSystemMetadata().hasProperties()) { + if (!newSystemMetadata.hasProperties()) { + newSystemMetadata.setProperties( + metadataChangeLog.getSystemMetadata().getProperties(), SetMode.IGNORE_NULL); + } else { + newSystemMetadata + .getProperties() + .putAll(metadataChangeLog.getSystemMetadata().getProperties()); + } + } + return emissionStatus.getFirst() != null ? Optional.of(emissionStatus) : Optional.empty(); } else { log.info( @@ -1865,6 +1963,35 @@ private UpdateAspectResult conditionallyProduceMCLAsync( .orElse(result); } + public void produceFailedMCPs( + @Nonnull OperationContext opContext, @Nonnull IngestAspectsResult ingestAspectsResult) { + + if (!ingestAspectsResult.getFailedUpdateAspectResults().isEmpty()) { + Span currentSpan = Span.current(); + currentSpan.recordException( + new IllegalStateException("Batch contains failed aspect validations.")); + currentSpan.setStatus(StatusCode.ERROR, "Batch contains failed aspect validations."); + currentSpan.setAttribute(MetricUtils.ERROR_TYPE, IllegalStateException.class.getName()); + + List> futures = + ingestAspectsResult.getFailedUpdateAspectResults().stream() + .map( + failedItem -> + producer.produceFailedMetadataChangeProposalAsync( + opContext, failedItem.getFirst(), new HashSet<>(failedItem.getSecond()))) + .collect(Collectors.toList()); + + futures.forEach( + f -> { + try { + f.get(); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + }); + } + } + @Override public void ingestEntities( @Nonnull OperationContext opContext, @@ -2528,6 +2655,7 @@ private Map getEnvelopedAspects( */ @Nullable private UpdateAspectResult ingestAspectToLocalDB( + @Nonnull OperationContext opContext, @Nullable TransactionContext txContext, @Nonnull final ChangeMCP writeItem, @Nullable final EntityAspect.EntitySystemAspect databaseAspect) { @@ -2597,7 +2725,7 @@ private UpdateAspectResult ingestAspectToLocalDB( previousBatchAspect.getCreatedBy(), null, previousBatchAspect.getCreatedOn(), - RecordUtils.toJsonString(latestSystemMetadataDiff.get()), + RecordUtils.toJsonString(opContext.withTraceId(latestSystemMetadataDiff.get())), previousBatchAspect.getVersion(), false); @@ -2663,7 +2791,7 @@ private UpdateAspectResult ingestAspectToLocalDB( ? writeItem.getAuditStamp().getImpersonator().toString() : null, new Timestamp(writeItem.getAuditStamp().getTime()), - EntityApiUtils.toJsonAspect(writeItem.getSystemMetadata()), + EntityApiUtils.toJsonAspect(opContext.withTraceId(writeItem.getSystemMetadata())), writeItem.getNextAspectVersion()); // metrics diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java index c595e3e07b8342..eb3c5b9cca0671 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java @@ -31,7 +31,7 @@ import com.linkedin.retention.TimeBasedRetention; import com.linkedin.retention.VersionBasedRetention; import io.datahubproject.metadata.context.OperationContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.sql.Timestamp; import java.time.Clock; import java.util.List; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java index 74d0d8b0964de0..5fbac036bd05e6 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java @@ -26,7 +26,7 @@ import io.ebean.TxScope; import io.ebeaninternal.server.expression.Op; import io.ebeaninternal.server.expression.SimpleExpression; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.sql.Timestamp; import java.time.Clock; import java.util.List; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java index 6ecac70e13c7e5..7a792efd2984d2 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.entity.validation; -import com.codahale.metrics.Timer; import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.AbstractArrayTemplate; @@ -38,33 +37,36 @@ public static SearchResult validateSearchResult( @Nonnull OperationContext opContext, final SearchResult searchResult, @Nonnull final EntityService entityService) { - try (Timer.Context ignored = - MetricUtils.timer(ValidationUtils.class, "validateSearchResult").time()) { - if (searchResult == null) { - return null; - } - Objects.requireNonNull(entityService, "entityService must not be null"); - - SearchResult validatedSearchResult = - new SearchResult() - .setFrom(searchResult.getFrom()) - .setMetadata(searchResult.getMetadata()) - .setPageSize(searchResult.getPageSize()) - .setNumEntities(searchResult.getNumEntities()); - - SearchEntityArray validatedEntities = - validateSearchUrns( - opContext, - searchResult.getEntities(), - SearchEntity::getEntity, - entityService, - true, - true) - .collect(Collectors.toCollection(SearchEntityArray::new)); - validatedSearchResult.setEntities(validatedEntities); - - return validatedSearchResult; - } + return opContext.withSpan( + "validateSearchResult", + () -> { + if (searchResult == null) { + return null; + } + Objects.requireNonNull(entityService, "entityService must not be null"); + + SearchResult validatedSearchResult = + new SearchResult() + .setFrom(searchResult.getFrom()) + .setMetadata(searchResult.getMetadata()) + .setPageSize(searchResult.getPageSize()) + .setNumEntities(searchResult.getNumEntities()); + + SearchEntityArray validatedEntities = + validateSearchUrns( + opContext, + searchResult.getEntities(), + SearchEntity::getEntity, + entityService, + true, + true) + .collect(Collectors.toCollection(SearchEntityArray::new)); + validatedSearchResult.setEntities(validatedEntities); + + return validatedSearchResult; + }, + MetricUtils.DROPWIZARD_METRIC, + MetricUtils.name(ValidationUtils.class, "validateSearchResult")); } public static ScrollResult validateScrollResult( @@ -104,102 +106,113 @@ public static BrowseResult validateBrowseResult( @Nonnull OperationContext opContext, final BrowseResult browseResult, @Nonnull final EntityService entityService) { - try (Timer.Context ignored = - MetricUtils.timer(ValidationUtils.class, "validateBrowseResult").time()) { - if (browseResult == null) { - return null; - } - Objects.requireNonNull(entityService, "entityService must not be null"); - - BrowseResult validatedBrowseResult = - new BrowseResult() - .setGroups(browseResult.getGroups()) - .setMetadata(browseResult.getMetadata()) - .setFrom(browseResult.getFrom()) - .setPageSize(browseResult.getPageSize()) - .setNumGroups(browseResult.getNumGroups()) - .setNumEntities(browseResult.getNumEntities()) - .setNumElements(browseResult.getNumElements()); - - BrowseResultEntityArray validatedEntities = - validateSearchUrns( - opContext, - browseResult.getEntities(), - BrowseResultEntity::getUrn, - entityService, - true, - true) - .collect(Collectors.toCollection(BrowseResultEntityArray::new)); - validatedBrowseResult.setEntities(validatedEntities); - - return validatedBrowseResult; - } + return opContext.withSpan( + "validateBrowseResult", + () -> { + if (browseResult == null) { + return null; + } + Objects.requireNonNull(entityService, "entityService must not be null"); + + BrowseResult validatedBrowseResult = + new BrowseResult() + .setGroups(browseResult.getGroups()) + .setMetadata(browseResult.getMetadata()) + .setFrom(browseResult.getFrom()) + .setPageSize(browseResult.getPageSize()) + .setNumGroups(browseResult.getNumGroups()) + .setNumEntities(browseResult.getNumEntities()) + .setNumElements(browseResult.getNumElements()); + + BrowseResultEntityArray validatedEntities = + validateSearchUrns( + opContext, + browseResult.getEntities(), + BrowseResultEntity::getUrn, + entityService, + true, + true) + .collect(Collectors.toCollection(BrowseResultEntityArray::new)); + validatedBrowseResult.setEntities(validatedEntities); + + return validatedBrowseResult; + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(ValidationUtils.class, "validateBrowseResult")); } public static ListResult validateListResult( @Nonnull OperationContext opContext, final ListResult listResult, @Nonnull final EntityService entityService) { - try (Timer.Context ignored = - MetricUtils.timer(ValidationUtils.class, "validateListResult").time()) { - if (listResult == null) { - return null; - } - Objects.requireNonNull(entityService, "entityService must not be null"); - - ListResult validatedListResult = - new ListResult() - .setStart(listResult.getStart()) - .setCount(listResult.getCount()) - .setTotal(listResult.getTotal()); - - UrnArray validatedEntities = - validateSearchUrns( - opContext, - listResult.getEntities(), - Function.identity(), - entityService, - true, - true) - .collect(Collectors.toCollection(UrnArray::new)); - validatedListResult.setEntities(validatedEntities); - - return validatedListResult; - } + + return opContext.withSpan( + "validateListResult", + () -> { + if (listResult == null) { + return null; + } + Objects.requireNonNull(entityService, "entityService must not be null"); + + ListResult validatedListResult = + new ListResult() + .setStart(listResult.getStart()) + .setCount(listResult.getCount()) + .setTotal(listResult.getTotal()); + + UrnArray validatedEntities = + validateSearchUrns( + opContext, + listResult.getEntities(), + Function.identity(), + entityService, + true, + true) + .collect(Collectors.toCollection(UrnArray::new)); + validatedListResult.setEntities(validatedEntities); + + return validatedListResult; + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(ValidationUtils.class, "validateListResult")); } public static LineageSearchResult validateLineageSearchResult( @Nonnull OperationContext opContext, final LineageSearchResult lineageSearchResult, @Nonnull final EntityService entityService) { - try (Timer.Context ignored = - MetricUtils.timer(ValidationUtils.class, "validateLineageResult").time()) { - if (lineageSearchResult == null) { - return null; - } - Objects.requireNonNull(entityService, "entityService must not be null"); - - LineageSearchResult validatedLineageSearchResult = - new LineageSearchResult() - .setMetadata(lineageSearchResult.getMetadata()) - .setFrom(lineageSearchResult.getFrom()) - .setPageSize(lineageSearchResult.getPageSize()) - .setNumEntities(lineageSearchResult.getNumEntities()); - - LineageSearchEntityArray validatedEntities = - validateSearchUrns( - opContext, - lineageSearchResult.getEntities(), - LineageSearchEntity::getEntity, - entityService, - true, - true) - .collect(Collectors.toCollection(LineageSearchEntityArray::new)); - validatedLineageSearchResult.setEntities(validatedEntities); - - log.debug("Returning validated lineage search results"); - return validatedLineageSearchResult; - } + + return opContext.withSpan( + "validateLineageResult", + () -> { + if (lineageSearchResult == null) { + return null; + } + Objects.requireNonNull(entityService, "entityService must not be null"); + + LineageSearchResult validatedLineageSearchResult = + new LineageSearchResult() + .setMetadata(lineageSearchResult.getMetadata()) + .setFrom(lineageSearchResult.getFrom()) + .setPageSize(lineageSearchResult.getPageSize()) + .setNumEntities(lineageSearchResult.getNumEntities()); + + LineageSearchEntityArray validatedEntities = + validateSearchUrns( + opContext, + lineageSearchResult.getEntities(), + LineageSearchEntity::getEntity, + entityService, + true, + true) + .collect(Collectors.toCollection(LineageSearchEntityArray::new)); + validatedLineageSearchResult.setEntities(validatedEntities); + + log.debug("Returning validated lineage search results"); + return validatedLineageSearchResult; + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(ValidationUtils.class, "validateLineageResult")); } public static EntityLineageResult validateEntityLineageResult( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/event/EventProducer.java b/metadata-io/src/main/java/com/linkedin/metadata/event/EventProducer.java index a809c7f9a3e31b..15017231eee907 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/event/EventProducer.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/event/EventProducer.java @@ -1,19 +1,28 @@ package com.linkedin.metadata.event; import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.SetMode; +import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.mxe.DataHubUpgradeHistoryEvent; import com.linkedin.mxe.MetadataChangeLog; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.PlatformEvent; -import io.opentelemetry.extension.annotations.WithSpan; +import io.datahubproject.metadata.context.OperationContext; +import io.opentelemetry.instrumentation.annotations.WithSpan; +import java.util.List; +import java.util.Set; +import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; /** Interface implemented by producers of {@link com.linkedin.mxe.MetadataAuditEvent}s. */ -public interface EventProducer { +@Slf4j +public abstract class EventProducer { /** * Produces a {@link com.linkedin.mxe.MetadataChangeLog} from a new & previous aspect. @@ -23,22 +32,85 @@ public interface EventProducer { * @param metadataChangeLog metadata change log to push into MCL kafka topic * @return A {@link Future} object that reports when the message has been produced. */ - Future produceMetadataChangeLog( + public Future produceMetadataChangeLog( + @Nonnull OperationContext opContext, + @Nonnull final Urn urn, + @Nonnull AspectSpec aspectSpec, + @Nonnull final MetadataChangeLog metadataChangeLog) { + metadataChangeLog.setSystemMetadata( + opContext.withProducerTrace( + "produceMetadataChangeLog", + metadataChangeLog.getSystemMetadata(), + getMetadataChangeLogTopicName(aspectSpec)), + SetMode.IGNORE_NULL); + return produceMetadataChangeLog(urn, aspectSpec, metadataChangeLog); + } + + public abstract Future produceMetadataChangeLog( @Nonnull final Urn urn, @Nonnull AspectSpec aspectSpec, @Nonnull final MetadataChangeLog metadataChangeLog); + public abstract String getMetadataChangeLogTopicName(@Nonnull AspectSpec aspectSpec); + /** * Produces a {@link com.linkedin.mxe.MetadataChangeProposal} as an async update to an entity * * @param urn the urn associated with the change proposal. - * @param metadataChangeProposal metadata change proposal to push into MCP kafka topic. + * @param item Item which includes the metadata change proposal to push into MCP kafka topic. * @return A {@link Future} object that reports when the message has been produced. */ + public Future produceMetadataChangeProposal( + @Nonnull OperationContext opContext, @Nonnull final Urn urn, @Nonnull MCPItem item) { + item.setSystemMetadata( + opContext.withProducerTrace( + "produceMetadataChangeProposal", + item.getSystemMetadata(), + getMetadataChangeProposalTopicName())); + return produceMetadataChangeProposal(urn, item.getMetadataChangeProposal()); + } + @WithSpan - Future produceMetadataChangeProposal( + public abstract Future produceMetadataChangeProposal( @Nonnull final Urn urn, @Nonnull MetadataChangeProposal metadataChangeProposal); + public abstract String getMetadataChangeProposalTopicName(); + + public Future produceFailedMetadataChangeProposalAsync( + @Nonnull OperationContext opContext, + @Nonnull MCPItem item, + @Nonnull Set throwables) { + return produceFailedMetadataChangeProposalAsync( + opContext, item.getMetadataChangeProposal(), throwables); + } + + public void produceFailedMetadataChangeProposal( + @Nonnull OperationContext opContext, + @Nonnull List mcps, + @Nonnull Throwable throwable) { + List> futures = + mcps.stream() + .map( + event -> + produceFailedMetadataChangeProposalAsync(opContext, event, Set.of(throwable))) + .collect(Collectors.toList()); + + futures.forEach( + f -> { + try { + f.get(); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + }); + } + + @WithSpan + public abstract Future produceFailedMetadataChangeProposalAsync( + @Nonnull OperationContext opContext, + @Nonnull MetadataChangeProposal mcp, + @Nonnull Set throwables); + /** * Produces a generic platform "event". * @@ -50,14 +122,16 @@ Future produceMetadataChangeProposal( * system event. * @return A {@link Future} object that reports when the message has been produced. */ - Future producePlatformEvent( + public abstract Future producePlatformEvent( @Nonnull String name, @Nullable String key, @Nonnull PlatformEvent payload); + public abstract String getPlatformEventTopicName(); + /** * Creates an entry on the history log of when the indices were last rebuilt with the latest * configuration. * * @param event the history event to send to the DataHub Upgrade history topic */ - void produceDataHubUpgradeHistoryEvent(@Nonnull DataHubUpgradeHistoryEvent event); + public abstract void produceDataHubUpgradeHistoryEvent(@Nonnull DataHubUpgradeHistoryEvent event); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java index a801cab81c952f..30a4a1d878995c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java @@ -3,7 +3,6 @@ import static com.linkedin.metadata.aspect.models.graph.Edge.*; import static com.linkedin.metadata.graph.elastic.ElasticSearchGraphService.*; -import com.codahale.metrics.Timer; import com.datahub.util.exception.ESQueryException; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; @@ -36,7 +35,7 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.metrics.MetricUtils; import io.datahubproject.metadata.context.OperationContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -135,7 +134,10 @@ private static void addFilterToQueryBuilder( } private SearchResponse executeLineageSearchQuery( - @Nonnull final QueryBuilder query, final int offset, final int count) { + @Nonnull OperationContext opContext, + @Nonnull final QueryBuilder query, + final int offset, + final int count) { SearchRequest searchRequest = new SearchRequest(); SearchSourceBuilder searchSourceBuilder = sharedSourceBuilder(query, offset, count); @@ -144,13 +146,19 @@ private SearchResponse executeLineageSearchQuery( searchRequest.indices(indexConvention.getIndexName(INDEX_NAME)); - try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "esQuery").time()) { - MetricUtils.counter(this.getClass(), SEARCH_EXECUTIONS_METRIC).inc(); - return client.search(searchRequest, RequestOptions.DEFAULT); - } catch (Exception e) { - log.error("Search query failed", e); - throw new ESQueryException("Search query failed:", e); - } + return opContext.withSpan( + "esQuery", + () -> { + try { + MetricUtils.counter(this.getClass(), SEARCH_EXECUTIONS_METRIC).inc(); + return client.search(searchRequest, RequestOptions.DEFAULT); + } catch (Exception e) { + log.error("Search query failed", e); + throw new ESQueryException("Search query failed:", e); + } + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "esQuery")); } private SearchSourceBuilder sharedSourceBuilder( @@ -168,6 +176,7 @@ private SearchSourceBuilder sharedSourceBuilder( } private SearchResponse executeGroupByLineageSearchQuery( + @Nonnull final OperationContext opContext, @Nonnull final QueryBuilder query, final int offset, final int count, @@ -232,14 +241,19 @@ private SearchResponse executeGroupByLineageSearchQuery( searchRequest.source(searchSourceBuilder); searchRequest.indices(indexConvention.getIndexName(INDEX_NAME)); - try (Timer.Context ignored = - MetricUtils.timer(this.getClass(), "esLineageGroupByQuery").time()) { - MetricUtils.counter(this.getClass(), SEARCH_EXECUTIONS_METRIC).inc(); - return client.search(searchRequest, RequestOptions.DEFAULT); - } catch (Exception e) { - log.error("Search query failed", e); - throw new ESQueryException("Search query failed:", e); - } + return opContext.withSpan( + "esLineageGroupByQuery", + () -> { + try { + MetricUtils.counter(this.getClass(), SEARCH_EXECUTIONS_METRIC).inc(); + return client.search(searchRequest, RequestOptions.DEFAULT); + } catch (Exception e) { + log.error("Search query failed", e); + throw new ESQueryException("Search query failed:", e); + } + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "esLineageGroupByQuery")); } private static BoolQueryBuilder getAggregationFilter( @@ -289,7 +303,7 @@ public SearchResponse getSearchResponse( relationshipTypes, relationshipFilter); - return executeLineageSearchQuery(finalQuery, offset, count); + return executeLineageSearchQuery(opContext, finalQuery, offset, count); } public static BoolQueryBuilder buildQuery( @@ -664,7 +678,7 @@ private List getLineageRelationships( if (lineageFlags != null && lineageFlags.getEntitiesExploredPerHopLimit() != null) { response = executeGroupByLineageSearchQuery( - finalQuery, 0, lineageFlags.getEntitiesExploredPerHopLimit(), validEdges); + opContext, finalQuery, 0, lineageFlags.getEntitiesExploredPerHopLimit(), validEdges); return extractRelationshipsGroupByQuery( entityUrnSet, response, @@ -676,7 +690,9 @@ private List getLineageRelationships( existingPaths, exploreMultiplePaths); } else { - response = executeLineageSearchQuery(finalQuery, 0, graphQueryConfiguration.getMaxResult()); + response = + executeLineageSearchQuery( + opContext, finalQuery, 0, graphQueryConfiguration.getMaxResult()); return extractRelationships( entityUrnSet, response, @@ -1378,10 +1394,11 @@ public SearchResponse getSearchResponse( relationshipTypes, relationshipFilter); - return executeScrollSearchQuery(finalQuery, sortCriteria, scrollId, count); + return executeScrollSearchQuery(opContext, finalQuery, sortCriteria, scrollId, count); } private SearchResponse executeScrollSearchQuery( + @Nonnull final OperationContext opContext, @Nonnull final QueryBuilder query, @Nonnull List sortCriteria, @Nullable String scrollId, @@ -1405,13 +1422,19 @@ private SearchResponse executeScrollSearchQuery( searchRequest.indices(indexConvention.getIndexName(INDEX_NAME)); - try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "esQuery").time()) { - MetricUtils.counter(this.getClass(), SEARCH_EXECUTIONS_METRIC).inc(); - return client.search(searchRequest, RequestOptions.DEFAULT); - } catch (Exception e) { - log.error("Search query failed", e); - throw new ESQueryException("Search query failed:", e); - } + return opContext.withSpan( + "esQuery", + () -> { + try { + MetricUtils.counter(this.getClass(), SEARCH_EXECUTIONS_METRIC).inc(); + return client.search(searchRequest, RequestOptions.DEFAULT); + } catch (Exception e) { + log.error("Search query failed", e); + throw new ESQueryException("Search query failed:", e); + } + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "esQuery")); } private static void applyExcludeSoftDelete( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java index 1068fae9478e1b..6fd741d30062c5 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java @@ -39,7 +39,7 @@ import com.linkedin.structured.StructuredPropertyDefinition; import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java index ef748ebd232789..42f241186d2a34 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.graph.neo4j; -import com.codahale.metrics.Timer; import com.datahub.util.Statement; import com.datahub.util.exception.RetryLimitReached; import com.google.common.annotations.VisibleForTesting; @@ -66,26 +65,32 @@ public class Neo4jGraphService implements GraphService { private static final int MAX_TRANSACTION_RETRY = 3; - private final LineageRegistry _lineageRegistry; - private final Driver _driver; - private SessionConfig _sessionConfig; + private final LineageRegistry lineageRegistry; + private final Driver driver; + private final OperationContext systemOperationContext; + private SessionConfig sessionConfig; - public Neo4jGraphService(@Nonnull LineageRegistry lineageRegistry, @Nonnull Driver driver) { - this(lineageRegistry, driver, SessionConfig.defaultConfig()); + public Neo4jGraphService( + @Nonnull final OperationContext systemOperationContext, + @Nonnull LineageRegistry lineageRegistry, + @Nonnull Driver driver) { + this(systemOperationContext, lineageRegistry, driver, SessionConfig.defaultConfig()); } public Neo4jGraphService( + @Nonnull final OperationContext systemOperationContext, @Nonnull LineageRegistry lineageRegistry, @Nonnull Driver driver, @Nonnull SessionConfig sessionConfig) { - this._lineageRegistry = lineageRegistry; - this._driver = driver; - this._sessionConfig = sessionConfig; + this.systemOperationContext = systemOperationContext; + this.lineageRegistry = lineageRegistry; + this.driver = driver; + this.sessionConfig = sessionConfig; } @Override public LineageRegistry getLineageRegistry() { - return _lineageRegistry; + return lineageRegistry; } @Override @@ -329,7 +334,7 @@ private String getPathFindingRelationshipFilter( final var filterComponents = new HashSet(); for (final var entityName : entityNames) { if (direction != null) { - for (final var edgeInfo : _lineageRegistry.getLineageRelationships(entityName, direction)) { + for (final var edgeInfo : lineageRegistry.getLineageRelationships(entityName, direction)) { final var type = edgeInfo.getType(); if (edgeInfo.getDirection() == RelationshipDirection.INCOMING) { filterComponents.add("<" + type); @@ -342,7 +347,7 @@ private String getPathFindingRelationshipFilter( for (final var direction1 : List.of(LineageDirection.UPSTREAM, LineageDirection.DOWNSTREAM)) { for (final var edgeInfo : - _lineageRegistry.getLineageRelationships(entityName, direction1)) { + lineageRegistry.getLineageRelationships(entityName, direction1)) { filterComponents.add(edgeInfo.getType()); } } @@ -736,7 +741,7 @@ private ExecutionResult executeStatements(@Nonnull List statements) { final StopWatch stopWatch = new StopWatch(); stopWatch.start(); int retry = 0; - try (final Session session = _driver.session(_sessionConfig)) { + try (final Session session = driver.session(sessionConfig)) { for (retry = 0; retry <= MAX_TRANSACTION_RETRY; retry++) { try { session.executeWrite( @@ -773,9 +778,11 @@ private ExecutionResult executeStatements(@Nonnull List statements) { @Nonnull private Result runQuery(@Nonnull Statement statement) { log.debug(String.format("Running Neo4j query %s", statement.toString())); - try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "runQuery").time()) { - return _driver.session(_sessionConfig).run(statement.getCommandText(), statement.getParams()); - } + return systemOperationContext.withSpan( + "runQuery", + () -> driver.session(sessionConfig).run(statement.getCommandText(), statement.getParams()), + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "runQuery")); } // Returns "key:value" String, if value is not primitive, then use toString() and double quote it diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java index fea3fafdc845ad..565de23aafb92c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.recommendation.candidatesource; -import com.codahale.metrics.Timer; import com.datahub.authorization.config.ViewAuthorizationConfiguration; import com.datahub.util.exception.ESQueryException; import com.google.common.collect.ImmutableSet; @@ -17,7 +16,7 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.metrics.MetricUtils; import io.datahubproject.metadata.context.OperationContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.io.IOException; import java.util.List; import java.util.Optional; @@ -105,22 +104,29 @@ public List getRecommendations( @Nonnull RecommendationRequestContext requestContext, @Nullable Filter filter) { SearchRequest searchRequest = buildSearchRequest(opContext); - try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "getMostPopular").time()) { - final SearchResponse searchResponse = - _searchClient.search(searchRequest, RequestOptions.DEFAULT); - // extract results - ParsedTerms parsedTerms = searchResponse.getAggregations().get(ENTITY_AGG_NAME); - List bucketUrns = - parsedTerms.getBuckets().stream() - .map(MultiBucketsAggregation.Bucket::getKeyAsString) - .collect(Collectors.toList()); - return buildContent(opContext, bucketUrns, _entityService) - .limit(MAX_CONTENT) - .collect(Collectors.toList()); - } catch (Exception e) { - log.error("Search query to get most popular entities failed", e); - throw new ESQueryException("Search query failed:", e); - } + + return opContext.withSpan( + "getMostPopular", + () -> { + try { + final SearchResponse searchResponse = + _searchClient.search(searchRequest, RequestOptions.DEFAULT); + // extract results + ParsedTerms parsedTerms = searchResponse.getAggregations().get(ENTITY_AGG_NAME); + List bucketUrns = + parsedTerms.getBuckets().stream() + .map(MultiBucketsAggregation.Bucket::getKeyAsString) + .collect(Collectors.toList()); + return buildContent(opContext, bucketUrns, _entityService) + .limit(MAX_CONTENT) + .collect(Collectors.toList()); + } catch (Exception e) { + log.error("Search query to get most popular entities failed", e); + throw new ESQueryException("Search query failed:", e); + } + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "getMostPopular")); } @Override diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyEditedSource.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyEditedSource.java index afdce0d7145133..e03293c11b36c2 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyEditedSource.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyEditedSource.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.recommendation.candidatesource; -import com.codahale.metrics.Timer; import com.datahub.util.exception.ESQueryException; import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; @@ -18,7 +17,7 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.metrics.MetricUtils; import io.datahubproject.metadata.context.OperationContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.io.IOException; import java.util.List; import java.util.Set; @@ -107,22 +106,29 @@ public List getRecommendations( SearchRequest searchRequest = buildSearchRequest( opContext.getSessionActorContext().getActorUrn(), opContext.getAspectRetriever()); - try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "getRecentlyEdited").time()) { - final SearchResponse searchResponse = - _searchClient.search(searchRequest, RequestOptions.DEFAULT); - // extract results - ParsedTerms parsedTerms = searchResponse.getAggregations().get(ENTITY_AGG_NAME); - List bucketUrns = - parsedTerms.getBuckets().stream() - .map(MultiBucketsAggregation.Bucket::getKeyAsString) - .collect(Collectors.toList()); - return buildContent(opContext, bucketUrns, _entityService) - .limit(MAX_CONTENT) - .collect(Collectors.toList()); - } catch (Exception e) { - log.error("Search query to get most recently edited entities failed", e); - throw new ESQueryException("Search query failed:", e); - } + + return opContext.withSpan( + "getRecentlyEdited", + () -> { + try { + final SearchResponse searchResponse = + _searchClient.search(searchRequest, RequestOptions.DEFAULT); + // extract results + ParsedTerms parsedTerms = searchResponse.getAggregations().get(ENTITY_AGG_NAME); + List bucketUrns = + parsedTerms.getBuckets().stream() + .map(MultiBucketsAggregation.Bucket::getKeyAsString) + .collect(Collectors.toList()); + return buildContent(opContext, bucketUrns, _entityService) + .limit(MAX_CONTENT) + .collect(Collectors.toList()); + } catch (Exception e) { + log.error("Search query to get most recently edited entities failed", e); + throw new ESQueryException("Search query failed:", e); + } + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "getRecentlyEdited")); } @Override diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java index f282470193ae5e..ea3a80c5f60381 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.recommendation.candidatesource; -import com.codahale.metrics.Timer; import com.datahub.util.exception.ESQueryException; import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; @@ -18,7 +17,7 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.metrics.MetricUtils; import io.datahubproject.metadata.context.OperationContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.io.IOException; import java.util.List; import java.util.Set; @@ -107,22 +106,29 @@ public List getRecommendations( SearchRequest searchRequest = buildSearchRequest( opContext.getSessionActorContext().getActorUrn(), opContext.getAspectRetriever()); - try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "getRecentlyViewed").time()) { - final SearchResponse searchResponse = - _searchClient.search(searchRequest, RequestOptions.DEFAULT); - // extract results - ParsedTerms parsedTerms = searchResponse.getAggregations().get(ENTITY_AGG_NAME); - List bucketUrns = - parsedTerms.getBuckets().stream() - .map(MultiBucketsAggregation.Bucket::getKeyAsString) - .collect(Collectors.toList()); - return buildContent(opContext, bucketUrns, _entityService) - .limit(MAX_CONTENT) - .collect(Collectors.toList()); - } catch (Exception e) { - log.error("Search query to get most recently viewed entities failed", e); - throw new ESQueryException("Search query failed:", e); - } + + return opContext.withSpan( + "getRecentlyViewed", + () -> { + try { + final SearchResponse searchResponse = + _searchClient.search(searchRequest, RequestOptions.DEFAULT); + // extract results + ParsedTerms parsedTerms = searchResponse.getAggregations().get(ENTITY_AGG_NAME); + List bucketUrns = + parsedTerms.getBuckets().stream() + .map(MultiBucketsAggregation.Bucket::getKeyAsString) + .collect(Collectors.toList()); + return buildContent(opContext, bucketUrns, _entityService) + .limit(MAX_CONTENT) + .collect(Collectors.toList()); + } catch (Exception e) { + log.error("Search query to get most recently viewed entities failed", e); + throw new ESQueryException("Search query failed:", e); + } + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "getRecentlyViewed")); } @Override diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java index 67ebdf8882b80c..f77b5097db80c9 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java @@ -33,7 +33,7 @@ import com.linkedin.metadata.search.utils.FilterUtils; import com.linkedin.metadata.search.utils.SearchUtils; import io.datahubproject.metadata.context.OperationContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.net.URISyntaxException; import java.util.Collections; import java.util.HashMap; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java index ecded1bb9c3846..731517ba3290f1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java @@ -2,7 +2,6 @@ import static com.linkedin.metadata.utils.SearchUtil.*; -import com.codahale.metrics.Timer; import com.linkedin.data.template.LongMap; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; @@ -215,20 +214,18 @@ public SearchResult searchAcrossEntities( */ public List getEntitiesToSearch( @Nonnull OperationContext opContext, @Nonnull Collection inputEntities, int size) { - List nonEmptyEntities; List lowercaseEntities = inputEntities.stream().map(String::toLowerCase).collect(Collectors.toList()); if (lowercaseEntities.isEmpty()) { - try (Timer.Context ignored = - MetricUtils.timer(this.getClass(), "getNonEmptyEntities").time()) { - nonEmptyEntities = _entityDocCountCache.getNonEmptyEntities(opContext); - } - } else { - nonEmptyEntities = lowercaseEntities; + return opContext.withSpan( + "getNonEmptyEntities", + () -> _entityDocCountCache.getNonEmptyEntities(opContext), + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "getNonEmptyEntities")); } - return nonEmptyEntities; + return lowercaseEntities; } /** diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/cache/CacheableSearcher.java b/metadata-io/src/main/java/com/linkedin/metadata/search/cache/CacheableSearcher.java index 28efa29c9fffa2..28a308c7f16d23 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/cache/CacheableSearcher.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/cache/CacheableSearcher.java @@ -1,13 +1,14 @@ package com.linkedin.metadata.search.cache; import static com.datahub.util.RecordUtils.*; +import static com.linkedin.metadata.utils.metrics.MetricUtils.CACHE_HIT_ATTR; -import com.codahale.metrics.Timer; import com.linkedin.metadata.search.SearchEntity; import com.linkedin.metadata.search.SearchEntityArray; import com.linkedin.metadata.search.SearchResult; import com.linkedin.metadata.utils.metrics.MetricUtils; import io.datahubproject.metadata.context.OperationContext; +import io.opentelemetry.api.trace.Span; import java.io.Serializable; import java.util.ArrayList; import java.util.List; @@ -42,41 +43,46 @@ public static class QueryPagination implements Serializable { * corresponds to) */ public SearchResult getSearchResults(@Nonnull OperationContext opContext, int from, int size) { - try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "getSearchResults").time()) { - int resultsSoFar = 0; - int batchId = 0; - boolean foundStart = false; - List resultEntities = new ArrayList<>(); - SearchResult batchedResult; - // Use do-while to make sure we run at least one batch to fetch metadata - do { - batchedResult = getBatch(opContext, batchId); - int currentBatchSize = batchedResult.getEntities().size(); - // If the number of results in this batch is 0, no need to continue - if (currentBatchSize == 0) { - break; - } - if (resultsSoFar + currentBatchSize > from) { - int startInBatch = foundStart ? 0 : from - resultsSoFar; - int endInBatch = Math.min(currentBatchSize, startInBatch + size - resultEntities.size()); - resultEntities.addAll(batchedResult.getEntities().subList(startInBatch, endInBatch)); - foundStart = true; - } - // If current batch is smaller than the requested batch size, the next batch will return - // empty. - if (currentBatchSize < batchSize) { - break; - } - resultsSoFar += currentBatchSize; - batchId++; - } while (resultsSoFar < from + size); - return new SearchResult() - .setEntities(new SearchEntityArray(resultEntities)) - .setMetadata(batchedResult.getMetadata()) - .setFrom(from) - .setPageSize(size) - .setNumEntities(batchedResult.getNumEntities()); - } + return opContext.withSpan( + "getSearchResults", + () -> { + int resultsSoFar = 0; + int batchId = 0; + boolean foundStart = false; + List resultEntities = new ArrayList<>(); + SearchResult batchedResult; + // Use do-while to make sure we run at least one batch to fetch metadata + do { + batchedResult = getBatch(opContext, batchId); + int currentBatchSize = batchedResult.getEntities().size(); + // If the number of results in this batch is 0, no need to continue + if (currentBatchSize == 0) { + break; + } + if (resultsSoFar + currentBatchSize > from) { + int startInBatch = foundStart ? 0 : from - resultsSoFar; + int endInBatch = + Math.min(currentBatchSize, startInBatch + size - resultEntities.size()); + resultEntities.addAll(batchedResult.getEntities().subList(startInBatch, endInBatch)); + foundStart = true; + } + // If current batch is smaller than the requested batch size, the next batch will return + // empty. + if (currentBatchSize < batchSize) { + break; + } + resultsSoFar += currentBatchSize; + batchId++; + } while (resultsSoFar < from + size); + return new SearchResult() + .setEntities(new SearchEntityArray(resultEntities)) + .setMetadata(batchedResult.getMetadata()) + .setFrom(from) + .setPageSize(size) + .setNumEntities(batchedResult.getNumEntities()); + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "getSearchResults")); } private QueryPagination getBatchQuerySize(int batchId) { @@ -84,37 +90,41 @@ private QueryPagination getBatchQuerySize(int batchId) { } private SearchResult getBatch(@Nonnull OperationContext opContext, int batchId) { - try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "getBatch").time()) { - QueryPagination batch = getBatchQuerySize(batchId); - SearchResult result; - if (enableCache) { - K cacheKey = cacheKeyGenerator.apply(batch); - if ((opContext.getSearchContext().getSearchFlags().isSkipCache() == null - || !opContext.getSearchContext().getSearchFlags().isSkipCache())) { - try (Timer.Context ignored2 = - MetricUtils.timer(this.getClass(), "getBatch_cache").time()) { - Timer.Context cacheAccess = - MetricUtils.timer(this.getClass(), "getBatch_cache_access").time(); - String json = cache.get(cacheKey, String.class); - result = json != null ? toRecordTemplate(SearchResult.class, json) : null; - cacheAccess.stop(); - if (result == null) { - Timer.Context cacheMiss = - MetricUtils.timer(this.getClass(), "getBatch_cache_miss").time(); + + return opContext.withSpan( + "getBatch", + () -> { + QueryPagination batch = getBatchQuerySize(batchId); + SearchResult result; + if (enableCache) { + K cacheKey = cacheKeyGenerator.apply(batch); + if ((opContext.getSearchContext().getSearchFlags().isSkipCache() == null + || !opContext.getSearchContext().getSearchFlags().isSkipCache())) { + + String json = cache.get(cacheKey, String.class); + result = json != null ? toRecordTemplate(SearchResult.class, json) : null; + + if (result == null) { + Span.current().setAttribute(CACHE_HIT_ATTR, false); + result = searcher.apply(batch); + cache.put(cacheKey, toJsonString(result)); + MetricUtils.counter(this.getClass(), "getBatch_cache_miss_count").inc(); + } else { + Span.current().setAttribute(CACHE_HIT_ATTR, true); + } + + } else { + Span.current().setAttribute(CACHE_HIT_ATTR, false); result = searcher.apply(batch); cache.put(cacheKey, toJsonString(result)); - cacheMiss.stop(); - MetricUtils.counter(this.getClass(), "getBatch_cache_miss_count").inc(); } + } else { + Span.current().setAttribute(CACHE_HIT_ATTR, false); + result = searcher.apply(batch); } - } else { - result = searcher.apply(batch); - cache.put(cacheKey, toJsonString(result)); - } - } else { - result = searcher.apply(batch); - } - return result; - } + return result; + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "getBatch")); } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/cache/EntityDocCountCache.java b/metadata-io/src/main/java/com/linkedin/metadata/search/cache/EntityDocCountCache.java index 1efaeb2b12f45f..7cea787583b54d 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/cache/EntityDocCountCache.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/cache/EntityDocCountCache.java @@ -7,7 +7,7 @@ import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.utils.ConcurrencyUtils; import io.datahubproject.metadata.context.OperationContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java index cb062e0e3f4483..7272809bb1221a 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java @@ -2,8 +2,8 @@ import static com.datahub.util.RecordUtils.toJsonString; import static com.datahub.util.RecordUtils.toRecordTemplate; +import static com.linkedin.metadata.utils.metrics.MetricUtils.CACHE_HIT_ATTR; -import com.codahale.metrics.Timer; import com.linkedin.metadata.browse.BrowseResult; import com.linkedin.metadata.query.AutoCompleteResult; import com.linkedin.metadata.query.SearchFlags; @@ -15,6 +15,7 @@ import com.linkedin.metadata.search.cache.CacheableSearcher; import com.linkedin.metadata.utils.metrics.MetricUtils; import io.datahubproject.metadata.context.OperationContext; +import io.opentelemetry.api.trace.Span; import java.util.List; import java.util.Optional; import javax.annotation.Nonnull; @@ -184,40 +185,42 @@ public AutoCompleteResult getCachedAutoCompleteResults( @Nullable String field, @Nullable Filter filters, int limit) { - try (Timer.Context ignored = - MetricUtils.timer(this.getClass(), "getCachedAutoCompleteResults").time()) { - Cache cache = cacheManager.getCache(ENTITY_SEARCH_SERVICE_AUTOCOMPLETE_CACHE_NAME); - AutoCompleteResult result; - if (enableCache(opContext.getSearchContext().getSearchFlags())) { - try (Timer.Context ignored2 = - MetricUtils.timer(this.getClass(), "getCachedAutoCompleteResults_cache").time()) { - Timer.Context cacheAccess = - MetricUtils.timer(this.getClass(), "autocomplete_cache_access").time(); - Object cacheKey = - Sextet.with( - opContext.getSearchContextId(), - entityName, - input, - field, - filters != null ? toJsonString(filters) : null, - limit); - String json = cache.get(cacheKey, String.class); - result = json != null ? toRecordTemplate(AutoCompleteResult.class, json) : null; - cacheAccess.stop(); - if (result == null) { - Timer.Context cacheMiss = - MetricUtils.timer(this.getClass(), "autocomplete_cache_miss").time(); + + return opContext.withSpan( + "getAutoCompleteResults", + () -> { + Cache cache = cacheManager.getCache(ENTITY_SEARCH_SERVICE_AUTOCOMPLETE_CACHE_NAME); + AutoCompleteResult result; + if (enableCache(opContext.getSearchContext().getSearchFlags())) { + + Object cacheKey = + Sextet.with( + opContext.getSearchContextId(), + entityName, + input, + field, + filters != null ? toJsonString(filters) : null, + limit); + String json = cache.get(cacheKey, String.class); + result = json != null ? toRecordTemplate(AutoCompleteResult.class, json) : null; + + if (result == null) { + result = + getRawAutoCompleteResults(opContext, entityName, input, field, filters, limit); + cache.put(cacheKey, toJsonString(result)); + Span.current().setAttribute(CACHE_HIT_ATTR, false); + MetricUtils.counter(this.getClass(), "autocomplete_cache_miss_count").inc(); + } else { + Span.current().setAttribute(CACHE_HIT_ATTR, true); + } + } else { + Span.current().setAttribute(CACHE_HIT_ATTR, false); result = getRawAutoCompleteResults(opContext, entityName, input, field, filters, limit); - cache.put(cacheKey, toJsonString(result)); - cacheMiss.stop(); - MetricUtils.counter(this.getClass(), "autocomplete_cache_miss_count").inc(); } - } - } else { - result = getRawAutoCompleteResults(opContext, entityName, input, field, filters, limit); - } - return result; - } + return result; + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "getCachedAutoCompleteResults")); } /** Returns cached browse results. */ @@ -228,40 +231,40 @@ public BrowseResult getCachedBrowseResults( @Nullable Filter filters, int from, int size) { - try (Timer.Context ignored = - MetricUtils.timer(this.getClass(), "getCachedBrowseResults").time()) { - Cache cache = cacheManager.getCache(ENTITY_SEARCH_SERVICE_BROWSE_CACHE_NAME); - BrowseResult result; - if (enableCache(opContext.getSearchContext().getSearchFlags())) { - try (Timer.Context ignored2 = - MetricUtils.timer(this.getClass(), "getCachedBrowseResults_cache").time()) { - Timer.Context cacheAccess = - MetricUtils.timer(this.getClass(), "browse_cache_access").time(); - Object cacheKey = - Sextet.with( - opContext.getSearchContextId(), - entityName, - path, - filters != null ? toJsonString(filters) : null, - from, - size); - String json = cache.get(cacheKey, String.class); - result = json != null ? toRecordTemplate(BrowseResult.class, json) : null; - cacheAccess.stop(); - if (result == null) { - Timer.Context cacheMiss = - MetricUtils.timer(this.getClass(), "browse_cache_miss").time(); + + return opContext.withSpan( + "getBrowseResults", + () -> { + Cache cache = cacheManager.getCache(ENTITY_SEARCH_SERVICE_BROWSE_CACHE_NAME); + BrowseResult result; + if (enableCache(opContext.getSearchContext().getSearchFlags())) { + Object cacheKey = + Sextet.with( + opContext.getSearchContextId(), + entityName, + path, + filters != null ? toJsonString(filters) : null, + from, + size); + String json = cache.get(cacheKey, String.class); + result = json != null ? toRecordTemplate(BrowseResult.class, json) : null; + + if (result == null) { + result = getRawBrowseResults(opContext, entityName, path, filters, from, size); + cache.put(cacheKey, toJsonString(result)); + Span.current().setAttribute(CACHE_HIT_ATTR, false); + MetricUtils.counter(this.getClass(), "browse_cache_miss_count").inc(); + } else { + Span.current().setAttribute(CACHE_HIT_ATTR, true); + } + } else { + Span.current().setAttribute(CACHE_HIT_ATTR, false); result = getRawBrowseResults(opContext, entityName, path, filters, from, size); - cache.put(cacheKey, toJsonString(result)); - cacheMiss.stop(); - MetricUtils.counter(this.getClass(), "browse_cache_miss_count").inc(); } - } - } else { - result = getRawBrowseResults(opContext, entityName, path, filters, from, size); - } - return result; - } + return result; + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "getCachedBrowseResults")); } /** Returns cached scroll results. */ @@ -274,62 +277,67 @@ public ScrollResult getCachedScrollResults( @Nullable String scrollId, @Nullable String keepAlive, int size) { - try (Timer.Context ignored = - MetricUtils.timer(this.getClass(), "getCachedScrollResults").time()) { - boolean isFullText = - Boolean.TRUE.equals( - Optional.ofNullable(opContext.getSearchContext().getSearchFlags()) - .orElse(new SearchFlags()) - .isFulltext()); - Cache cache = cacheManager.getCache(ENTITY_SEARCH_SERVICE_SCROLL_CACHE_NAME); - ScrollResult result; - if (enableCache(opContext.getSearchContext().getSearchFlags())) { - Timer.Context cacheAccess = - MetricUtils.timer(this.getClass(), "scroll_cache_access").time(); - Object cacheKey = - Septet.with( - opContext.getSearchContextId(), - entities, - query, - filters != null ? toJsonString(filters) : null, - CollectionUtils.isNotEmpty(sortCriteria) ? toJsonString(sortCriteria) : null, - scrollId, - size); - String json = cache.get(cacheKey, String.class); - result = json != null ? toRecordTemplate(ScrollResult.class, json) : null; - cacheAccess.stop(); - if (result == null) { - Timer.Context cacheMiss = MetricUtils.timer(this.getClass(), "scroll_cache_miss").time(); - result = - getRawScrollResults( - opContext, - entities, - query, - filters, - sortCriteria, - scrollId, - keepAlive, - size, - isFullText); - cache.put(cacheKey, toJsonString(result)); - cacheMiss.stop(); - MetricUtils.counter(this.getClass(), "scroll_cache_miss_count").inc(); - } - } else { - result = - getRawScrollResults( - opContext, - entities, - query, - filters, - sortCriteria, - scrollId, - keepAlive, - size, - isFullText); - } - return result; - } + + return opContext.withSpan( + "getScrollResults", + () -> { + boolean isFullText = + Boolean.TRUE.equals( + Optional.ofNullable(opContext.getSearchContext().getSearchFlags()) + .orElse(new SearchFlags()) + .isFulltext()); + Cache cache = cacheManager.getCache(ENTITY_SEARCH_SERVICE_SCROLL_CACHE_NAME); + ScrollResult result; + if (enableCache(opContext.getSearchContext().getSearchFlags())) { + + Object cacheKey = + Septet.with( + opContext.getSearchContextId(), + entities, + query, + filters != null ? toJsonString(filters) : null, + CollectionUtils.isNotEmpty(sortCriteria) ? toJsonString(sortCriteria) : null, + scrollId, + size); + String json = cache.get(cacheKey, String.class); + result = json != null ? toRecordTemplate(ScrollResult.class, json) : null; + + if (result == null) { + result = + getRawScrollResults( + opContext, + entities, + query, + filters, + sortCriteria, + scrollId, + keepAlive, + size, + isFullText); + cache.put(cacheKey, toJsonString(result)); + Span.current().setAttribute(CACHE_HIT_ATTR, false); + MetricUtils.counter(this.getClass(), "scroll_cache_miss_count").inc(); + } else { + Span.current().setAttribute(CACHE_HIT_ATTR, true); + } + } else { + Span.current().setAttribute(CACHE_HIT_ATTR, false); + result = + getRawScrollResults( + opContext, + entities, + query, + filters, + sortCriteria, + scrollId, + keepAlive, + size, + isFullText); + } + return result; + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "getCachedScrollResults")); } /** Executes the expensive search query using the {@link EntitySearchService} */ diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java index 35f133cc794f2a..3a84d1cb2de489 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java @@ -3,7 +3,6 @@ import static com.linkedin.metadata.search.utils.ESUtils.applyDefaultSearchFilters; import static com.linkedin.metadata.search.utils.SearchUtils.applyDefaultSearchFlags; -import com.codahale.metrics.Timer; import com.datahub.util.exception.ESQueryException; import com.google.common.annotations.VisibleForTesting; import com.linkedin.common.urn.Urn; @@ -30,6 +29,7 @@ import com.linkedin.metadata.utils.SearchUtil; import com.linkedin.metadata.utils.metrics.MetricUtils; import io.datahubproject.metadata.context.OperationContext; +import java.io.IOException; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Arrays; @@ -139,13 +139,21 @@ public BrowseResult browse( .getIndexConvention() .getIndexName(opContext.getEntityRegistry().getEntitySpec(entityName)); - final SearchResponse groupsResponse; - try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "esGroupSearch").time()) { - groupsResponse = - client.search( - constructGroupsSearchRequest(finalOpContext, indexName, path, requestMap), - RequestOptions.DEFAULT); - } + final SearchResponse groupsResponse = + opContext.withSpan( + "esGroupSearch", + () -> { + try { + return client.search( + constructGroupsSearchRequest(finalOpContext, indexName, path, requestMap), + RequestOptions.DEFAULT); + } catch (IOException e) { + throw new RuntimeException(e); + } + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "esGroupSearch")); + final BrowseGroupsResult browseGroupsResult = extractGroupsResponse(groupsResponse, path, from, size); final int numGroups = browseGroupsResult.getTotalGroups(); @@ -156,14 +164,22 @@ public BrowseResult browse( // if numGroups <= from, we should only return entities int entityFrom = Math.max(from - numGroups, 0); int entitySize = Math.min(Math.max(from + size - numGroups, 0), size); - final SearchResponse entitiesResponse; - try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "esEntitiesSearch").time()) { - entitiesResponse = - client.search( - constructEntitiesSearchRequest( - finalOpContext, indexName, path, requestMap, entityFrom, entitySize), - RequestOptions.DEFAULT); - } + final SearchResponse entitiesResponse = + opContext.withSpan( + "esEntitiesSearch", + () -> { + try { + return client.search( + constructEntitiesSearchRequest( + finalOpContext, indexName, path, requestMap, entityFrom, entitySize), + RequestOptions.DEFAULT); + } catch (IOException e) { + throw new RuntimeException(e); + } + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "esEntitiesSearch")); + final int numEntities = (int) entitiesResponse.getHits().getTotalHits().value; final List browseResultEntityList = extractEntitiesResponse(entitiesResponse, path); @@ -441,19 +457,25 @@ public BrowseResultV2 browseV2( int start, int count) { try { - final SearchResponse groupsResponse; final OperationContext finalOpContext = opContext.withSearchFlags( flags -> applyDefaultSearchFlags(flags, path, DEFAULT_BROWSE_SEARCH_FLAGS)); - try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "esGroupSearch").time()) { - final String finalInput = input.isEmpty() ? "*" : input; - groupsResponse = - client.search( - constructGroupsSearchRequestV2( - finalOpContext, entityName, path, filter, finalInput), - RequestOptions.DEFAULT); - } + final SearchResponse groupsResponse = + opContext.withSpan( + "esGroupSearch", + () -> { + try { + return client.search( + constructGroupsSearchRequestV2( + finalOpContext, entityName, path, filter, input.isEmpty() ? "*" : input), + RequestOptions.DEFAULT); + } catch (IOException e) { + throw new RuntimeException(e); + } + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "esGroupSearch")); final BrowseGroupsResultV2 browseGroupsResult = extractGroupsResponseV2(groupsResponse, path, start, count); @@ -483,19 +505,25 @@ public BrowseResultV2 browseV2( int start, int count) { try { - final SearchResponse groupsResponse; final OperationContext finalOpContext = opContext.withSearchFlags( flags -> applyDefaultSearchFlags(flags, path, DEFAULT_BROWSE_SEARCH_FLAGS)); - try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "esGroupSearch").time()) { - final String finalInput = input.isEmpty() ? "*" : input; - groupsResponse = - client.search( - constructGroupsSearchRequestBrowseAcrossEntities( - finalOpContext, entities, path, filter, finalInput), - RequestOptions.DEFAULT); - } + final SearchResponse groupsResponse = + opContext.withSpan( + "esGroupSearch", + () -> { + try { + return client.search( + constructGroupsSearchRequestBrowseAcrossEntities( + finalOpContext, entities, path, filter, input.isEmpty() ? "*" : input), + RequestOptions.DEFAULT); + } catch (IOException e) { + throw new RuntimeException(e); + } + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "esGroupSearch")); final BrowseGroupsResultV2 browseGroupsResult = extractGroupsResponseV2(groupsResponse, path, start, count); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index 2d7db075e676ff..a720e0bf815c26 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -4,7 +4,6 @@ import static com.linkedin.metadata.aspect.patch.template.TemplateUtil.*; import static com.linkedin.metadata.utils.SearchUtil.*; -import com.codahale.metrics.Timer; import com.datahub.util.exception.ESQueryException; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.core.type.TypeReference; @@ -31,7 +30,7 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.metrics.MetricUtils; import io.datahubproject.metadata.context.OperationContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; @@ -114,12 +113,19 @@ public long docCount( filter, entitySpec.getSearchableFieldTypes(), queryFilterRewriteChain)); - try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "docCount").time()) { - return client.count(countRequest, RequestOptions.DEFAULT).getCount(); - } catch (IOException e) { - log.error("Count query failed:" + e.getMessage()); - throw new ESQueryException("Count query failed:", e); - } + + return opContext.withSpan( + "docCount", + () -> { + try { + return client.count(countRequest, RequestOptions.DEFAULT).getCount(); + } catch (IOException e) { + log.error("Count query failed:" + e.getMessage()); + throw new ESQueryException("Count query failed:", e); + } + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "docCount")); } @Nonnull @@ -132,26 +138,33 @@ private SearchResult executeAndExtract( int from, int size) { long id = System.currentTimeMillis(); - try (Timer.Context ignored = - MetricUtils.timer(this.getClass(), "executeAndExtract_search").time()) { - log.debug("Executing request {}: {}", id, searchRequest); - final SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); - // extract results, validated against document model as well - return transformIndexIntoEntityName( - opContext.getSearchContext().getIndexConvention(), - SearchRequestHandler.getBuilder( - opContext.getEntityRegistry(), - entitySpec, - searchConfiguration, - customSearchConfiguration, - queryFilterRewriteChain) - .extractResult(opContext, searchResponse, filter, from, size)); - } catch (Exception e) { - log.error("Search query failed", e); - throw new ESQueryException("Search query failed:", e); - } finally { - log.debug("Returning from request {}.", id); - } + + return opContext.withSpan( + "executeAndExtract_search", + () -> { + try { + log.debug("Executing request {}: {}", id, searchRequest); + final SearchResponse searchResponse = + client.search(searchRequest, RequestOptions.DEFAULT); + // extract results, validated against document model as well + return transformIndexIntoEntityName( + opContext.getSearchContext().getIndexConvention(), + SearchRequestHandler.getBuilder( + opContext.getEntityRegistry(), + entitySpec, + searchConfiguration, + customSearchConfiguration, + queryFilterRewriteChain) + .extractResult(opContext, searchResponse, filter, from, size)); + } catch (Exception e) { + log.error("Search query failed", e); + throw new ESQueryException("Search query failed:", e); + } finally { + log.debug("Returning from request {}.", id); + } + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "executeAndExtract_search")); } private String transformIndexToken( @@ -234,24 +247,30 @@ private ScrollResult executeAndExtract( @Nullable Filter filter, @Nullable String keepAlive, int size) { - try (Timer.Context ignored = - MetricUtils.timer(this.getClass(), "executeAndExtract_scroll").time()) { - final SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); - // extract results, validated against document model as well - return transformIndexIntoEntityName( - opContext.getSearchContext().getIndexConvention(), - SearchRequestHandler.getBuilder( - opContext.getEntityRegistry(), - entitySpecs, - searchConfiguration, - customSearchConfiguration, - queryFilterRewriteChain) - .extractScrollResult( - opContext, searchResponse, filter, keepAlive, size, supportsPointInTime())); - } catch (Exception e) { - log.error("Search query failed: {}", searchRequest, e); - throw new ESQueryException("Search query failed:", e); - } + return opContext.withSpan( + "executeAndExtract_scroll", + () -> { + try { + final SearchResponse searchResponse = + client.search(searchRequest, RequestOptions.DEFAULT); + // extract results, validated against document model as well + return transformIndexIntoEntityName( + opContext.getSearchContext().getIndexConvention(), + SearchRequestHandler.getBuilder( + opContext.getEntityRegistry(), + entitySpecs, + searchConfiguration, + customSearchConfiguration, + queryFilterRewriteChain) + .extractScrollResult( + opContext, searchResponse, filter, keepAlive, size, supportsPointInTime())); + } catch (Exception e) { + log.error("Search query failed: {}", searchRequest, e); + throw new ESQueryException("Search query failed:", e); + } + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "executeAndExtract_scroll")); } /** @@ -279,26 +298,32 @@ public SearchResult search( int size, @Nullable List facets) { final String finalInput = input.isEmpty() ? "*" : input; - Timer.Context searchRequestTimer = MetricUtils.timer(this.getClass(), "searchRequest").time(); List entitySpecs = entityNames.stream() .map(name -> opContext.getEntityRegistry().getEntitySpec(name)) .collect(Collectors.toList()); IndexConvention indexConvention = opContext.getSearchContext().getIndexConvention(); Filter transformedFilters = transformFilterForEntities(postFilters, indexConvention); + // Step 1: construct the query final SearchRequest searchRequest = - SearchRequestHandler.getBuilder( - opContext.getEntityRegistry(), - entitySpecs, - searchConfiguration, - customSearchConfiguration, - queryFilterRewriteChain) - .getSearchRequest( - opContext, finalInput, transformedFilters, sortCriteria, from, size, facets); - searchRequest.indices( - entityNames.stream().map(indexConvention::getEntityIndexName).toArray(String[]::new)); - searchRequestTimer.stop(); + opContext.withSpan( + "searchRequest", + () -> + SearchRequestHandler.getBuilder( + opContext.getEntityRegistry(), + entitySpecs, + searchConfiguration, + customSearchConfiguration, + queryFilterRewriteChain) + .getSearchRequest( + opContext, finalInput, transformedFilters, sortCriteria, from, size, facets) + .indices( + entityNames.stream() + .map(indexConvention::getEntityIndexName) + .toArray(String[]::new)), + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "searchRequest")); if (testLoggingEnabled) { testLog(opContext.getObjectMapper(), searchRequest); @@ -437,15 +462,21 @@ public Map aggregateByValue( searchRequest.indices(stream.toArray(String[]::new)); } - try (Timer.Context ignored = - MetricUtils.timer(this.getClass(), "aggregateByValue_search").time()) { - final SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); - // extract results, validated against document model as well - return AggregationQueryBuilder.extractAggregationsFromResponse(searchResponse, field); - } catch (Exception e) { - log.error("Aggregation query failed", e); - throw new ESQueryException("Aggregation query failed:", e); - } + return opContext.withSpan( + "aggregateByValue_search", + () -> { + try { + final SearchResponse searchResponse = + client.search(searchRequest, RequestOptions.DEFAULT); + // extract results, validated against document model as well + return AggregationQueryBuilder.extractAggregationsFromResponse(searchResponse, field); + } catch (Exception e) { + log.error("Aggregation query failed", e); + throw new ESQueryException("Aggregation query failed:", e); + } + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "aggregateByValue_search")); } /** @@ -476,33 +507,40 @@ public ScrollResult scroll( IndexConvention indexConvention = opContext.getSearchContext().getIndexConvention(); String[] indexArray = entities.stream().map(indexConvention::getEntityIndexName).toArray(String[]::new); - Timer.Context scrollRequestTimer = MetricUtils.timer(this.getClass(), "scrollRequest").time(); List entitySpecs = entities.stream() .map(name -> opContext.getEntityRegistry().getEntitySpec(name)) .collect(Collectors.toList()); Filter transformedFilters = transformFilterForEntities(postFilters, indexConvention); - // TODO: Align scroll and search using facets - final SearchRequest searchRequest = - getScrollRequest( - opContext, - scrollId, - keepAlive, - indexArray, - size, - transformedFilters, - entitySpecs, - finalInput, - sortCriteria, - null); - // PIT specifies indices in creation so it doesn't support specifying indices on the request, so - // we only specify if not using PIT - if (!supportsPointInTime()) { - searchRequest.indices(indexArray); - } - - scrollRequestTimer.stop(); + final SearchRequest searchRequest = + opContext.withSpan( + "scrollRequest", + () -> { + // TODO: Align scroll and search using facets + final SearchRequest req = + getScrollRequest( + opContext, + scrollId, + keepAlive, + indexArray, + size, + transformedFilters, + entitySpecs, + finalInput, + sortCriteria, + null); + + // PIT specifies indices in creation so it doesn't support specifying indices on the + // request, so + // we only specify if not using PIT + if (!supportsPointInTime()) { + req.indices(indexArray); + } + return req; + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "scrollRequest")); if (testLoggingEnabled) { testLog(opContext.getObjectMapper(), searchRequest); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java index 8b83439a3008c1..8ff0759b2e41a5 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java @@ -22,7 +22,7 @@ import com.linkedin.metadata.utils.SearchUtil; import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.time.OffsetDateTime; import java.time.format.DateTimeParseException; import java.util.ArrayList; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index 01a1e9cb159844..0ecf1a932e4bfd 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -43,7 +43,7 @@ import com.linkedin.metadata.search.utils.ESUtils; import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Arrays; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java index 635d4472305c93..c410d2ccefdf24 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java @@ -30,6 +30,7 @@ import com.linkedin.mxe.MetadataChangeLog; import com.linkedin.mxe.SystemMetadata; import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; import java.io.IOException; import java.io.UnsupportedEncodingException; @@ -131,13 +132,23 @@ public void handleChangeEvent( Stream.concat(Stream.of(batch), sideEffects).collect(Collectors.toList())) { MetadataChangeLog hookEvent = mclItem.getMetadataChangeLog(); if (UPDATE_CHANGE_TYPES.contains(hookEvent.getChangeType())) { - handleUpdateChangeEvent(opContext, mclItem); + // non-system metadata + handleUpdateChangeEvent(opContext, mclItem, false); + // graph update + updateGraphIndicesService.handleChangeEvent(opContext, event); + // system metadata is last for tracing + handleUpdateChangeEvent(opContext, mclItem, true); } else if (hookEvent.getChangeType() == ChangeType.DELETE) { - handleDeleteChangeEvent(opContext, mclItem); + Pair specPair = extractSpecPair(mclItem); + boolean isDeletingKey = isDeletingKey(specPair); + + // non-system metadata + handleNonSystemMetadataDeleteChangeEvent(opContext, specPair, mclItem, isDeletingKey); + // graph update + updateGraphIndicesService.handleChangeEvent(opContext, event); + // system metadata is last for tracing + handleSystemMetadataDeleteChangeEvent(mclItem.getUrn(), specPair, isDeletingKey); } - - // graph update - updateGraphIndicesService.handleChangeEvent(opContext, event); } } catch (IOException e) { throw new RuntimeException(e); @@ -154,7 +165,8 @@ public void handleChangeEvent( * @param event the change event to be processed. */ private void handleUpdateChangeEvent( - @Nonnull OperationContext opContext, @Nonnull final MCLItem event) throws IOException { + @Nonnull OperationContext opContext, @Nonnull final MCLItem event, boolean forSystemMetadata) + throws IOException { final EntitySpec entitySpec = event.getEntitySpec(); final AspectSpec aspectSpec = event.getAspectSpec(); @@ -163,32 +175,34 @@ private void handleUpdateChangeEvent( RecordTemplate aspect = event.getRecordTemplate(); RecordTemplate previousAspect = event.getPreviousRecordTemplate(); - // Step 0. If the aspect is timeseries, add to its timeseries index. - if (aspectSpec.isTimeseries()) { - updateTimeseriesFields( - opContext, - urn.getEntityType(), - event.getAspectName(), - urn, - aspect, - aspectSpec, - event.getSystemMetadata()); - } else { + if (!forSystemMetadata) { + // Step 0. If the aspect is timeseries, add to its timeseries index. + if (aspectSpec.isTimeseries()) { + updateTimeseriesFields( + opContext, + urn.getEntityType(), + event.getAspectName(), + urn, + aspect, + aspectSpec, + event.getSystemMetadata()); + } + + try { + // Step 1. Handle StructuredProperties Index Mapping changes + updateIndexMappings(urn, entitySpec, aspectSpec, aspect, previousAspect); + } catch (Exception e) { + log.error("Issue with updating index mappings for structured property change", e); + } + + // Step 2. For all aspects, attempt to update Search + updateSearchService(opContext, event); + } else if (forSystemMetadata && !aspectSpec.isTimeseries()) { // Inject into the System Metadata Index when an aspect is non-timeseries only. // TODO: Verify whether timeseries aspects can be dropped into System Metadata as well // without impacting rollbacks. updateSystemMetadata(event.getSystemMetadata(), urn, aspectSpec, aspect); } - - try { - // Step 1. Handle StructuredProperties Index Mapping changes - updateIndexMappings(urn, entitySpec, aspectSpec, aspect, previousAspect); - } catch (Exception e) { - log.error("Issue with updating index mappings for structured property change", e); - } - - // Step 2. For all aspects, attempt to update Search - updateSearchService(opContext, event); } public void updateIndexMappings( @@ -232,6 +246,25 @@ public void updateIndexMappings( } } + private static Pair extractSpecPair(@Nonnull final MCLItem event) { + final EntitySpec entitySpec = event.getEntitySpec(); + final Urn urn = event.getUrn(); + + AspectSpec aspectSpec = entitySpec.getAspectSpec(event.getAspectName()); + if (aspectSpec == null) { + throw new RuntimeException( + String.format( + "Failed to retrieve Aspect Spec for entity with name %s, aspect with name %s. Cannot update indices for MCL.", + urn.getEntityType(), event.getAspectName())); + } + + return Pair.of(entitySpec, aspectSpec); + } + + private static boolean isDeletingKey(Pair specPair) { + return specPair.getSecond().getName().equals(specPair.getFirst().getKeyAspectName()); + } + /** * This very important method processes {@link MetadataChangeLog} deletion events to cleanup the * Metadata Graph when an aspect or entity is removed. @@ -242,28 +275,39 @@ public void updateIndexMappings( *

Note that if an entity's key aspect is deleted, the entire entity will be purged from * search, graph, timeseries, etc. * + * @param opContext operation's context + * @param specPair entity & aspect spec * @param event the change event to be processed. + * @param isDeletingKey whether the key aspect is being deleted */ - private void handleDeleteChangeEvent( - @Nonnull OperationContext opContext, @Nonnull final MCLItem event) { - - final EntitySpec entitySpec = event.getEntitySpec(); - final Urn urn = event.getUrn(); + private void handleNonSystemMetadataDeleteChangeEvent( + @Nonnull OperationContext opContext, + Pair specPair, + @Nonnull final MCLItem event, + boolean isDeletingKey) { - AspectSpec aspectSpec = entitySpec.getAspectSpec(event.getAspectName()); - if (aspectSpec == null) { - throw new RuntimeException( - String.format( - "Failed to retrieve Aspect Spec for entity with name %s, aspect with name %s. Cannot update indices for MCL.", - urn.getEntityType(), event.getAspectName())); + if (!specPair.getSecond().isTimeseries()) { + deleteSearchData( + opContext, + event.getUrn(), + specPair.getFirst().getName(), + specPair.getSecond(), + event.getRecordTemplate(), + isDeletingKey); } + } - RecordTemplate aspect = event.getRecordTemplate(); - Boolean isDeletingKey = event.getAspectName().equals(entitySpec.getKeyAspectName()); - - if (!aspectSpec.isTimeseries()) { - deleteSystemMetadata(urn, aspectSpec, isDeletingKey); - deleteSearchData(opContext, urn, entitySpec.getName(), aspectSpec, aspect, isDeletingKey); + /** + * Handle the system metadata separately for tracing + * + * @param urn delete urn + * @param specPair entity & aspect spec + * @param isDeletingKey whether the key aspect is being deleted + */ + private void handleSystemMetadataDeleteChangeEvent( + @Nonnull Urn urn, Pair specPair, boolean isDeletingKey) { + if (!specPair.getSecond().isTimeseries()) { + deleteSystemMetadata(urn, specPair.getSecond(), isDeletingKey); } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java index a5c2fb04b5ce39..594b1852740c7d 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ESSystemMetadataDAO.java @@ -1,8 +1,11 @@ package com.linkedin.metadata.systemmetadata; +import static com.linkedin.metadata.systemmetadata.ElasticSearchSystemMetadataService.FIELD_ASPECT; +import static com.linkedin.metadata.systemmetadata.ElasticSearchSystemMetadataService.FIELD_URN; import static com.linkedin.metadata.systemmetadata.ElasticSearchSystemMetadataService.INDEX_NAME; import com.google.common.collect.ImmutableList; +import com.linkedin.metadata.search.elasticsearch.query.request.SearchAfterWrapper; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.search.utils.ESUtils; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; @@ -103,8 +106,8 @@ public BulkByScrollResponse deleteByUrn(@Nonnull final String urn) { public BulkByScrollResponse deleteByUrnAspect( @Nonnull final String urn, @Nonnull final String aspect) { BoolQueryBuilder finalQuery = QueryBuilders.boolQuery(); - finalQuery.must(QueryBuilders.termQuery("urn", urn)); - finalQuery.must(QueryBuilders.termQuery("aspect", aspect)); + finalQuery.filter(QueryBuilders.termQuery("urn", urn)); + finalQuery.filter(QueryBuilders.termQuery("aspect", aspect)); final Optional deleteResponse = bulkProcessor.deleteByQuery(finalQuery, indexConvention.getIndexName(INDEX_NAME)); @@ -121,7 +124,7 @@ public SearchResponse findByParams( BoolQueryBuilder finalQuery = QueryBuilders.boolQuery(); for (String key : searchParams.keySet()) { - finalQuery.must(QueryBuilders.termQuery(key, searchParams.get(key))); + finalQuery.filter(QueryBuilders.termQuery(key, searchParams.get(key))); } if (!includeSoftDeleted) { @@ -161,7 +164,7 @@ public SearchResponse findByParams( BoolQueryBuilder finalQuery = QueryBuilders.boolQuery(); for (String key : searchParams.keySet()) { - finalQuery.must(QueryBuilders.termQuery(key, searchParams.get(key))); + finalQuery.filter(QueryBuilders.termQuery(key, searchParams.get(key))); } if (!includeSoftDeleted) { @@ -186,6 +189,43 @@ public SearchResponse findByParams( return null; } + public SearchResponse scroll( + BoolQueryBuilder queryBuilder, + boolean includeSoftDeleted, + @Nullable String scrollId, + @Nullable String pitId, + @Nullable String keepAlive, + int size) { + SearchRequest searchRequest = new SearchRequest(); + + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); + + if (!includeSoftDeleted) { + queryBuilder.mustNot(QueryBuilders.termQuery("removed", "true")); + } + + Object[] sort = null; + if (scrollId != null) { + SearchAfterWrapper searchAfterWrapper = SearchAfterWrapper.fromScrollId(scrollId); + sort = searchAfterWrapper.getSort(); + } + + searchSourceBuilder.query(queryBuilder); + ESUtils.setSearchAfter(searchSourceBuilder, sort, pitId, keepAlive); + searchSourceBuilder.size(size); + searchSourceBuilder.sort(FIELD_URN).sort(FIELD_ASPECT); + + searchRequest.source(searchSourceBuilder); + searchRequest.indices(indexConvention.getIndexName(INDEX_NAME)); + + try { + return client.search(searchRequest, RequestOptions.DEFAULT); + } catch (IOException e) { + log.error("Error while searching by params.", e); + } + return null; + } + public SearchResponse findByRegistry( String registryName, String registryVersion, boolean includeSoftDeleted, int from, int size) { Map params = new HashMap<>(); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java index fe79ba75cb1d14..dfef592b7be943 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java @@ -1,9 +1,12 @@ package com.linkedin.metadata.systemmetadata; +import static io.datahubproject.metadata.context.TraceContext.TELEMETRY_TRACE_KEY; + import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.ImmutableMap; import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.SetMode; import com.linkedin.metadata.run.AspectRowSummary; import com.linkedin.metadata.run.IngestionRunSummary; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; @@ -36,6 +39,7 @@ import lombok.extern.slf4j.Slf4j; import org.opensearch.action.search.SearchResponse; import org.opensearch.client.tasks.GetTaskResponse; +import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.QueryBuilders; import org.opensearch.search.SearchHits; import org.opensearch.search.aggregations.bucket.filter.ParsedFilter; @@ -56,10 +60,10 @@ public class ElasticSearchSystemMetadataService private static final String DOC_DELIMETER = "--"; public static final String INDEX_NAME = "system_metadata_service_v1"; - private static final String FIELD_URN = "urn"; - private static final String FIELD_ASPECT = "aspect"; + public static final String FIELD_URN = "urn"; + public static final String FIELD_ASPECT = "aspect"; private static final String FIELD_RUNID = "runId"; - private static final String FIELD_LAST_UPDATED = "lastUpdated"; + public static final String FIELD_LAST_UPDATED = "lastUpdated"; private static final String FIELD_REGISTRY_NAME = "registryName"; private static final String FIELD_REGISTRY_VERSION = "registryVersion"; private static final Set INDEX_FIELD_SET = @@ -82,6 +86,10 @@ private String toDocument(SystemMetadata systemMetadata, String urn, String aspe document.put("registryName", systemMetadata.getRegistryName()); document.put("registryVersion", systemMetadata.getRegistryVersion()); document.put("removed", false); + if (systemMetadata.getProperties() != null + && systemMetadata.getProperties().containsKey(TELEMETRY_TRACE_KEY)) { + document.put(TELEMETRY_TRACE_KEY, systemMetadata.getProperties().get(TELEMETRY_TRACE_KEY)); + } return document.toString(); } @@ -160,31 +168,18 @@ public List findByParams( Map systemMetaParams, boolean includeSoftDeleted, int from, int size) { SearchResponse searchResponse = _esDAO.findByParams(systemMetaParams, includeSoftDeleted, from, size); - if (searchResponse != null) { - SearchHits hits = searchResponse.getHits(); - List summaries = - Arrays.stream(hits.getHits()) - .map( - hit -> { - Map values = hit.getSourceAsMap(); - AspectRowSummary summary = new AspectRowSummary(); - summary.setRunId((String) values.get(FIELD_RUNID)); - summary.setAspectName((String) values.get(FIELD_ASPECT)); - summary.setUrn((String) values.get(FIELD_URN)); - Object timestamp = values.get(FIELD_LAST_UPDATED); - if (timestamp instanceof Long) { - summary.setTimestamp((Long) timestamp); - } else if (timestamp instanceof Integer) { - summary.setTimestamp(Long.valueOf((Integer) timestamp)); - } - summary.setKeyAspect(((String) values.get(FIELD_ASPECT)).endsWith("Key")); - return summary; - }) - .collect(Collectors.toList()); - return summaries; - } else { - return Collections.emptyList(); - } + return toAspectRowSummary(searchResponse); + } + + @Override + public List findAspectsByUrn( + @Nonnull Urn urn, @Nonnull List aspects, boolean includeSoftDeleted) { + BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); + boolQueryBuilder.filter(QueryBuilders.termQuery(FIELD_URN, urn.toString())); + boolQueryBuilder.filter(QueryBuilders.termsQuery(FIELD_ASPECT, aspects)); + SearchResponse searchResponse = + _esDAO.scroll(boolQueryBuilder, includeSoftDeleted, null, null, null, aspects.size()); + return toAspectRowSummary(searchResponse); } @Override @@ -254,4 +249,32 @@ public void clear() { _esBulkProcessor.deleteByQuery( QueryBuilders.matchAllQuery(), true, _indexConvention.getIndexName(INDEX_NAME)); } + + private static List toAspectRowSummary(SearchResponse searchResponse) { + if (searchResponse != null) { + SearchHits hits = searchResponse.getHits(); + return Arrays.stream(hits.getHits()) + .map( + hit -> { + Map values = hit.getSourceAsMap(); + AspectRowSummary summary = new AspectRowSummary(); + summary.setRunId((String) values.get(FIELD_RUNID)); + summary.setAspectName((String) values.get(FIELD_ASPECT)); + summary.setUrn((String) values.get(FIELD_URN)); + Object timestamp = values.get(FIELD_LAST_UPDATED); + if (timestamp instanceof Long) { + summary.setTimestamp((Long) timestamp); + } else if (timestamp instanceof Integer) { + summary.setTimestamp(Long.valueOf((Integer) timestamp)); + } + summary.setKeyAspect(((String) values.get(FIELD_ASPECT)).endsWith("Key")); + summary.setTelemetryTraceId( + (String) values.get(TELEMETRY_TRACE_KEY), SetMode.IGNORE_NULL); + return summary; + }) + .collect(Collectors.toList()); + } else { + return Collections.emptyList(); + } + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataMappingsBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataMappingsBuilder.java index 6623580548706b..9b8c50a81f8510 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataMappingsBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataMappingsBuilder.java @@ -1,5 +1,7 @@ package com.linkedin.metadata.systemmetadata; +import static io.datahubproject.metadata.context.TraceContext.TELEMETRY_TRACE_KEY; + import com.google.common.collect.ImmutableMap; import java.util.HashMap; import java.util.Map; @@ -19,6 +21,7 @@ public static Map getMappings() { mappings.put("registryVersion", getMappingsForKeyword()); mappings.put("registryName", getMappingsForKeyword()); mappings.put("removed", getMappingsForRemoved()); + mappings.put(TELEMETRY_TRACE_KEY, getMappingsForKeyword()); return ImmutableMap.of("properties", mappings); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java index 4d940c229dc9af..0862077f4d1fa7 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java @@ -3,7 +3,6 @@ import static com.linkedin.metadata.Constants.*; import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; -import com.codahale.metrics.Timer; import com.datahub.util.RecordUtils; import com.datahub.util.exception.ESQueryException; import com.fasterxml.jackson.core.JsonProcessingException; @@ -46,7 +45,6 @@ import com.linkedin.timeseries.TimeseriesIndexSizeResult; import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; -import io.datahubproject.metadata.context.SearchContext; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; @@ -404,19 +402,24 @@ public List getAspectValues( searchRequest.indices(indexName); log.debug("Search request is: " + searchRequest); - SearchHits hits; - try (Timer.Context ignored = - MetricUtils.timer(this.getClass(), "searchAspectValues_search").time()) { - final SearchResponse searchResponse = - searchClient.search(searchRequest, RequestOptions.DEFAULT); - hits = searchResponse.getHits(); - } catch (Exception e) { - log.error("Search query failed:", e); - throw new ESQueryException("Search query failed:", e); - } - return Arrays.stream(hits.getHits()) - .map(ElasticSearchTimeseriesAspectService::parseDocument) - .collect(Collectors.toList()); + return opContext.withSpan( + "searchAspectValues_search", + () -> { + SearchHits hits; + try { + final SearchResponse searchResponse = + searchClient.search(searchRequest, RequestOptions.DEFAULT); + hits = searchResponse.getHits(); + } catch (Exception e) { + log.error("Search query failed:", e); + throw new ESQueryException("Search query failed:", e); + } + return Arrays.stream(hits.getHits()) + .map(ElasticSearchTimeseriesAspectService::parseDocument) + .collect(Collectors.toList()); + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "searchAspectValues_search")); } @Nonnull @@ -681,13 +684,7 @@ public TimeseriesScrollResult scrollAspects( SearchResponse response = executeScrollSearchQuery( - opContext.getSearchContext(), - entityName, - aspectName, - filterQueryBuilder, - sortCriteria, - scrollId, - count); + opContext, entityName, aspectName, filterQueryBuilder, sortCriteria, scrollId, count); int totalCount = (int) response.getHits().getTotalHits().value; List> resultPairs = @@ -704,7 +701,7 @@ public TimeseriesScrollResult scrollAspects( } private SearchResponse executeScrollSearchQuery( - @Nonnull SearchContext searchContext, + @Nonnull OperationContext opContext, @Nonnull final String entityName, @Nonnull final String aspectName, @Nonnull final QueryBuilder query, @@ -729,14 +726,22 @@ private SearchResponse executeScrollSearchQuery( ESUtils.setSearchAfter(searchSourceBuilder, sort, null, null); searchRequest.indices( - searchContext.getIndexConvention().getTimeseriesAspectIndexName(entityName, aspectName)); - - try (Timer.Context ignored = - MetricUtils.timer(this.getClass(), "scrollAspects_search").time()) { - return searchClient.search(searchRequest, RequestOptions.DEFAULT); - } catch (Exception e) { - log.error("Search query failed", e); - throw new ESQueryException("Search query failed:", e); - } + opContext + .getSearchContext() + .getIndexConvention() + .getTimeseriesAspectIndexName(entityName, aspectName)); + + return opContext.withSpan( + "scrollAspects_search", + () -> { + try { + return searchClient.search(searchRequest, RequestOptions.DEFAULT); + } catch (Exception e) { + log.error("Search query failed", e); + throw new ESQueryException("Search query failed:", e); + } + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "scrollAspects_search")); } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/UsageServiceUtil.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/UsageServiceUtil.java index 54f97f45219ac5..671b142b13150f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/UsageServiceUtil.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/UsageServiceUtil.java @@ -41,7 +41,6 @@ import java.time.Instant; import java.util.ArrayList; import java.util.List; -import java.util.concurrent.TimeUnit; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; @@ -122,25 +121,22 @@ public static UsageQueryResult query( long took; // 2. Get buckets. - timer = MetricUtils.timer(UsageServiceUtil.class, "getBuckets").time(); UsageAggregationArray buckets = - getBuckets(opContext, timeseriesAspectService, filter, resource, duration); - took = timer.stop(); - log.info( - "Usage stats for resource {} returned {} buckets in {} ms", - resource, - buckets.size(), - TimeUnit.NANOSECONDS.toMillis(took)); + opContext.withSpan( + "getBuckets", + () -> getBuckets(opContext, timeseriesAspectService, filter, resource, duration), + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(UsageServiceUtil.class, "getBuckets")); + log.info("Usage stats for resource {} returned {} buckets", resource, buckets.size()); // 3. Get aggregations. - timer = MetricUtils.timer(UsageServiceUtil.class, "getAggregations").time(); UsageQueryResultAggregations aggregations = - getAggregations(opContext, timeseriesAspectService, filter); - took = timer.stop(); - log.info( - "Usage stats aggregation for resource {} took {} ms", - resource, - TimeUnit.NANOSECONDS.toMillis(took)); + opContext.withSpan( + "getAggregations", + () -> getAggregations(opContext, timeseriesAspectService, filter), + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(UsageServiceUtil.class, "getAggregations")); + log.info("Usage stats aggregation for resource {}", resource); // 4. Compute totalSqlQuery count from the buckets itself. // We want to avoid issuing an additional query with a sum aggregation. diff --git a/metadata-io/src/main/java/com/linkedin/metadata/trace/KafkaTraceReader.java b/metadata-io/src/main/java/com/linkedin/metadata/trace/KafkaTraceReader.java new file mode 100644 index 00000000000000..9ce8a1b91e66c7 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/trace/KafkaTraceReader.java @@ -0,0 +1,460 @@ +package com.linkedin.metadata.trace; + +import static io.datahubproject.metadata.context.TraceContext.TELEMETRY_TRACE_KEY; + +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.systemmetadata.TraceStorageStatus; +import com.linkedin.metadata.systemmetadata.TraceWriteStatus; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; +import java.time.Duration; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.experimental.SuperBuilder; +import lombok.extern.slf4j.Slf4j; +import org.apache.avro.generic.GenericRecord; +import org.apache.kafka.clients.admin.AdminClient; +import org.apache.kafka.clients.admin.ListConsumerGroupOffsetsResult; +import org.apache.kafka.clients.admin.TopicDescription; +import org.apache.kafka.clients.consumer.Consumer; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.OffsetAndMetadata; +import org.apache.kafka.clients.consumer.OffsetAndTimestamp; +import org.apache.kafka.clients.producer.internals.DefaultPartitioner; +import org.apache.kafka.common.Cluster; +import org.apache.kafka.common.Node; +import org.apache.kafka.common.PartitionInfo; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.protocol.types.SchemaException; + +@Slf4j +@SuperBuilder +public abstract class KafkaTraceReader { + private final AdminClient adminClient; + private final Supplier> consumerSupplier; + private final int pollDurationMs; + private final int pollMaxAttempts; + + @Nonnull private final ExecutorService executorService; + private final long timeoutSeconds; + + private final Cache topicPartitionCache = + Caffeine.newBuilder() + .maximumSize(1_000) // Maximum number of entries + .expireAfterWrite(Duration.ofHours(1)) // expire entries after 1 hour + .build(); + private final Cache offsetCache = + Caffeine.newBuilder() + .maximumSize(100) // unlikely to have more than 100 partitions + .expireAfterWrite(Duration.ofMinutes(5)) // Shorter expiry for offsets + .build(); + + public KafkaTraceReader( + AdminClient adminClient, + Supplier> consumerSupplier, + int pollDurationMillis, + int pollMaxAttempts, + ExecutorService executorService, + long timeoutSeconds) { + this.adminClient = adminClient; + this.consumerSupplier = consumerSupplier; + this.pollDurationMs = pollDurationMillis; + this.pollMaxAttempts = pollMaxAttempts; + this.executorService = executorService; + this.timeoutSeconds = timeoutSeconds; + } + + @Nonnull + protected abstract String getTopicName(); + + @Nullable + protected abstract String getConsumerGroupId(); + + public abstract Optional read(@Nullable GenericRecord genericRecord); + + protected abstract Optional, SystemMetadata>> + matchConsumerRecord( + ConsumerRecord consumerRecord, String traceId, String aspectName); + + /** + * Determines the write status of a trace by comparing consumer offset with message offset. + * + * @return PENDING if the message exists but hasn't been consumed yet, UNKNOWN if no consumer + * offset exists, ERROR in other cases + */ + public Map> tracePendingStatuses( + Map> urnAspectPairs, String traceId, Long traceTimestampMillis) { + return tracePendingStatuses(urnAspectPairs, traceId, traceTimestampMillis, false); + } + + public Map> tracePendingStatuses( + Map> urnAspectPairs, + String traceId, + Long traceTimestampMillis, + boolean skipCache) { + + List>>> futures = + urnAspectPairs.entrySet().stream() + .map( + entry -> + CompletableFuture.supplyAsync( + () -> { + try { + Map result = + tracePendingStatuses( + entry.getKey(), + entry.getValue(), + traceId, + traceTimestampMillis, + skipCache); + return Map.entry(entry.getKey(), result); + } catch (Exception e) { + log.error( + "Error processing trace status for URN: {}", entry.getKey(), e); + return Map.entry( + entry.getKey(), Collections.emptyMap()); + } + }, + executorService)) + .collect(Collectors.toList()); + + try { + List>> results = + CompletableFuture.allOf(futures.toArray(new CompletableFuture[futures.size()])) + .thenApply( + v -> futures.stream().map(CompletableFuture::join).collect(Collectors.toList())) + .get(timeoutSeconds, TimeUnit.SECONDS); + + return results.stream() + .collect( + Collectors.toMap( + Map.Entry::getKey, Map.Entry::getValue, (existing, replacement) -> existing)); + } catch (Exception e) { + log.error("Error processing parallel trace status requests", e); + throw new RuntimeException("Failed to process parallel trace status requests", e); + } + } + + /** + * Find messages in the kafka topic by urn, aspect names, and trace id using the timestamp to seek + * to the expected location. + * + * @return Map of aspect name to matching record pair, containing only the aspects that were found + */ + public Map, SystemMetadata>>> + findMessages( + Map> urnAspectPairs, String traceId, Long traceTimestampMillis) { + + List< + CompletableFuture< + Map.Entry< + Urn, Map, SystemMetadata>>>>> + futures = + urnAspectPairs.entrySet().stream() + .map( + entry -> + CompletableFuture.supplyAsync( + () -> { + try { + Map< + String, + Pair, SystemMetadata>> + result = + findMessages( + entry.getKey(), + entry.getValue(), + traceId, + traceTimestampMillis); + return Map.entry(entry.getKey(), result); + } catch (Exception e) { + log.error("Error processing trace for URN: {}", entry.getKey(), e); + return Map.entry( + entry.getKey(), + Collections + ., + SystemMetadata>> + emptyMap()); + } + }, + executorService)) + .collect(Collectors.toList()); + + try { + List, SystemMetadata>>>> + results = + CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])) + .thenApply( + v -> + futures.stream() + .map(CompletableFuture::join) + .collect(Collectors.toList())) + .get(timeoutSeconds, TimeUnit.SECONDS); + + return results.stream() + .collect( + Collectors.toMap( + Map.Entry::getKey, Map.Entry::getValue, (existing, replacement) -> existing)); + } catch (Exception e) { + log.error("Error processing parallel trace requests", e); + throw new RuntimeException("Failed to process parallel trace requests", e); + } + } + + private Map tracePendingStatuses( + Urn urn, + Collection aspectNames, + String traceId, + Long traceTimestampMillis, + boolean skipCache) { + try { + TopicPartition topicPartition = getTopicPartition(urn); + Optional offsetMetadata = getOffsetAndMetadata(topicPartition, skipCache); + if (offsetMetadata.isEmpty()) { + log.warn("No consumer offset to compare with."); + return aspectNames.stream() + .collect( + Collectors.toMap( + aspectName -> aspectName, + aspectName -> + TraceStorageStatus.ok( + TraceWriteStatus.UNKNOWN, "Missing consumer offsets."))); + } + + Map, SystemMetadata>> messages = + findMessages(urn, aspectNames, traceId, traceTimestampMillis); + + return aspectNames.stream() + .collect( + Collectors.toMap( + aspectName -> aspectName, + aspectName -> { + Pair, SystemMetadata> message = + messages.get(aspectName); + if (message != null + && offsetMetadata.get().offset() < message.getFirst().offset()) { + return TraceStorageStatus.ok( + TraceWriteStatus.PENDING, "Consumer has not processed offset."); + } + return TraceStorageStatus.fail( + TraceWriteStatus.ERROR, "Consumer has processed past the offset."); + })); + } catch (ExecutionException | InterruptedException e) { + throw new RuntimeException(e); + } + } + + /** + * Get the offset metadata for a specific TopicPartition from the consumer group. This method is + * now the primary interface for offset lookup and uses caching. + */ + private Optional getOffsetAndMetadata( + TopicPartition topicPartition, boolean skipCache) { + if (skipCache) { + offsetCache.invalidate(topicPartition); + } + + return Optional.ofNullable( + offsetCache.get( + topicPartition, + tp -> { + final String consumerGroupId = Objects.requireNonNull(getConsumerGroupId()); + + try { + ListConsumerGroupOffsetsResult offsetsResult = + adminClient.listConsumerGroupOffsets(consumerGroupId); + + if (offsetsResult == null) { + log.error("Failed to get consumer group offsets for group: {}", consumerGroupId); + return null; + } + + Map offsets = + offsetsResult.partitionsToOffsetAndMetadata().get(); + + if (offsets == null) { + log.error("Null offsets returned for consumer group: {}", consumerGroupId); + return null; + } + + OffsetAndMetadata offsetAndMetadata = offsets.get(tp); + if (offsetAndMetadata == null) { + log.warn( + "No committed offset found for Topic: {}, Partition: {}, Group: {}", + tp.topic(), + tp.partition(), + consumerGroupId); + return null; + } + + log.debug( + "Found offset metadata {} for Topic: {}, Partition: {}, Group: {}", + offsetAndMetadata, + tp.topic(), + tp.partition(), + consumerGroupId); + + return offsetAndMetadata; + } catch (SchemaException e) { + log.error("Schema error when fetching consumer group offsets", e); + return null; + } catch (Exception e) { + log.error("Error fetching consumer group offsets", e); + return null; + } + })); + } + + private Map, SystemMetadata>> findMessages( + Urn urn, Collection aspectNames, String traceId, Long traceTimestampMillis) + throws ExecutionException, InterruptedException { + + TopicPartition topicPartition = getTopicPartition(urn); + + try (Consumer consumer = consumerSupplier.get()) { + // Assign the partition we want to read from + consumer.assign(Collections.singleton(topicPartition)); + + // Get offset for timestamp + OffsetAndTimestamp offsetAndTimestamp = + getOffsetByTime(consumer, topicPartition, traceTimestampMillis); + + if (offsetAndTimestamp == null) { + log.debug( + "No offset found for timestamp {} in partition {}", + traceTimestampMillis, + topicPartition); + return Collections.emptyMap(); + } + + // Seek to the offset for the timestamp + consumer.seek(topicPartition, offsetAndTimestamp.offset()); + log.debug( + "Seeking to timestamp-based offset {} for partition {}", + offsetAndTimestamp.offset(), + topicPartition); + + // Poll with a maximum number of attempts + int attempts = 0; + long lastProcessedOffset = -1; + Map, SystemMetadata>> results = + new HashMap<>(); + + while (attempts < pollMaxAttempts && results.size() < aspectNames.size()) { + var records = consumer.poll(java.time.Duration.ofMillis(pollDurationMs)); + attempts++; + + if (records.isEmpty()) { + break; + } + + // Check if we're making progress + long currentOffset = consumer.position(topicPartition); + if (currentOffset == lastProcessedOffset) { + break; + } + lastProcessedOffset = currentOffset; + + // Process records for each aspect name we haven't found yet + for (String aspectName : aspectNames) { + if (!results.containsKey(aspectName)) { + var matchingRecord = + records.records(topicPartition).stream() + .filter(record -> record.key().equals(urn.toString())) + .flatMap(record -> matchConsumerRecord(record, traceId, aspectName).stream()) + .findFirst(); + + matchingRecord.ifPresent(pair -> results.put(aspectName, pair)); + } + } + } + + return results; + } + } + + protected static boolean traceIdMatch(@Nullable SystemMetadata systemMetadata, String traceId) { + return systemMetadata != null + && systemMetadata.getProperties() != null + && traceId.equals(systemMetadata.getProperties().get(TELEMETRY_TRACE_KEY)); + } + + private TopicPartition getTopicPartition(Urn urn) { + return topicPartitionCache.get( + urn.toString(), + key -> { + try { + DefaultPartitioner partitioner = new DefaultPartitioner(); + + TopicDescription topicDescription = + adminClient + .describeTopics(Collections.singletonList(getTopicName())) + .all() + .get() + .get(getTopicName()); + + if (topicDescription == null) { + throw new IllegalStateException("Topic " + getTopicName() + " not found"); + } + + List partitions = + topicDescription.partitions().stream() + .map( + p -> + new PartitionInfo( + getTopicName(), + p.partition(), + p.leader(), + p.replicas().toArray(new Node[0]), + p.isr().toArray(new Node[0]), + p.replicas().toArray(new Node[0]))) + .collect(Collectors.toList()); + + List nodes = + partitions.stream() + .map(PartitionInfo::leader) + .filter(Objects::nonNull) + .collect(Collectors.toList()); + + Cluster cluster = + new Cluster( + null, nodes, partitions, Collections.emptySet(), Collections.emptySet()); + + int partition = + partitioner.partition(getTopicName(), key, key.getBytes(), null, null, cluster); + + return new TopicPartition(getTopicName(), partition); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException("Failed to get topic partition for " + key, e); + } + }); + } + + private static OffsetAndTimestamp getOffsetByTime( + Consumer consumer, + TopicPartition topicPartition, + Long traceTimestampMillis) { + // If we have a timestamp, first seek to that approximate location + Map timestampsToSearch = + Collections.singletonMap(topicPartition, traceTimestampMillis); + + return consumer.offsetsForTimes(timestampsToSearch).get(topicPartition); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/trace/MCLTraceReader.java b/metadata-io/src/main/java/com/linkedin/metadata/trace/MCLTraceReader.java new file mode 100644 index 00000000000000..98da8f87d3b5ad --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/trace/MCLTraceReader.java @@ -0,0 +1,43 @@ +package com.linkedin.metadata.trace; + +import com.linkedin.metadata.EventUtils; +import com.linkedin.mxe.MetadataChangeLog; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; +import java.io.IOException; +import java.util.Optional; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Getter; +import lombok.experimental.SuperBuilder; +import org.apache.avro.generic.GenericRecord; +import org.apache.kafka.clients.consumer.ConsumerRecord; + +@Getter +@SuperBuilder +public class MCLTraceReader extends KafkaTraceReader { + @Nonnull private final String topicName; + @Nullable private final String consumerGroupId; + + @Override + public Optional read(@Nullable GenericRecord genericRecord) { + try { + return Optional.ofNullable( + genericRecord == null ? null : EventUtils.avroToPegasusMCL(genericRecord)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + protected Optional, SystemMetadata>> + matchConsumerRecord( + ConsumerRecord consumerRecord, String traceId, String aspectName) { + return read(consumerRecord.value()) + .filter( + event -> + traceIdMatch(event.getSystemMetadata(), traceId) + && aspectName.equals(event.getAspectName())) + .map(event -> Pair.of(consumerRecord, event.getSystemMetadata())); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/trace/MCPFailedTraceReader.java b/metadata-io/src/main/java/com/linkedin/metadata/trace/MCPFailedTraceReader.java new file mode 100644 index 00000000000000..bb3520d6630d84 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/trace/MCPFailedTraceReader.java @@ -0,0 +1,45 @@ +package com.linkedin.metadata.trace; + +import com.linkedin.metadata.EventUtils; +import com.linkedin.mxe.FailedMetadataChangeProposal; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; +import java.io.IOException; +import java.util.Optional; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Getter; +import lombok.experimental.SuperBuilder; +import org.apache.avro.generic.GenericRecord; +import org.apache.kafka.clients.consumer.ConsumerRecord; + +@Getter +@SuperBuilder +public class MCPFailedTraceReader extends KafkaTraceReader { + @Nonnull private final String topicName; + @Nullable private final String consumerGroupId; + + @Override + public Optional read(@Nullable GenericRecord genericRecord) { + try { + return Optional.ofNullable( + genericRecord == null ? null : EventUtils.avroToPegasusFailedMCP(genericRecord)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + protected Optional, SystemMetadata>> + matchConsumerRecord( + ConsumerRecord consumerRecord, String traceId, String aspectName) { + return read(consumerRecord.value()) + .filter( + event -> + traceIdMatch(event.getMetadataChangeProposal().getSystemMetadata(), traceId) + && aspectName.equals(event.getMetadataChangeProposal().getAspectName())) + .map( + event -> + Pair.of(consumerRecord, event.getMetadataChangeProposal().getSystemMetadata())); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/trace/MCPTraceReader.java b/metadata-io/src/main/java/com/linkedin/metadata/trace/MCPTraceReader.java new file mode 100644 index 00000000000000..99781e80416de9 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/trace/MCPTraceReader.java @@ -0,0 +1,43 @@ +package com.linkedin.metadata.trace; + +import com.linkedin.metadata.EventUtils; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; +import java.io.IOException; +import java.util.Optional; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Getter; +import lombok.experimental.SuperBuilder; +import org.apache.avro.generic.GenericRecord; +import org.apache.kafka.clients.consumer.ConsumerRecord; + +@Getter +@SuperBuilder +public class MCPTraceReader extends KafkaTraceReader { + @Nonnull private final String topicName; + @Nullable private final String consumerGroupId; + + @Override + public Optional read(@Nullable GenericRecord genericRecord) { + try { + return Optional.ofNullable( + genericRecord == null ? null : EventUtils.avroToPegasusMCP(genericRecord)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + protected Optional, SystemMetadata>> + matchConsumerRecord( + ConsumerRecord consumerRecord, String traceId, String aspectName) { + return read(consumerRecord.value()) + .filter( + event -> + traceIdMatch(event.getSystemMetadata(), traceId) + && aspectName.equals(event.getAspectName())) + .map(event -> Pair.of(consumerRecord, event.getSystemMetadata())); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/trace/TraceServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/trace/TraceServiceImpl.java new file mode 100644 index 00000000000000..51e30bd6f6658d --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/trace/TraceServiceImpl.java @@ -0,0 +1,484 @@ +package com.linkedin.metadata.trace; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.run.AspectRowSummary; +import com.linkedin.metadata.systemmetadata.SystemMetadataService; +import com.linkedin.metadata.systemmetadata.TraceService; +import com.linkedin.metadata.systemmetadata.TraceStatus; +import com.linkedin.metadata.systemmetadata.TraceStorageStatus; +import com.linkedin.metadata.systemmetadata.TraceWriteStatus; +import com.linkedin.mxe.FailedMetadataChangeProposal; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.TraceContext; +import io.datahubproject.metadata.context.TraceIdGenerator; +import io.datahubproject.metadata.exception.TraceException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; +import lombok.extern.slf4j.Slf4j; +import org.apache.avro.generic.GenericRecord; +import org.apache.kafka.clients.consumer.ConsumerRecord; + +@Builder +@Slf4j +public class TraceServiceImpl implements TraceService { + private final EntityRegistry entityRegistry; + private final SystemMetadataService systemMetadataService; + private final EntityService entityService; + private final MCPTraceReader mcpTraceReader; + private final MCPFailedTraceReader mcpFailedTraceReader; + private final MCLTraceReader mclVersionedTraceReader; + private final MCLTraceReader mclTimeseriesTraceReader; + + public TraceServiceImpl( + EntityRegistry entityRegistry, + SystemMetadataService systemMetadataService, + EntityService entityService, + MCPTraceReader mcpTraceReader, + MCPFailedTraceReader mcpFailedTraceReader, + MCLTraceReader mclVersionedTraceReader, + MCLTraceReader mclTimeseriesTraceReader) { + this.entityRegistry = entityRegistry; + this.systemMetadataService = systemMetadataService; + this.entityService = entityService; + this.mcpTraceReader = mcpTraceReader; + this.mcpFailedTraceReader = mcpFailedTraceReader; + this.mclVersionedTraceReader = mclVersionedTraceReader; + this.mclTimeseriesTraceReader = mclTimeseriesTraceReader; + } + + @Nonnull + @Override + public Map> trace( + @Nonnull OperationContext opContext, + @Nonnull String traceId, + @Nonnull Map> aspectNames, + boolean onlyIncludeErrors, + boolean detailed, + boolean skipCache) { + + long traceTimestampMillis = TraceIdGenerator.getTimestampMillis(traceId); + + // Get primary status for all URNs + Map> primaryStatuses = + tracePrimaryInParallel( + opContext, traceId, traceTimestampMillis, aspectNames, detailed, skipCache); + + // Get search status for all URNs using primary results + Map> searchStatuses = + traceSearchInParallel( + opContext, traceId, traceTimestampMillis, aspectNames, primaryStatuses, skipCache); + + // Merge and filter results for each URN + Map> mergedResults = + aspectNames.keySet().stream() + .collect( + Collectors.toMap( + urn -> urn, + urn -> + mergeStatus( + primaryStatuses.getOrDefault(urn, new LinkedHashMap<>()), + searchStatuses.getOrDefault(urn, new LinkedHashMap<>()), + onlyIncludeErrors))); + + // Remove URNs with empty aspect maps (when filtering for errors) + return mergedResults.entrySet().stream() + .filter(entry -> !entry.getValue().isEmpty()) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } + + private Map> tracePrimaryInParallel( + @Nonnull OperationContext opContext, + @Nonnull String traceId, + long traceTimestampMillis, + @Nonnull Map> aspectNames, + boolean detailed, + boolean skipCache) { + + // Group aspects by whether they are timeseries + Map> timeseriesResults = new HashMap<>(); + Map> nonTimeseriesAspects = new HashMap<>(); + + for (Map.Entry> entry : aspectNames.entrySet()) { + Urn urn = entry.getKey(); + EntitySpec entitySpec = entityRegistry.getEntitySpec(urn.getEntityType()); + + Map timeseriesStatuses = new LinkedHashMap<>(); + Set remainingAspects = new HashSet<>(); + + for (String aspectName : entry.getValue()) { + if (entitySpec.getAspectSpec(aspectName).isTimeseries()) { + timeseriesStatuses.put(aspectName, TraceStorageStatus.NO_OP); + } else { + remainingAspects.add(aspectName); + } + } + + if (!timeseriesStatuses.isEmpty()) { + timeseriesResults.put(urn, timeseriesStatuses); + } + if (!remainingAspects.isEmpty()) { + nonTimeseriesAspects.put(urn, remainingAspects); + } + } + + // Process non-timeseries aspects using SQL + Map> sqlResults = new HashMap<>(); + if (!nonTimeseriesAspects.isEmpty()) { + try { + Map responses = + entityService.getEntitiesV2( + opContext, + nonTimeseriesAspects.keySet().iterator().next().getEntityType(), + nonTimeseriesAspects.keySet(), + nonTimeseriesAspects.values().stream() + .flatMap(Collection::stream) + .collect(Collectors.toSet()), + false); + + for (Map.Entry entry : responses.entrySet()) { + Map aspectStatuses = new LinkedHashMap<>(); + for (Map.Entry aspectEntry : + entry.getValue().getAspects().entrySet()) { + long createdOnMillis = aspectEntry.getValue().getCreated().getTime(); + SystemMetadata systemMetadata = aspectEntry.getValue().getSystemMetadata(); + String systemTraceId = extractTraceId(systemMetadata); + Optional aspectLastUpdated = extractLastUpdated(systemMetadata); + String aspectName = aspectEntry.getKey(); + + if (traceId.equals(systemTraceId)) { + aspectStatuses.put(aspectName, TraceStorageStatus.ok(TraceWriteStatus.ACTIVE_STATE)); + } else if (traceTimestampMillis <= extractTimestamp(systemTraceId, createdOnMillis)) { + aspectStatuses.put( + aspectName, TraceStorageStatus.ok(TraceWriteStatus.HISTORIC_STATE)); + } else if (createdOnMillis < traceTimestampMillis + && traceTimestampMillis < aspectLastUpdated.orElse(traceTimestampMillis)) { + aspectStatuses.put(aspectName, TraceStorageStatus.ok(TraceWriteStatus.NO_OP)); + } + } + sqlResults.put(entry.getKey(), aspectStatuses); + } + } catch (Exception e) { + log.error("Error getting entities", e); + } + } + + // Account for sql results + Map> remainingAspects = new HashMap<>(); + for (Map.Entry> entry : nonTimeseriesAspects.entrySet()) { + Set foundAspects = + sqlResults.getOrDefault(entry.getKey(), Collections.emptyMap()).keySet(); + Set remaining = new HashSet<>(entry.getValue()); + remaining.removeAll(foundAspects); + if (!remaining.isEmpty()) { + remainingAspects.put(entry.getKey(), new ArrayList<>(remaining)); + } + } + + // Get remaining aspects from Kafka + Map> kafkaResults = + mcpTraceReader.tracePendingStatuses( + remainingAspects, traceId, traceTimestampMillis, skipCache); + + // Merge all results + Map> finalResults = new HashMap<>(); + for (Urn urn : aspectNames.keySet()) { + LinkedHashMap merged = new LinkedHashMap<>(); + merged.putAll(timeseriesResults.getOrDefault(urn, Collections.emptyMap())); + merged.putAll(sqlResults.getOrDefault(urn, Collections.emptyMap())); + merged.putAll(kafkaResults.getOrDefault(urn, Collections.emptyMap())); + finalResults.put(urn, merged); + } + + if (detailed) { + handleFailedMCP(opContext, finalResults, traceId, traceTimestampMillis); + } + + return finalResults; + } + + private Optional extractLastUpdated(@Nullable SystemMetadata systemMetadata) { + return Optional.ofNullable(systemMetadata) + .flatMap(sysMeta -> Optional.ofNullable(sysMeta.getLastObserved())); + } + + private void handleFailedMCP( + @Nonnull OperationContext opContext, + Map> finalResults, + @Nonnull String traceId, + long traceTimestampMillis) { + // Create a map of URNs and aspects that need to be checked in the failed topic + Map> aspectsToCheck = new HashMap<>(); + + // Filter for aspects with ERROR, NO_OP, or UNKNOWN status that might be in the failed topic + for (Map.Entry> entry : + finalResults.entrySet()) { + Urn urn = entry.getKey(); + EntitySpec entitySpec = entityRegistry.getEntitySpec(urn.getEntityType()); + + /* + * ERROR - to fetch exception + * NO_OP - to validate there wasn't a failure during an expected NO_OP + * UNKNOWN - ambiguous case resolution + */ + List aspectsToVerify = + entry.getValue().entrySet().stream() + .filter(aspect -> !entitySpec.getAspectSpec(aspect.getKey()).isTimeseries()) + .filter( + aspect -> + Set.of( + TraceWriteStatus.ERROR, + TraceWriteStatus.NO_OP, + TraceWriteStatus.UNKNOWN) + .contains(aspect.getValue().getWriteStatus())) + .map(Map.Entry::getKey) + .collect(Collectors.toList()); + + if (!aspectsToVerify.isEmpty()) { + aspectsToCheck.put(entry.getKey(), aspectsToVerify); + } + } + + // If there are no aspects to check, return early + if (aspectsToCheck.isEmpty()) { + return; + } + + try { + // Find messages in the failed topic for these URNs and aspects + Map, SystemMetadata>>> + failedMessages = + mcpFailedTraceReader.findMessages(aspectsToCheck, traceId, traceTimestampMillis); + + // Update the status for any aspects found in the failed topic + for (Map.Entry, SystemMetadata>>> + entry : failedMessages.entrySet()) { + Urn urn = entry.getKey(); + LinkedHashMap urnStatuses = finalResults.get(urn); + + if (urnStatuses != null) { + for (Map.Entry, SystemMetadata>> + aspectEntry : entry.getValue().entrySet()) { + String aspectName = aspectEntry.getKey(); + + // If we found the message in the failed topic, update its status (possible transition + // from UNKNOWN) + TraceStorageStatus.TraceStorageStatusBuilder builder = + TraceStorageStatus.builder().writeStatus(TraceWriteStatus.ERROR); + + // Populate the exception if possible + mcpFailedTraceReader + .read(aspectEntry.getValue().getFirst().value()) + .ifPresent( + failedMCP -> + builder.writeExceptions(extractTraceExceptions(opContext, failedMCP))); + + urnStatuses.put(aspectName, builder.build()); + } + } + } + } catch (Exception e) { + log.error("Error processing failed MCP messages", e); + } + } + + private Map> traceSearchInParallel( + @Nonnull OperationContext opContext, + @Nonnull String traceId, + long traceTimestampMillis, + @Nonnull Map> aspectNames, + @Nonnull Map> primaryStatuses, + boolean skipCache) { + + Map> aspectsToResolve = new HashMap<>(); + Map> finalResults = new HashMap<>(); + + // 1. Consider status of primary storage write + for (Map.Entry> entry : aspectNames.entrySet()) { + Urn urn = entry.getKey(); + EntitySpec entitySpec = entityRegistry.getEntitySpec(urn.getEntityType()); + LinkedHashMap finalResponse = new LinkedHashMap<>(); + List remaining = new ArrayList<>(); + + Map primaryStatus = + primaryStatuses.getOrDefault(urn, new LinkedHashMap<>()); + + for (String aspectName : entry.getValue()) { + TraceWriteStatus status = primaryStatus.get(aspectName).getWriteStatus(); + if (status == TraceWriteStatus.PENDING) { + finalResponse.put( + aspectName, + TraceStorageStatus.ok(TraceWriteStatus.PENDING, "Pending primary storage write.")); + } else if (status == TraceWriteStatus.NO_OP) { + if (entitySpec.getAspectSpec(aspectName).isTimeseries()) { + finalResponse.put( + aspectName, TraceStorageStatus.ok(TraceWriteStatus.TRACE_NOT_IMPLEMENTED)); + } else { + finalResponse.put(aspectName, TraceStorageStatus.NO_OP); + } + } else if (status == TraceWriteStatus.ERROR) { + finalResponse.put( + aspectName, + TraceStorageStatus.fail(TraceWriteStatus.ERROR, "Primary storage write failed.")); + } else if (status == TraceWriteStatus.TRACE_NOT_IMPLEMENTED + || status == TraceWriteStatus.UNKNOWN) { + finalResponse.put( + aspectName, + TraceStorageStatus.ok( + TraceWriteStatus.UNKNOWN, "Primary storage write indeterminate.")); + } else { + remaining.add(aspectName); + } + } + + if (!remaining.isEmpty()) { + aspectsToResolve.put(urn, remaining); + } + if (!finalResponse.isEmpty()) { + finalResults.put(urn, finalResponse); + } + } + + // 2. Check implied search write using system metadata + if (!aspectsToResolve.isEmpty()) { + // Get system metadata & group by URN + Map> summariesByUrn = + aspectsToResolve.entrySet().stream() + .flatMap( + entry -> + systemMetadataService + .findAspectsByUrn(entry.getKey(), entry.getValue(), true) + .stream()) + .collect(Collectors.groupingBy(summary -> UrnUtils.getUrn(summary.getUrn()))); + + // Process each URN's summaries + for (Map.Entry> entry : aspectsToResolve.entrySet()) { + Urn urn = entry.getKey(); + List remaining = new ArrayList<>(entry.getValue()); + LinkedHashMap response = + finalResults.computeIfAbsent(urn, k -> new LinkedHashMap<>()); + + for (AspectRowSummary summary : summariesByUrn.getOrDefault(urn, Collections.emptyList())) { + if (traceId.equals(summary.getTelemetryTraceId())) { + response.put( + summary.getAspectName(), TraceStorageStatus.ok(TraceWriteStatus.ACTIVE_STATE)); + remaining.remove(summary.getAspectName()); + } else if (summary.hasTimestamp() + && summary.getTimestamp() > 0 + && traceTimestampMillis <= summary.getTimestamp()) { + response.put( + summary.getAspectName(), TraceStorageStatus.ok(TraceWriteStatus.HISTORIC_STATE)); + remaining.remove(summary.getAspectName()); + } + } + + // update remaining + aspectsToResolve.put(urn, remaining); + } + + // Get remaining from Kafka + Map> kafkaResults = + mcpTraceReader.tracePendingStatuses( + aspectsToResolve, traceId, traceTimestampMillis, skipCache); + + // Merge Kafka results + kafkaResults.forEach( + (urn, statuses) -> + finalResults.computeIfAbsent(urn, k -> new LinkedHashMap<>()).putAll(statuses)); + } + + return finalResults; + } + + private static Map mergeStatus( + LinkedHashMap primaryAspectStatus, + LinkedHashMap searchAspectStatus, + boolean onlyIncludeErrors) { + + return primaryAspectStatus.entrySet().stream() + .map( + storageEntry -> { + String aspectName = storageEntry.getKey(); + TraceStorageStatus primaryStatus = storageEntry.getValue(); + TraceStorageStatus searchStatus = searchAspectStatus.get(aspectName); + TraceStatus traceStatus = + TraceStatus.builder() + .primaryStorage(primaryStatus) + .searchStorage(searchStatus) + .success(isSuccess(primaryStatus, searchStatus)) + .build(); + + // Only include this aspect if we're not filtering for errors + // or if either storage has an ERROR status + if (!onlyIncludeErrors + || TraceWriteStatus.ERROR.equals(primaryStatus.getWriteStatus()) + || TraceWriteStatus.ERROR.equals(searchStatus.getWriteStatus())) { + return Map.entry(aspectName, traceStatus); + } + return null; + }) + .filter(Objects::nonNull) + .collect( + Collectors.toMap( + Map.Entry::getKey, + Map.Entry::getValue, + (existing, replacement) -> existing, + LinkedHashMap::new)); + } + + private static boolean isSuccess( + TraceStorageStatus primaryStatus, TraceStorageStatus searchStatus) { + return !TraceWriteStatus.ERROR.equals(primaryStatus.getWriteStatus()) + && !TraceWriteStatus.ERROR.equals(searchStatus.getWriteStatus()); + } + + @Nullable + private static String extractTraceId(@Nullable SystemMetadata systemMetadata) { + if (systemMetadata != null && systemMetadata.getProperties() != null) { + return systemMetadata.getProperties().get(TraceContext.TELEMETRY_TRACE_KEY); + } + return null; + } + + private static long extractTimestamp(@Nullable String traceId, long createOnMillis) { + return Optional.ofNullable(traceId) + .map(TraceIdGenerator::getTimestampMillis) + .orElse(createOnMillis); + } + + private List extractTraceExceptions( + @Nonnull OperationContext opContext, FailedMetadataChangeProposal fmcp) { + if (!fmcp.getError().isEmpty()) { + try { + if (fmcp.getError().startsWith("[") && fmcp.getError().endsWith("]")) { + return opContext.getObjectMapper().readValue(fmcp.getError(), new TypeReference<>() {}); + } + } catch (Exception e) { + log.warn("Failed to deserialize: {}", fmcp.getError()); + } + return List.of(new TraceException(fmcp.getError())); + } + return List.of(new TraceException("Unable to extract trace exception")); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java index 6eda210baf7d4a..dabdf493fa1f99 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java @@ -46,6 +46,7 @@ import com.linkedin.metadata.aspect.CorpUserAspect; import com.linkedin.metadata.aspect.CorpUserAspectArray; import com.linkedin.metadata.aspect.VersionedAspect; +import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.patch.GenericJsonPatch; import com.linkedin.metadata.aspect.patch.PatchOperationType; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; @@ -67,6 +68,7 @@ import com.linkedin.metadata.utils.AuditStampUtils; import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.mxe.GenericAspect; import com.linkedin.mxe.MetadataChangeLog; import com.linkedin.mxe.MetadataChangeProposal; @@ -83,6 +85,8 @@ import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.StatusCode; import jakarta.annotation.Nonnull; import java.sql.Timestamp; import java.time.Instant; @@ -94,10 +98,12 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.concurrent.Future; import java.util.stream.Collectors; import java.util.stream.Stream; import org.junit.Assert; import org.mockito.ArgumentCaptor; +import org.mockito.MockedStatic; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -190,7 +196,8 @@ public void testIngestGetEntity() throws Exception { ArgumentCaptor mclCaptor = ArgumentCaptor.forClass(MetadataChangeLog.class); verify(_mockProducer, times(2)) - .produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), mclCaptor.capture()); + .produceMetadataChangeLog( + any(OperationContext.class), Mockito.eq(entityUrn), Mockito.any(), mclCaptor.capture()); MetadataChangeLog mcl = mclCaptor.getValue(); assertEquals(mcl.getEntityType(), "corpuser"); assertNull(mcl.getPreviousAspectValue()); @@ -236,7 +243,8 @@ public void testAddKey() throws Exception { ArgumentCaptor mclCaptor = ArgumentCaptor.forClass(MetadataChangeLog.class); verify(_mockProducer, times(2)) - .produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), mclCaptor.capture()); + .produceMetadataChangeLog( + any(OperationContext.class), Mockito.eq(entityUrn), Mockito.any(), mclCaptor.capture()); MetadataChangeLog mcl = mclCaptor.getValue(); assertEquals(mcl.getEntityType(), "corpuser"); assertNull(mcl.getPreviousAspectValue()); @@ -324,7 +332,11 @@ public void testIngestGetEntities() throws Exception { ArgumentCaptor mclCaptor = ArgumentCaptor.forClass(MetadataChangeLog.class); verify(_mockProducer, times(2)) - .produceMetadataChangeLog(Mockito.eq(entityUrn1), Mockito.any(), mclCaptor.capture()); + .produceMetadataChangeLog( + any(OperationContext.class), + Mockito.eq(entityUrn1), + Mockito.any(), + mclCaptor.capture()); MetadataChangeLog mcl = mclCaptor.getValue(); assertEquals(mcl.getEntityType(), "corpuser"); assertNull(mcl.getPreviousAspectValue()); @@ -332,7 +344,11 @@ public void testIngestGetEntities() throws Exception { assertEquals(mcl.getChangeType(), ChangeType.UPSERT); verify(_mockProducer, times(2)) - .produceMetadataChangeLog(Mockito.eq(entityUrn2), Mockito.any(), mclCaptor.capture()); + .produceMetadataChangeLog( + any(OperationContext.class), + Mockito.eq(entityUrn2), + Mockito.any(), + mclCaptor.capture()); mcl = mclCaptor.getValue(); assertEquals(mcl.getEntityType(), "corpuser"); assertNull(mcl.getPreviousAspectValue()); @@ -405,10 +421,12 @@ public void testIngestGetEntitiesV2() throws Exception { DataTemplateUtil.areEqual(expectedKey2, new CorpUserKey(envelopedKey2.getValue().data()))); verify(_mockProducer, times(2)) - .produceMetadataChangeLog(Mockito.eq(entityUrn1), Mockito.any(), Mockito.any()); + .produceMetadataChangeLog( + any(OperationContext.class), Mockito.eq(entityUrn1), Mockito.any(), Mockito.any()); verify(_mockProducer, times(2)) - .produceMetadataChangeLog(Mockito.eq(entityUrn2), Mockito.any(), Mockito.any()); + .produceMetadataChangeLog( + any(OperationContext.class), Mockito.eq(entityUrn2), Mockito.any(), Mockito.any()); verifyNoMoreInteractions(_mockProducer); } @@ -476,10 +494,12 @@ public void testIngestGetEntitiesVersionedV2() throws Exception { DataTemplateUtil.areEqual(expectedKey2, new CorpUserKey(envelopedKey2.getValue().data()))); verify(_mockProducer, times(2)) - .produceMetadataChangeLog(Mockito.eq(entityUrn1), Mockito.any(), Mockito.any()); + .produceMetadataChangeLog( + any(OperationContext.class), Mockito.eq(entityUrn1), Mockito.any(), Mockito.any()); verify(_mockProducer, times(2)) - .produceMetadataChangeLog(Mockito.eq(entityUrn2), Mockito.any(), Mockito.any()); + .produceMetadataChangeLog( + any(OperationContext.class), Mockito.eq(entityUrn2), Mockito.any(), Mockito.any()); verifyNoMoreInteractions(_mockProducer); } @@ -510,7 +530,8 @@ public void testIngestAspectsGetLatestAspects() throws Exception { assertTrue(DataTemplateUtil.areEqual(writeAspect2, latestAspects.get(aspectName2))); verify(_mockProducer, times(3)) - .produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), Mockito.any()); + .produceMetadataChangeLog( + any(OperationContext.class), Mockito.eq(entityUrn), Mockito.any(), Mockito.any()); verifyNoMoreInteractions(_mockProducer); } @@ -566,7 +587,10 @@ public void testReingestAspectsGetLatestAspects() throws Exception { verify(_mockProducer, times(1)) .produceMetadataChangeLog( - Mockito.eq(entityUrn), Mockito.any(), Mockito.eq(initialChangeLog)); + any(OperationContext.class), + Mockito.eq(entityUrn), + Mockito.any(), + Mockito.eq(initialChangeLog)); // Mockito detects the previous invocation and throws an error in verifying the second call // unless invocations are cleared @@ -576,7 +600,8 @@ public void testReingestAspectsGetLatestAspects() throws Exception { opContext, entityUrn, pairToIngest, TEST_AUDIT_STAMP, metadata1); verify(_mockProducer, times(0)) - .produceMetadataChangeLog(Mockito.any(), Mockito.any(), Mockito.any()); + .produceMetadataChangeLog( + any(OperationContext.class), Mockito.any(), Mockito.any(), Mockito.any()); verifyNoMoreInteractions(_mockProducer); } @@ -645,7 +670,10 @@ public void testReingestLineageAspect() throws Exception { verify(_mockProducer, times(1)) .produceMetadataChangeLog( - Mockito.eq(entityUrn), Mockito.any(), Mockito.eq(initialChangeLog)); + any(OperationContext.class), + Mockito.eq(entityUrn), + Mockito.any(), + Mockito.eq(initialChangeLog)); // Mockito detects the previous invocation and throws an error in verifying the second call // unless invocations are cleared @@ -656,7 +684,10 @@ public void testReingestLineageAspect() throws Exception { verify(_mockProducer, times(1)) .produceMetadataChangeLog( - Mockito.eq(entityUrn), Mockito.any(), Mockito.eq(restateChangeLog)); + any(OperationContext.class), + Mockito.eq(entityUrn), + Mockito.any(), + Mockito.eq(restateChangeLog)); verifyNoMoreInteractions(_mockProducer); } @@ -718,7 +749,10 @@ public void testReingestLineageProposal() throws Exception { verify(_mockProducer, times(1)) .produceMetadataChangeLog( - Mockito.eq(entityUrn), Mockito.any(), Mockito.eq(initialChangeLog)); + any(OperationContext.class), + Mockito.eq(entityUrn), + Mockito.any(), + Mockito.eq(initialChangeLog)); // Mockito detects the previous invocation and throws an error in verifying the second call // unless invocations are cleared @@ -728,7 +762,10 @@ public void testReingestLineageProposal() throws Exception { verify(_mockProducer, times(1)) .produceMetadataChangeLog( - Mockito.eq(entityUrn), Mockito.any(), Mockito.eq(restateChangeLog)); + any(OperationContext.class), + Mockito.eq(entityUrn), + Mockito.any(), + Mockito.eq(restateChangeLog)); verifyNoMoreInteractions(_mockProducer); } @@ -773,9 +810,15 @@ public void testAsyncProposalVersioned() throws Exception { gmce.setAspect(genericAspect); _entityServiceImpl.ingestProposal(opContext, gmce, TEST_AUDIT_STAMP, true); verify(_mockProducer, times(0)) - .produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), Mockito.any()); + .produceMetadataChangeLog( + any(OperationContext.class), Mockito.eq(entityUrn), Mockito.any(), Mockito.any()); + + ChangeItemImpl item = + ChangeItemImpl.builder().build(gmce, TEST_AUDIT_STAMP, opContext.getAspectRetriever()); + verify(_mockProducer, times(1)) - .produceMetadataChangeProposal(Mockito.eq(entityUrn), Mockito.eq(gmce)); + .produceMetadataChangeProposal( + any(OperationContext.class), Mockito.eq(entityUrn), Mockito.eq(item)); } @Test @@ -798,7 +841,8 @@ public void testAsyncProposalTimeseries() throws Exception { gmce.setAspect(genericAspect); _entityServiceImpl.ingestProposal(opContext, gmce, TEST_AUDIT_STAMP, true); verify(_mockProducer, times(1)) - .produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), Mockito.any()); + .produceMetadataChangeLog( + any(OperationContext.class), Mockito.eq(entityUrn), Mockito.any(), Mockito.any()); verify(_mockProducer, times(0)) .produceMetadataChangeProposal(Mockito.eq(entityUrn), Mockito.eq(gmce)); } @@ -823,7 +867,10 @@ public void testUpdateGetAspect() throws AssertionError { assertTrue(DataTemplateUtil.areEqual(writeAspect, readAspect1)); verify(_mockProducer, times(1)) .produceMetadataChangeLog( - Mockito.eq(entityUrn), Mockito.eq(corpUserInfoSpec), Mockito.any()); + any(OperationContext.class), + Mockito.eq(entityUrn), + Mockito.eq(corpUserInfoSpec), + Mockito.any()); // Ingest CorpUserInfo Aspect #2 writeAspect.setEmail("newemail@test.com"); @@ -836,10 +883,14 @@ public void testUpdateGetAspect() throws AssertionError { assertTrue(DataTemplateUtil.areEqual(writeAspect, readAspect2)); verify(_mockProducer, times(2)) .produceMetadataChangeLog( - Mockito.eq(entityUrn), Mockito.eq(corpUserInfoSpec), Mockito.any()); + any(OperationContext.class), + Mockito.eq(entityUrn), + Mockito.eq(corpUserInfoSpec), + Mockito.any()); verify(_mockProducer, times(1)) .produceMetadataChangeLog( + any(OperationContext.class), Mockito.eq(entityUrn), Mockito.eq( opContext @@ -877,7 +928,10 @@ public void testGetAspectAtVersion() throws AssertionError { assertTrue(DataTemplateUtil.areEqual(writtenVersionedAspect1, readAspect1)); verify(_mockProducer, times(1)) .produceMetadataChangeLog( - Mockito.eq(entityUrn), Mockito.eq(corpUserInfoSpec), Mockito.any()); + any(OperationContext.class), + Mockito.eq(entityUrn), + Mockito.eq(corpUserInfoSpec), + Mockito.any()); readAspect1 = _entityServiceImpl.getVersionedAspect(opContext, entityUrn, aspectName, -1); assertTrue(DataTemplateUtil.areEqual(writtenVersionedAspect1, readAspect1)); @@ -896,7 +950,10 @@ public void testGetAspectAtVersion() throws AssertionError { assertTrue(DataTemplateUtil.areEqual(writtenVersionedAspect2, readAspectVersion2)); verify(_mockProducer, times(2)) .produceMetadataChangeLog( - Mockito.eq(entityUrn), Mockito.eq(corpUserInfoSpec), Mockito.any()); + any(OperationContext.class), + Mockito.eq(entityUrn), + Mockito.eq(corpUserInfoSpec), + Mockito.any()); readAspect1 = _entityServiceImpl.getVersionedAspect(opContext, entityUrn, aspectName, -1); assertFalse(DataTemplateUtil.areEqual(writtenVersionedAspect1, readAspect1)); @@ -904,6 +961,7 @@ public void testGetAspectAtVersion() throws AssertionError { // check key aspect verify(_mockProducer, times(1)) .produceMetadataChangeLog( + any(OperationContext.class), Mockito.eq(entityUrn), Mockito.eq( opContext @@ -1228,6 +1286,7 @@ public void testIngestGetLatestAspect() throws AssertionError { ArgumentCaptor mclCaptor = ArgumentCaptor.forClass(MetadataChangeLog.class); verify(_mockProducer, times(1)) .produceMetadataChangeLog( + any(OperationContext.class), Mockito.eq(entityUrn), Mockito.eq( opContext @@ -1243,6 +1302,7 @@ public void testIngestGetLatestAspect() throws AssertionError { verify(_mockProducer, times(1)) .produceMetadataChangeLog( + any(OperationContext.class), Mockito.eq(entityUrn), Mockito.eq( opContext @@ -1291,7 +1351,8 @@ public void testIngestGetLatestAspect() throws AssertionError { EntityApiUtils.parseSystemMetadata(readAspectDao1.getSystemMetadata()), metadata1)); verify(_mockProducer, times(1)) - .produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), mclCaptor.capture()); + .produceMetadataChangeLog( + any(OperationContext.class), Mockito.eq(entityUrn), Mockito.any(), mclCaptor.capture()); mcl = mclCaptor.getValue(); assertEquals(mcl.getEntityType(), "corpuser"); assertNotNull(mcl.getPreviousAspectValue()); @@ -1376,6 +1437,7 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception { verify(_mockProducer, times(2)) .produceMetadataChangeLog( + any(OperationContext.class), Mockito.eq(entityUrn), Mockito.eq( opContext @@ -1386,6 +1448,7 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception { verify(_mockProducer, times(1)) .produceMetadataChangeLog( + any(OperationContext.class), Mockito.eq(entityUrn), Mockito.eq( opContext @@ -1435,6 +1498,7 @@ public void testIngestSameAspect() throws AssertionError { verify(_mockProducer, times(1)) .produceMetadataChangeLog( + any(OperationContext.class), Mockito.eq(entityUrn), Mockito.eq( opContext @@ -1446,6 +1510,7 @@ public void testIngestSameAspect() throws AssertionError { ArgumentCaptor mclCaptor = ArgumentCaptor.forClass(MetadataChangeLog.class); verify(_mockProducer, times(1)) .produceMetadataChangeLog( + any(OperationContext.class), Mockito.eq(entityUrn), Mockito.eq( opContext @@ -1506,7 +1571,8 @@ public void testIngestSameAspect() throws AssertionError { EntityApiUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata3)); verify(_mockProducer, times(0)) - .produceMetadataChangeLog(Mockito.any(), Mockito.any(), Mockito.any()); + .produceMetadataChangeLog( + any(OperationContext.class), Mockito.any(), Mockito.any(), Mockito.any()); verifyNoMoreInteractions(_mockProducer); } @@ -1781,7 +1847,11 @@ public void testRestoreIndices() throws Exception { ArgumentCaptor mclCaptor = ArgumentCaptor.forClass(MetadataChangeLog.class); verify(_mockProducer, times(1)) - .produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), mclCaptor.capture()); + .produceMetadataChangeLog( + any(OperationContext.class), + Mockito.eq(entityUrn), + Mockito.any(), + mclCaptor.capture()); MetadataChangeLog mcl = mclCaptor.getValue(); assertEquals(mcl.getEntityType(), "dataset"); assertNull(mcl.getPreviousAspectValue()); @@ -1919,7 +1989,10 @@ public void testUIPreProcessedProposal() throws Exception { ArgumentCaptor aspectSpecCaptor = ArgumentCaptor.forClass(AspectSpec.class); verify(_mockProducer, times(4)) .produceMetadataChangeLog( - Mockito.eq(entityUrn), aspectSpecCaptor.capture(), captor.capture()); + any(OperationContext.class), + Mockito.eq(entityUrn), + aspectSpecCaptor.capture(), + captor.capture()); assertEquals(UI_SOURCE, captor.getValue().getSystemMetadata().getProperties().get(APP_SOURCE)); assertEquals( aspectSpecCaptor.getAllValues().stream() @@ -1963,6 +2036,7 @@ public void testStructuredPropertyIngestProposal() throws Exception { ArgumentCaptor captor = ArgumentCaptor.forClass(MetadataChangeLog.class); verify(_mockProducer, times(1)) .produceMetadataChangeLog( + any(OperationContext.class), Mockito.eq(firstPropertyUrn), Mockito.eq(structuredPropertiesDefinitionAspect), captor.capture()); @@ -2050,6 +2124,7 @@ public void testStructuredPropertyIngestProposal() throws Exception { ArgumentCaptor.forClass(MetadataChangeLog.class); verify(_mockProducer, times(1)) .produceMetadataChangeLog( + any(OperationContext.class), Mockito.eq(secondPropertyUrn), Mockito.eq(structuredPropertiesDefinitionAspect), secondCaptor.capture()); @@ -2775,6 +2850,73 @@ public void testDeleteUrnWithRunIdFilterNonMatchVersionGap() throws Exception { assertEquals(result.getAspectName(), aspectName); } + @Test + public void testFailedAspectValidation() throws Exception { + try (MockedStatic mockedStatic = Mockito.mockStatic(Span.class)) { + Urn entityUrn = UrnUtils.getUrn("urn:li:corpuser:testFailedAspectValidation"); + + // Create an aspect that will trigger validation failure + CorpUserInfo invalidAspect = new CorpUserInfo().setActive(true); + + String aspectName = AspectGenerationUtils.getAspectName(invalidAspect); + + // Mock the span recording behavior + Span mockSpan = Mockito.mock(Span.class); + when(mockSpan.recordException(any(IllegalStateException.class))).thenReturn(mockSpan); + when(mockSpan.setStatus(any(StatusCode.class), anyString())).thenReturn(mockSpan); + when(mockSpan.setAttribute(anyString(), anyString())).thenReturn(mockSpan); + + // Configure the static Span.current() to return our mock + mockedStatic.when(Span::current).thenReturn(mockSpan); + + // Create test items that will fail validation + List items = + List.of( + ChangeItemImpl.builder() + .urn(entityUrn) + .aspectName(aspectName) + .recordTemplate(invalidAspect) + .systemMetadata(AspectGenerationUtils.createSystemMetadata()) + .auditStamp(TEST_AUDIT_STAMP) + // Set invalid version to trigger validation failure + .headers(Map.of("If-Version-Match", "-10000")) + .build(opContext.getAspectRetriever())); + + // Create a mock Future that completes successfully + @SuppressWarnings("unchecked") + Future mockFuture = Mockito.mock(Future.class); + when(mockFuture.get()).thenReturn(null); + + // Make producer return our mock Future + when(_mockProducer.produceFailedMetadataChangeProposalAsync( + any(OperationContext.class), any(MCPItem.class), any(Set.class))) + .thenReturn(mockFuture); + + // Execute the ingest which should trigger validation failures + _entityServiceImpl.ingestAspects( + opContext, + AspectsBatchImpl.builder() + .retrieverContext(opContext.getRetrieverContext()) + .items(items) + .build(), + true, + true); + + // Verify span error recording + verify(mockSpan).recordException(any(IllegalStateException.class)); + verify(mockSpan).setStatus(StatusCode.ERROR, "Batch contains failed aspect validations."); + verify(mockSpan).setAttribute(MetricUtils.ERROR_TYPE, IllegalStateException.class.getName()); + + // Verify failed MCP production + verify(_mockProducer, times(1)) + .produceFailedMetadataChangeProposalAsync( + eq(opContext), any(MCPItem.class), any(Set.class)); + + // Verify Future.get() was called + verify(mockFuture, times(1)).get(); + } + } + @Nonnull protected com.linkedin.entity.Entity createCorpUserEntity(Urn entityUrn, String email) throws Exception { diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphServiceTest.java index d47652e2dbca55..f2d3446392bf10 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphServiceTest.java @@ -56,7 +56,8 @@ public void init() { _serverBuilder.newServer(); _driver = GraphDatabase.driver(_serverBuilder.boltURI()); _client = - new Neo4jGraphService(new LineageRegistry(SnapshotEntityRegistry.getInstance()), _driver); + new Neo4jGraphService( + operationContext, new LineageRegistry(SnapshotEntityRegistry.getInstance()), _driver); _client.clear(); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java index 9a21c5337db864..b4d52aceeaa17f 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java @@ -2,12 +2,18 @@ import static org.mockito.Mockito.*; +import com.datahub.util.exception.ESQueryException; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.NumericNode; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.metadata.config.TimeseriesAspectServiceConfig; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.query.filter.SortOrder; import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; +import com.linkedin.metadata.search.utils.QueryUtils; import com.linkedin.metadata.timeseries.TimeseriesAspectService; import com.linkedin.metadata.timeseries.elastic.ElasticSearchTimeseriesAspectService; import com.linkedin.metadata.timeseries.elastic.indexbuilder.TimeseriesAspectIndexBuilders; @@ -17,6 +23,7 @@ import io.datahubproject.test.metadata.context.TestOperationContexts; import java.io.IOException; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import java.util.List; import org.apache.commons.io.IOUtils; import org.apache.http.HttpEntity; @@ -25,6 +32,7 @@ import org.opensearch.client.RestClient; import org.opensearch.client.RestHighLevelClient; import org.testng.Assert; +import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; /** @@ -33,29 +41,34 @@ */ public class TimeseriesAspectServiceUnitTest { - private final RestHighLevelClient _searchClient = mock(RestHighLevelClient.class); - private final IndexConvention _indexConvention = mock(IndexConvention.class); - private final TimeseriesAspectIndexBuilders _timeseriesAspectIndexBuilders = + private final RestHighLevelClient searchClient = mock(RestHighLevelClient.class); + private final IndexConvention indexConvention = mock(IndexConvention.class); + private final TimeseriesAspectIndexBuilders timeseriesAspectIndexBuilders = mock(TimeseriesAspectIndexBuilders.class); - private final ESBulkProcessor _bulkProcessor = mock(ESBulkProcessor.class); - private final RestClient _restClient = mock(RestClient.class); + private final ESBulkProcessor bulkProcessor = mock(ESBulkProcessor.class); + private final RestClient restClient = mock(RestClient.class); private final TimeseriesAspectService _timeseriesAspectService = new ElasticSearchTimeseriesAspectService( - _searchClient, - _timeseriesAspectIndexBuilders, - _bulkProcessor, + searchClient, + timeseriesAspectIndexBuilders, + bulkProcessor, 0, QueryFilterRewriteChain.EMPTY, TimeseriesAspectServiceConfig.builder().build()); private final OperationContext opContext = - TestOperationContexts.systemContextNoSearchAuthorization(_indexConvention); + TestOperationContexts.systemContextNoSearchAuthorization(indexConvention); private static final String INDEX_PATTERN = "indexPattern"; + @BeforeMethod + public void resetMocks() { + reset(searchClient, indexConvention, timeseriesAspectIndexBuilders, bulkProcessor, restClient); + } + @Test public void testGetIndicesIntegerWrap() throws IOException { - when(_indexConvention.getAllTimeseriesAspectIndicesPattern()).thenReturn(INDEX_PATTERN); - when(_searchClient.getLowLevelClient()).thenReturn(_restClient); + when(indexConvention.getAllTimeseriesAspectIndicesPattern()).thenReturn(INDEX_PATTERN); + when(searchClient.getLowLevelClient()).thenReturn(restClient); ObjectNode jsonNode = JsonNodeFactory.instance.objectNode(); ObjectNode indicesNode = JsonNodeFactory.instance.objectNode(); ObjectNode indexNode = JsonNodeFactory.instance.objectNode(); @@ -73,10 +86,66 @@ public void testGetIndicesIntegerWrap() throws IOException { when(response.getEntity()).thenReturn(responseEntity); when(responseEntity.getContent()) .thenReturn(IOUtils.toInputStream(jsonNode.toString(), StandardCharsets.UTF_8)); - when(_restClient.performRequest(any(Request.class))).thenReturn(response); + when(restClient.performRequest(any(Request.class))).thenReturn(response); List results = _timeseriesAspectService.getIndexSizes(opContext); Assert.assertEquals(results.get(0).getSizeInMb(), 8078.398031); } + + @Test + public void testSearchQueryFailure() throws IOException { + // setup mock + when(indexConvention.getTimeseriesAspectIndexName(eq("dataset"), eq("testAspect"))) + .thenReturn("dataset_testAspect_index_v1"); + + // Setup search request that will fail + when(searchClient.search(any(), any())).thenThrow(new IOException("Search failed")); + + Filter filter = QueryUtils.newFilter("field", "value"); + + // Verify that ESQueryException is thrown with correct message + try { + _timeseriesAspectService.getAspectValues( + opContext, + UrnUtils.getUrn("urn:li:dataset:123"), + "dataset", + "testAspect", + null, + null, + 10, + filter, + null); + Assert.fail("Expected ESQueryException to be thrown"); + } catch (ESQueryException e) { + Assert.assertEquals(e.getMessage(), "Search query failed:"); + Assert.assertTrue(e.getCause() instanceof IOException); + Assert.assertEquals(e.getCause().getMessage(), "Search failed"); + } + } + + @Test + public void testScrollSearchQueryFailure() throws IOException { + // setup mock + when(indexConvention.getTimeseriesAspectIndexName(eq("dataset"), eq("testAspect"))) + .thenReturn("dataset_testAspect_index_v1"); + + // Setup search request that will fail + when(searchClient.search(any(), any())).thenThrow(new IOException("Scroll search failed")); + + Filter filter = QueryUtils.newFilter("field", "value"); + List sortCriteria = + Arrays.asList(new SortCriterion().setField("timestamp").setOrder(SortOrder.DESCENDING)); + + // Verify that ESQueryException is thrown with correct message + try { + _timeseriesAspectService.scrollAspects( + opContext, "dataset", "testAspect", filter, sortCriteria, null, 10, null, null); + Assert.fail("Expected ESQueryException to be thrown"); + } catch (ESQueryException e) { + Assert.assertEquals(e.getMessage(), "Search query failed:"); + Assert.assertTrue(e.getCause() instanceof IOException); + Assert.assertEquals(e.getCause().getMessage(), "Scroll search failed"); + } + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/trace/BaseKafkaTraceReaderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/trace/BaseKafkaTraceReaderTest.java new file mode 100644 index 00000000000000..c5217475362295 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/trace/BaseKafkaTraceReaderTest.java @@ -0,0 +1,239 @@ +package com.linkedin.metadata.trace; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyCollection; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.data.template.StringMap; +import com.linkedin.metadata.systemmetadata.TraceStorageStatus; +import com.linkedin.metadata.systemmetadata.TraceWriteStatus; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; +import io.datahubproject.metadata.context.TraceContext; +import java.io.IOException; +import java.time.Duration; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import javax.annotation.Nullable; +import org.apache.avro.generic.GenericRecord; +import org.apache.kafka.clients.admin.AdminClient; +import org.apache.kafka.clients.admin.DescribeTopicsResult; +import org.apache.kafka.clients.admin.ListConsumerGroupOffsetsResult; +import org.apache.kafka.clients.admin.TopicDescription; +import org.apache.kafka.clients.consumer.Consumer; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.OffsetAndMetadata; +import org.apache.kafka.clients.consumer.OffsetAndTimestamp; +import org.apache.kafka.common.KafkaFuture; +import org.apache.kafka.common.Node; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.TopicPartitionInfo; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public abstract class BaseKafkaTraceReaderTest { + protected static final String TOPIC_NAME = "test-topic"; + protected static final String CONSUMER_GROUP = "test-group"; + protected static final String TRACE_ID = "test-trace-id"; + protected static final String ASPECT_NAME = "status"; + protected static final Urn TEST_URN = UrnUtils.getUrn("urn:li:container:123"); + + @Mock protected AdminClient adminClient; + @Mock protected Consumer consumer; + protected ExecutorService executorService; + protected KafkaTraceReader traceReader; + + abstract KafkaTraceReader buildTraceReader(); + + abstract M buildMessage(@Nullable SystemMetadata systemMetadata); + + abstract GenericRecord toGenericRecord(M message) throws IOException; + + abstract M fromGenericRecord(GenericRecord genericRecord) throws IOException; + + @BeforeMethod(alwaysRun = true) + public void setup() { + MockitoAnnotations.openMocks(this); + executorService = Executors.newSingleThreadExecutor(); + traceReader = buildTraceReader(); + setupDefaultMocks(); + } + + protected void setupDefaultMocks() { + // Mock topic description + Node mockNode = new Node(0, "localhost", 9092); + TopicPartitionInfo partitionInfo = + new TopicPartitionInfo( + 0, mockNode, Collections.singletonList(mockNode), Collections.singletonList(mockNode)); + TopicDescription topicDescription = + new TopicDescription(TOPIC_NAME, false, Collections.singletonList(partitionInfo)); + + DescribeTopicsResult mockDescribeTopicsResult = mock(DescribeTopicsResult.class); + when(mockDescribeTopicsResult.all()) + .thenReturn( + KafkaFuture.completedFuture(Collections.singletonMap(TOPIC_NAME, topicDescription))); + when(adminClient.describeTopics(anyCollection())).thenReturn(mockDescribeTopicsResult); + + // Mock consumer group offset lookup + ListConsumerGroupOffsetsResult mockOffsetResult = mock(ListConsumerGroupOffsetsResult.class); + when(adminClient.listConsumerGroupOffsets(CONSUMER_GROUP)).thenReturn(mockOffsetResult); + when(mockOffsetResult.partitionsToOffsetAndMetadata()) + .thenReturn( + KafkaFuture.completedFuture( + Collections.singletonMap( + new TopicPartition(TOPIC_NAME, 0), new OffsetAndMetadata(100L)))); + + // Mock consumer behavior + when(consumer.poll(any(Duration.class))).thenReturn(mock(ConsumerRecords.class)); + } + + @Test + public void testRead_WithValidGenericRecord() throws Exception { + // Arrange + M expectedMessage = buildMessage(null); + GenericRecord genericRecord = toGenericRecord(expectedMessage); + + // Act + Optional result = traceReader.read(genericRecord); + + // Assert + assertTrue(result.isPresent()); + assertEquals(result.get(), expectedMessage); + } + + @Test + public void testRead_WithNullGenericRecord() { + Optional result = traceReader.read(null); + assertFalse(result.isPresent()); + } + + @Test + public void testMatchConsumerRecord_WithMatchingTraceAndAspect() throws IOException { + // Arrange + ConsumerRecord mockConsumerRecord = mock(ConsumerRecord.class); + + SystemMetadata systemMetadata = new SystemMetadata(); + Map properties = new HashMap<>(); + properties.put(TraceContext.TELEMETRY_TRACE_KEY, TRACE_ID); + systemMetadata.setProperties(new StringMap(properties)); + + GenericRecord genericRecord = toGenericRecord(buildMessage(systemMetadata)); + when(mockConsumerRecord.value()).thenReturn(genericRecord); + + // Act + Optional, SystemMetadata>> result = + traceReader.matchConsumerRecord(mockConsumerRecord, TRACE_ID, ASPECT_NAME); + + // Assert + assertTrue(result.isPresent()); + assertEquals(result.get().getFirst(), mockConsumerRecord); + assertEquals(result.get().getSecond(), systemMetadata); + } + + @Test + public void testTracePendingStatuses() throws IOException { + // Arrange + List aspectNames = Collections.singletonList(ASPECT_NAME); + Map> urnAspectPairs = Collections.singletonMap(TEST_URN, aspectNames); + long timestamp = System.currentTimeMillis(); + + // Mock topic partition + TopicPartition topicPartition = new TopicPartition(TOPIC_NAME, 0); + + // Mock consumer group offset lookup (lower offset) + OffsetAndMetadata offsetAndMetadata = new OffsetAndMetadata(100L); + ListConsumerGroupOffsetsResult mockOffsetResult = mock(ListConsumerGroupOffsetsResult.class); + when(adminClient.listConsumerGroupOffsets(CONSUMER_GROUP)).thenReturn(mockOffsetResult); + when(mockOffsetResult.partitionsToOffsetAndMetadata()) + .thenReturn( + KafkaFuture.completedFuture( + Collections.singletonMap(topicPartition, offsetAndMetadata))); + + // Mock offset lookup by timestamp + when(consumer.offsetsForTimes(any())) + .thenReturn( + Collections.singletonMap(topicPartition, new OffsetAndTimestamp(150L, timestamp))); + + // Create system metadata with trace ID + SystemMetadata systemMetadata = new SystemMetadata(); + Map properties = new HashMap<>(); + properties.put(TraceContext.TELEMETRY_TRACE_KEY, TRACE_ID); + systemMetadata.setProperties(new StringMap(properties)); + + // Build message with metadata + M message = buildMessage(systemMetadata); + GenericRecord genericRecord = toGenericRecord(message); + + // Mock consumer record fetch with higher offset than consumer offset + ConsumerRecord mockRecord = + new ConsumerRecord<>(TOPIC_NAME, 0, 150L, TEST_URN.toString(), genericRecord); + ConsumerRecords mockRecords = mock(ConsumerRecords.class); + when(mockRecords.isEmpty()).thenReturn(false); + when(mockRecords.records(any(TopicPartition.class))) + .thenReturn(Collections.singletonList(mockRecord)); + when(consumer.poll(any(Duration.class))).thenReturn(mockRecords); + + // Act + Map> result = + traceReader.tracePendingStatuses(urnAspectPairs, TRACE_ID, timestamp); + + // Assert + assertTrue(result.containsKey(TEST_URN)); + assertTrue(result.get(TEST_URN).containsKey(ASPECT_NAME)); + assertEquals(result.get(TEST_URN).get(ASPECT_NAME).getWriteStatus(), TraceWriteStatus.PENDING); + } + + @Test + public void testFindMessages() throws Exception { + // Arrange + List aspectNames = Collections.singletonList(ASPECT_NAME); + Map> urnAspectPairs = Collections.singletonMap(TEST_URN, aspectNames); + long timestamp = System.currentTimeMillis(); + + // Mock topic partition assignment and offsets + TopicPartition topicPartition = new TopicPartition(TOPIC_NAME, 0); + OffsetAndTimestamp offsetAndTimestamp = new OffsetAndTimestamp(100L, timestamp); + when(consumer.offsetsForTimes(any())) + .thenReturn(Collections.singletonMap(topicPartition, offsetAndTimestamp)); + + // Mock system metadata + SystemMetadata systemMetadata = new SystemMetadata(); + Map properties = new HashMap<>(); + properties.put(TraceContext.TELEMETRY_TRACE_KEY, TRACE_ID); + systemMetadata.setProperties(new StringMap(properties)); + M message = buildMessage(systemMetadata); + + // Mock consumer record fetch + ConsumerRecord mockRecord = + new ConsumerRecord<>(TOPIC_NAME, 0, 100L, TEST_URN.toString(), toGenericRecord(message)); + ConsumerRecords mockRecords = mock(ConsumerRecords.class); + when(mockRecords.records(any(TopicPartition.class))) + .thenReturn(Collections.singletonList(mockRecord)); + when(consumer.poll(any(Duration.class))).thenReturn(mockRecords); + + // Act + Map, SystemMetadata>>> result = + traceReader.findMessages(urnAspectPairs, TRACE_ID, timestamp); + + // Assert + assertTrue(result.containsKey(TEST_URN)); + assertTrue(result.get(TEST_URN).containsKey(ASPECT_NAME)); + assertEquals(result.get(TEST_URN).get(ASPECT_NAME).getFirst(), mockRecord); + assertEquals(result.get(TEST_URN).get(ASPECT_NAME).getSecond(), systemMetadata); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/trace/MCLTraceReaderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/trace/MCLTraceReaderTest.java new file mode 100644 index 00000000000000..9a4afd74917d4b --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/trace/MCLTraceReaderTest.java @@ -0,0 +1,93 @@ +package com.linkedin.metadata.trace; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import com.linkedin.data.template.SetMode; +import com.linkedin.data.template.StringMap; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.EventUtils; +import com.linkedin.mxe.MetadataChangeLog; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; +import io.datahubproject.metadata.context.TraceContext; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import org.apache.avro.generic.GenericRecord; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.jetbrains.annotations.Nullable; +import org.testng.annotations.Test; + +public class MCLTraceReaderTest extends BaseKafkaTraceReaderTest { + @Override + KafkaTraceReader buildTraceReader() { + return MCLTraceReader.builder() + .adminClient(adminClient) + .consumerSupplier(() -> consumer) + .pollDurationMs(100) + .pollMaxAttempts(3) + .executorService(executorService) + .timeoutSeconds(5) + .topicName(TOPIC_NAME) + .consumerGroupId(CONSUMER_GROUP) + .build(); + } + + @Override + MetadataChangeLog buildMessage(@Nullable SystemMetadata systemMetadata) { + return new MetadataChangeLog() + .setAspectName(ASPECT_NAME) + .setEntityType(TEST_URN.getEntityType()) + .setChangeType(ChangeType.UPSERT) + .setEntityUrn(TEST_URN) + .setSystemMetadata(systemMetadata, SetMode.IGNORE_NULL); + } + + @Override + GenericRecord toGenericRecord(MetadataChangeLog message) throws IOException { + return EventUtils.pegasusToAvroMCL(message); + } + + @Override + MetadataChangeLog fromGenericRecord(GenericRecord genericRecord) throws IOException { + return EventUtils.avroToPegasusMCL(genericRecord); + } + + @Test + public void testMCLRead() throws Exception { + MetadataChangeLog expectedMCL = buildMessage(null); + + GenericRecord genericRecord = toGenericRecord(expectedMCL); + + Optional result = traceReader.read(genericRecord); + + assertTrue(result.isPresent()); + assertEquals(result.get().getAspectName(), ASPECT_NAME); + } + + @Test + public void testMCLMatchConsumerRecord() throws Exception { + ConsumerRecord mockConsumerRecord = mock(ConsumerRecord.class); + + SystemMetadata systemMetadata = new SystemMetadata(); + Map properties = new HashMap<>(); + properties.put(TraceContext.TELEMETRY_TRACE_KEY, TRACE_ID); + systemMetadata.setProperties(new StringMap(properties)); + + MetadataChangeLog mcl = buildMessage(systemMetadata); + + GenericRecord genericRecord = toGenericRecord(mcl); + when(mockConsumerRecord.value()).thenReturn(genericRecord); + + Optional, SystemMetadata>> result = + traceReader.matchConsumerRecord(mockConsumerRecord, TRACE_ID, ASPECT_NAME); + + assertTrue(result.isPresent()); + assertEquals(result.get().getFirst(), mockConsumerRecord); + assertEquals(result.get().getSecond(), systemMetadata); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/trace/MCPFailedTraceReaderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/trace/MCPFailedTraceReaderTest.java new file mode 100644 index 00000000000000..8d8fedfc98c2b3 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/trace/MCPFailedTraceReaderTest.java @@ -0,0 +1,98 @@ +package com.linkedin.metadata.trace; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import com.linkedin.data.template.SetMode; +import com.linkedin.data.template.StringMap; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.EventUtils; +import com.linkedin.mxe.FailedMetadataChangeProposal; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; +import io.datahubproject.metadata.context.TraceContext; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import org.apache.avro.generic.GenericRecord; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.jetbrains.annotations.Nullable; +import org.testng.annotations.Test; + +public class MCPFailedTraceReaderTest + extends BaseKafkaTraceReaderTest { + @Override + KafkaTraceReader buildTraceReader() { + return MCPFailedTraceReader.builder() + .adminClient(adminClient) + .consumerSupplier(() -> consumer) + .pollDurationMs(100) + .pollMaxAttempts(3) + .executorService(executorService) + .timeoutSeconds(5) + .topicName(TOPIC_NAME) + .consumerGroupId(CONSUMER_GROUP) + .build(); + } + + @Override + FailedMetadataChangeProposal buildMessage(@Nullable SystemMetadata systemMetadata) { + return new FailedMetadataChangeProposal() + .setError("Test failure error") + .setMetadataChangeProposal( + new MetadataChangeProposal() + .setAspectName(ASPECT_NAME) + .setEntityType(TEST_URN.getEntityType()) + .setChangeType(ChangeType.UPSERT) + .setEntityUrn(TEST_URN) + .setSystemMetadata(systemMetadata, SetMode.IGNORE_NULL)); + } + + @Override + GenericRecord toGenericRecord(FailedMetadataChangeProposal message) throws IOException { + return EventUtils.pegasusToAvroFailedMCP(message); + } + + @Override + FailedMetadataChangeProposal fromGenericRecord(GenericRecord genericRecord) throws IOException { + return EventUtils.avroToPegasusFailedMCP(genericRecord); + } + + @Test + public void testFailedMCPRead() throws Exception { + FailedMetadataChangeProposal expectedMCP = buildMessage(null); + + GenericRecord genericRecord = toGenericRecord(expectedMCP); + + Optional result = traceReader.read(genericRecord); + + assertTrue(result.isPresent()); + assertEquals(result.get().getMetadataChangeProposal().getAspectName(), ASPECT_NAME); + } + + @Test + public void testFailedMCPMatchConsumerRecord() throws Exception { + ConsumerRecord mockConsumerRecord = mock(ConsumerRecord.class); + + SystemMetadata systemMetadata = new SystemMetadata(); + Map properties = new HashMap<>(); + properties.put(TraceContext.TELEMETRY_TRACE_KEY, TRACE_ID); + systemMetadata.setProperties(new StringMap(properties)); + + FailedMetadataChangeProposal fmcp = buildMessage(systemMetadata); + + GenericRecord genericRecord = toGenericRecord(fmcp); + when(mockConsumerRecord.value()).thenReturn(genericRecord); + + Optional, SystemMetadata>> result = + traceReader.matchConsumerRecord(mockConsumerRecord, TRACE_ID, ASPECT_NAME); + + assertTrue(result.isPresent()); + assertEquals(result.get().getFirst(), mockConsumerRecord); + assertEquals(result.get().getSecond(), systemMetadata); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/trace/MCPTraceReaderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/trace/MCPTraceReaderTest.java new file mode 100644 index 00000000000000..e2aa9730267ddf --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/trace/MCPTraceReaderTest.java @@ -0,0 +1,93 @@ +package com.linkedin.metadata.trace; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import com.linkedin.data.template.SetMode; +import com.linkedin.data.template.StringMap; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.EventUtils; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; +import io.datahubproject.metadata.context.TraceContext; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import org.apache.avro.generic.GenericRecord; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.jetbrains.annotations.Nullable; +import org.testng.annotations.Test; + +public final class MCPTraceReaderTest extends BaseKafkaTraceReaderTest { + @Override + KafkaTraceReader buildTraceReader() { + return MCPTraceReader.builder() + .adminClient(adminClient) + .consumerSupplier(() -> consumer) + .pollDurationMs(100) + .pollMaxAttempts(3) + .executorService(executorService) + .timeoutSeconds(5) + .topicName(TOPIC_NAME) + .consumerGroupId(CONSUMER_GROUP) + .build(); + } + + @Override + MetadataChangeProposal buildMessage(@Nullable SystemMetadata systemMetadata) { + return new MetadataChangeProposal() + .setAspectName(ASPECT_NAME) + .setEntityType(TEST_URN.getEntityType()) + .setChangeType(ChangeType.UPSERT) + .setEntityUrn(TEST_URN) + .setSystemMetadata(systemMetadata, SetMode.IGNORE_NULL); + } + + @Override + GenericRecord toGenericRecord(MetadataChangeProposal message) throws IOException { + return EventUtils.pegasusToAvroMCP(message); + } + + @Override + MetadataChangeProposal fromGenericRecord(GenericRecord genericRecord) throws IOException { + return EventUtils.avroToPegasusMCP(genericRecord); + } + + @Test + public void testMCPRead() throws Exception { + MetadataChangeProposal expectedMCP = buildMessage(null); + + GenericRecord genericRecord = toGenericRecord(expectedMCP); + + Optional result = traceReader.read(genericRecord); + + assertTrue(result.isPresent()); + assertEquals(result.get().getAspectName(), ASPECT_NAME); + } + + @Test + public void testMCPMatchConsumerRecord() throws Exception { + ConsumerRecord mockConsumerRecord = mock(ConsumerRecord.class); + + SystemMetadata systemMetadata = new SystemMetadata(); + Map properties = new HashMap<>(); + properties.put(TraceContext.TELEMETRY_TRACE_KEY, TRACE_ID); + systemMetadata.setProperties(new StringMap(properties)); + + MetadataChangeProposal mcp = buildMessage(systemMetadata); + + GenericRecord genericRecord = toGenericRecord(mcp); + when(mockConsumerRecord.value()).thenReturn(genericRecord); + + Optional, SystemMetadata>> result = + traceReader.matchConsumerRecord(mockConsumerRecord, TRACE_ID, ASPECT_NAME); + + assertTrue(result.isPresent()); + assertEquals(result.get().getFirst(), mockConsumerRecord); + assertEquals(result.get().getSecond(), systemMetadata); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/trace/TraceServiceImplTest.java b/metadata-io/src/test/java/com/linkedin/metadata/trace/TraceServiceImplTest.java new file mode 100644 index 00000000000000..abe65d48b3410a --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/trace/TraceServiceImplTest.java @@ -0,0 +1,350 @@ +package com.linkedin.metadata.trace; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.ArgumentMatchers.anySet; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.StringMap; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.EventUtils; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.run.AspectRowSummary; +import com.linkedin.metadata.systemmetadata.SystemMetadataService; +import com.linkedin.metadata.systemmetadata.TraceStatus; +import com.linkedin.metadata.systemmetadata.TraceStorageStatus; +import com.linkedin.metadata.systemmetadata.TraceWriteStatus; +import com.linkedin.mxe.FailedMetadataChangeProposal; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.TraceContext; +import io.datahubproject.metadata.context.TraceIdGenerator; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.time.Instant; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import org.apache.avro.generic.GenericRecord; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class TraceServiceImplTest { + private static final String TEST_TRACE_ID_FUTURE = + TraceContext.TRACE_ID_GENERATOR.generateTraceId(Instant.now().toEpochMilli() + 1000); + private static final String TEST_TRACE_ID = TraceContext.TRACE_ID_GENERATOR.generateTraceId(); + protected static final String ASPECT_NAME = "status"; + protected static final String TIMESERIES_ASPECT_NAME = "datasetProfile"; + protected static final Urn TEST_URN = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:kafka,PageViewEvent,PROD)"); + + @Mock private SystemMetadataService systemMetadataService; + @Mock private EntityService entityService; + @Mock private MCPTraceReader mcpTraceReader; + @Mock private MCPFailedTraceReader mcpFailedTraceReader; + @Mock private MCLTraceReader mclVersionedTraceReader; + @Mock private MCLTraceReader mclTimeseriesTraceReader; + + private TraceServiceImpl traceService; + private static final OperationContext operationContext = + TestOperationContexts.systemContextNoSearchAuthorization(); + + @BeforeMethod + public void setup() throws Exception { + MockitoAnnotations.openMocks(this); + + traceService = + TraceServiceImpl.builder() + .entityRegistry(operationContext.getEntityRegistry()) + .systemMetadataService(systemMetadataService) + .entityService(entityService) + .mcpTraceReader(mcpTraceReader) + .mcpFailedTraceReader(mcpFailedTraceReader) + .mclVersionedTraceReader(mclVersionedTraceReader) + .mclTimeseriesTraceReader(mclTimeseriesTraceReader) + .build(); + } + + @Test + public void testTraceWithActiveState() throws Exception { + // Arrange + Map> aspectNames = + Collections.singletonMap(TEST_URN, Collections.singletonList(ASPECT_NAME)); + + // Mock entityService response for primary storage + SystemMetadata systemMetadata = new SystemMetadata(); + Map properties = new HashMap<>(); + properties.put(TraceContext.TELEMETRY_TRACE_KEY, TEST_TRACE_ID); + systemMetadata.setProperties(new StringMap(properties)); + + EnvelopedAspect envelopedAspect = new EnvelopedAspect(); + envelopedAspect.setCreated(new AuditStamp().setTime(Instant.now().toEpochMilli())); + envelopedAspect.setSystemMetadata(systemMetadata); + + EntityResponse entityResponse = new EntityResponse(); + entityResponse.setAspects( + new EnvelopedAspectMap(Collections.singletonMap(ASPECT_NAME, envelopedAspect))); + entityResponse.setEntityName(TEST_URN.getEntityType()); + entityResponse.setUrn(TEST_URN); + + when(entityService.getEntitiesV2(any(), anyString(), anySet(), anySet(), anyBoolean())) + .thenReturn(Collections.singletonMap(TEST_URN, entityResponse)); + + // Mock search storage response + AspectRowSummary summary = mock(AspectRowSummary.class); + when(summary.getUrn()).thenReturn(TEST_URN.toString()); + when(summary.getAspectName()).thenReturn(ASPECT_NAME); + when(summary.getTelemetryTraceId()).thenReturn(TEST_TRACE_ID); + when(systemMetadataService.findAspectsByUrn(eq(TEST_URN), anyList(), eq(true))) + .thenReturn(Collections.singletonList(summary)); + + // Act + Map> result = + traceService.trace(operationContext, TEST_TRACE_ID, aspectNames, false, false); + + // Assert + assertNotNull(result); + assertTrue(result.containsKey(TEST_URN)); + Map urnStatus = result.get(TEST_URN); + assertTrue(urnStatus.containsKey(ASPECT_NAME)); + + TraceStatus status = urnStatus.get(ASPECT_NAME); + assertEquals(status.getPrimaryStorage().getWriteStatus(), TraceWriteStatus.ACTIVE_STATE); + assertEquals(status.getSearchStorage().getWriteStatus(), TraceWriteStatus.ACTIVE_STATE); + assertTrue(status.isSuccess()); + } + + @Test + public void testTraceWithPendingStatus() throws Exception { + // Arrange + Map> aspectNames = + Collections.singletonMap(TEST_URN, Collections.singletonList(ASPECT_NAME)); + + // Mock empty entity response (not in SQL) + when(entityService.getEntitiesV2(any(), anyString(), anySet(), anySet(), anyBoolean())) + .thenReturn(Collections.emptyMap()); + + // Mock pending status from Kafka + Map pendingStatus = new LinkedHashMap<>(); + pendingStatus.put( + ASPECT_NAME, + TraceStorageStatus.ok(TraceWriteStatus.PENDING, "Consumer has not processed offset.")); + + when(mcpTraceReader.tracePendingStatuses(any(), eq(TEST_TRACE_ID), any(), anyBoolean())) + .thenReturn(Collections.singletonMap(TEST_URN, pendingStatus)); + + // Act + Map> result = + traceService.trace(operationContext, TEST_TRACE_ID, aspectNames, false, false); + + // Assert + assertNotNull(result); + assertTrue(result.containsKey(TEST_URN)); + Map urnStatus = result.get(TEST_URN); + assertTrue(urnStatus.containsKey(ASPECT_NAME)); + + TraceStatus status = urnStatus.get(ASPECT_NAME); + assertEquals(status.getPrimaryStorage().getWriteStatus(), TraceWriteStatus.PENDING); + assertTrue(status.isSuccess()); + } + + @Test + public void testTraceWithErrorStatus() throws Exception { + // Arrange + Map> aspectNames = + Collections.singletonMap(TEST_URN, Collections.singletonList(ASPECT_NAME)); + + // Mock empty entity response + when(entityService.getEntitiesV2(any(), anyString(), anySet(), anySet(), anyBoolean())) + .thenReturn(Collections.emptyMap()); + + // Mock error status from Kafka + Map errorStatus = new LinkedHashMap<>(); + errorStatus.put( + ASPECT_NAME, TraceStorageStatus.fail(TraceWriteStatus.ERROR, "Failed to process message.")); + + when(mcpTraceReader.tracePendingStatuses(any(), eq(TEST_TRACE_ID), any(), anyBoolean())) + .thenReturn(Collections.singletonMap(TEST_URN, errorStatus)); + + // Act + Map> result = + traceService.trace(operationContext, TEST_TRACE_ID, aspectNames, true, true); + + // Assert + assertNotNull(result); + assertTrue(result.containsKey(TEST_URN)); + Map urnStatus = result.get(TEST_URN); + assertTrue(urnStatus.containsKey(ASPECT_NAME)); + + TraceStatus status = urnStatus.get(ASPECT_NAME); + assertEquals(status.getPrimaryStorage().getWriteStatus(), TraceWriteStatus.ERROR); + assertFalse(status.isSuccess()); + } + + @Test + public void testTraceWithTimeseriesAspect() throws Exception { + // Arrange + Map> aspectNames = + Collections.singletonMap(TEST_URN, Collections.singletonList(TIMESERIES_ASPECT_NAME)); + + // Act + Map> result = + traceService.trace(operationContext, TEST_TRACE_ID, aspectNames, false, false); + + // Assert + assertNotNull(result); + assertTrue(result.containsKey(TEST_URN)); + Map urnStatus = result.get(TEST_URN); + assertTrue(urnStatus.containsKey(TIMESERIES_ASPECT_NAME)); + + TraceStatus status = urnStatus.get(TIMESERIES_ASPECT_NAME); + assertEquals(status.getPrimaryStorage().getWriteStatus(), TraceWriteStatus.NO_OP); + assertEquals( + status.getSearchStorage().getWriteStatus(), TraceWriteStatus.TRACE_NOT_IMPLEMENTED); + assertTrue(status.isSuccess()); + } + + @Test + public void testTraceWithHistoricState() throws Exception { + // Arrange + Map> aspectNames = + Collections.singletonMap(TEST_URN, Collections.singletonList(ASPECT_NAME)); + + // Mock primary storage with historic state + SystemMetadata systemMetadata = new SystemMetadata(); + Map properties = new HashMap<>(); + properties.put(TraceContext.TELEMETRY_TRACE_KEY, TEST_TRACE_ID_FUTURE); + systemMetadata.setProperties(new StringMap(properties)); + + EnvelopedAspect envelopedAspect = new EnvelopedAspect(); + envelopedAspect.setCreated( + new AuditStamp() + .setTime( + TraceIdGenerator.getTimestampMillis(TEST_TRACE_ID_FUTURE))); // Future timestamp + envelopedAspect.setSystemMetadata(systemMetadata); + + EntityResponse entityResponse = new EntityResponse(); + entityResponse.setAspects( + new EnvelopedAspectMap(Collections.singletonMap(ASPECT_NAME, envelopedAspect))); + entityResponse.setEntityName(TEST_URN.getEntityType()); + entityResponse.setUrn(TEST_URN); + + when(entityService.getEntitiesV2(any(), anyString(), anySet(), anySet(), anyBoolean())) + .thenReturn(Collections.singletonMap(TEST_URN, entityResponse)); + + // Mock search storage with historic state + AspectRowSummary summary = mock(AspectRowSummary.class); + when(summary.getUrn()).thenReturn(TEST_URN.toString()); + when(summary.getAspectName()).thenReturn(ASPECT_NAME); + when(summary.hasTimestamp()).thenReturn(true); + when(summary.getTimestamp()) + .thenReturn(TraceIdGenerator.getTimestampMillis(TEST_TRACE_ID_FUTURE)); // Future timestamp + when(summary.getTelemetryTraceId()).thenReturn(TEST_TRACE_ID_FUTURE); + + when(systemMetadataService.findAspectsByUrn(eq(TEST_URN), anyList(), eq(true))) + .thenReturn(Collections.singletonList(summary)); + + // Act + Map> result = + traceService.trace(operationContext, TEST_TRACE_ID, aspectNames, false, false); + + // Assert + assertNotNull(result); + assertTrue(result.containsKey(TEST_URN)); + Map urnStatus = result.get(TEST_URN); + assertTrue(urnStatus.containsKey(ASPECT_NAME)); + + TraceStatus status = urnStatus.get(ASPECT_NAME); + assertEquals(status.getPrimaryStorage().getWriteStatus(), TraceWriteStatus.HISTORIC_STATE); + assertEquals(status.getSearchStorage().getWriteStatus(), TraceWriteStatus.HISTORIC_STATE); + assertTrue(status.isSuccess()); + } + + @Test + public void testTraceWithFailedMessage() throws Exception { + // Arrange + Map> aspectNames = + Collections.singletonMap(TEST_URN, Collections.singletonList(ASPECT_NAME)); + + // Mock primary storage with ERROR status + Map errorStatus = new LinkedHashMap<>(); + errorStatus.put(ASPECT_NAME, TraceStorageStatus.fail(TraceWriteStatus.ERROR, "Initial error")); + + when(mcpTraceReader.tracePendingStatuses(any(), eq(TEST_TRACE_ID), any(), anyBoolean())) + .thenReturn(Collections.singletonMap(TEST_URN, errorStatus)); + + // Mock the failed message in MCPFailedTraceReader + SystemMetadata failedMetadata = new SystemMetadata(); + Map properties = new HashMap<>(); + properties.put(TraceContext.TELEMETRY_TRACE_KEY, TEST_TRACE_ID); + failedMetadata.setProperties(new StringMap(properties)); + + FailedMetadataChangeProposal failedMCP = + new FailedMetadataChangeProposal() + .setError( + "[{\"message\":\"Processing failed: Test error message\",\"exceptionClass\":\"java.lang.IllegalArgumentException\"}]") + .setMetadataChangeProposal( + new MetadataChangeProposal() + .setEntityUrn(TEST_URN) + .setChangeType(ChangeType.UPSERT) + .setAspectName(ASPECT_NAME) + .setEntityType(TEST_URN.getEntityType()) + .setSystemMetadata(failedMetadata)); + + GenericRecord genericRecord = EventUtils.pegasusToAvroFailedMCP(failedMCP); + ConsumerRecord failedRecord = mock(ConsumerRecord.class); + when(failedRecord.value()).thenReturn(genericRecord); + + Map, SystemMetadata>> failedMessages = + Collections.singletonMap(ASPECT_NAME, Pair.of(failedRecord, failedMetadata)); + + when(mcpFailedTraceReader.findMessages(any(), eq(TEST_TRACE_ID), any())) + .thenReturn(Collections.singletonMap(TEST_URN, failedMessages)); + + // Mock failed record read with error message + when(mcpFailedTraceReader.read(eq(genericRecord))).thenReturn(Optional.of(failedMCP)); + + // Act + Map> result = + traceService.trace(operationContext, TEST_TRACE_ID, aspectNames, true, true); + + // Assert + assertNotNull(result); + assertTrue(result.containsKey(TEST_URN)); + Map urnStatus = result.get(TEST_URN); + assertTrue(urnStatus.containsKey(ASPECT_NAME)); + + TraceStatus status = urnStatus.get(ASPECT_NAME); + assertEquals(status.getPrimaryStorage().getWriteStatus(), TraceWriteStatus.ERROR); + assertNotNull(status.getPrimaryStorage().getWriteExceptions()); + assertEquals(status.getPrimaryStorage().getWriteExceptions().size(), 1); + assertEquals( + status.getPrimaryStorage().getWriteExceptions().get(0).getMessage(), + "Processing failed: Test error message"); + assertEquals( + status.getPrimaryStorage().getWriteExceptions().get(0).getExceptionClass(), + "java.lang.IllegalArgumentException"); + assertFalse(status.isSuccess()); + } +} diff --git a/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MAEOpenTelemetryConfig.java b/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MAEOpenTelemetryConfig.java new file mode 100644 index 00000000000000..e1761755fd3adc --- /dev/null +++ b/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MAEOpenTelemetryConfig.java @@ -0,0 +1,21 @@ +package com.linkedin.metadata.kafka; + +import com.linkedin.gms.factory.system_telemetry.OpenTelemetryBaseFactory; +import io.datahubproject.metadata.context.TraceContext; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class MAEOpenTelemetryConfig extends OpenTelemetryBaseFactory { + + @Override + protected String getApplicationComponent() { + return "datahub-mae-consumer"; + } + + @Bean + @Override + protected TraceContext traceContext() { + return super.traceContext(); + } +} diff --git a/metadata-jobs/mae-consumer/build.gradle b/metadata-jobs/mae-consumer/build.gradle index b4990e289b10df..08c712f6167b40 100644 --- a/metadata-jobs/mae-consumer/build.gradle +++ b/metadata-jobs/mae-consumer/build.gradle @@ -33,6 +33,7 @@ dependencies { implementation externalDependency.springKafka implementation externalDependency.annotationApi + implementation externalDependency.opentelemetrySdkTrace implementation externalDependency.slf4jApi compileOnly externalDependency.lombok diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/DataHubUsageEventsProcessor.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/DataHubUsageEventsProcessor.java index d699f0bff68019..29a3ffa0a340d6 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/DataHubUsageEventsProcessor.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/DataHubUsageEventsProcessor.java @@ -2,7 +2,6 @@ import com.codahale.metrics.Histogram; import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.Timer; import com.linkedin.events.metadata.ChangeType; import com.linkedin.gms.factory.kafka.SimpleKafkaConsumerFactory; import com.linkedin.metadata.kafka.config.DataHubUsageEventsProcessorCondition; @@ -12,11 +11,13 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.mxe.Topics; +import io.datahubproject.metadata.context.OperationContext; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.util.Optional; import lombok.extern.slf4j.Slf4j; import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Conditional; import org.springframework.context.annotation.Import; import org.springframework.kafka.annotation.EnableKafka; @@ -33,6 +34,7 @@ public class DataHubUsageEventsProcessor { private final ElasticsearchConnector elasticSearchConnector; private final DataHubUsageEventTransformer dataHubUsageEventTransformer; private final String indexName; + private final OperationContext systemOperationContext; private final Histogram kafkaLagStats = MetricUtils.get().histogram(MetricRegistry.name(this.getClass(), "kafkaLag")); @@ -40,10 +42,12 @@ public class DataHubUsageEventsProcessor { public DataHubUsageEventsProcessor( ElasticsearchConnector elasticSearchConnector, DataHubUsageEventTransformer dataHubUsageEventTransformer, - IndexConvention indexConvention) { + IndexConvention indexConvention, + @Qualifier("systemOperationContext") OperationContext systemOperationContext) { this.elasticSearchConnector = elasticSearchConnector; this.dataHubUsageEventTransformer = dataHubUsageEventTransformer; this.indexName = indexConvention.getIndexName("datahub_usage_event"); + this.systemOperationContext = systemOperationContext; } @KafkaListener( @@ -52,31 +56,36 @@ public DataHubUsageEventsProcessor( containerFactory = "simpleKafkaConsumer", autoStartup = "false") public void consume(final ConsumerRecord consumerRecord) { - try (Timer.Context i = MetricUtils.timer(this.getClass(), "consume").time()) { - kafkaLagStats.update(System.currentTimeMillis() - consumerRecord.timestamp()); - final String record = consumerRecord.value(); + systemOperationContext.withSpan( + "consume", + () -> { + kafkaLagStats.update(System.currentTimeMillis() - consumerRecord.timestamp()); + final String record = consumerRecord.value(); - log.info( - "Got DHUE event key: {}, topic: {}, partition: {}, offset: {}, value size: {}, timestamp: {}", - consumerRecord.key(), - consumerRecord.topic(), - consumerRecord.partition(), - consumerRecord.offset(), - consumerRecord.serializedValueSize(), - consumerRecord.timestamp()); + log.info( + "Got DHUE event key: {}, topic: {}, partition: {}, offset: {}, value size: {}, timestamp: {}", + consumerRecord.key(), + consumerRecord.topic(), + consumerRecord.partition(), + consumerRecord.offset(), + consumerRecord.serializedValueSize(), + consumerRecord.timestamp()); - Optional eventDocument = - dataHubUsageEventTransformer.transformDataHubUsageEvent(record); - if (eventDocument.isEmpty()) { - log.warn("Failed to apply usage events transform to record: {}", record); - return; - } - JsonElasticEvent elasticEvent = new JsonElasticEvent(eventDocument.get().getDocument()); - elasticEvent.setId(generateDocumentId(eventDocument.get().getId(), consumerRecord.offset())); - elasticEvent.setIndex(indexName); - elasticEvent.setActionType(ChangeType.CREATE); - elasticSearchConnector.feedElasticEvent(elasticEvent); - } + Optional eventDocument = + dataHubUsageEventTransformer.transformDataHubUsageEvent(record); + if (eventDocument.isEmpty()) { + log.warn("Failed to apply usage events transform to record: {}", record); + return; + } + JsonElasticEvent elasticEvent = new JsonElasticEvent(eventDocument.get().getDocument()); + elasticEvent.setId( + generateDocumentId(eventDocument.get().getId(), consumerRecord.offset())); + elasticEvent.setIndex(indexName); + elasticEvent.setActionType(ChangeType.CREATE); + elasticSearchConnector.feedElasticEvent(elasticEvent); + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "consume")); } /** diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListener.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListener.java index a2d59023ba5ce2..ec33ea12209310 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListener.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListener.java @@ -7,7 +7,6 @@ import com.codahale.metrics.Histogram; import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.Timer; import com.linkedin.common.urn.Urn; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.EventUtils; @@ -15,6 +14,8 @@ import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.mxe.MetadataChangeLog; import io.datahubproject.metadata.context.OperationContext; +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.StatusCode; import java.util.List; import java.util.Optional; import java.util.stream.Collectors; @@ -33,11 +34,13 @@ public class MCLKafkaListener { private final String consumerGroupId; private final List hooks; + private final OperationContext systemOperationContext; public MCLKafkaListener( OperationContext systemOperationContext, String consumerGroup, List hooks) { + this.systemOperationContext = systemOperationContext; this.consumerGroupId = consumerGroup; this.hooks = hooks; this.hooks.forEach(hook -> hook.init(systemOperationContext)); @@ -49,7 +52,7 @@ public MCLKafkaListener( } public void consume(final ConsumerRecord consumerRecord) { - try (Timer.Context i = MetricUtils.timer(this.getClass(), "consume").time()) { + try { kafkaLagStats.update(System.currentTimeMillis() - consumerRecord.timestamp()); final GenericRecord record = consumerRecord.value(); log.debug( @@ -85,38 +88,61 @@ public void consume(final ConsumerRecord consumerRecord) MDC.put( MDC_CHANGE_TYPE, Optional.ofNullable(changeType).map(ChangeType::toString).orElse("")); - log.info( - "Invoking MCL hooks for consumer: {} urn: {}, aspect name: {}, entity type: {}, change type: {}", - consumerGroupId, - entityUrn, - aspectName, - entityType, - changeType); + systemOperationContext.withQueueSpan( + "consume", + event.getSystemMetadata(), + consumerRecord.topic(), + () -> { + log.info( + "Invoking MCL hooks for consumer: {} urn: {}, aspect name: {}, entity type: {}, change type: {}", + consumerGroupId, + entityUrn, + aspectName, + entityType, + changeType); - // Here - plug in additional "custom processor hooks" - for (MetadataChangeLogHook hook : this.hooks) { - log.debug( - "Invoking MCL hook {} for urn: {}", - hook.getClass().getSimpleName(), - event.getEntityUrn()); - try (Timer.Context ignored = - MetricUtils.timer(this.getClass(), hook.getClass().getSimpleName() + "_latency") - .time()) { - hook.invoke(event); - } catch (Exception e) { - // Just skip this hook and continue. - Note that this represents "at most once"// - // processing. - MetricUtils.counter(this.getClass(), hook.getClass().getSimpleName() + "_failure").inc(); - log.error( - "Failed to execute MCL hook with name {}", hook.getClass().getCanonicalName(), e); - } - } - // TODO: Manually commit kafka offsets after full processing. - MetricUtils.counter(this.getClass(), consumerGroupId + "_consumed_mcl_count").inc(); - log.info( - "Successfully completed MCL hooks for consumer: {} urn: {}", - consumerGroupId, - event.getEntityUrn()); + // Here - plug in additional "custom processor hooks" + for (MetadataChangeLogHook hook : this.hooks) { + systemOperationContext.withSpan( + hook.getClass().getSimpleName(), + () -> { + log.debug( + "Invoking MCL hook {} for urn: {}", + hook.getClass().getSimpleName(), + event.getEntityUrn()); + try { + hook.invoke(event); + } catch (Exception e) { + // Just skip this hook and continue. - Note that this represents "at most + // once"// + // processing. + MetricUtils.counter( + this.getClass(), hook.getClass().getSimpleName() + "_failure") + .inc(); + log.error( + "Failed to execute MCL hook with name {}", + hook.getClass().getCanonicalName(), + e); + + Span currentSpan = Span.current(); + currentSpan.recordException(e); + currentSpan.setStatus(StatusCode.ERROR, e.getMessage()); + currentSpan.setAttribute(MetricUtils.ERROR_TYPE, e.getClass().getName()); + } + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), hook.getClass().getSimpleName() + "_latency")); + } + + // TODO: Manually commit kafka offsets after full processing. + MetricUtils.counter(this.getClass(), consumerGroupId + "_consumed_mcl_count").inc(); + log.info( + "Successfully completed MCL hooks for consumer: {} urn: {}", + consumerGroupId, + event.getEntityUrn()); + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "consume")); } finally { MDC.clear(); } diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java index c92749385145de..017570cfcf7afb 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java @@ -96,6 +96,7 @@ public OperationContext operationContext( indexConvention, TestOperationContexts.emptyActiveUsersRetrieverContext(() -> entityRegistry), mock(ValidationContext.class), + null, true); } diff --git a/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/MCEOpenTelemetryConfig.java b/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/MCEOpenTelemetryConfig.java new file mode 100644 index 00000000000000..9ae6aa5f50c1f4 --- /dev/null +++ b/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/MCEOpenTelemetryConfig.java @@ -0,0 +1,21 @@ +package com.linkedin.metadata.kafka; + +import com.linkedin.gms.factory.system_telemetry.OpenTelemetryBaseFactory; +import io.datahubproject.metadata.context.TraceContext; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class MCEOpenTelemetryConfig extends OpenTelemetryBaseFactory { + + @Override + protected String getApplicationComponent() { + return "datahub-mce-consumer"; + } + + @Bean + @Override + protected TraceContext traceContext() { + return super.traceContext(); + } +} diff --git a/metadata-jobs/mce-consumer/build.gradle b/metadata-jobs/mce-consumer/build.gradle index 2da3957c4bb218..21951106ca6b24 100644 --- a/metadata-jobs/mce-consumer/build.gradle +++ b/metadata-jobs/mce-consumer/build.gradle @@ -30,6 +30,7 @@ dependencies { implementation externalDependency.protobuf implementation externalDependency.springKafka + implementation externalDependency.opentelemetrySdkTrace implementation externalDependency.slf4jApi compileOnly externalDependency.lombok diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeEventsProcessor.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeEventsProcessor.java index 2152ed15cf0e93..b5d721589aeef3 100644 --- a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeEventsProcessor.java +++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeEventsProcessor.java @@ -4,7 +4,6 @@ import com.codahale.metrics.Histogram; import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.Timer; import com.linkedin.entity.Entity; import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.gms.factory.entityclient.RestliEntityClientFactory; @@ -66,35 +65,39 @@ public class MetadataChangeEventsProcessor { autoStartup = "false") @Deprecated public void consume(final ConsumerRecord consumerRecord) { - try (Timer.Context i = MetricUtils.timer(this.getClass(), "consume").time()) { - kafkaLagStats.update(System.currentTimeMillis() - consumerRecord.timestamp()); - final GenericRecord record = consumerRecord.value(); + systemOperationContext.withSpan( + "consume", + () -> { + kafkaLagStats.update(System.currentTimeMillis() - consumerRecord.timestamp()); + final GenericRecord record = consumerRecord.value(); - log.info( - "Got MCE event key: {}, topic: {}, partition: {}, offset: {}, value size: {}, timestamp: {}", - consumerRecord.key(), - consumerRecord.topic(), - consumerRecord.partition(), - consumerRecord.offset(), - consumerRecord.serializedValueSize(), - consumerRecord.timestamp()); + log.info( + "Got MCE event key: {}, topic: {}, partition: {}, offset: {}, value size: {}, timestamp: {}", + consumerRecord.key(), + consumerRecord.topic(), + consumerRecord.partition(), + consumerRecord.offset(), + consumerRecord.serializedValueSize(), + consumerRecord.timestamp()); - log.debug("Record {}", record); + log.debug("Record {}", record); - MetadataChangeEvent event = new MetadataChangeEvent(); + MetadataChangeEvent event = new MetadataChangeEvent(); - try { - event = EventUtils.avroToPegasusMCE(record); - log.debug("MetadataChangeEvent {}", event); - if (event.hasProposedSnapshot()) { - processProposedSnapshot(event); - } - } catch (Throwable throwable) { - log.error("MCE Processor Error", throwable); - log.error("Message: {}", record); - sendFailedMCE(event, throwable); - } - } + try { + event = EventUtils.avroToPegasusMCE(record); + log.debug("MetadataChangeEvent {}", event); + if (event.hasProposedSnapshot()) { + processProposedSnapshot(event); + } + } catch (Throwable throwable) { + log.error("MCE Processor Error", throwable); + log.error("Message: {}", record); + sendFailedMCE(event, throwable); + } + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "consume")); } private void sendFailedMCE(@Nonnull MetadataChangeEvent event, @Nonnull Throwable throwable) { diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java index d854a5517793ff..98195f57526824 100644 --- a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java +++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java @@ -5,10 +5,10 @@ import static com.linkedin.metadata.Constants.MDC_ENTITY_TYPE; import static com.linkedin.metadata.Constants.MDC_ENTITY_URN; import static com.linkedin.metadata.config.kafka.KafkaConfiguration.MCP_EVENT_CONSUMER_NAME; +import static com.linkedin.mxe.ConsumerGroups.MCP_CONSUMER_GROUP_ID_VALUE; import com.codahale.metrics.Histogram; import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.Timer; import com.linkedin.common.urn.Urn; import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.events.metadata.ChangeType; @@ -16,20 +16,23 @@ import com.linkedin.gms.factory.entityclient.RestliEntityClientFactory; import com.linkedin.metadata.EventUtils; import com.linkedin.metadata.dao.throttle.ThrottleSensor; +import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.kafka.config.MetadataChangeProposalProcessorCondition; import com.linkedin.metadata.kafka.util.KafkaListenerUtil; import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.Topics; import io.datahubproject.metadata.context.OperationContext; +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.StatusCode; +import java.io.IOException; +import java.util.List; import java.util.Optional; import javax.annotation.PostConstruct; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.avro.generic.GenericRecord; -import org.apache.avro.generic.IndexedRecord; import org.apache.kafka.clients.consumer.ConsumerRecord; -import org.apache.kafka.clients.producer.Producer; import org.slf4j.MDC; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; @@ -47,12 +50,9 @@ @EnableKafka @RequiredArgsConstructor public class MetadataChangeProposalsProcessor { - private static final String CONSUMER_GROUP_ID_VALUE = - "${METADATA_CHANGE_PROPOSAL_KAFKA_CONSUMER_GROUP_ID:generic-mce-consumer-job-client}"; - private final OperationContext systemOperationContext; private final SystemEntityClient entityClient; - private final Producer kafkaProducer; + private final EventProducer kafkaProducer; @Qualifier("kafkaThrottle") private final ThrottleSensor kafkaThrottle; @@ -69,7 +69,7 @@ public class MetadataChangeProposalsProcessor { + "}") private String fmcpTopicName; - @Value(CONSUMER_GROUP_ID_VALUE) + @Value(MCP_CONSUMER_GROUP_ID_VALUE) private String mceConsumerGroupId; @PostConstruct @@ -78,12 +78,12 @@ public void registerConsumerThrottle() { } @KafkaListener( - id = CONSUMER_GROUP_ID_VALUE, + id = MCP_CONSUMER_GROUP_ID_VALUE, topics = "${METADATA_CHANGE_PROPOSAL_TOPIC_NAME:" + Topics.METADATA_CHANGE_PROPOSAL + "}", containerFactory = MCP_EVENT_CONSUMER_NAME, autoStartup = "false") public void consume(final ConsumerRecord consumerRecord) { - try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "consume").time()) { + try { kafkaLagStats.update(System.currentTimeMillis() - consumerRecord.timestamp()); final GenericRecord record = consumerRecord.value(); @@ -100,29 +100,50 @@ public void consume(final ConsumerRecord consumerRecord) log.debug("Record {}", record); } - MetadataChangeProposal event = new MetadataChangeProposal(); + final MetadataChangeProposal event; try { event = EventUtils.avroToPegasusMCP(record); - Urn entityUrn = event.getEntityUrn(); - String aspectName = event.hasAspectName() ? event.getAspectName() : null; - String entityType = event.hasEntityType() ? event.getEntityType() : null; - ChangeType changeType = event.hasChangeType() ? event.getChangeType() : null; - MDC.put(MDC_ENTITY_URN, Optional.ofNullable(entityUrn).map(Urn::toString).orElse("")); - MDC.put(MDC_ASPECT_NAME, aspectName); - MDC.put(MDC_ENTITY_TYPE, entityType); - MDC.put( - MDC_CHANGE_TYPE, Optional.ofNullable(changeType).map(ChangeType::toString).orElse("")); - - if (log.isDebugEnabled()) { - log.debug("MetadataChangeProposal {}", event); - } - String urn = entityClient.ingestProposal(systemOperationContext, event, false); - log.info("Successfully processed MCP event urn: {}", urn); - } catch (Throwable throwable) { - log.error("MCP Processor Error", throwable); - log.error("Message: {}", record); - KafkaListenerUtil.sendFailedMCP(event, throwable, fmcpTopicName, kafkaProducer); + systemOperationContext.withQueueSpan( + "consume", + event.getSystemMetadata(), + consumerRecord.topic(), + () -> { + try { + Urn entityUrn = event.getEntityUrn(); + String aspectName = event.hasAspectName() ? event.getAspectName() : null; + String entityType = event.hasEntityType() ? event.getEntityType() : null; + ChangeType changeType = event.hasChangeType() ? event.getChangeType() : null; + MDC.put( + MDC_ENTITY_URN, Optional.ofNullable(entityUrn).map(Urn::toString).orElse("")); + MDC.put(MDC_ASPECT_NAME, aspectName); + MDC.put(MDC_ENTITY_TYPE, entityType); + MDC.put( + MDC_CHANGE_TYPE, + Optional.ofNullable(changeType).map(ChangeType::toString).orElse("")); + + if (log.isDebugEnabled()) { + log.debug("MetadataChangeProposal {}", event); + } + String urn = entityClient.ingestProposal(systemOperationContext, event, false); + log.info("Successfully processed MCP event urn: {}", urn); + } catch (Throwable throwable) { + log.error("MCP Processor Error", throwable); + log.error("Message: {}", record); + Span currentSpan = Span.current(); + currentSpan.recordException(throwable); + currentSpan.setStatus(StatusCode.ERROR, throwable.getMessage()); + currentSpan.setAttribute(MetricUtils.ERROR_TYPE, throwable.getClass().getName()); + + kafkaProducer.produceFailedMetadataChangeProposal( + systemOperationContext, List.of(event), throwable); + } + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "consume")); + } catch (IOException e) { + log.error( + "Unrecoverable message deserialization error. Cannot forward to failure topic.", e); } } finally { MDC.clear(); diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/batch/BatchMetadataChangeProposalsProcessor.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/batch/BatchMetadataChangeProposalsProcessor.java index 5ee9cd6ba94d2f..6f9798aa1da2c4 100644 --- a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/batch/BatchMetadataChangeProposalsProcessor.java +++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/batch/BatchMetadataChangeProposalsProcessor.java @@ -1,28 +1,34 @@ package com.linkedin.metadata.kafka.batch; +import static com.linkedin.metadata.config.kafka.KafkaConfiguration.MCP_EVENT_CONSUMER_NAME; +import static com.linkedin.metadata.utils.metrics.MetricUtils.BATCH_SIZE_ATTR; +import static com.linkedin.mxe.ConsumerGroups.MCP_CONSUMER_GROUP_ID_VALUE; + import com.codahale.metrics.Histogram; import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.Timer; import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.entityclient.RestliEntityClientFactory; import com.linkedin.metadata.EventUtils; import com.linkedin.metadata.dao.throttle.ThrottleSensor; +import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.kafka.config.batch.BatchMetadataChangeProposalProcessorCondition; import com.linkedin.metadata.kafka.util.KafkaListenerUtil; import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.mxe.SystemMetadata; import com.linkedin.mxe.Topics; import io.datahubproject.metadata.context.OperationContext; +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.StatusCode; +import java.io.IOException; import java.util.ArrayList; import java.util.List; import javax.annotation.PostConstruct; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.avro.generic.GenericRecord; -import org.apache.avro.generic.IndexedRecord; import org.apache.kafka.clients.consumer.ConsumerRecord; -import org.apache.kafka.clients.producer.Producer; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Conditional; @@ -39,12 +45,9 @@ @EnableKafka @RequiredArgsConstructor public class BatchMetadataChangeProposalsProcessor { - private static final String CONSUMER_GROUP_ID_VALUE = - "${METADATA_CHANGE_PROPOSAL_KAFKA_CONSUMER_GROUP_ID:generic-mce-consumer-job-client}"; - private final OperationContext systemOperationContext; private final SystemEntityClient entityClient; - private final Producer kafkaProducer; + private final EventProducer kafkaProducer; @Qualifier("kafkaThrottle") private final ThrottleSensor kafkaThrottle; @@ -61,7 +64,7 @@ public class BatchMetadataChangeProposalsProcessor { + "}") private String fmcpTopicName; - @Value(CONSUMER_GROUP_ID_VALUE) + @Value(MCP_CONSUMER_GROUP_ID_VALUE) private String mceConsumerGroupId; @PostConstruct @@ -70,48 +73,68 @@ public void registerConsumerThrottle() { } @KafkaListener( - id = CONSUMER_GROUP_ID_VALUE, + id = MCP_CONSUMER_GROUP_ID_VALUE, topics = "${METADATA_CHANGE_PROPOSAL_TOPIC_NAME:" + Topics.METADATA_CHANGE_PROPOSAL + "}", - containerFactory = "kafkaEventConsumer", + containerFactory = MCP_EVENT_CONSUMER_NAME, batch = "true", autoStartup = "false") public void consume(final List> consumerRecords) { - try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "consume").time()) { - List metadataChangeProposals = - new ArrayList<>(consumerRecords.size()); - for (ConsumerRecord consumerRecord : consumerRecords) { - kafkaLagStats.update(System.currentTimeMillis() - consumerRecord.timestamp()); - final GenericRecord record = consumerRecord.value(); - - log.info( - "Got MCP event key: {}, topic: {}, partition: {}, offset: {}, value size: {}, timestamp: {}", - consumerRecord.key(), - consumerRecord.topic(), - consumerRecord.partition(), - consumerRecord.offset(), - consumerRecord.serializedValueSize(), - consumerRecord.timestamp()); - - MetadataChangeProposal event = new MetadataChangeProposal(); - try { - event = EventUtils.avroToPegasusMCP(record); - } catch (Throwable throwable) { - log.error("MCP Processor Error", throwable); - log.error("Message: {}", record); - KafkaListenerUtil.sendFailedMCP(event, throwable, fmcpTopicName, kafkaProducer); - } - metadataChangeProposals.add(event); + List metadataChangeProposals = new ArrayList<>(consumerRecords.size()); + String topicName = null; + + for (ConsumerRecord consumerRecord : consumerRecords) { + kafkaLagStats.update(System.currentTimeMillis() - consumerRecord.timestamp()); + final GenericRecord record = consumerRecord.value(); + + log.info( + "Got MCP event key: {}, topic: {}, partition: {}, offset: {}, value size: {}, timestamp: {}", + consumerRecord.key(), + consumerRecord.topic(), + consumerRecord.partition(), + consumerRecord.offset(), + consumerRecord.serializedValueSize(), + consumerRecord.timestamp()); + + if (topicName == null) { + topicName = consumerRecord.topic(); } + final MetadataChangeProposal event; try { - List urns = - entityClient.batchIngestProposals( - systemOperationContext, metadataChangeProposals, false); - log.info("Successfully processed MCP event urns: {}", urns); - } catch (Exception e) { - // Java client should never throw this - log.error("Exception in batch ingest", e); + event = EventUtils.avroToPegasusMCP(record); + metadataChangeProposals.add(event); + } catch (IOException e) { + log.error( + "Unrecoverable message deserialization error. Cannot forward to failure topic.", e); } } + + List systemMetadataList = + metadataChangeProposals.stream().map(MetadataChangeProposal::getSystemMetadata).toList(); + systemOperationContext.withQueueSpan( + "consume", + systemMetadataList, + topicName, + () -> { + try { + List urns = + entityClient.batchIngestProposals( + systemOperationContext, metadataChangeProposals, false); + log.info("Successfully processed MCP event urns: {}", urns); + } catch (Throwable throwable) { + log.error("MCP Processor Error", throwable); + Span currentSpan = Span.current(); + currentSpan.recordException(throwable); + currentSpan.setStatus(StatusCode.ERROR, throwable.getMessage()); + currentSpan.setAttribute(MetricUtils.ERROR_TYPE, throwable.getClass().getName()); + + kafkaProducer.produceFailedMetadataChangeProposal( + systemOperationContext, metadataChangeProposals, throwable); + } + }, + BATCH_SIZE_ATTR, + String.valueOf(metadataChangeProposals.size()), + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "consume")); } } diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/util/KafkaListenerUtil.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/util/KafkaListenerUtil.java index 874a45c995e911..c9dcb55d8e3d33 100644 --- a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/util/KafkaListenerUtil.java +++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/util/KafkaListenerUtil.java @@ -1,20 +1,10 @@ package com.linkedin.metadata.kafka.util; import com.linkedin.gms.factory.config.ConfigurationProvider; -import com.linkedin.metadata.EventUtils; import com.linkedin.metadata.dao.throttle.ThrottleControl; import com.linkedin.metadata.dao.throttle.ThrottleSensor; -import com.linkedin.mxe.FailedMetadataChangeProposal; -import com.linkedin.mxe.MetadataChangeProposal; -import java.io.IOException; import java.util.Optional; -import javax.annotation.Nonnull; import lombok.extern.slf4j.Slf4j; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.generic.IndexedRecord; -import org.apache.commons.lang.exception.ExceptionUtils; -import org.apache.kafka.clients.producer.Producer; -import org.apache.kafka.clients.producer.ProducerRecord; import org.springframework.kafka.config.KafkaListenerEndpointRegistry; import org.springframework.kafka.listener.MessageListenerContainer; @@ -61,36 +51,4 @@ public static void registerThrottle( log.info("MCE Consumer Throttle Disabled"); } } - - public static void sendFailedMCP( - @Nonnull MetadataChangeProposal event, - @Nonnull Throwable throwable, - String fmcpTopicName, - Producer kafkaProducer) { - final FailedMetadataChangeProposal failedMetadataChangeProposal = - createFailedMCPEvent(event, throwable); - try { - final GenericRecord genericFailedMCERecord = - EventUtils.pegasusToAvroFailedMCP(failedMetadataChangeProposal); - log.debug("Sending FailedMessages to topic - {}", fmcpTopicName); - log.info( - "Error while processing FMCP: FailedMetadataChangeProposal - {}", - failedMetadataChangeProposal); - kafkaProducer.send(new ProducerRecord<>(fmcpTopicName, genericFailedMCERecord)); - } catch (IOException e) { - log.error( - "Error while sending FailedMetadataChangeProposal: Exception - {}, FailedMetadataChangeProposal - {}", - e.getStackTrace(), - failedMetadataChangeProposal); - } - } - - @Nonnull - public static FailedMetadataChangeProposal createFailedMCPEvent( - @Nonnull MetadataChangeProposal event, @Nonnull Throwable throwable) { - final FailedMetadataChangeProposal fmcp = new FailedMetadataChangeProposal(); - fmcp.setError(ExceptionUtils.getStackTrace(throwable)); - fmcp.setMetadataChangeProposal(event); - return fmcp; - } } diff --git a/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/PlatformEventProcessor.java b/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/PlatformEventProcessor.java index 2befeccb951a38..dadede7fe20bf5 100644 --- a/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/PlatformEventProcessor.java +++ b/metadata-jobs/pe-consumer/src/main/java/com/datahub/event/PlatformEventProcessor.java @@ -4,7 +4,6 @@ import com.codahale.metrics.Histogram; import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.Timer; import com.datahub.event.hook.PlatformEventHook; import com.linkedin.metadata.EventUtils; import com.linkedin.metadata.utils.metrics.MetricUtils; @@ -61,53 +60,66 @@ public PlatformEventProcessor( containerFactory = PE_EVENT_CONSUMER_NAME, autoStartup = "false") public void consume(final ConsumerRecord consumerRecord) { - try (Timer.Context i = MetricUtils.timer(this.getClass(), "consume").time()) { - log.debug("Consuming a Platform Event"); + systemOperationContext.withSpan( + "consume", + () -> { + log.debug("Consuming a Platform Event"); - kafkaLagStats.update(System.currentTimeMillis() - consumerRecord.timestamp()); - final GenericRecord record = consumerRecord.value(); - log.info( - "Got PE event key: {}, topic: {}, partition: {}, offset: {}, value size: {}, timestamp: {}", - consumerRecord.key(), - consumerRecord.topic(), - consumerRecord.partition(), - consumerRecord.offset(), - consumerRecord.serializedValueSize(), - consumerRecord.timestamp()); - MetricUtils.counter(this.getClass(), "received_pe_count").inc(); + kafkaLagStats.update(System.currentTimeMillis() - consumerRecord.timestamp()); + final GenericRecord record = consumerRecord.value(); + log.info( + "Got PE event key: {}, topic: {}, partition: {}, offset: {}, value size: {}, timestamp: {}", + consumerRecord.key(), + consumerRecord.topic(), + consumerRecord.partition(), + consumerRecord.offset(), + consumerRecord.serializedValueSize(), + consumerRecord.timestamp()); + MetricUtils.counter(this.getClass(), "received_pe_count").inc(); - PlatformEvent event; - try { - event = EventUtils.avroToPegasusPE(record); - log.debug("Successfully converted Avro PE to Pegasus PE. name: {}", event.getName()); - } catch (Exception e) { - MetricUtils.counter(this.getClass(), "avro_to_pegasus_conversion_failure").inc(); - log.error("Error deserializing message due to: ", e); - log.error("Message: {}", record.toString()); - return; - } + PlatformEvent event; + try { + event = EventUtils.avroToPegasusPE(record); + log.debug("Successfully converted Avro PE to Pegasus PE. name: {}", event.getName()); + } catch (Exception e) { + MetricUtils.counter(this.getClass(), "avro_to_pegasus_conversion_failure").inc(); + log.error("Error deserializing message due to: ", e); + log.error("Message: {}", record.toString()); + return; + } - log.info("Invoking PE hooks for event name {}", event.getName()); + log.info("Invoking PE hooks for event name {}", event.getName()); - for (PlatformEventHook hook : this.hooks) { - log.info( - "Invoking PE hook {} for event name {}", - hook.getClass().getSimpleName(), - event.getName()); - try (Timer.Context ignored = - MetricUtils.timer(this.getClass(), hook.getClass().getSimpleName() + "_latency") - .time()) { - hook.invoke(systemOperationContext, event); - } catch (Exception e) { - // Just skip this hook and continue. - MetricUtils.counter(this.getClass(), hook.getClass().getSimpleName() + "_failure").inc(); - log.error( - "Failed to execute PE hook with name {}", hook.getClass().getCanonicalName(), e); - } - } - MetricUtils.counter(this.getClass(), "consumed_pe_count").inc(); - log.info("Successfully completed PE hooks for event with name {}", event.getName()); - } + for (PlatformEventHook hook : this.hooks) { + log.info( + "Invoking PE hook {} for event name {}", + hook.getClass().getSimpleName(), + event.getName()); + + systemOperationContext.withSpan( + hook.getClass().getSimpleName(), + () -> { + try { + hook.invoke(systemOperationContext, event); + } catch (Exception e) { + // Just skip this hook and continue. + MetricUtils.counter( + this.getClass(), hook.getClass().getSimpleName() + "_failure") + .inc(); + log.error( + "Failed to execute PE hook with name {}", + hook.getClass().getCanonicalName(), + e); + } + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), hook.getClass().getSimpleName() + "_latency")); + } + MetricUtils.counter(this.getClass(), "consumed_pe_count").inc(); + log.info("Successfully completed PE hooks for event with name {}", event.getName()); + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "consume")); } } diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/run/AspectRowSummary.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/run/AspectRowSummary.pdl index 7ebc97e73877b2..ad7015afdf827d 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/run/AspectRowSummary.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/run/AspectRowSummary.pdl @@ -11,4 +11,5 @@ record AspectRowSummary { version: long keyAspect: boolean aspect: optional Aspect + telemetryTraceId: optional string } diff --git a/metadata-operation-context/build.gradle b/metadata-operation-context/build.gradle index 71b61528d187bd..a02d47790f2ed6 100644 --- a/metadata-operation-context/build.gradle +++ b/metadata-operation-context/build.gradle @@ -10,9 +10,11 @@ dependencies { implementation externalDependency.slf4jApi implementation externalDependency.servletApi implementation spec.product.pegasus.restliServer + implementation externalDependency.opentelemetryApi + implementation externalDependency.opentelemetrySdkTrace compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok testImplementation externalDependency.testng - testImplementation externalDependency.mockito + testImplementation externalDependency.mockitoInline } \ No newline at end of file diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java index 30255f7ebcac36..dd8769ba427325 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java @@ -5,6 +5,7 @@ import com.datahub.authorization.AuthorizationSession; import com.datahub.authorization.EntitySpec; import com.datahub.plugins.auth.authorization.Authorizer; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableSet; import com.linkedin.common.AuditStamp; @@ -16,17 +17,23 @@ import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.utils.AuditStampUtils; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; +import com.linkedin.mxe.SystemMetadata; import io.datahubproject.metadata.exception.ActorAccessException; import io.datahubproject.metadata.exception.OperationContextException; +import io.datahubproject.metadata.exception.TraceException; import java.util.Collection; +import java.util.List; import java.util.Objects; import java.util.Optional; import java.util.Set; import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.Builder; import lombok.Getter; +import lombok.extern.slf4j.Slf4j; /** * These contexts define a read/write context which allows more flexibility when reading and writing @@ -41,6 +48,7 @@ */ @Builder(toBuilder = true) @Getter +@Slf4j public class OperationContext implements AuthorizationSession { /** @@ -153,6 +161,7 @@ public static OperationContext asSystem( @Nullable IndexConvention indexConvention, @Nullable RetrieverContext retrieverContext, @Nonnull ValidationContext validationContext, + @Nullable TraceContext traceContext, boolean enforceExistenceEnabled) { return asSystem( config, @@ -163,6 +172,7 @@ public static OperationContext asSystem( retrieverContext, validationContext, ObjectMapperContext.DEFAULT, + traceContext, enforceExistenceEnabled); } @@ -175,6 +185,7 @@ public static OperationContext asSystem( @Nullable RetrieverContext retrieverContext, @Nonnull ValidationContext validationContext, @Nonnull ObjectMapperContext objectMapperContext, + @Nullable TraceContext traceContext, boolean enforceExistenceEnabled) { ActorContext systemActorContext = @@ -202,6 +213,7 @@ public static OperationContext asSystem( .retrieverContext(retrieverContext) .objectMapperContext(objectMapperContext) .validationContext(validationContext) + .traceContext(traceContext) .build(systemAuthentication, false); } catch (OperationContextException e) { throw new RuntimeException(e); @@ -219,6 +231,7 @@ public static OperationContext asSystem( @Nonnull private final RetrieverContext retrieverContext; @Nonnull private final ObjectMapperContext objectMapperContext; @Nonnull private final ValidationContext validationContext; + @Nullable private final TraceContext traceContext; public OperationContext withSearchFlags( @Nonnull Function flagDefaults) { @@ -343,6 +356,84 @@ public AuthorizationResult authorize( return authorizationContext.authorize(getSessionActorContext(), privilege, resourceSpec); } + @Nullable + public SystemMetadata withTraceId(@Nullable SystemMetadata systemMetadata) { + if (systemMetadata != null && traceContext != null) { + return traceContext.withTraceId(systemMetadata); + } + return systemMetadata; + } + + public SystemMetadata withProducerTrace( + String operationName, @Nullable SystemMetadata systemMetadata, String topicName) { + if (systemMetadata != null && traceContext != null) { + return traceContext.withProducerTrace(operationName, systemMetadata, topicName); + } + return systemMetadata; + } + + /** + * Generic method to capture spans + * + * @param name name of the span + * @param operation the actual logic + * @param attributes additional attributes + * @return the output from the logic + * @param generic + */ + public T withSpan(String name, Supplier operation, String... attributes) { + if (traceContext != null) { + return traceContext.withSpan(name, operation, attributes); + } else { + return operation.get(); + } + } + + public void withSpan(String name, Runnable operation, String... attributes) { + if (traceContext != null) { + traceContext.withSpan(name, operation, attributes); + } else { + operation.run(); + } + } + + public void withQueueSpan( + String name, + SystemMetadata systemMetadata, + String topicName, + Runnable operation, + String... attributes) { + if (systemMetadata != null) { + withQueueSpan(name, List.of(systemMetadata), topicName, operation, attributes); + } else { + operation.run(); + } + } + + public void withQueueSpan( + String name, + List systemMetadata, + String topicName, + Runnable operation, + String... attributes) { + if (traceContext != null) { + traceContext.withQueueSpan(name, systemMetadata, topicName, operation, attributes); + } else { + operation.run(); + } + } + + public String traceException(Set throwables) { + try { + return getObjectMapper() + .writeValueAsString( + throwables.stream().map(TraceException::new).collect(Collectors.toList())); + } catch (JsonProcessingException e) { + log.error("Error creating trace.", e); + } + return throwables.stream().map(Throwable::getMessage).collect(Collectors.joining("\n")); + } + /** * Return a unique id for this context. Typically useful for building cache keys. We combine the * different context components to create a single string representation of the hashcode across @@ -371,6 +462,7 @@ public String getGlobalContextId() { .add(getRequestContext() == null ? EmptyContext.EMPTY : getRequestContext()) .add(getRetrieverContext()) .add(getObjectMapperContext()) + .add(getTraceContext() == null ? EmptyContext.EMPTY : getTraceContext()) .build() .stream() .map(ContextInterface::getCacheKeyComponent) @@ -513,7 +605,8 @@ public OperationContext build(@Nonnull ActorContext sessionActor, boolean skipCa this.requestContext, this.retrieverContext, this.objectMapperContext != null ? this.objectMapperContext : ObjectMapperContext.DEFAULT, - this.validationContext); + this.validationContext, + this.traceContext); } private OperationContext build() { diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RequestContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RequestContext.java index 779c418a56142f..022a75945b5049 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RequestContext.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RequestContext.java @@ -2,6 +2,7 @@ import com.google.common.net.HttpHeaders; import com.linkedin.restli.server.ResourceContext; +import io.opentelemetry.api.trace.Span; import jakarta.servlet.http.HttpServletRequest; import java.util.Arrays; import java.util.Collection; @@ -59,6 +60,13 @@ public Optional getCacheKeyComponent() { public static class RequestContextBuilder { private RequestContext build() { + + // Add context for tracing + Span.current() + .setAttribute("user.id", this.actorUrn) + .setAttribute("request.api", this.requestAPI.toString()) + .setAttribute("request.id", this.requestID); + return new RequestContext( this.actorUrn, this.sourceIP, this.requestAPI, this.requestID, this.userAgent); } diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/TraceContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/TraceContext.java new file mode 100644 index 00000000000000..d881020aed0528 --- /dev/null +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/TraceContext.java @@ -0,0 +1,414 @@ +package io.datahubproject.metadata.context; + +import static com.linkedin.metadata.utils.metrics.MetricUtils.BATCH_SIZE_ATTR; +import static com.linkedin.metadata.utils.metrics.MetricUtils.QUEUE_DURATION_MS_ATTR; +import static com.linkedin.metadata.utils.metrics.MetricUtils.QUEUE_ENQUEUED_AT_ATTR; + +import com.linkedin.data.template.StringMap; +import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.metadata.utils.metrics.MetricUtils; +import com.linkedin.mxe.SystemMetadata; +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.SpanBuilder; +import io.opentelemetry.api.trace.SpanContext; +import io.opentelemetry.api.trace.SpanKind; +import io.opentelemetry.api.trace.StatusCode; +import io.opentelemetry.api.trace.TraceFlags; +import io.opentelemetry.api.trace.TraceState; +import io.opentelemetry.api.trace.Tracer; +import io.opentelemetry.context.Context; +import io.opentelemetry.sdk.common.CompletableResultCode; +import io.opentelemetry.sdk.trace.data.SpanData; +import io.opentelemetry.sdk.trace.export.SpanExporter; +import jakarta.servlet.http.Cookie; +import jakarta.servlet.http.HttpServletRequest; +import java.time.Instant; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Getter +@Builder +public class TraceContext implements ContextInterface { + // trace logging + public static final String TRACE_HEADER = "X-Enable-Trace-Log"; + public static final String TRACE_COOKIE = "enable-trace-log"; + // system metadata properties + public static final String TELEMETRY_TRACE_KEY = "telemetryTraceId"; + public static final String TELEMETRY_QUEUE_SPAN_KEY = "telemetryQueueSpanId"; + public static final String TELEMETRY_LOG_KEY = "telemetryLog"; + public static final String TELEMETRY_ENQUEUED_AT = "telemetryEnqueuedAt"; + + public static final TraceIdGenerator TRACE_ID_GENERATOR = new TraceIdGenerator(); + public static final SpanExporter LOG_SPAN_EXPORTER = new ConditionalLogSpanExporter(); + + private static final ThreadLocal logTracingEnabled = new ThreadLocal<>(); + + public static boolean isLogTracingEnabled() { + Boolean enabled = logTracingEnabled.get(); + return enabled != null && enabled; + } + + public static void clear() { + logTracingEnabled.remove(); + } + + public static void enableLogTracing(HttpServletRequest request) { + // Check header + String headerValue = request.getHeader(TRACE_HEADER); + if ("true".equalsIgnoreCase(headerValue)) { + logTracingEnabled.set(true); + return; + } + + // Check cookie + Cookie[] cookies = request.getCookies(); + if (cookies != null) { + for (Cookie cookie : cookies) { + if (TRACE_COOKIE.equals(cookie.getName()) && "true".equalsIgnoreCase(cookie.getValue())) { + logTracingEnabled.set(true); + return; + } + } + } + + logTracingEnabled.set(false); + } + + @Getter @Nonnull private final Tracer tracer; + + @Override + public Optional getCacheKeyComponent() { + return Optional.empty(); + } + + /** + * Generic method to capture spans + * + * @param name name of the span + * @param operation the actual logic + * @param attributes additional attributes + * @return the output from the logic + * @param generic + */ + public T withSpan(String name, Supplier operation, String... attributes) { + Span span = tracer.spanBuilder(name).startSpan(); + try (var scope = span.makeCurrent()) { + for (int i = 0; i < attributes.length; i += 2) { + span.setAttribute(attributes[i], attributes[i + 1]); + } + return operation.get(); + } catch (Exception e) { + span.setStatus(StatusCode.ERROR, e.getMessage()); + span.recordException(e); + span.setAttribute(MetricUtils.ERROR_TYPE, e.getClass().getName()); + throw e; + } finally { + span.end(); + } + } + + protected void withSpan(String name, Runnable operation, String... attributes) { + Span span = tracer.spanBuilder(name).startSpan(); + try (var scope = span.makeCurrent()) { + for (int i = 0; i < attributes.length; i += 2) { + span.setAttribute(attributes[i], attributes[i + 1]); + } + operation.run(); + } catch (Exception e) { + span.setStatus(StatusCode.ERROR, e.getMessage()); + span.recordException(e); + span.setAttribute(MetricUtils.ERROR_TYPE, e.getClass().getName()); + throw e; + } finally { + span.end(); + } + } + + /** + * Handle multiple messages with different trace ids processed from a queue + * + * @param name name of the processing of the queue + * @param batchSystemMetadata batch of system metadata + * @param operation actual processing logic + * @param attributes span attributes + */ + protected void withQueueSpan( + String name, + List batchSystemMetadata, + String topicName, + Runnable operation, + String... attributes) { + + List tracingEnabledSystemMetadata = + batchSystemMetadata.stream() + .filter( + sysMeta -> + Objects.nonNull(sysMeta) + && sysMeta.getProperties() != null + && sysMeta.getProperties().get(TELEMETRY_TRACE_KEY) != null + && sysMeta.getProperties().get(TELEMETRY_QUEUE_SPAN_KEY) != null) + .collect(Collectors.toList()); + + // resume log tracing + logTracingEnabled.set( + tracingEnabledSystemMetadata.stream() + .anyMatch( + sysMeta -> + Boolean.parseBoolean( + sysMeta.getProperties().getOrDefault(TELEMETRY_LOG_KEY, "false")))); + + // Create the span builder, close queue span and add links + SpanBuilder spanBuilder = + tracer.spanBuilder(name).setAttribute(BATCH_SIZE_ATTR, batchSystemMetadata.size()); + + List originalSpanContexts = + tracingEnabledSystemMetadata.stream() + .map(sysMeta -> closeQueueSpan(name, sysMeta, topicName)) + .filter(Objects::nonNull) + .distinct() + .collect(Collectors.toList()); + + Span span; + if (originalSpanContexts.size() == 1) { + // set parent if there is only a single original trace in the batch + spanBuilder.setParent(Context.current().with(Span.wrap(originalSpanContexts.get(0)))); + span = spanBuilder.startSpan(); + } else { + // otherwise link the current trace to all original traces + originalSpanContexts.forEach(spanBuilder::addLink); + span = spanBuilder.startSpan(); + + // log linked traces + if (isLogTracingEnabled()) { + log.info( + "Trace: {}, Linked Traces: {}", + span.getSpanContext().getTraceId(), + originalSpanContexts.stream().map(SpanContext::getTraceId)); + } + } + + try (var scope = span.makeCurrent()) { + // Set additional attributes + for (int i = 0; i < attributes.length; i += 2) { + span.setAttribute(attributes[i], attributes[i + 1]); + } + + operation.run(); + } catch (Exception e) { + span.setStatus(StatusCode.ERROR, e.getMessage()); + span.recordException(e); + span.setAttribute(MetricUtils.ERROR_TYPE, e.getClass().getName()); + throw e; + } finally { + span.end(); + } + } + + public SystemMetadata withTraceId(@Nonnull SystemMetadata systemMetadata) { + if (systemMetadata.getProperties() == null + || !systemMetadata.getProperties().containsKey(TELEMETRY_TRACE_KEY)) { + SpanContext currentSpanContext = Span.current().getSpanContext(); + + if (currentSpanContext.isValid()) { + SystemMetadata copy = GenericRecordUtils.copy(systemMetadata, SystemMetadata.class); + + if (!copy.hasProperties() || copy.getProperties() == null) { + copy.setProperties(new StringMap()); + } + + copy.getProperties().putAll(Map.of(TELEMETRY_TRACE_KEY, currentSpanContext.getTraceId())); + + return copy; + } + } + + return systemMetadata; + } + + /** Method to capture the current trace and span ids in systemMetadata */ + public SystemMetadata withProducerTrace( + String operationName, @Nonnull SystemMetadata systemMetadata, String topicName) { + SpanContext currentSpanContext = Span.current().getSpanContext(); + + if (currentSpanContext.isValid()) { + SystemMetadata copy = GenericRecordUtils.copy(systemMetadata, SystemMetadata.class); + + if (!copy.hasProperties() || copy.getProperties() == null) { + copy.setProperties(new StringMap()); + } + + // Create the queue span that will be closed by consumer + Span queueSpan = + tracer + .spanBuilder(operationName) + .setParent(Context.current()) + .setSpanKind(SpanKind.PRODUCER) + .setAttribute(MetricUtils.MESSAGING_SYSTEM, "kafka") + .setAttribute(MetricUtils.MESSAGING_DESTINATION, topicName) + .setAttribute(MetricUtils.MESSAGING_DESTINATION_KIND, "topic") + .setAttribute(MetricUtils.MESSAGING_OPERATION, "publish") + .startSpan(); + + long enqueuedAt = Instant.now().toEpochMilli(); + if (!copy.getProperties().containsKey(TELEMETRY_TRACE_KEY)) { + copy.getProperties() + .putAll( + Map.of( + TELEMETRY_TRACE_KEY, currentSpanContext.getTraceId(), + TELEMETRY_QUEUE_SPAN_KEY, queueSpan.getSpanContext().getSpanId())); + } + + copy.getProperties() + .putAll( + Map.of( + TELEMETRY_LOG_KEY, String.valueOf(isLogTracingEnabled()), + TELEMETRY_ENQUEUED_AT, String.valueOf(enqueuedAt))); + + // It will be mirrored by consumer with enqueued time + queueSpan.setAttribute(QUEUE_ENQUEUED_AT_ATTR, enqueuedAt).end(); + + return copy; + } + + return systemMetadata; + } + + /** + * When processing from queue - create new span with stored parent context + * + * @param systemMetadata systemMetadata with trace/span ids to restore + */ + @Nullable + private Span queueConsumerTrace( + String operationName, @Nonnull SystemMetadata systemMetadata, String topicName) { + + SpanContext queueSpanContext = closeQueueSpan(operationName, systemMetadata, topicName); + + if (queueSpanContext != null) { + // Create the processing span with the queue span as parent + return tracer + .spanBuilder(operationName) + .setParent( + Context.current() + .with(Span.wrap(queueSpanContext))) // Use queue span context as parent + .startSpan(); + } + + return null; + } + + @Nullable + private SpanContext closeQueueSpan( + String operationName, SystemMetadata metadata, String topicName) { + if (metadata != null && metadata.getProperties() != null) { + // resume log tracing + logTracingEnabled.set( + Boolean.parseBoolean(metadata.getProperties().getOrDefault(TELEMETRY_LOG_KEY, "false"))); + + String traceId = metadata.getProperties().get(TELEMETRY_TRACE_KEY); + String queueSpanId = metadata.getProperties().get(TELEMETRY_QUEUE_SPAN_KEY); + + if (traceId != null && queueSpanId != null) { + + SpanContext queueSpanContext = + SpanContext.createFromRemoteParent( + traceId, queueSpanId, TraceFlags.getSampled(), TraceState.getDefault()); + + // Get the span and end it with duration + SpanBuilder queueSpanBuilder = + tracer + .spanBuilder(operationName) + .setParent(Context.current().with(Span.wrap(queueSpanContext))) + .setSpanKind(SpanKind.CONSUMER); + + Span queueSpan = + queueSpanBuilder + .startSpan() + .setAttribute(MetricUtils.MESSAGING_SYSTEM, "kafka") + .setAttribute(MetricUtils.MESSAGING_DESTINATION, topicName) + .setAttribute(MetricUtils.MESSAGING_DESTINATION_KIND, "topic") + .setAttribute(MetricUtils.MESSAGING_OPERATION, "receive"); + + // calculate duration + if (metadata.getProperties().containsKey(TELEMETRY_ENQUEUED_AT)) { + long enqueuedAt = Long.parseLong(metadata.getProperties().get(TELEMETRY_ENQUEUED_AT)); + long queueTimeMillis = Instant.now().toEpochMilli() - enqueuedAt; + queueSpan + .setAttribute(QUEUE_ENQUEUED_AT_ATTR, enqueuedAt) + .setAttribute(QUEUE_DURATION_MS_ATTR, queueTimeMillis); + } + + queueSpan.end(); + + return queueSpanContext; + } + } + + return null; + } + + private static class ConditionalLogSpanExporter implements SpanExporter { + + @Override + public CompletableResultCode export(Collection spans) { + if (isLogTracingEnabled()) { + spans.forEach( + span -> { + log.info( + "Trace: {}, SpanId: {}, ParentId: {}, Name: {}, Duration: {} ms", + span.getTraceId(), + span.getSpanId(), + span.getParentSpanId(), + span.getName(), + String.format( + "%.2f", (span.getEndEpochNanos() - span.getStartEpochNanos()) / 1_000_000.0)); + + if (!span.getAttributes().isEmpty()) { + log.info("Trace: {}, Attributes: {}", span.getTraceId(), span.getAttributes()); + } + + if (!span.getEvents().isEmpty()) { + log.info("Trace: {}, Events: {}", span.getTraceId(), span.getEvents()); + } + + // Add logging for links + if (!span.getLinks().isEmpty()) { + span.getLinks() + .forEach( + link -> { + log.info( + "Trace: {}, Linked TraceId: {}, Linked SpanId: {}, Link Attributes: {}", + span.getTraceId(), + link.getSpanContext().getTraceId(), + link.getSpanContext().getSpanId(), + link.getAttributes()); + }); + } + }); + } + + return CompletableResultCode.ofSuccess(); + } + + @Override + public CompletableResultCode flush() { + return CompletableResultCode.ofSuccess(); + } + + @Override + public CompletableResultCode shutdown() { + return CompletableResultCode.ofSuccess(); + } + } +} diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/TraceIdGenerator.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/TraceIdGenerator.java new file mode 100644 index 00000000000000..9011a0e28d38a6 --- /dev/null +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/TraceIdGenerator.java @@ -0,0 +1,48 @@ +package io.datahubproject.metadata.context; + +import com.google.common.annotations.VisibleForTesting; +import io.opentelemetry.sdk.trace.IdGenerator; +import java.time.Instant; +import java.util.concurrent.ThreadLocalRandom; + +public class TraceIdGenerator implements IdGenerator { + private final IdGenerator defaultGenerator; + + public TraceIdGenerator() { + this.defaultGenerator = IdGenerator.random(); + } + + @VisibleForTesting + public String generateTraceId(long epochMillis) { + // First 8 bytes (16 hex chars) as timestamp in micros + long timestampMicros = epochMillis * 1000; + // Last 8 bytes as random to ensure uniqueness + long randomBits = ThreadLocalRandom.current().nextLong(); + + return String.format("%016x%016x", timestampMicros, randomBits); + } + + @Override + public String generateTraceId() { + return generateTraceId(Instant.now().toEpochMilli()); + } + + @Override + public String generateSpanId() { + // Use default random generation for span IDs + return defaultGenerator.generateSpanId(); + } + + // Utility method to extract timestamp + private static long getTimestampMicros(String traceId) { + if (traceId == null || traceId.length() < 16) { + throw new IllegalArgumentException("Invalid trace ID format"); + } + return Long.parseUnsignedLong(traceId.substring(0, 16), 16); + } + + // Convert to milliseconds for easier comparison + public static long getTimestampMillis(String traceId) { + return getTimestampMicros(traceId) / 1000; + } +} diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/TraceException.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/TraceException.java new file mode 100644 index 00000000000000..85b32d6ca06d16 --- /dev/null +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/exception/TraceException.java @@ -0,0 +1,40 @@ +package io.datahubproject.metadata.exception; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonInclude.Include; +import java.util.Arrays; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Value; +import lombok.extern.jackson.Jacksonized; + +@Value +@Builder +@Jacksonized +@AllArgsConstructor +@JsonInclude(Include.NON_NULL) +public class TraceException { + String message; + String exceptionClass; + String[] stackTrace; + TraceException cause; + + public TraceException(Throwable throwable) { + this.message = throwable.getMessage(); + this.exceptionClass = throwable.getClass().getName(); + this.stackTrace = + Arrays.stream(throwable.getStackTrace()) + .map(StackTraceElement::toString) + .toArray(String[]::new); + + // Handle nested cause + this.cause = throwable.getCause() != null ? new TraceException(throwable.getCause()) : null; + } + + public TraceException(String message) { + this.message = message; + this.exceptionClass = null; + this.stackTrace = null; + this.cause = null; + } +} diff --git a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java index 92d62d42295b92..7e9f081a11e383 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java @@ -22,11 +22,13 @@ import com.linkedin.metadata.snapshot.Snapshot; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; +import io.datahubproject.metadata.context.ObjectMapperContext; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.OperationContextConfig; import io.datahubproject.metadata.context.RequestContext; import io.datahubproject.metadata.context.RetrieverContext; import io.datahubproject.metadata.context.ServicesRegistryContext; +import io.datahubproject.metadata.context.TraceContext; import io.datahubproject.metadata.context.ValidationContext; import java.util.Map; import java.util.Optional; @@ -193,6 +195,8 @@ public static OperationContext systemContextNoSearchAuthorization( retrieverContextSupplier, indexConventionSupplier, null, + null, + null, null); } @@ -210,7 +214,26 @@ public static OperationContext systemContextNoSearchAuthorization( retrieverContextSupplier, indexConventionSupplier, null, - environmentContextSupplier); + environmentContextSupplier, + null, + null); + } + + public static OperationContext systemContextTraceNoSearchAuthorization( + @Nullable Supplier objectMapperContextSupplier, + @Nullable Supplier traceContextSupplier) { + + return systemContext( + null, + null, + null, + null, + null, + null, + null, + null, + objectMapperContextSupplier, + traceContextSupplier); } public static OperationContext systemContext( @@ -222,6 +245,30 @@ public static OperationContext systemContext( @Nullable Supplier indexConventionSupplier, @Nullable Consumer postConstruct, @Nullable Supplier environmentContextSupplier) { + return systemContext( + configSupplier, + systemAuthSupplier, + servicesRegistrySupplier, + entityRegistrySupplier, + retrieverContextSupplier, + indexConventionSupplier, + postConstruct, + environmentContextSupplier, + null, + null); + } + + public static OperationContext systemContext( + @Nullable Supplier configSupplier, + @Nullable Supplier systemAuthSupplier, + @Nullable Supplier servicesRegistrySupplier, + @Nullable Supplier entityRegistrySupplier, + @Nullable Supplier retrieverContextSupplier, + @Nullable Supplier indexConventionSupplier, + @Nullable Consumer postConstruct, + @Nullable Supplier environmentContextSupplier, + @Nullable Supplier objectMapperContextSupplier, + @Nullable Supplier traceContextSupplier) { OperationContextConfig config = Optional.ofNullable(configSupplier).map(Supplier::get).orElse(DEFAULT_OPCONTEXT_CONFIG); @@ -252,6 +299,13 @@ public static OperationContext systemContext( .map(Supplier::get) .orElse(defaultValidationContext); + ObjectMapperContext objectMapperContext = + objectMapperContextSupplier == null + ? ObjectMapperContext.DEFAULT + : objectMapperContextSupplier.get(); + + TraceContext traceContext = traceContextSupplier != null ? traceContextSupplier.get() : null; + OperationContext operationContext = OperationContext.asSystem( config, @@ -261,6 +315,8 @@ public static OperationContext systemContext( indexConvention, retrieverContext, validationContext, + objectMapperContext, + traceContext, true); if (postConstruct != null) { diff --git a/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/OperationContextTest.java b/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/OperationContextTest.java index a2575c1c562209..df6bcaa848c5f0 100644 --- a/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/OperationContextTest.java +++ b/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/OperationContextTest.java @@ -1,17 +1,44 @@ package io.datahubproject.metadata.context; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoInteractions; +import static org.mockito.Mockito.verifyNoMoreInteractions; +import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertTrue; import com.datahub.authentication.Actor; import com.datahub.authentication.ActorType; import com.datahub.authentication.Authentication; import com.datahub.plugins.auth.authorization.Authorizer; +import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.mxe.SystemMetadata; import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.function.Supplier; +import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; public class OperationContextTest { + private TraceContext mockTraceContext; + private SystemMetadata mockSystemMetadata; + private ObjectMapper mockObjectMapper; + + @BeforeMethod + public void setUp() { + mockTraceContext = mock(TraceContext.class); + mockSystemMetadata = mock(SystemMetadata.class); + mockObjectMapper = mock(ObjectMapper.class); + } @Test public void testSystemPrivilegeEscalation() { @@ -28,6 +55,7 @@ public void testSystemPrivilegeEscalation() { null, TestOperationContexts.emptyActiveUsersRetrieverContext(null), mock(ValidationContext.class), + null, true); OperationContext opContext = @@ -69,4 +97,159 @@ public void testSystemPrivilegeEscalation() { assertEquals(opContextNoSystem.getSystemActorContext().getAuthentication(), systemAuth); assertEquals(opContextNoSystem.getSessionAuthentication(), userAuth); } + + @Test + public void testWithTraceId_WithTraceContextAndSystemMetadata() { + when(mockTraceContext.withTraceId(mockSystemMetadata)).thenReturn(mockSystemMetadata); + + SystemMetadata result = buildTraceMock().withTraceId(mockSystemMetadata); + + verify(mockTraceContext).withTraceId(mockSystemMetadata); + assertEquals(result, mockSystemMetadata); + } + + @Test + public void testWithTraceId_NullSystemMetadata() { + SystemMetadata result = buildTraceMock().withTraceId(null); + + verifyNoInteractions(mockTraceContext); + assertNull(result); + } + + @Test + public void testWithTraceId_NullTraceContext() { + OperationContext operationContext = buildTraceMock(() -> null); + + SystemMetadata result = operationContext.withTraceId(mockSystemMetadata); + + assertEquals(result, mockSystemMetadata); + } + + @Test + public void testWithSpan_WithTraceContext() { + String spanName = "testSpan"; + String[] attributes = {"attr1", "attr2"}; + final boolean[] operationExecuted = {false}; + Supplier operation = + () -> { + operationExecuted[0] = true; + return "result"; + }; + + // Capture the supplier passed to withSpan to verify it's executed + doAnswer( + invocation -> { + Supplier capturedSupplier = invocation.getArgument(1); + return capturedSupplier.get(); + }) + .when(mockTraceContext) + .withSpan(eq(spanName), any(Supplier.class), eq(attributes)); + + String result = buildTraceMock().withSpan(spanName, operation, attributes); + + verify(mockTraceContext).withSpan(eq(spanName), any(Supplier.class), eq(attributes)); + assertTrue(operationExecuted[0], "The operation supplier should have been executed"); + assertEquals(result, "result", "The result should match the operation's return value"); + } + + @Test + public void testWithSpan_NullTraceContext() { + OperationContext operationContext = buildTraceMock(() -> null); + + String spanName = "testSpan"; + String[] attributes = {"attr1", "attr2"}; + Supplier operation = () -> "result"; + + String result = operationContext.withSpan(spanName, operation, attributes); + + assertEquals(result, "result"); + } + + @Test + public void testWithSpan_RunnableWithTraceContext() { + String spanName = "testSpan"; + String[] attributes = {"attr1", "attr2"}; + Runnable operation = mock(Runnable.class); + + buildTraceMock().withSpan(spanName, operation, attributes); + + verify(mockTraceContext).withSpan(eq(spanName), eq(operation), eq(attributes)); + verifyNoMoreInteractions(operation); + } + + @Test + public void testWithQueueSpan_SingleSystemMetadata() { + String spanName = "testQueueSpan"; + String topicName = "testTopic"; + String[] attributes = {"attr1", "attr2"}; + Runnable operation = mock(Runnable.class); + + buildTraceMock().withQueueSpan(spanName, mockSystemMetadata, topicName, operation, attributes); + + verify(mockTraceContext) + .withQueueSpan( + eq(spanName), + eq(List.of(mockSystemMetadata)), + eq(topicName), + eq(operation), + eq(attributes)); + } + + @Test + public void testWithQueueSpan_MultipleSystemMetadata() { + String spanName = "testQueueSpan"; + String topicName = "testTopic"; + String[] attributes = {"attr1", "attr2"}; + Runnable operation = mock(Runnable.class); + List systemMetadataList = Arrays.asList(mockSystemMetadata, mockSystemMetadata); + + buildTraceMock().withQueueSpan(spanName, systemMetadataList, topicName, operation, attributes); + + verify(mockTraceContext) + .withQueueSpan( + eq(spanName), eq(systemMetadataList), eq(topicName), eq(operation), eq(attributes)); + } + + @Test + public void testTraceException() throws Exception { + Set throwables = new HashSet<>(); + throwables.add(new RuntimeException("test exception 1")); + throwables.add(new IllegalArgumentException("test exception 2")); + + String expectedJson = "[{\"message\":\"test exception 1\"},{\"message\":\"test exception 2\"}]"; + when(mockObjectMapper.writeValueAsString(any())).thenReturn(expectedJson); + + String result = buildTraceMock().traceException(throwables); + + verify(mockObjectMapper).writeValueAsString(any()); + assertEquals(result, expectedJson); + } + + @Test + public void testTraceException_JsonProcessingError() throws Exception { + Set throwables = new HashSet<>(); + RuntimeException ex1 = new RuntimeException("test exception 1"); + IllegalArgumentException ex2 = new IllegalArgumentException("test exception 2"); + throwables.add(ex1); + throwables.add(ex2); + + when(mockObjectMapper.writeValueAsString(any())) + .thenThrow(new com.fasterxml.jackson.core.JsonProcessingException("") {}); + + String result = buildTraceMock().traceException(throwables); + + verify(mockObjectMapper).writeValueAsString(any()); + assertTrue(result.contains("test exception 1")); + assertTrue(result.contains("test exception 2")); + } + + private OperationContext buildTraceMock() { + return buildTraceMock(null); + } + + private OperationContext buildTraceMock(Supplier traceContextSupplier) { + return TestOperationContexts.systemContextTraceNoSearchAuthorization( + () -> ObjectMapperContext.builder().objectMapper(mockObjectMapper).build(), + traceContextSupplier == null ? () -> mockTraceContext : traceContextSupplier); + } } diff --git a/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/TraceContextTest.java b/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/TraceContextTest.java new file mode 100644 index 00000000000000..8adf540c570b17 --- /dev/null +++ b/metadata-operation-context/src/test/java/io/datahubproject/metadata/context/TraceContextTest.java @@ -0,0 +1,293 @@ +package io.datahubproject.metadata.context; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.atLeast; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertSame; +import static org.testng.Assert.assertTrue; + +import com.linkedin.data.template.StringMap; +import com.linkedin.mxe.SystemMetadata; +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.SpanBuilder; +import io.opentelemetry.api.trace.SpanContext; +import io.opentelemetry.api.trace.StatusCode; +import io.opentelemetry.api.trace.Tracer; +import io.opentelemetry.context.Context; +import io.opentelemetry.context.Scope; +import jakarta.servlet.http.Cookie; +import jakarta.servlet.http.HttpServletRequest; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class TraceContextTest { + @Mock private Tracer tracer; + @Mock private HttpServletRequest request; + @Mock private Span span; + @Mock private SpanContext spanContext; + + private TraceContext traceContext; + + @BeforeMethod + public void setup() { + MockitoAnnotations.openMocks(this); + traceContext = TraceContext.builder().tracer(tracer).build(); + + // Clear any existing thread local state + TraceContext.clear(); + } + + @Test + public void testEnableLogTracingWithHeader() { + when(request.getHeader(TraceContext.TRACE_HEADER)).thenReturn("true"); + TraceContext.enableLogTracing(request); + assertTrue(TraceContext.isLogTracingEnabled()); + } + + @Test + public void testEnableLogTracingWithCookie() { + when(request.getHeader(TraceContext.TRACE_HEADER)).thenReturn(null); + Cookie cookie = new Cookie(TraceContext.TRACE_COOKIE, "true"); + when(request.getCookies()).thenReturn(new Cookie[] {cookie}); + TraceContext.enableLogTracing(request); + assertTrue(TraceContext.isLogTracingEnabled()); + } + + @Test + public void testEnableLogTracingDisabled() { + when(request.getHeader(TraceContext.TRACE_HEADER)).thenReturn("false"); + when(request.getCookies()).thenReturn(null); + TraceContext.enableLogTracing(request); + assertFalse(TraceContext.isLogTracingEnabled()); + } + + @Test + public void testWithTraceIdValidSpanContext() { + SystemMetadata systemMetadata = new SystemMetadata(); + when(span.getSpanContext()).thenReturn(spanContext); + when(spanContext.isValid()).thenReturn(true); + when(spanContext.getTraceId()).thenReturn("test-trace-id"); + + try (var mockedStatic = mockStatic(Span.class)) { + mockedStatic.when(Span::current).thenReturn(span); + SystemMetadata result = traceContext.withTraceId(systemMetadata); + assertNotNull(result.getProperties()); + assertEquals(result.getProperties().get(TraceContext.TELEMETRY_TRACE_KEY), "test-trace-id"); + } + } + + @Test + public void testWithTraceIdInvalidSpanContext() { + SystemMetadata systemMetadata = new SystemMetadata(); + when(span.getSpanContext()).thenReturn(spanContext); + when(spanContext.isValid()).thenReturn(false); + + try (var mockedStatic = mockStatic(Span.class)) { + mockedStatic.when(Span::current).thenReturn(span); + SystemMetadata result = traceContext.withTraceId(systemMetadata); + assertSame(result, systemMetadata); + } + } + + @Test + public void testWithQueueSpanBatch() { + // Setup + List batchMetadata = new ArrayList<>(); + SystemMetadata metadata1 = new SystemMetadata(); + metadata1.setProperties(new StringMap()); + metadata1.getProperties().put(TraceContext.TELEMETRY_TRACE_KEY, "trace-1"); + metadata1.getProperties().put(TraceContext.TELEMETRY_QUEUE_SPAN_KEY, "span-1"); + metadata1.getProperties().put(TraceContext.TELEMETRY_LOG_KEY, "true"); + metadata1 + .getProperties() + .put(TraceContext.TELEMETRY_ENQUEUED_AT, String.valueOf(System.currentTimeMillis())); + + SystemMetadata metadata2 = new SystemMetadata(); + metadata2.setProperties(new StringMap()); + metadata2.getProperties().put(TraceContext.TELEMETRY_TRACE_KEY, "trace-2"); + metadata2.getProperties().put(TraceContext.TELEMETRY_QUEUE_SPAN_KEY, "span-2"); + metadata2.getProperties().put(TraceContext.TELEMETRY_LOG_KEY, "false"); + metadata2 + .getProperties() + .put(TraceContext.TELEMETRY_ENQUEUED_AT, String.valueOf(System.currentTimeMillis())); + + batchMetadata.add(metadata1); + batchMetadata.add(metadata2); + + // Mock span builder chain for both consumer and processing spans + io.opentelemetry.api.trace.SpanBuilder mockSpanBuilder = + mock(io.opentelemetry.api.trace.SpanBuilder.class); + when(mockSpanBuilder.setParent(any(Context.class))).thenReturn(mockSpanBuilder); + when(mockSpanBuilder.setSpanKind(any())).thenReturn(mockSpanBuilder); + when(mockSpanBuilder.setAttribute(anyString(), anyString())).thenReturn(mockSpanBuilder); + when(mockSpanBuilder.setAttribute(anyString(), anyLong())).thenReturn(mockSpanBuilder); + when(mockSpanBuilder.addLink(any())).thenReturn(mockSpanBuilder); + when(mockSpanBuilder.startSpan()).thenReturn(span); + + when(tracer.spanBuilder(anyString())).thenReturn(mockSpanBuilder); + when(span.setAttribute(anyString(), anyString())).thenReturn(span); + when(span.setAttribute(anyString(), anyLong())).thenReturn(span); + when(span.getSpanContext()).thenReturn(spanContext); + + // Execute & Verify - mainly checking that no exceptions are thrown + traceContext.withQueueSpan( + "test-operation", + batchMetadata, + "test-topic", + () -> { + // Do nothing + }); + } + + @Test + public void testWithSpanSupplier() { + SpanBuilder mockSpanBuilder = mock(SpanBuilder.class); + when(mockSpanBuilder.setAttribute(anyString(), anyString())).thenReturn(mockSpanBuilder); + when(mockSpanBuilder.startSpan()).thenReturn(span); + when(tracer.spanBuilder(anyString())).thenReturn(mockSpanBuilder); + + when(span.setAttribute(anyString(), anyString())).thenReturn(span); + when(span.setStatus(any())).thenReturn(span); + when(span.makeCurrent()).thenReturn(mock(Scope.class)); + + // Execute + String result = traceContext.withSpan("test-operation", () -> "test-result", "attr1", "value1"); + + // Verify + assertEquals(result, "test-result"); + verify(mockSpanBuilder).startSpan(); + verify(span).end(); + } + + @Test(expectedExceptions = RuntimeException.class) + public void testWithSpanSupplierException() { + io.opentelemetry.api.trace.SpanBuilder mockSpanBuilder = + mock(io.opentelemetry.api.trace.SpanBuilder.class); + when(mockSpanBuilder.setAttribute(anyString(), anyString())).thenReturn(mockSpanBuilder); + when(mockSpanBuilder.startSpan()).thenReturn(span); + when(tracer.spanBuilder(anyString())).thenReturn(mockSpanBuilder); + + when(span.setAttribute(anyString(), anyString())).thenReturn(span); + when(span.setStatus(any(), anyString())).thenReturn(span); + when(span.recordException(any(RuntimeException.class))).thenReturn(span); + when(span.makeCurrent()).thenReturn(mock(Scope.class)); + + try { + traceContext.withSpan( + "test-operation", + () -> { + throw new RuntimeException("test-exception"); + }, + "attr1", + "value1"); + } finally { + verify(mockSpanBuilder).startSpan(); + verify(span).setStatus(StatusCode.ERROR, "test-exception"); + verify(span).recordException(any(RuntimeException.class)); + verify(span).end(); + } + } + + @Test + public void testWithSpanRunnable() { + SpanBuilder mockSpanBuilder = mock(SpanBuilder.class); + when(mockSpanBuilder.setAttribute(anyString(), anyString())).thenReturn(mockSpanBuilder); + when(mockSpanBuilder.startSpan()).thenReturn(span); + when(tracer.spanBuilder(anyString())).thenReturn(mockSpanBuilder); + + when(span.setAttribute(anyString(), anyString())).thenReturn(span); + when(span.setStatus(any())).thenReturn(span); + when(span.makeCurrent()).thenReturn(mock(Scope.class)); + + AtomicBoolean executed = new AtomicBoolean(false); + + traceContext.withSpan("test-operation", () -> executed.set(true), "attr1", "value1"); + + assertTrue(executed.get()); + verify(mockSpanBuilder).startSpan(); + verify(span).end(); + } + + @Test + public void testWithSingleQueueSpan() { + SystemMetadata metadata = new SystemMetadata(); + metadata.setProperties(new StringMap()); + metadata.getProperties().put(TraceContext.TELEMETRY_TRACE_KEY, "trace-1"); + metadata.getProperties().put(TraceContext.TELEMETRY_QUEUE_SPAN_KEY, "span-1"); + metadata.getProperties().put(TraceContext.TELEMETRY_LOG_KEY, "true"); + metadata + .getProperties() + .put(TraceContext.TELEMETRY_ENQUEUED_AT, String.valueOf(System.currentTimeMillis())); + + io.opentelemetry.api.trace.SpanBuilder mockSpanBuilder = + mock(io.opentelemetry.api.trace.SpanBuilder.class); + when(mockSpanBuilder.setParent(any(Context.class))).thenReturn(mockSpanBuilder); + when(mockSpanBuilder.setSpanKind(any())).thenReturn(mockSpanBuilder); + when(mockSpanBuilder.setAttribute(anyString(), anyString())).thenReturn(mockSpanBuilder); + when(mockSpanBuilder.setAttribute(anyString(), anyLong())).thenReturn(mockSpanBuilder); + when(mockSpanBuilder.startSpan()).thenReturn(span); + + when(tracer.spanBuilder(anyString())).thenReturn(mockSpanBuilder); + when(span.setAttribute(anyString(), anyString())).thenReturn(span); + when(span.setAttribute(anyString(), anyLong())).thenReturn(span); + when(span.makeCurrent()).thenReturn(mock(Scope.class)); + + AtomicBoolean executed = new AtomicBoolean(false); + + traceContext.withQueueSpan( + "test-operation", List.of(metadata), "test-topic", () -> executed.set(true)); + + assertTrue(executed.get()); + verify(mockSpanBuilder, atLeast(1)).startSpan(); + verify(span, atLeast(1)).end(); + } + + @Test + public void testWithProducerTrace() { + SystemMetadata systemMetadata = new SystemMetadata(); + + io.opentelemetry.api.trace.SpanBuilder mockSpanBuilder = + mock(io.opentelemetry.api.trace.SpanBuilder.class); + when(mockSpanBuilder.setParent(any(Context.class))).thenReturn(mockSpanBuilder); + when(mockSpanBuilder.setSpanKind(any())).thenReturn(mockSpanBuilder); + when(mockSpanBuilder.setAttribute(anyString(), anyString())).thenReturn(mockSpanBuilder); + when(mockSpanBuilder.setAttribute(anyString(), anyLong())).thenReturn(mockSpanBuilder); + when(mockSpanBuilder.startSpan()).thenReturn(span); + + when(tracer.spanBuilder(anyString())).thenReturn(mockSpanBuilder); + when(span.setAttribute(anyString(), anyString())).thenReturn(span); + when(span.setAttribute(anyString(), anyLong())).thenReturn(span); + when(span.getSpanContext()).thenReturn(spanContext); + when(spanContext.getSpanId()).thenReturn("test-span-id"); + when(spanContext.getTraceId()).thenReturn("test-trace-id"); + when(spanContext.isValid()).thenReturn(true); + + try (var mockedStatic = mockStatic(Span.class)) { + mockedStatic.when(Span::current).thenReturn(span); + + SystemMetadata result = + traceContext.withProducerTrace("test-operation", systemMetadata, "test-topic"); + + assertNotNull(result.getProperties()); + assertTrue(result.getProperties().containsKey(TraceContext.TELEMETRY_TRACE_KEY)); + assertTrue(result.getProperties().containsKey(TraceContext.TELEMETRY_QUEUE_SPAN_KEY)); + assertTrue(result.getProperties().containsKey(TraceContext.TELEMETRY_LOG_KEY)); + assertTrue(result.getProperties().containsKey(TraceContext.TELEMETRY_ENQUEUED_AT)); + verify(mockSpanBuilder).startSpan(); + verify(span).end(); + } + } +} diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java index ce9c636be16ac7..74232efc84d9c6 100644 --- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java +++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java @@ -321,6 +321,7 @@ public void setupTest() throws Exception { mock(IndexConvention.class), mock(RetrieverContext.class), mock(ValidationContext.class), + null, true); _dataHubAuthorizer = diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index 04f7409d5c39a3..59c57d575d801d 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -1,3 +1,6 @@ +# Name of the data hub component or container (used for tracing) +spring.application.name: ${APPLICATION_NAME:datahub-gms} + # The base URL where DataHub is accessible to users. baseUrl: ${DATAHUB_BASE_URL:http://localhost:9002} diff --git a/metadata-service/factories/build.gradle b/metadata-service/factories/build.gradle index 501c46d64d6f9c..3fbbcebdae7b04 100644 --- a/metadata-service/factories/build.gradle +++ b/metadata-service/factories/build.gradle @@ -51,6 +51,11 @@ dependencies { implementation externalDependency.jline implementation externalDependency.commonsIo + implementation externalDependency.opentelemetryApi + implementation externalDependency.opentelemetrySdk + implementation externalDependency.opentelemetrySdkTrace + implementation externalDependency.opentelemetryAutoConfig + testImplementation externalDependency.springBootTest testImplementation externalDependency.mockito testImplementation externalDependency.testng diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/Neo4jGraphServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/Neo4jGraphServiceFactory.java index 136c31aa0693da..0de389834927c7 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/Neo4jGraphServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/Neo4jGraphServiceFactory.java @@ -4,6 +4,7 @@ import com.linkedin.metadata.graph.neo4j.Neo4jGraphService; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.LineageRegistry; +import io.datahubproject.metadata.context.OperationContext; import javax.annotation.Nonnull; import org.neo4j.driver.Driver; import org.neo4j.driver.SessionConfig; @@ -28,9 +29,14 @@ public class Neo4jGraphServiceFactory { @Bean(name = "graphService") @Nonnull - protected GraphService getInstance(final EntityRegistry entityRegistry) { + protected GraphService getInstance( + @Qualifier("systemOperationContext") OperationContext systemOperationContext, + final EntityRegistry entityRegistry) { LineageRegistry lineageRegistry = new LineageRegistry(entityRegistry); return new Neo4jGraphService( - lineageRegistry, neo4jDriver, SessionConfig.forDatabase(neo4jDatabase)); + systemOperationContext, + lineageRegistry, + neo4jDriver, + SessionConfig.forDatabase(neo4jDatabase)); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java index 78107cc0ecc900..f5e26714a7f6aa 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java @@ -16,6 +16,7 @@ import io.datahubproject.metadata.context.OperationContextConfig; import io.datahubproject.metadata.context.RetrieverContext; import io.datahubproject.metadata.context.ServicesRegistryContext; +import io.datahubproject.metadata.context.TraceContext; import io.datahubproject.metadata.context.ValidationContext; import io.datahubproject.metadata.services.RestrictedService; import javax.annotation.Nonnull; @@ -46,7 +47,8 @@ protected OperationContext javaSystemOperationContext( @Qualifier("baseElasticSearchComponents") BaseElasticSearchComponentsFactory.BaseElasticSearchComponents components, @Nonnull final ConfigurationProvider configurationProvider, - @Qualifier("systemEntityClient") @Nonnull final SystemEntityClient systemEntityClient) { + @Qualifier("systemEntityClient") @Nonnull final SystemEntityClient systemEntityClient, + @Nonnull final TraceContext traceContext) { EntityServiceAspectRetriever entityServiceAspectRetriever = EntityServiceAspectRetriever.builder() @@ -80,6 +82,7 @@ protected OperationContext javaSystemOperationContext( .alternateValidation( configurationProvider.getFeatureFlags().isAlternateMCPValidation()) .build(), + traceContext, configurationProvider.getAuthentication().isEnforceExistenceEnabled()); entityClientAspectRetriever.setSystemOperationContext(systemOperationContext); @@ -109,7 +112,8 @@ protected OperationContext restliSystemOperationContext( @Nonnull final SearchService searchService, @Qualifier("baseElasticSearchComponents") BaseElasticSearchComponentsFactory.BaseElasticSearchComponents components, - @Nonnull final ConfigurationProvider configurationProvider) { + @Nonnull final ConfigurationProvider configurationProvider, + @Nonnull final TraceContext traceContext) { EntityClientAspectRetriever entityClientAspectRetriever = EntityClientAspectRetriever.builder().entityClient(systemEntityClient).build(); @@ -136,6 +140,7 @@ protected OperationContext restliSystemOperationContext( .alternateValidation( configurationProvider.getFeatureFlags().isAlternateMCPValidation()) .build(), + traceContext, configurationProvider.getAuthentication().isEnforceExistenceEnabled()); entityClientAspectRetriever.setSystemOperationContext(systemOperationContext); diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/common/AdminClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/common/AdminClientFactory.java new file mode 100644 index 00000000000000..53b3a86011e7e0 --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/common/AdminClientFactory.java @@ -0,0 +1,30 @@ +package com.linkedin.gms.factory.kafka.common; + +import com.linkedin.metadata.config.kafka.KafkaConfiguration; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import org.apache.kafka.clients.admin.AdminClient; +import org.apache.kafka.clients.admin.AdminClientConfig; +import org.apache.kafka.clients.admin.KafkaAdminClient; +import org.springframework.boot.autoconfigure.kafka.KafkaProperties; + +public class AdminClientFactory { + public static AdminClient buildKafkaAdminClient( + KafkaConfiguration kafkaConfiguration, + final KafkaProperties kafkaProperties, + String clientId) { + Map adminProperties = new HashMap<>(kafkaProperties.buildAdminProperties(null)); + adminProperties.put(AdminClientConfig.CLIENT_ID_CONFIG, clientId); + + // KAFKA_BOOTSTRAP_SERVER has precedence over SPRING_KAFKA_BOOTSTRAP_SERVERS + if (kafkaConfiguration.getBootstrapServers() != null + && !kafkaConfiguration.getBootstrapServers().isEmpty()) { + adminProperties.put( + AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, + Arrays.asList(kafkaConfiguration.getBootstrapServers().split(","))); + } // else we rely on KafkaProperties which defaults to localhost:9092 or environment variables + + return KafkaAdminClient.create(adminProperties); + } +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/throttle/KafkaThrottleFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/throttle/KafkaThrottleFactory.java index e2cdca8a065c03..f6d9fcefe46461 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/throttle/KafkaThrottleFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/throttle/KafkaThrottleFactory.java @@ -1,5 +1,7 @@ package com.linkedin.gms.factory.kafka.throttle; +import static com.linkedin.gms.factory.kafka.common.AdminClientFactory.buildKafkaAdminClient; + import com.datahub.metadata.dao.throttle.KafkaThrottleSensor; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.config.MetadataChangeProposalConfig; @@ -8,13 +10,7 @@ import com.linkedin.metadata.dao.throttle.ThrottleSensor; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.mxe.Topics; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; import lombok.extern.slf4j.Slf4j; -import org.apache.kafka.clients.admin.AdminClient; -import org.apache.kafka.clients.admin.AdminClientConfig; -import org.apache.kafka.clients.admin.KafkaAdminClient; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.autoconfigure.kafka.KafkaProperties; @@ -47,7 +43,7 @@ public ThrottleSensor kafkaThrottle( if (mcpConfig.getThrottle().getUpdateIntervalMs() > 0) { return KafkaThrottleSensor.builder() .entityRegistry(entityRegistry) - .kafkaAdmin(kafkaAdmin(kafkaConfiguration, kafkaProperties)) + .kafkaAdmin(buildKafkaAdminClient(kafkaConfiguration, kafkaProperties, "throttle-sensor")) .config(mcpConfig.getThrottle()) .mclConsumerGroupId(maeConsumerGroupId) .timeseriesTopicName(timeseriesTopicName) @@ -58,19 +54,4 @@ public ThrottleSensor kafkaThrottle( return new NoOpSensor(); } } - - private static AdminClient kafkaAdmin( - KafkaConfiguration kafkaConfiguration, final KafkaProperties kafkaProperties) { - Map adminProperties = new HashMap<>(kafkaProperties.buildAdminProperties(null)); - - // KAFKA_BOOTSTRAP_SERVER has precedence over SPRING_KAFKA_BOOTSTRAP_SERVERS - if (kafkaConfiguration.getBootstrapServers() != null - && !kafkaConfiguration.getBootstrapServers().isEmpty()) { - adminProperties.put( - AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, - Arrays.asList(kafkaConfiguration.getBootstrapServers().split(","))); - } // else we rely on KafkaProperties which defaults to localhost:9092 or environment variables - - return KafkaAdminClient.create(adminProperties); - } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/trace/KafkaTraceReaderFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/trace/KafkaTraceReaderFactory.java new file mode 100644 index 00000000000000..6ba7dedce8ff50 --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/trace/KafkaTraceReaderFactory.java @@ -0,0 +1,196 @@ +package com.linkedin.gms.factory.kafka.trace; + +import static com.linkedin.gms.factory.kafka.common.AdminClientFactory.buildKafkaAdminClient; +import static com.linkedin.mxe.ConsumerGroups.MCP_CONSUMER_GROUP_ID_VALUE; + +import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.metadata.trace.MCLTraceReader; +import com.linkedin.metadata.trace.MCPFailedTraceReader; +import com.linkedin.metadata.trace.MCPTraceReader; +import com.linkedin.mxe.Topics; +import jakarta.annotation.PreDestroy; +import java.util.Properties; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import org.apache.avro.generic.GenericRecord; +import org.apache.kafka.clients.admin.AdminClient; +import org.apache.kafka.clients.consumer.Consumer; +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.autoconfigure.kafka.KafkaProperties; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.kafka.core.DefaultKafkaConsumerFactory; + +@Configuration +public class KafkaTraceReaderFactory { + private static final Properties TRACE_CONSUMER_PROPERTIES = new Properties(); + + static { + TRACE_CONSUMER_PROPERTIES.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); + } + + @Value("${trace.pollMaxAttempts:5}") + private int pollMaxAttempts; + + @Value("${trace.pollDurationMs:1000}") + private int pollDurationMs; + + @Value(MCP_CONSUMER_GROUP_ID_VALUE) + private String mceConsumerGroupId; + + @Value("${METADATA_CHANGE_PROPOSAL_TOPIC_NAME:" + Topics.METADATA_CHANGE_PROPOSAL + "}") + private String mcpTopicName; + + @Value( + "${FAILED_METADATA_CHANGE_PROPOSAL_TOPIC_NAME:" + + Topics.FAILED_METADATA_CHANGE_PROPOSAL + + "}") + private String mcpFailedTopicName; + + @Value("${METADATA_CHANGE_LOG_KAFKA_CONSUMER_GROUP_ID:generic-mae-consumer-job-client}") + private String maeConsumerGroupId; + + @Value("${METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME:" + Topics.METADATA_CHANGE_LOG_VERSIONED + "}") + private String mclVersionedTopicName; + + @Value( + "${METADATA_CHANGE_LOG_TIMESERIES_TOPIC_NAME:" + Topics.METADATA_CHANGE_LOG_TIMESERIES + "}") + private String mclTimeseriesTopicName; + + @Value("${trace.executor.thread-pool-size:10}") + private int threadPoolSize; + + @Value("${trace.executor.shutdown-timeout-seconds:60}") + private int shutdownTimeoutSeconds; + + @Value("${trace.timeout-seconds:30}") + private long traceTimeoutSeconds; + + @Bean("traceAdminClient") + public AdminClient traceAdminClient( + @Qualifier("configurationProvider") ConfigurationProvider provider, + final KafkaProperties kafkaProperties) { + return buildKafkaAdminClient(provider.getKafka(), kafkaProperties, "trace-reader"); + } + + private ExecutorService traceExecutorService; + + @Bean("traceExecutorService") + public ExecutorService traceExecutorService() { + traceExecutorService = Executors.newFixedThreadPool(threadPoolSize); + return traceExecutorService; + } + + @Bean("mcpTraceReader") + public MCPTraceReader mcpTraceReader( + @Qualifier("traceAdminClient") AdminClient adminClient, + @Qualifier("kafkaConsumerFactory") + DefaultKafkaConsumerFactory kafkaConsumerFactory, + @Qualifier("traceExecutorService") ExecutorService traceExecutorService) { + return MCPTraceReader.builder() + .adminClient(adminClient) + .topicName(mcpTopicName) + .consumerGroupId(mceConsumerGroupId) + .consumerSupplier( + () -> createConsumerWithUniqueId(kafkaConsumerFactory, "trace-reader-mcp")) + .pollDurationMs(pollDurationMs) + .pollMaxAttempts(pollMaxAttempts) + .timeoutSeconds(traceTimeoutSeconds) + .executorService(traceExecutorService) + .build(); + } + + @Bean("mcpFailedTraceReader") + public MCPFailedTraceReader mcpFailedTraceReader( + @Qualifier("traceAdminClient") AdminClient adminClient, + @Qualifier("kafkaConsumerFactory") + DefaultKafkaConsumerFactory kafkaConsumerFactory, + @Qualifier("traceExecutorService") ExecutorService traceExecutorService) { + return MCPFailedTraceReader.builder() + .adminClient(adminClient) + .topicName(mcpFailedTopicName) + .consumerSupplier( + () -> createConsumerWithUniqueId(kafkaConsumerFactory, "trace-reader-mcp-failed")) + .pollDurationMs(pollDurationMs) + .pollMaxAttempts(pollMaxAttempts) + .timeoutSeconds(traceTimeoutSeconds) + .executorService(traceExecutorService) + .build(); + } + + @Bean("mclVersionedTraceReader") + public MCLTraceReader mclVersionedTraceReader( + @Qualifier("traceAdminClient") AdminClient adminClient, + @Qualifier("kafkaConsumerFactory") + DefaultKafkaConsumerFactory kafkaConsumerFactory, + @Qualifier("traceExecutorService") ExecutorService traceExecutorService) { + return MCLTraceReader.builder() + .adminClient(adminClient) + .topicName(mclVersionedTopicName) + .consumerGroupId(maeConsumerGroupId) + .consumerSupplier( + () -> createConsumerWithUniqueId(kafkaConsumerFactory, "trace-reader-mcl-versioned")) + .pollDurationMs(pollDurationMs) + .pollMaxAttempts(pollMaxAttempts) + .timeoutSeconds(traceTimeoutSeconds) + .executorService(traceExecutorService) + .build(); + } + + @Bean("mclTimeseriesTraceReader") + public MCLTraceReader mclTimeseriesTraceReader( + @Qualifier("traceAdminClient") AdminClient adminClient, + @Qualifier("kafkaConsumerFactory") + DefaultKafkaConsumerFactory kafkaConsumerFactory, + @Qualifier("traceExecutorService") ExecutorService traceExecutorService) { + return MCLTraceReader.builder() + .adminClient(adminClient) + .topicName(mclTimeseriesTopicName) + .consumerGroupId(maeConsumerGroupId) + .consumerSupplier( + () -> createConsumerWithUniqueId(kafkaConsumerFactory, "trace-reader-mcl-timeseries")) + .pollDurationMs(pollDurationMs) + .pollMaxAttempts(pollMaxAttempts) + .timeoutSeconds(traceTimeoutSeconds) + .executorService(traceExecutorService) + .build(); + } + + private Consumer createConsumerWithUniqueId( + DefaultKafkaConsumerFactory kafkaConsumerFactory, + String baseClientId) { + Properties consumerProps = new Properties(); + consumerProps.putAll(TRACE_CONSUMER_PROPERTIES); + // Add a unique suffix to the client.id + consumerProps.put( + ConsumerConfig.CLIENT_ID_CONFIG, + baseClientId + "-" + Thread.currentThread().getId() + "-" + System.nanoTime()); + + return kafkaConsumerFactory.createConsumer( + baseClientId, // groupId prefix + null, // groupId suffix (using default) + null, // assignor + consumerProps); + } + + @PreDestroy + public void shutdown() { + if (traceExecutorService != null) { + traceExecutorService.shutdown(); + try { + if (!traceExecutorService.awaitTermination(shutdownTimeoutSeconds, TimeUnit.SECONDS)) { + traceExecutorService.shutdownNow(); + if (!traceExecutorService.awaitTermination(shutdownTimeoutSeconds, TimeUnit.SECONDS)) { + System.err.println("ExecutorService did not terminate"); + } + } + } catch (InterruptedException e) { + traceExecutorService.shutdownNow(); + Thread.currentThread().interrupt(); + } + } + } +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/system_telemetry/OpenTelemetryBaseFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/system_telemetry/OpenTelemetryBaseFactory.java new file mode 100644 index 00000000000000..c6b3219a623e9c --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/system_telemetry/OpenTelemetryBaseFactory.java @@ -0,0 +1,81 @@ +package com.linkedin.gms.factory.system_telemetry; + +import com.linkedin.metadata.utils.metrics.MetricSpanExporter; +import io.datahubproject.metadata.context.TraceContext; +import io.opentelemetry.api.OpenTelemetry; +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.trace.Tracer; +import io.opentelemetry.api.trace.propagation.W3CTraceContextPropagator; +import io.opentelemetry.sdk.autoconfigure.AutoConfiguredOpenTelemetrySdk; +import io.opentelemetry.sdk.resources.Resource; +import io.opentelemetry.sdk.trace.export.BatchSpanProcessor; +import io.opentelemetry.sdk.trace.export.SimpleSpanProcessor; +import java.util.HashMap; +import java.util.Map; + +/** Common System OpenTelemetry */ +public abstract class OpenTelemetryBaseFactory { + private static final AttributeKey SERVICE_NAME = AttributeKey.stringKey("service.name"); + + protected abstract String getApplicationComponent(); + + protected TraceContext traceContext() { + return TraceContext.builder().tracer(tracer(openTelemetry())).build(); + } + + private Tracer tracer(OpenTelemetry openTelemetry) { + return openTelemetry.getTracer(getApplicationComponent()); + } + + private OpenTelemetry openTelemetry() { + return AutoConfiguredOpenTelemetrySdk.builder() + .addPropertiesCustomizer( + (configProperties) -> { + Map props = new HashMap<>(); + // override exporters to "none" if not specified + Map.of( + "OTEL_METRICS_EXPORTER", "otel.metrics.exporter", + "OTEL_TRACES_EXPORTER", "otel.traces.exporter", + "OTEL_LOGS_EXPORTER", "otel.logs.exporter") + .forEach( + (envVar, propKey) -> { + String value = System.getenv(envVar); + if (value == null || value.trim().isEmpty()) { + props.put(propKey, "none"); + } + }); + + return props; + }) + .addTracerProviderCustomizer( + (sdkTracerProviderBuilder, configProperties) -> + sdkTracerProviderBuilder + .addSpanProcessor(SimpleSpanProcessor.create(TraceContext.LOG_SPAN_EXPORTER)) + .addSpanProcessor(BatchSpanProcessor.builder(new MetricSpanExporter()).build()) + .setIdGenerator(TraceContext.TRACE_ID_GENERATOR) + .setResource( + Resource.getDefault() + .merge( + Resource.create( + Attributes.of(SERVICE_NAME, getApplicationComponent()))))) + .addPropagatorCustomizer( + (existingPropagator, configProperties) -> { + // If OTEL_PROPAGATORS is not set or doesn't include tracecontext, + // return W3C propagator, otherwise keep existing + String propagators = configProperties.getString("OTEL_PROPAGATORS"); + return (propagators == null || !propagators.contains("tracecontext")) + ? W3CTraceContextPropagator.getInstance() + : existingPropagator; + }) + .addMetricExporterCustomizer( + (metricExporter, configProperties) -> { + String metricsExporter = configProperties.getString("OTEL_METRICS_EXPORTER"); + return (metricsExporter == null || metricsExporter.trim().isEmpty()) + ? null // Return null to disable the exporter + : metricExporter; + }) + .build() + .getOpenTelemetrySdk(); + } +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/trace/TraceServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/trace/TraceServiceFactory.java new file mode 100644 index 00000000000000..aadce86036f6b2 --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/trace/TraceServiceFactory.java @@ -0,0 +1,37 @@ +package com.linkedin.gms.factory.trace; + +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.systemmetadata.SystemMetadataService; +import com.linkedin.metadata.systemmetadata.TraceService; +import com.linkedin.metadata.trace.MCLTraceReader; +import com.linkedin.metadata.trace.MCPFailedTraceReader; +import com.linkedin.metadata.trace.MCPTraceReader; +import com.linkedin.metadata.trace.TraceServiceImpl; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class TraceServiceFactory { + + @Bean + public TraceService traceService( + @Qualifier("entityRegistry") EntityRegistry entityRegistry, + @Qualifier("entityService") EntityService entityService, + @Qualifier("systemMetadataService") SystemMetadataService systemMetadataService, + @Qualifier("mcpTraceReader") MCPTraceReader mcpTraceReader, + @Qualifier("mcpFailedTraceReader") MCPFailedTraceReader mcpFailedTraceReader, + @Qualifier("mclVersionedTraceReader") MCLTraceReader mclVersionedTraceReader, + @Qualifier("mclTimeseriesTraceReader") MCLTraceReader mclTimeseriesTraceReader) { + return TraceServiceImpl.builder() + .entityRegistry(entityRegistry) + .entityService(entityService) + .systemMetadataService(systemMetadataService) + .mcpTraceReader(mcpTraceReader) + .mcpFailedTraceReader(mcpFailedTraceReader) + .mclVersionedTraceReader(mclVersionedTraceReader) + .mclTimeseriesTraceReader(mclTimeseriesTraceReader) + .build(); + } +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/DataHubUpgradeKafkaListener.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/DataHubUpgradeKafkaListener.java index 50be0149ce2d4e..a3f3f9edc24ec2 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/DataHubUpgradeKafkaListener.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/DataHubUpgradeKafkaListener.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.boot.kafka; -import com.codahale.metrics.Timer; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.EventUtils; import com.linkedin.metadata.boot.dependencies.BootstrapDependency; @@ -8,6 +7,7 @@ import com.linkedin.metadata.version.GitVersion; import com.linkedin.mxe.DataHubUpgradeHistoryEvent; import com.linkedin.mxe.Topics; +import io.datahubproject.metadata.context.OperationContext; import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; @@ -42,13 +42,13 @@ public class DataHubUpgradeKafkaListener implements ConsumerSeekAware, Bootstrap public static final String TOPIC_NAME = "${DATAHUB_UPGRADE_HISTORY_TOPIC_NAME:" + Topics.DATAHUB_UPGRADE_HISTORY_TOPIC_NAME + "}"; - private final DefaultKafkaConsumerFactory _defaultKafkaConsumerFactory; + private final DefaultKafkaConsumerFactory defaultKafkaConsumerFactory; @Value("#{systemEnvironment['DATAHUB_REVISION'] ?: '0'}") private String revision; - private final GitVersion _gitVersion; - private final ConfigurationProvider _configurationProvider; + private final GitVersion gitVersion; + private final ConfigurationProvider configurationProvider; @Value(CONSUMER_GROUP) private String consumerGroup; @@ -56,6 +56,8 @@ public class DataHubUpgradeKafkaListener implements ConsumerSeekAware, Bootstrap @Value(TOPIC_NAME) private String topicName; + private final OperationContext systemOperationContext; + private static final AtomicBoolean IS_UPDATED = new AtomicBoolean(false); public DataHubUpgradeKafkaListener( @@ -63,11 +65,13 @@ public DataHubUpgradeKafkaListener( @Qualifier("duheKafkaConsumerFactory") DefaultKafkaConsumerFactory defaultKafkaConsumerFactory, GitVersion gitVersion, - ConfigurationProvider configurationProvider) { + ConfigurationProvider configurationProvider, + @Qualifier("systemOperationContext") OperationContext operationContext) { this.registry = registry; - this._defaultKafkaConsumerFactory = defaultKafkaConsumerFactory; - this._gitVersion = gitVersion; - this._configurationProvider = configurationProvider; + this.defaultKafkaConsumerFactory = defaultKafkaConsumerFactory; + this.gitVersion = gitVersion; + this.configurationProvider = configurationProvider; + this.systemOperationContext = operationContext; } // Constructs a consumer to read determine final offset to assign, prevents re-reading whole topic @@ -76,7 +80,7 @@ public DataHubUpgradeKafkaListener( public void onPartitionsAssigned( Map assignments, ConsumerSeekCallback callback) { try (Consumer kafkaConsumer = - _defaultKafkaConsumerFactory.createConsumer(consumerGroup, SUFFIX)) { + defaultKafkaConsumerFactory.createConsumer(consumerGroup, SUFFIX)) { final Map offsetMap = kafkaConsumer.endOffsets(assignments.keySet()); assignments.entrySet().stream() .filter(entry -> topicName.equals(entry.getKey().topic())) @@ -100,44 +104,49 @@ public void onPartitionsAssigned( concurrency = "1", autoStartup = "false") public void checkSystemVersion(final ConsumerRecord consumerRecord) { - try (Timer.Context i = MetricUtils.timer(this.getClass(), "checkSystemVersion").time()) { - final GenericRecord record = consumerRecord.value(); - final String expectedVersion = String.format("%s-%s", _gitVersion.getVersion(), revision); - - DataHubUpgradeHistoryEvent event; - try { - event = EventUtils.avroToPegasusDUHE(record); - log.info("Latest system update version: {}", event.getVersion()); - if (expectedVersion.equals(event.getVersion())) { - IS_UPDATED.getAndSet(true); - } else if (!_configurationProvider.getSystemUpdate().isWaitForSystemUpdate()) { - log.warn("Wait for system update is disabled. Proceeding with startup."); - IS_UPDATED.getAndSet(true); - } else { - log.warn( - "System version is not up to date: {}. Waiting for datahub-upgrade to complete...", - expectedVersion); - } - } catch (Exception e) { - MetricUtils.counter(this.getClass(), "avro_to_pegasus_conversion_failure").inc(); - log.error("Error deserializing message due to: ", e); - log.error("Message: {}", record.toString()); - return; - } - } + systemOperationContext.withSpan( + "checkSystemVersion", + () -> { + final GenericRecord record = consumerRecord.value(); + final String expectedVersion = String.format("%s-%s", gitVersion.getVersion(), revision); + + DataHubUpgradeHistoryEvent event; + try { + event = EventUtils.avroToPegasusDUHE(record); + log.info("Latest system update version: {}", event.getVersion()); + if (expectedVersion.equals(event.getVersion())) { + IS_UPDATED.getAndSet(true); + } else if (!configurationProvider.getSystemUpdate().isWaitForSystemUpdate()) { + log.warn("Wait for system update is disabled. Proceeding with startup."); + IS_UPDATED.getAndSet(true); + } else { + log.warn( + "System version is not up to date: {}. Waiting for datahub-upgrade to complete...", + expectedVersion); + } + + } catch (Exception e) { + MetricUtils.counter(this.getClass(), "avro_to_pegasus_conversion_failure").inc(); + log.error("Error deserializing message due to: ", e); + log.error("Message: {}", record.toString()); + return; + } + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "checkSystemVersion")); } public void waitForUpdate() { - if (!_configurationProvider.getSystemUpdate().isWaitForSystemUpdate()) { + if (!configurationProvider.getSystemUpdate().isWaitForSystemUpdate()) { log.warn("Wait for system update is disabled. Proceeding with startup."); IS_UPDATED.getAndSet(true); } - int maxBackOffs = Integer.parseInt(_configurationProvider.getSystemUpdate().getMaxBackOffs()); + int maxBackOffs = Integer.parseInt(configurationProvider.getSystemUpdate().getMaxBackOffs()); long initialBackOffMs = - Long.parseLong(_configurationProvider.getSystemUpdate().getInitialBackOffMs()); + Long.parseLong(configurationProvider.getSystemUpdate().getInitialBackOffMs()); int backOffFactor = - Integer.parseInt(_configurationProvider.getSystemUpdate().getBackOffFactor()); + Integer.parseInt(configurationProvider.getSystemUpdate().getBackOffFactor()); long backOffMs = initialBackOffMs; for (int i = 0; i < maxBackOffs; i++) { diff --git a/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIAnalyticsTestConfiguration.java b/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIAnalyticsTestConfiguration.java index 615ef985ca05d7..ebd622a1f0cce1 100644 --- a/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIAnalyticsTestConfiguration.java +++ b/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIAnalyticsTestConfiguration.java @@ -20,11 +20,13 @@ import org.mockito.Mockito; import org.opensearch.action.search.SearchResponse; import org.springframework.boot.test.context.TestConfiguration; +import org.springframework.boot.test.mock.mockito.MockBean; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Primary; @TestConfiguration public class OpenAPIAnalyticsTestConfiguration { + @MockBean TracingInterceptor tracingInterceptor; @Bean(name = "systemOperationContext") public OperationContext systemOperationContext() { diff --git a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java index 075501c1a10711..305a91072c683d 100644 --- a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java +++ b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java @@ -28,6 +28,7 @@ import com.linkedin.metadata.systemmetadata.SystemMetadataService; import com.linkedin.metadata.timeline.TimelineService; import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.TraceContext; import io.datahubproject.openapi.dto.UrnResponseMap; import io.datahubproject.openapi.generated.EntityResponse; import io.datahubproject.openapi.v1.entities.EntitiesController; @@ -47,6 +48,13 @@ @TestConfiguration public class OpenAPIEntityTestConfiguration { + @MockBean TraceContext traceContext; + + @Bean + public TracingInterceptor tracingInterceptor(final TraceContext traceContext) { + return new TracingInterceptor(traceContext); + } + @Bean public ObjectMapper objectMapper() { return new ObjectMapper(new YAMLFactory()); diff --git a/metadata-service/openapi-servlet/build.gradle b/metadata-service/openapi-servlet/build.gradle index 77679790f25dea..59dbd2408ccdd9 100644 --- a/metadata-service/openapi-servlet/build.gradle +++ b/metadata-service/openapi-servlet/build.gradle @@ -36,6 +36,10 @@ dependencies { annotationProcessor externalDependency.lombok + implementation externalDependency.opentelemetryApi + implementation externalDependency.opentelemetrySdk + implementation externalDependency.opentelemetrySdkTrace + testImplementation externalDependency.springBootTest testImplementation project(':mock-entity-registry') testImplementation externalDependency.springBoot diff --git a/metadata-service/openapi-servlet/models/build.gradle b/metadata-service/openapi-servlet/models/build.gradle index d75e656e5ecd6c..dbc51ca17e3388 100644 --- a/metadata-service/openapi-servlet/models/build.gradle +++ b/metadata-service/openapi-servlet/models/build.gradle @@ -6,6 +6,7 @@ dependencies { implementation project(':entity-registry') implementation project(':metadata-operation-context') implementation project(':metadata-auth:auth-api') + implementation project(':metadata-service:services') implementation externalDependency.jacksonDataBind implementation externalDependency.httpClient diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v1/models/TraceRequestV1.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v1/models/TraceRequestV1.java new file mode 100644 index 00000000000000..8fe811f23d958b --- /dev/null +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v1/models/TraceRequestV1.java @@ -0,0 +1,17 @@ +package io.datahubproject.openapi.v1.models; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.linkedin.common.urn.Urn; +import java.util.LinkedHashMap; +import java.util.List; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.EqualsAndHashCode; + +@EqualsAndHashCode(callSuper = true) +@Data +@Builder +@JsonInclude(JsonInclude.Include.NON_NULL) +@AllArgsConstructor +public class TraceRequestV1 extends LinkedHashMap> {} diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v1/models/TraceResponseV1.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v1/models/TraceResponseV1.java new file mode 100644 index 00000000000000..5fc721c2812d41 --- /dev/null +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v1/models/TraceResponseV1.java @@ -0,0 +1,22 @@ +package io.datahubproject.openapi.v1.models; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.systemmetadata.TraceStatus; +import java.util.LinkedHashMap; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.EqualsAndHashCode; + +@EqualsAndHashCode(callSuper = true) +@Data +@Builder +@JsonInclude(JsonInclude.Include.NON_NULL) +@AllArgsConstructor +public class TraceResponseV1 extends LinkedHashMap> { + public TraceResponseV1(Map> m) { + super(m); + } +} diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java index c6d5f1452fea91..fb2dba8103d59e 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java @@ -16,10 +16,12 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import org.springdoc.core.models.GroupedOpenApi; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.core.annotation.Order; import org.springframework.web.servlet.config.annotation.EnableWebMvc; +import org.springframework.web.servlet.config.annotation.InterceptorRegistry; import org.springframework.web.servlet.config.annotation.ResourceHandlerRegistry; import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; @@ -39,6 +41,8 @@ public class SpringWebConfig implements WebMvcConfigurer { private static final Set OPENLINEAGE_PACKAGES = Set.of("io.datahubproject.openapi.openlineage"); + @Autowired private TracingInterceptor tracingInterceptor; + @Bean public GroupedOpenApi v3OpenApiGroup( final EntityRegistry entityRegistry, final ConfigurationProvider configurationProvider) { @@ -132,4 +136,9 @@ private Map concat(Supplier> a, Supplier> b) { (v1, v2) -> v2, LinkedHashMap::new)); } + + @Override + public void addInterceptors(InterceptorRegistry registry) { + registry.addInterceptor(tracingInterceptor).addPathPatterns("/**"); + } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/TracingInterceptor.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/TracingInterceptor.java new file mode 100644 index 00000000000000..0ca913b7c65cfc --- /dev/null +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/TracingInterceptor.java @@ -0,0 +1,93 @@ +package io.datahubproject.openapi.config; + +import io.datahubproject.metadata.context.TraceContext; +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.SpanContext; +import io.opentelemetry.api.trace.StatusCode; +import io.opentelemetry.api.trace.Tracer; +import io.opentelemetry.context.Context; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import javax.annotation.Nullable; +import org.slf4j.MDC; +import org.springframework.stereotype.Component; +import org.springframework.web.servlet.HandlerInterceptor; + +@Component +public class TracingInterceptor implements HandlerInterceptor { + @Nullable private final Tracer tracer; + + public TracingInterceptor(final TraceContext traceContext) { + this.tracer = traceContext.getTracer(); + } + + @Override + public boolean preHandle( + HttpServletRequest request, HttpServletResponse response, Object handler) { + + if (tracer != null) { + String spanName = request.getMethod() + " " + request.getRequestURI(); + Span span = + tracer + .spanBuilder(spanName) + .setAttribute("http.method", request.getMethod()) + .setAttribute("http.url", request.getRequestURI()) + .setParent(Context.root()) + .startSpan(); + + request.setAttribute("span", span); + span.makeCurrent(); + + TraceContext.enableLogTracing(request); + + if (span.getSpanContext().isValid()) { + SpanContext spanContext = span.getSpanContext(); + String traceId = spanContext.getTraceId(); + String spanId = spanContext.getSpanId(); + + // W3C Trace Context format + String flags = spanContext.getTraceFlags().isSampled() ? "01" : "00"; + response.setHeader("traceparent", String.format("00-%s-%s-%s", traceId, spanId, flags)); + + if (TraceContext.isLogTracingEnabled()) { + // Add trace context to MDC for logging + MDC.put("telemetryId", String.format("[%s-%s] ", traceId, spanId)); + } + } + } + + return true; + } + + @Override + public void afterCompletion( + HttpServletRequest request, HttpServletResponse response, Object handler, Exception ex) { + + if (tracer != null) { + Span span = (Span) request.getAttribute("span"); + if (span != null) { + try { + span.setAttribute("http.status_code", response.getStatus()); + + if (ex != null) { + span.setStatus(StatusCode.ERROR); + span.recordException(ex); + } else { + if (response.getStatus() >= 400) { + span.setStatus(StatusCode.ERROR); + } else { + span.setStatus(StatusCode.OK); + } + } + } finally { + span.end(); + } + } + + if (TraceContext.isLogTracingEnabled()) { + TraceContext.clear(); + MDC.clear(); + } + } + } +} diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java index 592d7bba4211fe..d3c67355d0f85d 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java @@ -55,6 +55,7 @@ import io.datahubproject.openapi.models.GenericAspect; import io.datahubproject.openapi.models.GenericEntity; import io.datahubproject.openapi.models.GenericEntityScrollResult; +import io.datahubproject.openapi.util.RequestInputUtil; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; import jakarta.servlet.http.HttpServletRequest; @@ -128,7 +129,8 @@ protected List buildEntityList( throws URISyntaxException { LinkedHashMap> aspectSpecMap = - resolveAspectSpecs( + RequestInputUtil.resolveAspectSpecs( + entityRegistry, urns.stream() .map( urn -> @@ -398,8 +400,11 @@ public ResponseEntity getAspect( buildEntityVersionedAspectList( opContext, List.of(urn), - resolveAspectSpecs( - new LinkedHashMap<>(Map.of(urn, Map.of(aspectName, version))), 0L, true), + RequestInputUtil.resolveAspectSpecs( + entityRegistry, + new LinkedHashMap<>(Map.of(urn, Map.of(aspectName, version))), + 0L, + true), withSystemMetadata, true); } @@ -634,7 +639,7 @@ public ResponseEntity createAspect( authentication.getActor().toUrnStr() + " is unauthorized to " + CREATE + " entities."); } - AspectSpec aspectSpec = lookupAspectSpec(entitySpec, aspectName).get(); + AspectSpec aspectSpec = RequestInputUtil.lookupAspectSpec(entitySpec, aspectName).get(); ChangeMCP upsert = toUpsertItem( opContext.getRetrieverContext().getAspectRetriever(), @@ -713,7 +718,7 @@ public ResponseEntity patchAspect( authentication.getActor().toUrnStr() + " is unauthorized to " + UPDATE + " entities."); } - AspectSpec aspectSpec = lookupAspectSpec(entitySpec, aspectName).get(); + AspectSpec aspectSpec = RequestInputUtil.lookupAspectSpec(entitySpec, aspectName).get(); RecordTemplate currentValue = entityService.getAspect(opContext, urn, aspectSpec.getName(), 0); GenericPatchTemplate genericPatchTemplate = @@ -761,69 +766,6 @@ protected Boolean exists( opContext, urn, aspect, includeSoftDelete != null ? includeSoftDelete : false); } - /** - * Given a map with aspect names from the API, normalized them into actual aspect names (casing - * fixes) - * - * @param requestedAspectNames requested aspects - * @param map values - * @param expandEmpty whether to expand empty aspect names to all aspect names - * @return updated map - */ - protected LinkedHashMap> resolveAspectSpecs( - LinkedHashMap> requestedAspectNames, - @Nonnull T defaultValue, - boolean expandEmpty) { - return requestedAspectNames.entrySet().stream() - .map( - entry -> { - final Urn urn = entry.getKey(); - if (expandEmpty && (entry.getValue().isEmpty() || entry.getValue().containsKey(""))) { - // All aspects specified - Set allNames = - new HashSet<>( - entityRegistry.getEntitySpec(urn.getEntityType()).getAspectSpecs()); - return Map.entry( - urn, - allNames.stream() - .map( - aspectName -> - Map.entry( - aspectName, entry.getValue().getOrDefault("", defaultValue))) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); - } else if (!entry.getValue().keySet().isEmpty()) { - final Map normalizedNames = - entry.getValue().keySet().stream() - .map( - requestAspectName -> - Map.entry( - requestAspectName, lookupAspectSpec(urn, requestAspectName))) - .filter(aspectSpecEntry -> aspectSpecEntry.getValue().isPresent()) - .collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().get())); - return Map.entry( - urn, - entry.getValue().entrySet().stream() - .filter(reqEntry -> normalizedNames.containsKey(reqEntry.getKey())) - .map( - reqEntry -> - Map.entry( - normalizedNames.get(reqEntry.getKey()), reqEntry.getValue())) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); - } else { - return (Map.Entry>) null; - } - }) - .filter(Objects::nonNull) - .collect( - Collectors.toMap( - Map.Entry::getKey, - Map.Entry::getValue, - (a, b) -> { - throw new IllegalStateException("Duplicate key"); - }, - LinkedHashMap::new)); - } - protected static LinkedHashMap> aspectSpecsToAspectNames( LinkedHashMap> urnAspectSpecsMap, boolean timeseries) { return urnAspectSpecsMap.entrySet().stream() @@ -859,7 +801,8 @@ protected Map> toAspectMap( } protected Optional lookupAspectSpec(Urn urn, String aspectName) { - return lookupAspectSpec(entityRegistry.getEntitySpec(urn.getEntityType()), aspectName); + return RequestInputUtil.lookupAspectSpec( + entityRegistry.getEntitySpec(urn.getEntityType()), aspectName); } protected RecordTemplate toRecordTemplate( @@ -902,23 +845,6 @@ protected ChangeMCP toUpsertItem( aspectRetriever); } - /** - * Case-insensitive fallback - * - * @return - */ - protected static Optional lookupAspectSpec(EntitySpec entitySpec, String aspectName) { - if (entitySpec == null) { - return Optional.empty(); - } - - return entitySpec.getAspectSpec(aspectName) != null - ? Optional.of(entitySpec.getAspectSpec(aspectName)) - : entitySpec.getAspectSpecs().stream() - .filter(aspec -> aspec.getName().toLowerCase().equals(aspectName)) - .findFirst(); - } - protected static Urn validatedUrn(String urn) throws InvalidUrnException { try { return Urn.createFromString(urn); diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/v1/TraceController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/v1/TraceController.java new file mode 100644 index 00000000000000..d7e7b2dce1189a --- /dev/null +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/v1/TraceController.java @@ -0,0 +1,149 @@ +package io.datahubproject.openapi.operations.v1; + +import static com.linkedin.metadata.authorization.ApiOperation.READ; + +import com.datahub.authentication.Authentication; +import com.datahub.authentication.AuthenticationContext; +import com.datahub.authorization.AuthUtil; +import com.datahub.authorization.AuthorizerChain; +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.systemmetadata.TraceService; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.RequestContext; +import io.datahubproject.openapi.exception.UnauthorizedException; +import io.datahubproject.openapi.util.RequestInputUtil; +import io.datahubproject.openapi.v1.models.TraceRequestV1; +import io.datahubproject.openapi.v1.models.TraceResponseV1; +import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.media.Content; +import io.swagger.v3.oas.annotations.media.ExampleObject; +import io.swagger.v3.oas.annotations.tags.Tag; +import jakarta.servlet.http.HttpServletRequest; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +@RestController +@RequestMapping("/openapi/v1/trace") +@Slf4j +@Tag(name = "Tracing", description = "An API for tracing async operations.") +public class TraceController { + private final TraceService traceService; + private final AuthorizerChain authorizerChain; + private final OperationContext systemOperationContext; + + public TraceController( + TraceService traceService, + OperationContext systemOperationContext, + AuthorizerChain authorizerChain) { + this.traceService = traceService; + this.systemOperationContext = systemOperationContext; + this.authorizerChain = authorizerChain; + } + + @Tag(name = "Async Write Tracing") + @PostMapping(path = "/write/{traceId}", produces = MediaType.APPLICATION_JSON_VALUE) + @Operation( + summary = "Trace an async write to the underlying storage.", + requestBody = + @io.swagger.v3.oas.annotations.parameters.RequestBody( + required = true, + content = + @Content( + mediaType = MediaType.APPLICATION_JSON_VALUE, + examples = { + @ExampleObject( + name = "Default", + value = + """ + { + "urn:li:dataset:(urn:li:dataPlatform:hdfs,/path/to/data,PROD)": ["status", "datasetProperties"], + "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.pet_profiles,PROD)": ["datasetProperties"] + } + """) + }))) + public ResponseEntity getTrace( + HttpServletRequest request, + @PathVariable("traceId") String traceId, + @RequestParam(value = "onlyIncludeErrors", defaultValue = "true") boolean onlyIncludeErrors, + @RequestParam(value = "detailed", defaultValue = "false") boolean detailed, + @RequestParam(value = "skipCache", defaultValue = "false") boolean skipCache, + @RequestBody @Nonnull TraceRequestV1 traceRequestV1) { + Authentication authentication = AuthenticationContext.getAuthentication(); + String actorUrnStr = authentication.getActor().toUrnStr(); + + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder().buildOpenapi(actorUrnStr, request, "getTrace", List.of()), + authorizerChain, + authentication, + true); + + if (!AuthUtil.isAPIAuthorizedEntityUrns(opContext, READ, traceRequestV1.keySet())) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + + " is unauthorized to " + + READ + + " as least one of the requested URNs."); + } + + LinkedHashMap> normalizedInput = + traceRequestV1.entrySet().stream() + .collect( + Collectors.toMap( + Map.Entry::getKey, + e -> + RequestInputUtil.resolveAspectNames( + opContext.getEntityRegistry(), e.getKey(), e.getValue(), true), + (v1, v2) -> v1, + LinkedHashMap::new)); + + return ResponseEntity.ok( + new TraceResponseV1( + traceService.trace( + opContext, + extractTraceId(traceId), + normalizedInput, + onlyIncludeErrors, + detailed, + skipCache))); + } + + private static String extractTraceId(String input) { + if (input == null || input.trim().isEmpty()) { + return null; + } + + // Clean the input + input = input.trim(); + + // Case 1: If it's a full traceparent header (containing hyphens) + if (input.contains("-")) { + String[] parts = input.split("-"); + if (parts.length >= 2) { + // The trace ID is the second part (index 1) + return parts[1]; + } + return null; + } + + // Case 2: If it's just the trace ID (32 hex characters) + if (input.length() == 32 && input.matches("[0-9a-fA-F]+")) { + return input; + } + + return null; + } +} diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java index ca425810c87a09..72c39c792b355e 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java @@ -5,7 +5,6 @@ import static java.nio.charset.StandardCharsets.UTF_8; import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.Timer; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; @@ -480,7 +479,6 @@ public static List> ingestBatchProposal( boolean async) { // TODO: Use the actor present in the IC. - Timer.Context context = MetricUtils.timer("postEntity").time(); final com.linkedin.common.AuditStamp auditStamp = new com.linkedin.common.AuditStamp() .setTime(System.currentTimeMillis()) @@ -519,7 +517,6 @@ public static List> ingestBatchProposal( } else { MetricUtils.counter(MetricRegistry.name("postEntity", "success")).inc(); } - context.stop(); } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/RequestInputUtil.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/RequestInputUtil.java new file mode 100644 index 00000000000000..fe1f217c0d8448 --- /dev/null +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/RequestInputUtil.java @@ -0,0 +1,136 @@ +package io.datahubproject.openapi.util; + +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.registry.EntityRegistry; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; + +public class RequestInputUtil { + private RequestInputUtil() {} + + public static List resolveAspectNames( + EntityRegistry entityRegistry, Urn urn, List inputAspectNames, boolean expandEmpty) { + return resolveAspectSpecs(entityRegistry, urn, inputAspectNames, expandEmpty).stream() + .map(AspectSpec::getName) + .toList(); + } + + /** + * For a given urn and list of aspect names, resolve AspectSpecs + * + * @param entityRegistry + * @param urn + * @param inputAspectNames + * @param expandEmpty if empty return all AspectSpecs + * @return + */ + public static List resolveAspectSpecs( + EntityRegistry entityRegistry, Urn urn, List inputAspectNames, boolean expandEmpty) { + LinkedHashMap intermediateReq = + inputAspectNames.stream() + .map(name -> Map.entry(name, 0L)) + .collect( + Collectors.toMap( + Map.Entry::getKey, + Map.Entry::getValue, + (existing, replacement) -> existing, + LinkedHashMap::new)); + Map> intermediate = + resolveAspectSpecs( + entityRegistry, new LinkedHashMap<>(Map.of(urn, intermediateReq)), 0L, expandEmpty); + return new ArrayList<>(intermediate.getOrDefault(urn, Map.of()).keySet()); + } + + /** + * Given a map with aspect names from the API, normalized them into actual aspect names (casing + * fixes) + * + * @param requestedAspectNames requested aspects + * @param map values + * @param expandEmpty whether to expand empty aspect names to all aspect names + * @return updated map + */ + public static LinkedHashMap> resolveAspectSpecs( + EntityRegistry entityRegistry, + LinkedHashMap> requestedAspectNames, + @Nonnull T defaultValue, + boolean expandEmpty) { + return requestedAspectNames.entrySet().stream() + .map( + entry -> { + final Urn urn = entry.getKey(); + if (expandEmpty && (entry.getValue().isEmpty() || entry.getValue().containsKey(""))) { + // All aspects specified + Set allNames = + new HashSet<>( + entityRegistry.getEntitySpec(urn.getEntityType()).getAspectSpecs()); + return Map.entry( + urn, + allNames.stream() + .map( + aspectName -> + Map.entry( + aspectName, entry.getValue().getOrDefault("", defaultValue))) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); + } else if (!entry.getValue().keySet().isEmpty()) { + final Map normalizedNames = + entry.getValue().keySet().stream() + .map( + requestAspectName -> + Map.entry( + requestAspectName, + lookupAspectSpec(entityRegistry, urn, requestAspectName))) + .filter(aspectSpecEntry -> aspectSpecEntry.getValue().isPresent()) + .collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().get())); + return Map.entry( + urn, + entry.getValue().entrySet().stream() + .filter(reqEntry -> normalizedNames.containsKey(reqEntry.getKey())) + .map( + reqEntry -> + Map.entry( + normalizedNames.get(reqEntry.getKey()), reqEntry.getValue())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); + } else { + return (Map.Entry>) null; + } + }) + .filter(Objects::nonNull) + .collect( + Collectors.toMap( + Map.Entry::getKey, + Map.Entry::getValue, + (a, b) -> { + throw new IllegalStateException("Duplicate key"); + }, + LinkedHashMap::new)); + } + + private static Optional lookupAspectSpec( + EntityRegistry entityRegistry, Urn urn, String aspectName) { + return lookupAspectSpec(entityRegistry.getEntitySpec(urn.getEntityType()), aspectName); + } + + /** Case-insensitive fallback */ + public static Optional lookupAspectSpec(EntitySpec entitySpec, String aspectName) { + if (entitySpec == null) { + return Optional.empty(); + } + + return entitySpec.getAspectSpec(aspectName) != null + ? Optional.of(entitySpec.getAspectSpec(aspectName)) + : entitySpec.getAspectSpecs().stream() + .filter(aspec -> aspec.getName().toLowerCase().equals(aspectName)) + .findFirst(); + } +} diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/entities/EntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/entities/EntitiesController.java index dd359cbc464319..7b1a4ec49f77c4 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/entities/EntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/entities/EntitiesController.java @@ -7,7 +7,6 @@ import static com.linkedin.metadata.utils.PegasusUtils.urnToEntityName; import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.Timer; import com.datahub.authentication.Authentication; import com.datahub.authentication.AuthenticationContext; import com.datahub.authorization.AuthUtil; @@ -104,7 +103,7 @@ public ResponseEntity getEntities( @RequestParam(name = "aspectNames", required = false) @Nullable String[] aspectNames) { - Timer.Context context = MetricUtils.timer("getEntities").time(); + final Set entityUrns = Arrays.stream(urns) // Have to decode here because of frontend routing, does No-op for already unencoded @@ -167,7 +166,6 @@ public ResponseEntity getEntities( } else { MetricUtils.counter(MetricRegistry.name("getEntities", "success")).inc(); } - context.stop(); } } @@ -263,7 +261,7 @@ public ResponseEntity> deleteEntities( boolean soft, @RequestParam(required = false, name = "async") Boolean async) { Throwable exceptionally = null; - try (Timer.Context context = MetricUtils.timer("deleteEntities").time()) { + try { Authentication authentication = AuthenticationContext.getAuthentication(); String actorUrnStr = authentication.getActor().toUrnStr(); diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/relationships/RelationshipsController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/relationships/RelationshipsController.java index f778bf54aaeac7..3dcce394e35e98 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/relationships/RelationshipsController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/relationships/RelationshipsController.java @@ -5,7 +5,6 @@ import static com.linkedin.metadata.search.utils.QueryUtils.*; import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.Timer; import com.datahub.authentication.Authentication; import com.datahub.authentication.AuthenticationContext; import com.datahub.authorization.AuthUtil; @@ -157,7 +156,7 @@ public ResponseEntity getRelationships( @RequestParam(name = "count", defaultValue = "200") @Nullable Integer count) { - Timer.Context context = MetricUtils.timer("getRelationships").time(); + // Have to decode here because of frontend routing, does No-op for already unencoded through // direct API access final Urn entityUrn = UrnUtils.getUrn(URLDecoder.decode(urn, Charset.forName("UTF-8"))); @@ -201,7 +200,6 @@ public ResponseEntity getRelationships( } else { MetricUtils.counter(MetricRegistry.name("getRelationships", "success")).inc(); } - context.stop(); } } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java index 573feec64d2283..9bf7851653d920 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java @@ -709,6 +709,17 @@ protected ChangeMCP toUpsertItem( changeType = ChangeType.UPSERT; } + SystemMetadata systemMetadata = null; + if (jsonNode.has("systemMetadata")) { + systemMetadata = + EntityApiUtils.parseSystemMetadata( + objectMapper.writeValueAsString(jsonNode.get("systemMetadata"))); + } + Map headers = null; + if (jsonNode.has("headers")) { + headers = objectMapper.convertValue(jsonNode.get("headers"), new TypeReference<>() {}); + } + return ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectSpec.getName()) @@ -719,6 +730,8 @@ protected ChangeMCP toUpsertItem( ByteString.copyString(aspectJson, StandardCharsets.UTF_8), GenericRecordUtils.JSON, aspectSpec)) + .systemMetadata(systemMetadata) + .headers(headers) .build(aspectRetriever); } } diff --git a/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java b/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java index 8b530b218532d0..a5cff75c0c91a2 100644 --- a/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java +++ b/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java @@ -14,6 +14,7 @@ import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.IngestAspectsResult; import com.linkedin.metadata.entity.TransactionContext; import com.linkedin.metadata.entity.UpdateAspectResult; import com.linkedin.metadata.event.EventProducer; @@ -76,7 +77,7 @@ public void setup() .thenAnswer( i -> List.of( - ((Function>) i.getArgument(0)) + ((Function) i.getArgument(0)) .apply(TransactionContext.empty(Mockito.mock(Transaction.class), 0)))); EventProducer mockEntityEventProducer = Mockito.mock(EventProducer.class); diff --git a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/operations/v1/TraceControllerTest.java b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/operations/v1/TraceControllerTest.java new file mode 100644 index 00000000000000..ca26ba42832009 --- /dev/null +++ b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/operations/v1/TraceControllerTest.java @@ -0,0 +1,275 @@ +package io.datahubproject.openapi.operations.v1; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; +import static org.testng.Assert.assertNotNull; + +import com.datahub.authentication.Actor; +import com.datahub.authentication.ActorType; +import com.datahub.authentication.Authentication; +import com.datahub.authentication.AuthenticationContext; +import com.datahub.authorization.AuthorizationResult; +import com.datahub.authorization.AuthorizerChain; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.systemmetadata.TraceService; +import com.linkedin.metadata.systemmetadata.TraceStatus; +import com.linkedin.metadata.systemmetadata.TraceStorageStatus; +import com.linkedin.metadata.systemmetadata.TraceWriteStatus; +import io.datahubproject.metadata.context.ObjectMapperContext; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.TraceContext; +import io.datahubproject.openapi.config.TracingInterceptor; +import io.datahubproject.openapi.v1.models.TraceRequestV1; +import io.datahubproject.openapi.v1.models.TraceResponseV1; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.List; +import java.util.Map; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.boot.SpringBootConfiguration; +import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc; +import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureWebMvc; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.context.TestConfiguration; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.ComponentScan; +import org.springframework.context.annotation.Import; +import org.springframework.context.annotation.Primary; +import org.springframework.http.MediaType; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.springframework.test.web.servlet.MockMvc; +import org.springframework.test.web.servlet.request.MockMvcRequestBuilders; +import org.springframework.test.web.servlet.result.MockMvcResultMatchers; +import org.testng.annotations.Test; + +@SpringBootTest(classes = TraceControllerTest.TestConfig.class) +@AutoConfigureWebMvc +@AutoConfigureMockMvc +public class TraceControllerTest extends AbstractTestNGSpringContextTests { + public static final TraceStatus OK = + TraceStatus.builder() + .success(true) + .primaryStorage(TraceStorageStatus.ok(TraceWriteStatus.ACTIVE_STATE)) + .searchStorage(TraceStorageStatus.ok(TraceWriteStatus.ACTIVE_STATE)) + .build(); + + @Autowired private TraceController traceController; + + @Autowired private MockMvc mockMvc; + + @Autowired private TraceService mockTraceService; + + @Test + public void initTest() { + assertNotNull(traceController); + } + + @Test + public void testGetTrace() throws Exception { + // Test URNs + Urn TEST_URN_1 = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hdfs,/path/to/data,PROD)"); + Urn TEST_URN_2 = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:snowflake,test.table,PROD)"); + + // Mock trace service response + TraceResponseV1 mockTraceResult = + new TraceResponseV1( + Map.of( + TEST_URN_1, Map.of("status", OK, "datasetProperties", OK), + TEST_URN_2, Map.of("datasetProperties", OK))); + + when(mockTraceService.trace( + any(OperationContext.class), eq("trace123"), any(), eq(false), eq(false), eq(false))) + .thenReturn(mockTraceResult); + + // Create test request body + TraceRequestV1 requestBody = new TraceRequestV1(); + requestBody.put(TEST_URN_1, List.of("status", "datasetProperties")); + requestBody.put(TEST_URN_2, List.of("datasetProperties")); + + // Test the trace endpoint + mockMvc + .perform( + MockMvcRequestBuilders.post("/openapi/v1/trace/write/test-trace123-id") + .param("onlyIncludeErrors", "false") + .content(new ObjectMapper().writeValueAsString(requestBody)) + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().isOk()) + .andDo( + result -> { + String responseContent = result.getResponse().getContentAsString(); + System.out.println("Response content: " + responseContent); + }) + .andExpect( + MockMvcResultMatchers.jsonPath( + "$['urn:li:dataset:(urn:li:dataPlatform:hdfs,/path/to/data,PROD)'].datasetProperties.success") + .value(true)) + .andExpect( + MockMvcResultMatchers.jsonPath( + "$['urn:li:dataset:(urn:li:dataPlatform:snowflake,test.table,PROD)'].datasetProperties.success") + .value(true)) + .andExpect( + MockMvcResultMatchers.jsonPath( + "$['urn:li:dataset:(urn:li:dataPlatform:hdfs,/path/to/data,PROD)'].status.success") + .value(true)) + .andExpect( + MockMvcResultMatchers.jsonPath( + "$['urn:li:dataset:(urn:li:dataPlatform:hdfs,/path/to/data,PROD)'].datasetProperties.primaryStorage.writeStatus") + .value("ACTIVE_STATE")) + .andExpect( + MockMvcResultMatchers.jsonPath( + "$['urn:li:dataset:(urn:li:dataPlatform:hdfs,/path/to/data,PROD)'].datasetProperties.searchStorage.writeStatus") + .value("ACTIVE_STATE")); + } + + @Test + public void testGetTraceWithCustomParameters() throws Exception { + // Test URN + Urn TEST_URN = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hdfs,/path/to/data,PROD)"); + + // Mock trace service response + TraceResponseV1 mockTraceResult = + new TraceResponseV1( + Map.of( + TEST_URN, + Map.of( + "status", + TraceStatus.builder() + .success(false) + .primaryStorage( + TraceStorageStatus.fail(TraceWriteStatus.ERROR, "Mock test error")) + .searchStorage( + TraceStorageStatus.fail( + TraceWriteStatus.ERROR, "Failed to write to primary storage")) + .build(), + "datasetProfile", + TraceStatus.builder() + .success(true) + .primaryStorage(TraceStorageStatus.ok(TraceWriteStatus.NO_OP)) + .searchStorage( + TraceStorageStatus.ok(TraceWriteStatus.TRACE_NOT_IMPLEMENTED)) + .build()))); + + when(mockTraceService.trace( + any(OperationContext.class), + eq("trace123"), + any(), + eq(false), // onlyIncludeErrors = false + eq(true), // detailed = true + eq(true) // skipCache = true + )) + .thenReturn(mockTraceResult); + + // Create test request body + TraceRequestV1 requestBody = new TraceRequestV1(); + requestBody.put(TEST_URN, List.of("status", "datasetProfile")); + + // Test the trace endpoint with custom parameters + mockMvc + .perform( + MockMvcRequestBuilders.post("/openapi/v1/trace/write/test-trace123-id") + .param("onlyIncludeErrors", "false") + .param("detailed", "true") + .param("skipCache", "true") + .content(new ObjectMapper().writeValueAsString(requestBody)) + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().isOk()) + .andDo( + result -> { + String responseContent = result.getResponse().getContentAsString(); + System.out.println("Response content: " + responseContent); + }) + .andExpect( + MockMvcResultMatchers.jsonPath( + "$['urn:li:dataset:(urn:li:dataPlatform:hdfs,/path/to/data,PROD)'].status.success") + .value(false)) + .andExpect( + MockMvcResultMatchers.jsonPath( + "$['urn:li:dataset:(urn:li:dataPlatform:hdfs,/path/to/data,PROD)'].datasetProfile.success") + .value(true)) + .andExpect( + MockMvcResultMatchers.jsonPath( + "$['urn:li:dataset:(urn:li:dataPlatform:hdfs,/path/to/data,PROD)'].status.primaryStorage.writeStatus") + .value("ERROR")) + .andExpect( + MockMvcResultMatchers.jsonPath( + "$['urn:li:dataset:(urn:li:dataPlatform:hdfs,/path/to/data,PROD)'].status.primaryStorage.writeMessage") + .value("Mock test error")) + .andExpect( + MockMvcResultMatchers.jsonPath( + "$['urn:li:dataset:(urn:li:dataPlatform:hdfs,/path/to/data,PROD)'].status.searchStorage.writeStatus") + .value("ERROR")) + .andExpect( + MockMvcResultMatchers.jsonPath( + "$['urn:li:dataset:(urn:li:dataPlatform:hdfs,/path/to/data,PROD)'].status.searchStorage.writeMessage") + .value("Failed to write to primary storage")) + .andExpect( + MockMvcResultMatchers.jsonPath( + "$['urn:li:dataset:(urn:li:dataPlatform:hdfs,/path/to/data,PROD)'].datasetProfile.primaryStorage.writeStatus") + .value("NO_OP")) + .andExpect( + MockMvcResultMatchers.jsonPath( + "$['urn:li:dataset:(urn:li:dataPlatform:hdfs,/path/to/data,PROD)'].datasetProfile.searchStorage.writeStatus") + .value("TRACE_NOT_IMPLEMENTED")); + } + + @SpringBootConfiguration + @Import({TraceControllerTestConfig.class, TracingInterceptor.class}) + @ComponentScan(basePackages = {"io.datahubproject.openapi.operations.v1"}) + static class TestConfig {} + + @TestConfiguration + public static class TraceControllerTestConfig { + @MockBean public TraceService traceService; + + @Bean + public ObjectMapper objectMapper() { + return new ObjectMapper(); + } + + @Bean(name = "systemOperationContext") + public OperationContext systemOperationContext(ObjectMapper objectMapper) { + TraceContext traceContext = mock(TraceContext.class); + return TestOperationContexts.systemContextTraceNoSearchAuthorization( + () -> ObjectMapperContext.builder().objectMapper(objectMapper).build(), + () -> traceContext); + } + + @Bean + public EntityRegistry entityRegistry( + @Qualifier("systemOperationContext") OperationContext systemOperationContext) { + return systemOperationContext.getEntityRegistry(); + } + + @Bean + @Primary + public TraceContext traceContext( + @Qualifier("systemOperationContext") OperationContext systemOperationContext) { + return systemOperationContext.getTraceContext(); + } + + @Bean + @Primary + public AuthorizerChain authorizerChain() { + AuthorizerChain authorizerChain = mock(AuthorizerChain.class); + + Authentication authentication = mock(Authentication.class); + when(authentication.getActor()).thenReturn(new Actor(ActorType.USER, "datahub")); + when(authorizerChain.authorize(any())) + .thenReturn(new AuthorizationResult(null, AuthorizationResult.Type.ALLOW, "")); + AuthenticationContext.setAuthentication(authentication); + + return authorizerChain; + } + } +} diff --git a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java index 5080f6a12cdd28..ced9fea1ae83ee 100644 --- a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java +++ b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java @@ -20,6 +20,7 @@ import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; import static org.testng.Assert.assertNotNull; import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertNull; import com.datahub.authentication.Actor; import com.datahub.authentication.ActorType; @@ -55,9 +56,12 @@ import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.metadata.utils.SearchUtil; import com.linkedin.mxe.GenericAspect; +import com.linkedin.mxe.SystemMetadata; import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.TraceContext; import io.datahubproject.metadata.context.ValidationContext; import io.datahubproject.openapi.config.SpringWebConfig; +import io.datahubproject.openapi.config.TracingInterceptor; import io.datahubproject.openapi.exception.InvalidUrnException; import io.datahubproject.test.metadata.context.TestOperationContexts; import jakarta.servlet.ServletException; @@ -87,6 +91,7 @@ @ComponentScan(basePackages = {"io.datahubproject.openapi.v3.controller"}) @Import({ SpringWebConfig.class, + TracingInterceptor.class, EntityControllerTest.EntityControllerTestConfig.class, EntityVersioningServiceFactory.class }) @@ -398,6 +403,7 @@ public static class EntityControllerTestConfig { @MockBean public EntityServiceImpl entityService; @MockBean public SearchService searchService; @MockBean public TimeseriesAspectService timeseriesAspectService; + @MockBean public TraceContext traceContext; @Bean public ObjectMapper objectMapper() { @@ -647,4 +653,80 @@ public void testInvalidVersionSetUrn() throws Exception { .accept(MediaType.APPLICATION_JSON)) .andExpect(status().is4xxClientError()); } + + @Test + public void testSystemMetadataAndHeadersParsing() throws Exception { + // Test JSON with both systemMetadata and headers + final String testBodyWithMetadataAndHeaders = + "[\n" + + " {\n" + + " \"urn\": \"urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)\",\n" + + " \"status\": {\n" + + " \"value\": {\n" + + " \"removed\": false\n" + + " },\n" + + " \"systemMetadata\": {\n" + + " \"lastObserved\": 1234567890,\n" + + " \"runId\": \"test-run-id\"\n" + + " },\n" + + " \"headers\": {\n" + + " \"X-Custom-Header\": \"test-value\",\n" + + " \"X-Another-Header\": \"another-value\"\n" + + " }\n" + + " }\n" + + " }\n" + + "]"; + + // Test JSON without systemMetadata and headers + final String testBodyWithoutMetadataAndHeaders = + "[\n" + + " {\n" + + " \"urn\": \"urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)\",\n" + + " \"status\": {\n" + + " \"value\": {\n" + + " \"removed\": false\n" + + " }\n" + + " }\n" + + " }\n" + + "]"; + + // Test with metadata and headers + AspectsBatch batchWithMetadata = + entityController.toMCPBatch( + opContext, + testBodyWithMetadataAndHeaders, + opContext.getSessionActorContext().getAuthentication().getActor()); + + // Verify systemMetadata is correctly parsed + SystemMetadata systemMetadata = + batchWithMetadata.getMCPItems().get(0).getMetadataChangeProposal().getSystemMetadata(); + assertNotNull(systemMetadata); + assertEquals(1234567890L, systemMetadata.getLastObserved().longValue()); + assertEquals("test-run-id", systemMetadata.getRunId()); + + // Verify headers are correctly parsed + Map headers = + batchWithMetadata.getMCPItems().get(0).getMetadataChangeProposal().getHeaders(); + assertNotNull(headers); + assertEquals("test-value", headers.get("X-Custom-Header")); + assertEquals("another-value", headers.get("X-Another-Header")); + + // Test without metadata and headers + AspectsBatch batchWithoutMetadata = + entityController.toMCPBatch( + opContext, + testBodyWithoutMetadataAndHeaders, + opContext.getSessionActorContext().getAuthentication().getActor()); + + // Verify systemMetadata has lastObserved even when not in input + SystemMetadata metadataWithoutInput = + batchWithoutMetadata.getMCPItems().get(0).getMetadataChangeProposal().getSystemMetadata(); + assertNotNull(metadataWithoutInput); + assertNotNull(metadataWithoutInput.getLastObserved()); + assertEquals( + metadataWithoutInput.getRunId(), "no-run-id-provided"); // Should be null since not provided + + // Verify headers are null when not present + assertNull(batchWithoutMetadata.getMCPItems().get(0).getMetadataChangeProposal().getHeaders()); + } } diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index af11532ccf4ece..3a5e907c1c0ea7 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -3830,18 +3830,18 @@ "type" : "record", "name" : "MLModelProperties", "namespace" : "com.linkedin.ml.metadata", - "doc" : "Properties associated with a ML Model\r", + "doc" : "Properties associated with a ML Model", "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference", { "type" : "record", "name" : "MLModelLineageInfo", - "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups\r", + "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups", "fields" : [ { "name" : "trainingJobs", "type" : { "type" : "array", "items" : "com.linkedin.common.Urn" }, - "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", + "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.", "optional" : true, "Relationship" : { "/*" : { @@ -3856,7 +3856,7 @@ "type" : "array", "items" : "com.linkedin.common.Urn" }, - "doc" : "List of jobs or process instances (if any) that use the model or group.\r", + "doc" : "List of jobs or process instances (if any) that use the model or group.", "optional" : true, "Relationship" : { "/*" : { @@ -3871,7 +3871,7 @@ "fields" : [ { "name" : "name", "type" : "string", - "doc" : "Display name of the MLModel\r", + "doc" : "Display name of the MLModel", "optional" : true, "Searchable" : { "boostScore" : 10.0, @@ -3882,7 +3882,7 @@ }, { "name" : "description", "type" : "string", - "doc" : "Documentation of the MLModel\r", + "doc" : "Documentation of the MLModel", "optional" : true, "Searchable" : { "fieldType" : "TEXT", @@ -3891,28 +3891,28 @@ }, { "name" : "date", "type" : "com.linkedin.common.Time", - "doc" : "Date when the MLModel was developed\r", + "doc" : "Date when the MLModel was developed", "optional" : true, "deprecated" : true }, { "name" : "created", "type" : "com.linkedin.common.TimeStamp", - "doc" : "Audit stamp containing who created this and when\r", + "doc" : "Audit stamp containing who created this and when", "optional" : true }, { "name" : "lastModified", "type" : "com.linkedin.common.TimeStamp", - "doc" : "Date when the MLModel was last modified\r", + "doc" : "Date when the MLModel was last modified", "optional" : true }, { "name" : "version", "type" : "com.linkedin.common.VersionTag", - "doc" : "Version of the MLModel\r", + "doc" : "Version of the MLModel", "optional" : true }, { "name" : "type", "type" : "string", - "doc" : "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc\r", + "doc" : "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc", "optional" : true, "Searchable" : { "fieldType" : "TEXT_PARTIAL" @@ -3928,7 +3928,7 @@ "ref" : [ "string", "int", "float", "double", "boolean" ] } }, - "doc" : "Hyper Parameters of the MLModel\r\n\r\nNOTE: these are deprecated in favor of hyperParams\r", + "doc" : "Hyper Parameters of the MLModel\n\nNOTE: these are deprecated in favor of hyperParams", "optional" : true }, { "name" : "hyperParams", @@ -3963,7 +3963,7 @@ } } }, - "doc" : "Hyperparameters of the MLModel\r", + "doc" : "Hyperparameters of the MLModel", "optional" : true }, { "name" : "trainingMetrics", @@ -3998,7 +3998,7 @@ } } }, - "doc" : "Metrics of the MLModel used in training\r", + "doc" : "Metrics of the MLModel used in training", "optional" : true }, { "name" : "onlineMetrics", @@ -4006,7 +4006,7 @@ "type" : "array", "items" : "MLMetric" }, - "doc" : "Metrics of the MLModel used in production\r", + "doc" : "Metrics of the MLModel used in production", "optional" : true }, { "name" : "mlFeatures", @@ -4014,7 +4014,7 @@ "type" : "array", "items" : "com.linkedin.common.MLFeatureUrn" }, - "doc" : "List of features used for MLModel training\r", + "doc" : "List of features used for MLModel training", "optional" : true, "Relationship" : { "/*" : { @@ -4029,7 +4029,7 @@ "type" : "array", "items" : "string" }, - "doc" : "Tags for the MLModel\r", + "doc" : "Tags for the MLModel", "default" : [ ] }, { "name" : "deployments", @@ -4037,7 +4037,7 @@ "type" : "array", "items" : "com.linkedin.common.Urn" }, - "doc" : "Deployments for the MLModel\r", + "doc" : "Deployments for the MLModel", "optional" : true, "Relationship" : { "/*" : { @@ -4051,7 +4051,7 @@ "type" : "array", "items" : "com.linkedin.common.Urn" }, - "doc" : "Groups the model belongs to\r", + "doc" : "Groups the model belongs to", "optional" : true, "Relationship" : { "/*" : { diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index f58d83dd1e5cb7..bde79e4d475cc2 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -3988,18 +3988,18 @@ "type" : "record", "name" : "MLModelProperties", "namespace" : "com.linkedin.ml.metadata", - "doc" : "Properties associated with a ML Model\r", + "doc" : "Properties associated with a ML Model", "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference", { "type" : "record", "name" : "MLModelLineageInfo", - "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups\r", + "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups", "fields" : [ { "name" : "trainingJobs", "type" : { "type" : "array", "items" : "com.linkedin.common.Urn" }, - "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", + "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.", "optional" : true, "Relationship" : { "/*" : { @@ -4014,7 +4014,7 @@ "type" : "array", "items" : "com.linkedin.common.Urn" }, - "doc" : "List of jobs or process instances (if any) that use the model or group.\r", + "doc" : "List of jobs or process instances (if any) that use the model or group.", "optional" : true, "Relationship" : { "/*" : { @@ -4029,7 +4029,7 @@ "fields" : [ { "name" : "name", "type" : "string", - "doc" : "Display name of the MLModel\r", + "doc" : "Display name of the MLModel", "optional" : true, "Searchable" : { "boostScore" : 10.0, @@ -4040,7 +4040,7 @@ }, { "name" : "description", "type" : "string", - "doc" : "Documentation of the MLModel\r", + "doc" : "Documentation of the MLModel", "optional" : true, "Searchable" : { "fieldType" : "TEXT", @@ -4049,28 +4049,28 @@ }, { "name" : "date", "type" : "com.linkedin.common.Time", - "doc" : "Date when the MLModel was developed\r", + "doc" : "Date when the MLModel was developed", "optional" : true, "deprecated" : true }, { "name" : "created", "type" : "com.linkedin.common.TimeStamp", - "doc" : "Audit stamp containing who created this and when\r", + "doc" : "Audit stamp containing who created this and when", "optional" : true }, { "name" : "lastModified", "type" : "com.linkedin.common.TimeStamp", - "doc" : "Date when the MLModel was last modified\r", + "doc" : "Date when the MLModel was last modified", "optional" : true }, { "name" : "version", "type" : "com.linkedin.common.VersionTag", - "doc" : "Version of the MLModel\r", + "doc" : "Version of the MLModel", "optional" : true }, { "name" : "type", "type" : "string", - "doc" : "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc\r", + "doc" : "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc", "optional" : true, "Searchable" : { "fieldType" : "TEXT_PARTIAL" @@ -4086,7 +4086,7 @@ "ref" : [ "string", "int", "float", "double", "boolean" ] } }, - "doc" : "Hyper Parameters of the MLModel\r\n\r\nNOTE: these are deprecated in favor of hyperParams\r", + "doc" : "Hyper Parameters of the MLModel\n\nNOTE: these are deprecated in favor of hyperParams", "optional" : true }, { "name" : "hyperParams", @@ -4121,7 +4121,7 @@ } } }, - "doc" : "Hyperparameters of the MLModel\r", + "doc" : "Hyperparameters of the MLModel", "optional" : true }, { "name" : "trainingMetrics", @@ -4156,7 +4156,7 @@ } } }, - "doc" : "Metrics of the MLModel used in training\r", + "doc" : "Metrics of the MLModel used in training", "optional" : true }, { "name" : "onlineMetrics", @@ -4164,7 +4164,7 @@ "type" : "array", "items" : "MLMetric" }, - "doc" : "Metrics of the MLModel used in production\r", + "doc" : "Metrics of the MLModel used in production", "optional" : true }, { "name" : "mlFeatures", @@ -4172,7 +4172,7 @@ "type" : "array", "items" : "com.linkedin.common.MLFeatureUrn" }, - "doc" : "List of features used for MLModel training\r", + "doc" : "List of features used for MLModel training", "optional" : true, "Relationship" : { "/*" : { @@ -4187,7 +4187,7 @@ "type" : "array", "items" : "string" }, - "doc" : "Tags for the MLModel\r", + "doc" : "Tags for the MLModel", "default" : [ ] }, { "name" : "deployments", @@ -4195,7 +4195,7 @@ "type" : "array", "items" : "com.linkedin.common.Urn" }, - "doc" : "Deployments for the MLModel\r", + "doc" : "Deployments for the MLModel", "optional" : true, "Relationship" : { "/*" : { @@ -4209,7 +4209,7 @@ "type" : "array", "items" : "com.linkedin.common.Urn" }, - "doc" : "Groups the model belongs to\r", + "doc" : "Groups the model belongs to", "optional" : true, "Relationship" : { "/*" : { @@ -5012,12 +5012,12 @@ "type" : "record", "name" : "MLModelGroupProperties", "namespace" : "com.linkedin.ml.metadata", - "doc" : "Properties associated with an ML Model Group\r", + "doc" : "Properties associated with an ML Model Group", "include" : [ "com.linkedin.common.CustomProperties", "MLModelLineageInfo" ], "fields" : [ { "name" : "name", "type" : "string", - "doc" : "Display name of the MLModelGroup\r", + "doc" : "Display name of the MLModelGroup", "optional" : true, "Searchable" : { "boostScore" : 10.0, @@ -5028,7 +5028,7 @@ }, { "name" : "description", "type" : "string", - "doc" : "Documentation of the MLModelGroup\r", + "doc" : "Documentation of the MLModelGroup", "optional" : true, "Searchable" : { "fieldType" : "TEXT", @@ -5037,23 +5037,23 @@ }, { "name" : "createdAt", "type" : "com.linkedin.common.Time", - "doc" : "Date when the MLModelGroup was developed\r", + "doc" : "Date when the MLModelGroup was developed", "optional" : true, "deprecated" : true }, { "name" : "created", "type" : "com.linkedin.common.TimeStamp", - "doc" : "Time and Actor who created the MLModelGroup\r", + "doc" : "Time and Actor who created the MLModelGroup", "optional" : true }, { "name" : "lastModified", "type" : "com.linkedin.common.TimeStamp", - "doc" : "Date when the MLModelGroup was last modified\r", + "doc" : "Date when the MLModelGroup was last modified", "optional" : true }, { "name" : "version", "type" : "com.linkedin.common.VersionTag", - "doc" : "Version of the MLModelGroup\r", + "doc" : "Version of the MLModelGroup", "optional" : true } ], "Aspect" : { @@ -6291,6 +6291,10 @@ "name" : "aspect", "type" : "com.linkedin.entity.Aspect", "optional" : true + }, { + "name" : "telemetryTraceId", + "type" : "string", + "optional" : true } ] }, { "type" : "record", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json index 61c31f93987b88..a252d5c73591d4 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json @@ -3554,18 +3554,18 @@ "type" : "record", "name" : "MLModelProperties", "namespace" : "com.linkedin.ml.metadata", - "doc" : "Properties associated with a ML Model\r", + "doc" : "Properties associated with a ML Model", "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference", { "type" : "record", "name" : "MLModelLineageInfo", - "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups\r", + "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups", "fields" : [ { "name" : "trainingJobs", "type" : { "type" : "array", "items" : "com.linkedin.common.Urn" }, - "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", + "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.", "optional" : true, "Relationship" : { "/*" : { @@ -3580,7 +3580,7 @@ "type" : "array", "items" : "com.linkedin.common.Urn" }, - "doc" : "List of jobs or process instances (if any) that use the model or group.\r", + "doc" : "List of jobs or process instances (if any) that use the model or group.", "optional" : true, "Relationship" : { "/*" : { @@ -3595,7 +3595,7 @@ "fields" : [ { "name" : "name", "type" : "string", - "doc" : "Display name of the MLModel\r", + "doc" : "Display name of the MLModel", "optional" : true, "Searchable" : { "boostScore" : 10.0, @@ -3606,7 +3606,7 @@ }, { "name" : "description", "type" : "string", - "doc" : "Documentation of the MLModel\r", + "doc" : "Documentation of the MLModel", "optional" : true, "Searchable" : { "fieldType" : "TEXT", @@ -3615,28 +3615,28 @@ }, { "name" : "date", "type" : "com.linkedin.common.Time", - "doc" : "Date when the MLModel was developed\r", + "doc" : "Date when the MLModel was developed", "optional" : true, "deprecated" : true }, { "name" : "created", "type" : "com.linkedin.common.TimeStamp", - "doc" : "Audit stamp containing who created this and when\r", + "doc" : "Audit stamp containing who created this and when", "optional" : true }, { "name" : "lastModified", "type" : "com.linkedin.common.TimeStamp", - "doc" : "Date when the MLModel was last modified\r", + "doc" : "Date when the MLModel was last modified", "optional" : true }, { "name" : "version", "type" : "com.linkedin.common.VersionTag", - "doc" : "Version of the MLModel\r", + "doc" : "Version of the MLModel", "optional" : true }, { "name" : "type", "type" : "string", - "doc" : "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc\r", + "doc" : "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc", "optional" : true, "Searchable" : { "fieldType" : "TEXT_PARTIAL" @@ -3652,7 +3652,7 @@ "ref" : [ "string", "int", "float", "double", "boolean" ] } }, - "doc" : "Hyper Parameters of the MLModel\r\n\r\nNOTE: these are deprecated in favor of hyperParams\r", + "doc" : "Hyper Parameters of the MLModel\n\nNOTE: these are deprecated in favor of hyperParams", "optional" : true }, { "name" : "hyperParams", @@ -3687,7 +3687,7 @@ } } }, - "doc" : "Hyperparameters of the MLModel\r", + "doc" : "Hyperparameters of the MLModel", "optional" : true }, { "name" : "trainingMetrics", @@ -3722,7 +3722,7 @@ } } }, - "doc" : "Metrics of the MLModel used in training\r", + "doc" : "Metrics of the MLModel used in training", "optional" : true }, { "name" : "onlineMetrics", @@ -3730,7 +3730,7 @@ "type" : "array", "items" : "MLMetric" }, - "doc" : "Metrics of the MLModel used in production\r", + "doc" : "Metrics of the MLModel used in production", "optional" : true }, { "name" : "mlFeatures", @@ -3738,7 +3738,7 @@ "type" : "array", "items" : "com.linkedin.common.MLFeatureUrn" }, - "doc" : "List of features used for MLModel training\r", + "doc" : "List of features used for MLModel training", "optional" : true, "Relationship" : { "/*" : { @@ -3753,7 +3753,7 @@ "type" : "array", "items" : "string" }, - "doc" : "Tags for the MLModel\r", + "doc" : "Tags for the MLModel", "default" : [ ] }, { "name" : "deployments", @@ -3761,7 +3761,7 @@ "type" : "array", "items" : "com.linkedin.common.Urn" }, - "doc" : "Deployments for the MLModel\r", + "doc" : "Deployments for the MLModel", "optional" : true, "Relationship" : { "/*" : { @@ -3775,7 +3775,7 @@ "type" : "array", "items" : "com.linkedin.common.Urn" }, - "doc" : "Groups the model belongs to\r", + "doc" : "Groups the model belongs to", "optional" : true, "Relationship" : { "/*" : { @@ -3952,6 +3952,10 @@ "name" : "aspect", "type" : "com.linkedin.entity.Aspect", "optional" : true + }, { + "name" : "telemetryTraceId", + "type" : "string", + "optional" : true } ] }, { "type" : "record", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json index 75793be7331da4..29d72cd00e9c99 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json @@ -3548,18 +3548,18 @@ "type" : "record", "name" : "MLModelProperties", "namespace" : "com.linkedin.ml.metadata", - "doc" : "Properties associated with a ML Model\r", + "doc" : "Properties associated with a ML Model", "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference", { "type" : "record", "name" : "MLModelLineageInfo", - "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups\r", + "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups", "fields" : [ { "name" : "trainingJobs", "type" : { "type" : "array", "items" : "com.linkedin.common.Urn" }, - "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", + "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.", "optional" : true, "Relationship" : { "/*" : { @@ -3574,7 +3574,7 @@ "type" : "array", "items" : "com.linkedin.common.Urn" }, - "doc" : "List of jobs or process instances (if any) that use the model or group.\r", + "doc" : "List of jobs or process instances (if any) that use the model or group.", "optional" : true, "Relationship" : { "/*" : { @@ -3589,7 +3589,7 @@ "fields" : [ { "name" : "name", "type" : "string", - "doc" : "Display name of the MLModel\r", + "doc" : "Display name of the MLModel", "optional" : true, "Searchable" : { "boostScore" : 10.0, @@ -3600,7 +3600,7 @@ }, { "name" : "description", "type" : "string", - "doc" : "Documentation of the MLModel\r", + "doc" : "Documentation of the MLModel", "optional" : true, "Searchable" : { "fieldType" : "TEXT", @@ -3609,28 +3609,28 @@ }, { "name" : "date", "type" : "com.linkedin.common.Time", - "doc" : "Date when the MLModel was developed\r", + "doc" : "Date when the MLModel was developed", "optional" : true, "deprecated" : true }, { "name" : "created", "type" : "com.linkedin.common.TimeStamp", - "doc" : "Audit stamp containing who created this and when\r", + "doc" : "Audit stamp containing who created this and when", "optional" : true }, { "name" : "lastModified", "type" : "com.linkedin.common.TimeStamp", - "doc" : "Date when the MLModel was last modified\r", + "doc" : "Date when the MLModel was last modified", "optional" : true }, { "name" : "version", "type" : "com.linkedin.common.VersionTag", - "doc" : "Version of the MLModel\r", + "doc" : "Version of the MLModel", "optional" : true }, { "name" : "type", "type" : "string", - "doc" : "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc\r", + "doc" : "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc", "optional" : true, "Searchable" : { "fieldType" : "TEXT_PARTIAL" @@ -3646,7 +3646,7 @@ "ref" : [ "string", "int", "float", "double", "boolean" ] } }, - "doc" : "Hyper Parameters of the MLModel\r\n\r\nNOTE: these are deprecated in favor of hyperParams\r", + "doc" : "Hyper Parameters of the MLModel\n\nNOTE: these are deprecated in favor of hyperParams", "optional" : true }, { "name" : "hyperParams", @@ -3681,7 +3681,7 @@ } } }, - "doc" : "Hyperparameters of the MLModel\r", + "doc" : "Hyperparameters of the MLModel", "optional" : true }, { "name" : "trainingMetrics", @@ -3716,7 +3716,7 @@ } } }, - "doc" : "Metrics of the MLModel used in training\r", + "doc" : "Metrics of the MLModel used in training", "optional" : true }, { "name" : "onlineMetrics", @@ -3724,7 +3724,7 @@ "type" : "array", "items" : "MLMetric" }, - "doc" : "Metrics of the MLModel used in production\r", + "doc" : "Metrics of the MLModel used in production", "optional" : true }, { "name" : "mlFeatures", @@ -3732,7 +3732,7 @@ "type" : "array", "items" : "com.linkedin.common.MLFeatureUrn" }, - "doc" : "List of features used for MLModel training\r", + "doc" : "List of features used for MLModel training", "optional" : true, "Relationship" : { "/*" : { @@ -3747,7 +3747,7 @@ "type" : "array", "items" : "string" }, - "doc" : "Tags for the MLModel\r", + "doc" : "Tags for the MLModel", "default" : [ ] }, { "name" : "deployments", @@ -3755,7 +3755,7 @@ "type" : "array", "items" : "com.linkedin.common.Urn" }, - "doc" : "Deployments for the MLModel\r", + "doc" : "Deployments for the MLModel", "optional" : true, "Relationship" : { "/*" : { @@ -3769,7 +3769,7 @@ "type" : "array", "items" : "com.linkedin.common.Urn" }, - "doc" : "Groups the model belongs to\r", + "doc" : "Groups the model belongs to", "optional" : true, "Relationship" : { "/*" : { diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json index 58ba2ad05dfe74..b4ede3617cacfb 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json @@ -3982,18 +3982,18 @@ "type" : "record", "name" : "MLModelProperties", "namespace" : "com.linkedin.ml.metadata", - "doc" : "Properties associated with a ML Model\r", + "doc" : "Properties associated with a ML Model", "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference", { "type" : "record", "name" : "MLModelLineageInfo", - "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups\r", + "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups", "fields" : [ { "name" : "trainingJobs", "type" : { "type" : "array", "items" : "com.linkedin.common.Urn" }, - "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", + "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.", "optional" : true, "Relationship" : { "/*" : { @@ -4008,7 +4008,7 @@ "type" : "array", "items" : "com.linkedin.common.Urn" }, - "doc" : "List of jobs or process instances (if any) that use the model or group.\r", + "doc" : "List of jobs or process instances (if any) that use the model or group.", "optional" : true, "Relationship" : { "/*" : { @@ -4023,7 +4023,7 @@ "fields" : [ { "name" : "name", "type" : "string", - "doc" : "Display name of the MLModel\r", + "doc" : "Display name of the MLModel", "optional" : true, "Searchable" : { "boostScore" : 10.0, @@ -4034,7 +4034,7 @@ }, { "name" : "description", "type" : "string", - "doc" : "Documentation of the MLModel\r", + "doc" : "Documentation of the MLModel", "optional" : true, "Searchable" : { "fieldType" : "TEXT", @@ -4043,28 +4043,28 @@ }, { "name" : "date", "type" : "com.linkedin.common.Time", - "doc" : "Date when the MLModel was developed\r", + "doc" : "Date when the MLModel was developed", "optional" : true, "deprecated" : true }, { "name" : "created", "type" : "com.linkedin.common.TimeStamp", - "doc" : "Audit stamp containing who created this and when\r", + "doc" : "Audit stamp containing who created this and when", "optional" : true }, { "name" : "lastModified", "type" : "com.linkedin.common.TimeStamp", - "doc" : "Date when the MLModel was last modified\r", + "doc" : "Date when the MLModel was last modified", "optional" : true }, { "name" : "version", "type" : "com.linkedin.common.VersionTag", - "doc" : "Version of the MLModel\r", + "doc" : "Version of the MLModel", "optional" : true }, { "name" : "type", "type" : "string", - "doc" : "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc\r", + "doc" : "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc", "optional" : true, "Searchable" : { "fieldType" : "TEXT_PARTIAL" @@ -4080,7 +4080,7 @@ "ref" : [ "string", "int", "float", "double", "boolean" ] } }, - "doc" : "Hyper Parameters of the MLModel\r\n\r\nNOTE: these are deprecated in favor of hyperParams\r", + "doc" : "Hyper Parameters of the MLModel\n\nNOTE: these are deprecated in favor of hyperParams", "optional" : true }, { "name" : "hyperParams", @@ -4115,7 +4115,7 @@ } } }, - "doc" : "Hyperparameters of the MLModel\r", + "doc" : "Hyperparameters of the MLModel", "optional" : true }, { "name" : "trainingMetrics", @@ -4150,7 +4150,7 @@ } } }, - "doc" : "Metrics of the MLModel used in training\r", + "doc" : "Metrics of the MLModel used in training", "optional" : true }, { "name" : "onlineMetrics", @@ -4158,7 +4158,7 @@ "type" : "array", "items" : "MLMetric" }, - "doc" : "Metrics of the MLModel used in production\r", + "doc" : "Metrics of the MLModel used in production", "optional" : true }, { "name" : "mlFeatures", @@ -4166,7 +4166,7 @@ "type" : "array", "items" : "com.linkedin.common.MLFeatureUrn" }, - "doc" : "List of features used for MLModel training\r", + "doc" : "List of features used for MLModel training", "optional" : true, "Relationship" : { "/*" : { @@ -4181,7 +4181,7 @@ "type" : "array", "items" : "string" }, - "doc" : "Tags for the MLModel\r", + "doc" : "Tags for the MLModel", "default" : [ ] }, { "name" : "deployments", @@ -4189,7 +4189,7 @@ "type" : "array", "items" : "com.linkedin.common.Urn" }, - "doc" : "Deployments for the MLModel\r", + "doc" : "Deployments for the MLModel", "optional" : true, "Relationship" : { "/*" : { @@ -4203,7 +4203,7 @@ "type" : "array", "items" : "com.linkedin.common.Urn" }, - "doc" : "Groups the model belongs to\r", + "doc" : "Groups the model belongs to", "optional" : true, "Relationship" : { "/*" : { @@ -5006,12 +5006,12 @@ "type" : "record", "name" : "MLModelGroupProperties", "namespace" : "com.linkedin.ml.metadata", - "doc" : "Properties associated with an ML Model Group\r", + "doc" : "Properties associated with an ML Model Group", "include" : [ "com.linkedin.common.CustomProperties", "MLModelLineageInfo" ], "fields" : [ { "name" : "name", "type" : "string", - "doc" : "Display name of the MLModelGroup\r", + "doc" : "Display name of the MLModelGroup", "optional" : true, "Searchable" : { "boostScore" : 10.0, @@ -5022,7 +5022,7 @@ }, { "name" : "description", "type" : "string", - "doc" : "Documentation of the MLModelGroup\r", + "doc" : "Documentation of the MLModelGroup", "optional" : true, "Searchable" : { "fieldType" : "TEXT", @@ -5031,23 +5031,23 @@ }, { "name" : "createdAt", "type" : "com.linkedin.common.Time", - "doc" : "Date when the MLModelGroup was developed\r", + "doc" : "Date when the MLModelGroup was developed", "optional" : true, "deprecated" : true }, { "name" : "created", "type" : "com.linkedin.common.TimeStamp", - "doc" : "Time and Actor who created the MLModelGroup\r", + "doc" : "Time and Actor who created the MLModelGroup", "optional" : true }, { "name" : "lastModified", "type" : "com.linkedin.common.TimeStamp", - "doc" : "Date when the MLModelGroup was last modified\r", + "doc" : "Date when the MLModelGroup was last modified", "optional" : true }, { "name" : "version", "type" : "com.linkedin.common.VersionTag", - "doc" : "Version of the MLModelGroup\r", + "doc" : "Version of the MLModelGroup", "optional" : true } ], "Aspect" : { diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java index 30b187da00e91a..ecb37a8c80bb29 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java @@ -51,7 +51,7 @@ import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.RequestContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; import java.time.Clock; @@ -145,7 +145,7 @@ public Task get( throws URISyntaxException { log.info("GET ASPECT urn: {} aspect: {} version: {}", urnStr, aspectName, version); final Urn urn = Urn.createFromString(urnStr); - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { Authentication auth = AuthenticationContext.getAuthentication(); @@ -195,7 +195,7 @@ public Task getTimeseriesAspectValues( endTimeMillis, limit); final Urn urn = Urn.createFromString(urnStr); - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { Authentication auth = AuthenticationContext.getAuthentication(); @@ -305,7 +305,7 @@ private Task ingestProposals( final AuditStamp auditStamp = new AuditStamp().setTime(_clock.millis()).setActor(Urn.createFromString(actorUrnStr)); - return RestliUtils.toTask(() -> { + return RestliUtils.toTask(systemOperationContext, () -> { log.debug("Proposals: {}", metadataChangeProposals); try { final AspectsBatch batch = AspectsBatchImpl.builder() @@ -342,7 +342,7 @@ private Task ingestProposals( public Task getCount( @ActionParam(PARAM_ASPECT) @Nonnull String aspectName, @ActionParam(PARAM_URN_LIKE) @Optional @Nullable String urnLike) { - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { Authentication authentication = AuthenticationContext.getAuthentication(); @@ -374,7 +374,7 @@ public Task restoreIndices( @ActionParam("limit") @Optional @Nullable Integer limit, @ActionParam("gePitEpochMs") @Optional @Nullable Long gePitEpochMs, @ActionParam("lePitEpochMs") @Optional @Nullable Long lePitEpochMs) { - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { Authentication authentication = AuthenticationContext.getAuthentication(); diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java index ebbfc6bb6c2983..3539a19ffd4702 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java @@ -36,7 +36,7 @@ import com.linkedin.restli.server.resources.CollectionResourceTaskTemplate; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.RequestContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.util.List; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -108,7 +108,7 @@ public Task rollback( "Both Safe & hardDelete flags were defined, honouring safe flag as hardDelete is deprecated"); } try { - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { try { @@ -136,7 +136,7 @@ public Task list( @ActionParam("includeSoft") @Optional @Nullable Boolean includeSoft) { log.info("LIST RUNS offset: {} size: {}", pageOffset, pageSize); - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { List summaries = systemMetadataService.listRuns( @@ -160,7 +160,7 @@ public Task describe( @ActionParam("includeAspect") @Optional @Nullable Boolean includeAspect) { log.info("DESCRIBE RUN runId: {}, start: {}, count: {}", runId, start, count); - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { Authentication auth = AuthenticationContext.getAuthentication(); diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java index 0c374c29cf958a..d05bf4a4598473 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java @@ -84,7 +84,7 @@ import com.linkedin.restli.server.annotations.RestMethod; import com.linkedin.restli.server.resources.CollectionResourceTaskTemplate; import com.linkedin.timeseries.DeleteAspectValuesResult; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.net.URISyntaxException; import java.time.Clock; import java.util.ArrayList; @@ -207,7 +207,7 @@ public Task get( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entity " + urn); } - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { final Set projectedAspects = aspectNames == null @@ -248,7 +248,7 @@ public Task> batchGet( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entities: " + urnStrs); } - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { final Set projectedAspects = aspectNames == null @@ -298,7 +298,7 @@ public Task ingest( // variables referenced in lambdas are required to be final final SystemMetadata finalSystemMetadata = systemMetadata; - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { entityService.ingestEntity(opContext, entity, auditStamp, finalSystemMetadata); return null; @@ -355,7 +355,7 @@ public Task batchIngest( .map(SystemMetadataUtils::generateSystemMetadataIfEmpty) .collect(Collectors.toList()); - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { entityService.ingestEntities(opContext, Arrays.asList(entities), auditStamp, finalSystemMetadataList); @@ -396,7 +396,7 @@ public Task search( log.info("GET SEARCH RESULTS for {} with query {}", entityName, input); // TODO - change it to use _searchService once we are confident on it's latency - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { final SearchResult result; // This API is not used by the frontend for search bars so we default to structured @@ -509,7 +509,7 @@ public Task scrollAcrossEntities( input, scrollId); - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { ScrollResult result = searchService.scrollAcrossEntities( opContext, @@ -576,7 +576,7 @@ public Task searchAcrossLineage( direction, entityList, input); - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> validateLineageSearchResult(opContext, lineageSearchService.searchAcrossLineage( opContext, urn, @@ -639,7 +639,7 @@ public Task scrollAcrossLineage( List sortCriterionList = getSortCriteria(sortCriteria, sortCriterion); - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> validateLineageScrollResult(opContext, lineageSearchService.scrollAcrossLineage( @@ -686,7 +686,7 @@ public Task list( final Filter finalFilter = validateAndConvert(filter); log.info("GET LIST RESULTS for {} with filter {}", entityName, finalFilter); - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { SearchResult result = entitySearchService.filter(opContext, entityName, finalFilter, sortCriterionList, start, count); if (!AuthUtil.isAPIAuthorizedResult( @@ -725,7 +725,7 @@ public Task autocomplete( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to search."); } - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { AutoCompleteResult result = entitySearchService.autoComplete(opContext, entityName, query, field, filter, limit); if (!isAPIAuthorizedResult( @@ -763,7 +763,7 @@ public Task browse( } log.info("GET BROWSE RESULTS for {} at path {}", entityName, path); - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { BrowseResult result = entitySearchService.browse(opContext, entityName, path, filter, start, limit); if (!isAPIAuthorizedResult( @@ -799,7 +799,7 @@ public Task getBrowsePaths( } log.info("GET BROWSE PATHS for {}", urn); - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> new StringArray(entitySearchService.getBrowsePaths(opContext, urnToEntityName(urn), urn)), MetricRegistry.name(this.getClass(), "getBrowsePaths")); } @@ -839,7 +839,7 @@ public Task deleteEntities( ComparableVersion finalRegistryVersion = registryVersion; String finalRegistryName1 = registryName; ComparableVersion finalRegistryVersion1 = registryVersion; - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { RollbackResponse response = new RollbackResponse(); List aspectRowsToDelete = @@ -921,7 +921,7 @@ public Task deleteEntity( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to delete entity: " + urnStr); } - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { // Find the timeseries aspects to delete. If aspectName is null, delete all. List timeseriesAspectNames = @@ -1041,7 +1041,7 @@ public Task deleteReferencesTo( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to delete entity " + urnStr); } - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> deleteEntityService.deleteReferencesTo(opContext, urn, dryRun), MetricRegistry.name(this.getClass(), "deleteReferences")); } @@ -1137,7 +1137,7 @@ public Task listUrns( } log.info("LIST URNS for {} with start {} and count {}", entityName, start, count); - return RestliUtils.toTask(() -> { + return RestliUtils.toTask(systemOperationContext, () -> { ListUrnsResult result = entityService.listUrns(opContext, entityName, start, count); if (!isAPIAuthorizedEntityUrns( opContext, @@ -1178,7 +1178,7 @@ public Task applyRetention( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to apply retention."); } - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> entityService.batchApplyRetention(opContext, start, count, attemptWithVersion, aspectName, urn), ACTION_APPLY_RETENTION); } @@ -1208,7 +1208,7 @@ public Task filter( List sortCriterionList = getSortCriteria(sortCriteria, sortCriterion); log.info("FILTER RESULTS for {} with filter {}", entityName, filter); - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { SearchResult result = entitySearchService.filter(opContext.withSearchFlags(flags -> flags.setFulltext(true)), entityName, filter, sortCriterionList, start, count); @@ -1245,7 +1245,7 @@ public Task exists(@ActionParam(PARAM_URN) @Nonnull String urnStr, @Act log.info("EXISTS for {}", urnStr); final boolean includeRemoved = includeSoftDelete == null || includeSoftDelete; - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> entityService.exists(opContext, urn, includeRemoved), MetricRegistry.name(this.getClass(), "exists")); } } diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java index 896d81d3cbecc3..6e05ce2ac82768 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java @@ -27,7 +27,7 @@ import com.linkedin.restli.server.resources.CollectionResourceTaskTemplate; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.RequestContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.net.URISyntaxException; import java.util.Arrays; import java.util.Collections; @@ -83,7 +83,7 @@ public Task get( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entity " + urn); } - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { final String entityName = urnToEntityName(urn); final Set projectedAspects = @@ -133,7 +133,7 @@ public Task> batchGet( return Task.value(Collections.emptyMap()); } final String entityName = urnToEntityName(urns.iterator().next()); - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { final Set projectedAspects = aspectNames == null diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java index 73b2d1a6c5cb87..1ce250300745fd 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java @@ -29,7 +29,7 @@ import com.linkedin.restli.server.resources.CollectionResourceTaskTemplate; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.RequestContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; @@ -96,7 +96,7 @@ public Task> batchGetVersioned( if (versionedUrnStrs.size() <= 0) { return Task.value(Collections.emptyMap()); } - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { final Set projectedAspects = aspectNames == null diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java index 16d5868443955a..fe3defe9658ca0 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java @@ -40,7 +40,7 @@ import com.linkedin.restli.server.resources.SimpleResourceTemplate; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.RequestContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.net.URISyntaxException; import java.util.Arrays; import java.util.List; @@ -133,7 +133,7 @@ public Task get( } RelationshipDirection direction = RelationshipDirection.valueOf(rawDirection); final List relationshipTypes = Arrays.asList(relationshipTypesParam); - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { final RelatedEntitiesResult relatedEntitiesResult = getRelatedEntities(rawUrn, relationshipTypes, direction, start, count); @@ -210,7 +210,7 @@ public Task getLineage( throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entity lineage: " + urnStr); } - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> _graphService.getLineage(systemOperationContext, urn, diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java index 705089baed8f5e..efb20bdee44099 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java @@ -33,7 +33,7 @@ import com.linkedin.timeseries.TimeseriesIndicesSizesResult; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.RequestContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.util.ArrayList; import java.util.List; import javax.annotation.Nonnull; @@ -104,7 +104,7 @@ public Task restoreIndices( @ActionParam("limit") @Optional @Nullable Integer limit, @ActionParam("gePitEpochMs") @Optional @Nullable Long gePitEpochMs, @ActionParam("lePitEpochMs") @Optional @Nullable Long lePitEpochMs) { - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> Utils.restoreIndices(systemOperationContext, getContext(), aspectName, urn, urnLike, start, batchSize, limit, gePitEpochMs, lePitEpochMs, _authorizer, _entityService), MetricRegistry.name(this.getClass(), "restoreIndices")); @@ -129,7 +129,7 @@ public Task getTaskStatus( @ActionParam(PARAM_NODE_ID) @Optional String nodeId, @ActionParam(PARAM_TASK_ID) @Optional("0") long taskId, @ActionParam(PARAM_TASK) @Optional String task) { - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { final Authentication auth = AuthenticationContext.getAuthentication(); @@ -192,7 +192,7 @@ public Task getTaskStatus( @Nonnull @WithSpan public Task getIndexSizes() { - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { final Authentication auth = AuthenticationContext.getAuthentication(); @@ -317,7 +317,7 @@ public Task truncateTimeseriesAspect( @ActionParam(PARAM_TIMEOUT_SECONDS) @Optional @Nullable Long timeoutSeconds, @ActionParam(PARAM_FORCE_DELETE_BY_QUERY) @Optional @Nullable Boolean forceDeleteByQuery, @ActionParam(PARAM_FORCE_REINDEX) @Optional @Nullable Boolean forceReindex) { - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> executeTruncateTimeseriesAspect( entityType, diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java index 4fea3b0a1aca68..7054da41173e59 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java @@ -23,7 +23,7 @@ import com.linkedin.restli.server.resources.CollectionResourceTaskTemplate; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.RequestContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import javax.annotation.Nonnull; import javax.inject.Inject; import javax.inject.Named; diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java index a2092405da3ff6..eec83b37e07d96 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java @@ -1,13 +1,13 @@ package com.linkedin.metadata.resources.restli; import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.Timer; import com.linkedin.metadata.dao.throttle.APIThrottleException; import com.linkedin.metadata.restli.NonExceptionHttpErrorResponse; import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.parseq.Task; import com.linkedin.restli.common.HttpStatus; import com.linkedin.restli.server.RestLiServiceException; +import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.exception.ActorAccessException; import java.util.Optional; import java.util.function.Supplier; @@ -54,20 +54,20 @@ public static Task toTask(@Nonnull Supplier supplier) { } @Nonnull - public static Task toTask(@Nonnull Supplier supplier, String metricName) { - Timer.Context context = MetricUtils.timer(metricName).time(); - // Stop timer on success and failure - return toTask(supplier) - .transform( - orig -> { - context.stop(); - if (orig.isFailed()) { - MetricUtils.counter(MetricRegistry.name(metricName, "failed")).inc(); - } else { - MetricUtils.counter(MetricRegistry.name(metricName, "success")).inc(); - } - return orig; - }); + public static Task toTask(@Nonnull OperationContext opContext, @Nonnull Supplier supplier, String metricName) { + return opContext.withSpan(metricName, () -> { + // Stop timer on success and failure + return toTask(supplier) + .transform( + orig -> { + if (orig.isFailed()) { + MetricUtils.counter(MetricRegistry.name(metricName, "failed")).inc(); + } else { + MetricUtils.counter(MetricRegistry.name(metricName, "success")).inc(); + } + return orig; + }); + }, MetricUtils.DROPWIZARD_METRIC, "true"); } /** diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java index 426eff20c9c6eb..cc5c1b1059b1f7 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java @@ -44,7 +44,7 @@ import com.linkedin.usage.UserUsageCounts; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.RequestContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.util.Arrays; import java.util.Map; import java.util.Set; @@ -100,7 +100,7 @@ public class UsageStats extends SimpleResourceTemplate { @WithSpan public Task batchIngest(@ActionParam(PARAM_BUCKETS) @Nonnull UsageAggregation[] buckets) { log.info("Ingesting {} usage stats aggregations", buckets.length); - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { final Authentication auth = AuthenticationContext.getAuthentication(); @@ -141,7 +141,7 @@ public Task query( log.info( "Querying usage stats for resource: {}, duration: {}, start time: {}, end time: {}, max buckets: {}", resource, duration, startTime, endTime, maxBuckets); - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> { Urn resourceUrn = UrnUtils.getUrn(resource); @@ -186,7 +186,7 @@ public Task queryRange( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to query usage."); } - return RestliUtils.toTask( + return RestliUtils.toTask(systemOperationContext, () -> UsageServiceUtil.queryRange(opContext, _timeseriesAspectService, resource, duration, range), MetricRegistry.name(this.getClass(), "queryRange")); } diff --git a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java index 037b5b81fd4df0..265b8a35e840c8 100644 --- a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java +++ b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java @@ -20,6 +20,7 @@ import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityServiceImpl; +import com.linkedin.metadata.entity.IngestAspectsResult; import com.linkedin.metadata.entity.UpdateAspectResult; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.event.EventProducer; @@ -88,7 +89,8 @@ public void testAsyncDefaultAspects() throws URISyntaxException { Actor actor = new Actor(ActorType.USER, "user"); when(mockAuthentication.getActor()).thenReturn(actor); aspectResource.ingestProposal(mcp, "true"); - verify(producer, times(1)).produceMetadataChangeProposal(urn, mcp); + verify(producer, times(1)).produceMetadataChangeProposal(any(OperationContext.class), eq(urn), + argThat(arg -> arg.getMetadataChangeProposal().equals(mcp))); verifyNoMoreInteractions(producer); verifyNoMoreInteractions(aspectDao); @@ -101,42 +103,43 @@ public void testAsyncDefaultAspects() throws URISyntaxException { .auditStamp(new AuditStamp()) .metadataChangeProposal(mcp) .build(opContext.getAspectRetriever()); - when(aspectDao.runInTransactionWithRetry(any(), any(), anyInt())) - .thenReturn( - List.of(List.of( - UpdateAspectResult.builder() - .urn(urn) - .newValue(new DatasetProperties().setName("name1")) - .auditStamp(new AuditStamp()) - .request(req) - .build(), - UpdateAspectResult.builder() - .urn(urn) - .newValue(new DatasetProperties().setName("name2")) - .auditStamp(new AuditStamp()) - .request(req) - .build(), - UpdateAspectResult.builder() - .urn(urn) - .newValue(new DatasetProperties().setName("name3")) - .auditStamp(new AuditStamp()) - .request(req) - .build(), - UpdateAspectResult.builder() - .urn(urn) - .newValue(new DatasetProperties().setName("name4")) - .auditStamp(new AuditStamp()) - .request(req) - .build(), - UpdateAspectResult.builder() - .urn(urn) - .newValue(new DatasetProperties().setName("name5")) - .auditStamp(new AuditStamp()) - .request(req) - .build()))); + IngestAspectsResult txResult = IngestAspectsResult.builder() + .updateAspectResults(List.of( + UpdateAspectResult.builder() + .urn(urn) + .newValue(new DatasetProperties().setName("name1")) + .auditStamp(new AuditStamp()) + .request(req) + .build(), + UpdateAspectResult.builder() + .urn(urn) + .newValue(new DatasetProperties().setName("name2")) + .auditStamp(new AuditStamp()) + .request(req) + .build(), + UpdateAspectResult.builder() + .urn(urn) + .newValue(new DatasetProperties().setName("name3")) + .auditStamp(new AuditStamp()) + .request(req) + .build(), + UpdateAspectResult.builder() + .urn(urn) + .newValue(new DatasetProperties().setName("name4")) + .auditStamp(new AuditStamp()) + .request(req) + .build(), + UpdateAspectResult.builder() + .urn(urn) + .newValue(new DatasetProperties().setName("name5")) + .auditStamp(new AuditStamp()) + .request(req) + .build())) + .build(); + when(aspectDao.runInTransactionWithRetry(any(), any(), anyInt())).thenReturn(List.of(txResult)); aspectResource.ingestProposal(mcp, "false"); verify(producer, times(5)) - .produceMetadataChangeLog(eq(urn), any(AspectSpec.class), any(MetadataChangeLog.class)); + .produceMetadataChangeLog(any(OperationContext.class), eq(urn), any(AspectSpec.class), any(MetadataChangeLog.class)); verifyNoMoreInteractions(producer); } @@ -160,7 +163,7 @@ public void testNoValidateAsync() throws URISyntaxException { Actor actor = new Actor(ActorType.USER, "user"); when(mockAuthentication.getActor()).thenReturn(actor); aspectResource.ingestProposal(mcp, "true"); - verify(producer, times(1)).produceMetadataChangeProposal(urn, mcp); + verify(producer, times(1)).produceMetadataChangeProposal(any(OperationContext.class), eq(urn), argThat(arg -> arg.getMetadataChangeProposal().equals(mcp))); verifyNoMoreInteractions(producer); verifyNoMoreInteractions(aspectDao); reset(producer, aspectDao); diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestAspectsResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestAspectsResult.java new file mode 100644 index 00000000000000..d9b7091ac44b0f --- /dev/null +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestAspectsResult.java @@ -0,0 +1,62 @@ +package com.linkedin.metadata.entity; + +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.util.Pair; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import lombok.Builder; +import lombok.Value; + +@Builder(toBuilder = true) +@Value +public class IngestAspectsResult { + public static final IngestAspectsResult EMPTY = IngestAspectsResult.builder().build(); + + List updateAspectResults; + List>> failedUpdateAspectResults; + + public static IngestAspectsResult combine(IngestAspectsResult first, IngestAspectsResult second) { + if (first == null) { + return second != null ? second : IngestAspectsResult.builder().build(); + } + if (second == null) { + return first; + } + + List combinedResults = + Stream.concat( + first.getUpdateAspectResults().stream(), second.getUpdateAspectResults().stream()) + .collect(Collectors.toList()); + + List>> combinedFailedResults = + Stream.concat( + first.getFailedUpdateAspectResults().stream(), + second.getFailedUpdateAspectResults().stream()) + .collect(Collectors.toList()); + + return IngestAspectsResult.builder() + .updateAspectResults(combinedResults) + .failedUpdateAspectResults(combinedFailedResults) + .build(); + } + + public static class IngestAspectsResultBuilder { + public IngestAspectsResult build() { + if (this.failedUpdateAspectResults == null) { + this.failedUpdateAspectResults = Collections.emptyList(); + } + if (this.updateAspectResults == null) { + this.updateAspectResults = Collections.emptyList(); + } + + return new IngestAspectsResult( + this.updateAspectResults.stream().filter(Objects::nonNull).collect(Collectors.toList()), + this.failedUpdateAspectResults); + } + } +} diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestProposalResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestProposalResult.java deleted file mode 100644 index 1ef818559faaec..00000000000000 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestProposalResult.java +++ /dev/null @@ -1,11 +0,0 @@ -package com.linkedin.metadata.entity; - -import com.linkedin.common.urn.Urn; -import lombok.Value; - -@Value -public class IngestProposalResult { - Urn urn; - boolean didUpdate; - boolean queued; -} diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/RecommendationsService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/RecommendationsService.java index c554f2b919b063..8b0540a6e2d949 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/RecommendationsService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/RecommendationsService.java @@ -5,7 +5,7 @@ import com.linkedin.metadata.recommendation.ranker.RecommendationModuleRanker; import com.linkedin.metadata.utils.ConcurrencyUtils; import io.datahubproject.metadata.context.OperationContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.util.List; import java.util.Map; import java.util.Optional; diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationSource.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationSource.java index c1593088a2dd71..03dbe1af149fd0 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationSource.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationSource.java @@ -17,7 +17,7 @@ import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.search.utils.QueryUtils; import io.datahubproject.metadata.context.OperationContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.net.URISyntaxException; import java.util.Collections; import java.util.Comparator; diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlySearchedSource.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlySearchedSource.java index ca3d43762e0738..0d9b3ced8d6f33 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlySearchedSource.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlySearchedSource.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.recommendation.candidatesource; -import com.codahale.metrics.Timer; import com.datahub.util.exception.ESQueryException; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.datahubusage.DataHubUsageEventConstants; @@ -85,21 +84,28 @@ public List getRecommendations( @Nullable Filter filter) { SearchRequest searchRequest = buildSearchRequest(opContext.getSessionActorContext().getActorUrn()); - try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "getRecentlySearched").time()) { - final SearchResponse searchResponse = - _searchClient.search(searchRequest, RequestOptions.DEFAULT); - // extract results - ParsedTerms parsedTerms = searchResponse.getAggregations().get(ENTITY_AGG_NAME); - return parsedTerms.getBuckets().stream() - .map(bucket -> buildContent(bucket.getKeyAsString())) - .filter(Optional::isPresent) - .map(Optional::get) - .limit(MAX_CONTENT) - .collect(Collectors.toList()); - } catch (Exception e) { - log.error("Search query to get most recently viewed entities failed", e); - throw new ESQueryException("Search query failed:", e); - } + + return opContext.withSpan( + "getRecentlySearched", + () -> { + try { + final SearchResponse searchResponse = + _searchClient.search(searchRequest, RequestOptions.DEFAULT); + // extract results + ParsedTerms parsedTerms = searchResponse.getAggregations().get(ENTITY_AGG_NAME); + return parsedTerms.getBuckets().stream() + .map(bucket -> buildContent(bucket.getKeyAsString())) + .filter(Optional::isPresent) + .map(Optional::get) + .limit(MAX_CONTENT) + .collect(Collectors.toList()); + } catch (Exception e) { + log.error("Search query to get most recently viewed entities failed", e); + throw new ESQueryException("Search query failed:", e); + } + }, + MetricUtils.DROPWIZARD_NAME, + MetricUtils.name(this.getClass(), "getRecentlySearched")); } private SearchRequest buildSearchRequest(@Nonnull Urn userUrn) { diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecommendationSource.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecommendationSource.java index ddf203067f455e..5175177906e260 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecommendationSource.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecommendationSource.java @@ -7,7 +7,7 @@ import com.linkedin.metadata.recommendation.RecommendationRenderType; import com.linkedin.metadata.recommendation.RecommendationRequestContext; import io.datahubproject.metadata.context.OperationContext; -import io.opentelemetry.extension.annotations.WithSpan; +import io.opentelemetry.instrumentation.annotations.WithSpan; import java.util.List; import java.util.Optional; import javax.annotation.Nonnull; diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java index 3880ad1d8da119..6d55ab1252b33d 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.systemmetadata; +import com.linkedin.common.urn.Urn; import com.linkedin.metadata.run.AspectRowSummary; import com.linkedin.metadata.run.IngestionRunSummary; import com.linkedin.mxe.SystemMetadata; @@ -40,6 +41,9 @@ List findByRegistry( List listRuns( Integer pageOffset, Integer pageSize, boolean includeSoftDeleted); + List findAspectsByUrn( + @Nonnull Urn urn, @Nonnull List aspects, boolean includeSoftDeleted); + default void configure() {} void clear(); diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/TraceService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/TraceService.java new file mode 100644 index 00000000000000..1b6f32da1162fb --- /dev/null +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/TraceService.java @@ -0,0 +1,38 @@ +package com.linkedin.metadata.systemmetadata; + +import com.linkedin.common.urn.Urn; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import java.util.Map; +import javax.annotation.Nonnull; + +public interface TraceService { + + @Nonnull + Map> trace( + @Nonnull OperationContext opContext, + @Nonnull String traceId, + @Nonnull Map> aspects, + boolean onlyIncludeErrors, + boolean detailed, + boolean skipCache); + + @Nonnull + default Map> trace( + @Nonnull OperationContext opContext, + @Nonnull String traceId, + @Nonnull Map> aspects, + boolean onlyIncludeErrors, + boolean detailed) { + return trace(opContext, traceId, aspects, onlyIncludeErrors, detailed, false); + } + + @Nonnull + default Map> traceDetailed( + @Nonnull OperationContext opContext, + @Nonnull String traceId, + @Nonnull Map> aspects, + boolean skipCache) { + return trace(opContext, traceId, aspects, false, true, skipCache); + } +} diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/TraceStatus.java b/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/TraceStatus.java new file mode 100644 index 00000000000000..35bdecee459e1e --- /dev/null +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/TraceStatus.java @@ -0,0 +1,16 @@ +package com.linkedin.metadata.systemmetadata; + +import com.fasterxml.jackson.annotation.JsonInclude; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; + +@Data +@Builder +@JsonInclude(JsonInclude.Include.NON_NULL) +@AllArgsConstructor +public class TraceStatus { + private boolean success; + private TraceStorageStatus primaryStorage; + private TraceStorageStatus searchStorage; +} diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/TraceStorageStatus.java b/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/TraceStorageStatus.java new file mode 100644 index 00000000000000..0def8785a6b823 --- /dev/null +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/TraceStorageStatus.java @@ -0,0 +1,52 @@ +package com.linkedin.metadata.systemmetadata; + +import com.fasterxml.jackson.annotation.JsonInclude; +import io.datahubproject.metadata.exception.TraceException; +import java.util.List; +import javax.annotation.Nullable; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; + +@Data +@Builder +@JsonInclude(JsonInclude.Include.NON_NULL) +@AllArgsConstructor +public class TraceStorageStatus { + public static final TraceStorageStatus NO_OP = TraceStorageStatus.ok(TraceWriteStatus.NO_OP); + + public static TraceStorageStatus ok(TraceWriteStatus writeStatus) { + return TraceStorageStatus.builder().writeStatus(writeStatus).build(); + } + + public static TraceStorageStatus ok(TraceWriteStatus writeStatus, @Nullable String message) { + TraceStorageStatus.TraceStorageStatusBuilder builder = + TraceStorageStatus.builder().writeStatus(writeStatus); + if (message != null) { + builder.writeMessage(message); + } + return builder.build(); + } + + public static TraceStorageStatus fail(TraceWriteStatus writeStatus, @Nullable Throwable t) { + TraceStorageStatus.TraceStorageStatusBuilder builder = + TraceStorageStatus.builder().writeStatus(writeStatus); + if (t != null) { + builder.writeExceptions(List.of(new TraceException(t))); + } + return builder.build(); + } + + public static TraceStorageStatus fail(TraceWriteStatus writeStatus, @Nullable String message) { + TraceStorageStatus.TraceStorageStatusBuilder builder = + TraceStorageStatus.builder().writeStatus(writeStatus); + if (message != null) { + builder.writeMessage(message); + } + return builder.build(); + } + + private TraceWriteStatus writeStatus; + private String writeMessage; + @Nullable private List writeExceptions; +} diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/TraceWriteStatus.java b/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/TraceWriteStatus.java new file mode 100644 index 00000000000000..78bccd6bf1ccf7 --- /dev/null +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/TraceWriteStatus.java @@ -0,0 +1,18 @@ +package com.linkedin.metadata.systemmetadata; + +public enum TraceWriteStatus { + // error occurred during processing + ERROR, + // write is queued + PENDING, + // write is the active value in the datastore + ACTIVE_STATE, + // write has been overwritten with a newer value. + HISTORIC_STATE, + // write is not required + NO_OP, + // Unknown status due to the fact that tracing is lost or potentially well outside the expected + // tracing range (i.e. last year) + UNKNOWN, + TRACE_NOT_IMPLEMENTED +} diff --git a/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java b/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java index 3f3ed9f75cceb3..face0cc859536d 100644 --- a/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java +++ b/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java @@ -52,7 +52,9 @@ "com.linkedin.gms.factory.change", "com.datahub.event.hook", "com.linkedin.gms.factory.notifications", - "com.linkedin.gms.factory.telemetry" + "com.linkedin.gms.factory.telemetry", + "com.linkedin.gms.factory.trace", + "com.linkedin.gms.factory.kafka.trace", }) @Configuration @PropertySource(value = "classpath:/application.yaml", factory = YamlPropertySourceFactory.class) diff --git a/metadata-service/war/src/main/java/com/linkedin/gms/factory/config/GMSOpenTelemetryConfig.java b/metadata-service/war/src/main/java/com/linkedin/gms/factory/config/GMSOpenTelemetryConfig.java new file mode 100644 index 00000000000000..b0eab226619709 --- /dev/null +++ b/metadata-service/war/src/main/java/com/linkedin/gms/factory/config/GMSOpenTelemetryConfig.java @@ -0,0 +1,21 @@ +package com.linkedin.gms.factory.config; + +import com.linkedin.gms.factory.system_telemetry.OpenTelemetryBaseFactory; +import io.datahubproject.metadata.context.TraceContext; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class GMSOpenTelemetryConfig extends OpenTelemetryBaseFactory { + + @Override + protected String getApplicationComponent() { + return "datahub-gms"; + } + + @Bean + @Override + protected TraceContext traceContext() { + return super.traceContext(); + } +} diff --git a/metadata-utils/build.gradle b/metadata-utils/build.gradle index 4b24eeac50b0b7..410641ef678e58 100644 --- a/metadata-utils/build.gradle +++ b/metadata-utils/build.gradle @@ -10,6 +10,7 @@ dependencies { implementation externalDependency.commonsLang api externalDependency.dropwizardMetricsCore implementation externalDependency.dropwizardMetricsJmx + implementation externalDependency.opentelemetrySdk api externalDependency.elasticSearchRest implementation externalDependency.httpClient api externalDependency.neo4jJavaDriver diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/MetricSpanExporter.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/MetricSpanExporter.java new file mode 100644 index 00000000000000..df2d164edc3c50 --- /dev/null +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/MetricSpanExporter.java @@ -0,0 +1,58 @@ +package com.linkedin.metadata.utils.metrics; + +import static com.linkedin.metadata.utils.metrics.MetricUtils.DROPWIZARD_METRIC; +import static com.linkedin.metadata.utils.metrics.MetricUtils.DROPWIZARD_NAME; + +import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.Timer; +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.sdk.common.CompletableResultCode; +import io.opentelemetry.sdk.trace.data.SpanData; +import io.opentelemetry.sdk.trace.export.SpanExporter; +import java.util.Collection; +import java.util.concurrent.TimeUnit; + +/** Created to forward opentelemetry spans to dropwizard for backwards compatibility */ +public class MetricSpanExporter implements SpanExporter { + private static final AttributeKey DROPWIZARD_ATTR_KEY = + AttributeKey.stringKey(DROPWIZARD_METRIC); + private static final AttributeKey DROPWIZARD_NAME_ATTR_KEY = + AttributeKey.stringKey(DROPWIZARD_NAME); + + @Override + public CompletableResultCode export(Collection spans) { + spans.stream().filter(this::shouldRecordMetric).forEach(this::recordSpanMetric); + + return CompletableResultCode.ofSuccess(); + } + + private boolean shouldRecordMetric(SpanData span) { + // Check for the recordMetric attribute + return Boolean.parseBoolean(span.getAttributes().get(DROPWIZARD_ATTR_KEY)) + || span.getAttributes().get(DROPWIZARD_NAME_ATTR_KEY) != null; + } + + private void recordSpanMetric(SpanData span) { + // Calculate duration in nanoseconds + long durationNanos = span.getEndEpochNanos() - span.getStartEpochNanos(); + String dropWizardName = span.getAttributes().get(DROPWIZARD_NAME_ATTR_KEY); + String dropWizardMetricName = + dropWizardName == null + ? MetricRegistry.name(span.getName()) + : MetricRegistry.name(dropWizardName); + + // Update timer with the span duration + Timer timer = MetricUtils.get().timer(dropWizardMetricName); + timer.update(durationNanos, TimeUnit.NANOSECONDS); + } + + @Override + public CompletableResultCode flush() { + return CompletableResultCode.ofSuccess(); + } + + @Override + public CompletableResultCode shutdown() { + return CompletableResultCode.ofSuccess(); + } +} diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/MetricUtils.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/MetricUtils.java index 3a47c11f8d7489..963015c111f600 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/MetricUtils.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/MetricUtils.java @@ -4,10 +4,21 @@ import com.codahale.metrics.Gauge; import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.SharedMetricRegistries; -import com.codahale.metrics.Timer; import com.codahale.metrics.jmx.JmxReporter; public class MetricUtils { + public static final String DROPWIZARD_METRIC = "dwizMetric"; + public static final String DROPWIZARD_NAME = "dwizName"; + public static final String CACHE_HIT_ATTR = "cache.hit"; + public static final String BATCH_SIZE_ATTR = "batch.size"; + public static final String QUEUE_ENQUEUED_AT_ATTR = "queue.enqueued_at"; + public static final String QUEUE_DURATION_MS_ATTR = "queue.duration_ms"; + public static final String MESSAGING_SYSTEM = "messaging.system"; + public static final String MESSAGING_DESTINATION = "messaging.destination"; + public static final String MESSAGING_DESTINATION_KIND = "messaging.destination_kind"; + public static final String MESSAGING_OPERATION = "messaging.operation"; + public static final String ERROR_TYPE = "error.type"; + private MetricUtils() {} public static final String DELIMITER = "_"; @@ -41,12 +52,12 @@ public static Counter counter(String metricName) { return REGISTRY.counter(MetricRegistry.name(metricName)); } - public static Timer timer(Class klass, String metricName) { - return REGISTRY.timer(MetricRegistry.name(klass, metricName)); + public static String name(String name, String... names) { + return MetricRegistry.name(name, names); } - public static Timer timer(String metricName) { - return REGISTRY.timer(MetricRegistry.name(metricName)); + public static String name(Class clazz, String... names) { + return MetricRegistry.name(clazz.getName(), names); } public static > T gauge( diff --git a/smoke-test/requirements.txt b/smoke-test/requirements.txt index fadc3dbec1f2b5..f1fbdac68f067a 100644 --- a/smoke-test/requirements.txt +++ b/smoke-test/requirements.txt @@ -17,4 +17,5 @@ types-PyYAML # https://github.com/docker/docker-py/issues/3256 requests<=2.31.0 # Missing numpy requirement in 8.0.0 -deepdiff!=8.0.0 \ No newline at end of file +deepdiff!=8.0.0 +opensearch-py==2.6.0 \ No newline at end of file diff --git a/smoke-test/tests/trace/__init__.py b/smoke-test/tests/trace/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/smoke-test/tests/trace/test_api_trace.py b/smoke-test/tests/trace/test_api_trace.py new file mode 100644 index 00000000000000..4f8671decf8ae6 --- /dev/null +++ b/smoke-test/tests/trace/test_api_trace.py @@ -0,0 +1,486 @@ +import time + +import pytest +from opensearchpy import OpenSearch + +from tests.utils import delete_urns, wait_for_writes_to_sync + +es = OpenSearch(["http://localhost:9200"]) + + +generated_urns = { + "apiTraceHappyPath": "urn:li:dataset:(urn:li:dataPlatform:test,apiTraceHappyPath,PROD)", + "apiTraceMCPFail": "urn:li:dataset:(urn:li:dataPlatform:test,apiTraceMCPFail,PROD)", + "apiTraceDroppedElasticsearch": "urn:li:dataset:(urn:li:dataPlatform:test,apiTraceDroppedElasticsearch,PROD)", + "apiTraceOverwritten": "urn:li:dataset:(urn:li:dataPlatform:test,apiTraceOverwritten,PROD)", + "apiTraceTimeseries": "urn:li:dataset:(urn:li:dataPlatform:test,apiTraceTimeseries,PROD)", + "apiTraceNoop": "urn:li:dataset:(urn:li:dataPlatform:test,apiTraceNoop,PROD)", + "apiTraceNoopWithFMCP": "urn:li:dataset:(urn:li:dataPlatform:test,apiTraceNoopWithFMCP,PROD)", +} + + +@pytest.fixture(scope="module", autouse=True) +def test_setup(graph_client): + """Fixture to clean-up urns before and after a test is run""" + print("removing previous test data") + delete_urns(graph_client, list(generated_urns.values())) + wait_for_writes_to_sync() + yield + print("removing generated test data") + delete_urns(graph_client, list(generated_urns.values())) + wait_for_writes_to_sync() + + +def test_successful_async_write(auth_session): + urn = generated_urns["apiTraceHappyPath"] + aspect_name = "status" + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v3/entity/dataset", + params={"async": "true", "systemMetadata": "true"}, + json=[{"urn": urn, aspect_name: {"value": {"removed": False}}}], + ) + + trace_id = compare_trace_header_system_metadata( + resp, resp.json()[0][aspect_name]["systemMetadata"] + ) + wait_for_writes_to_sync() + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v1/trace/write/{trace_id}", + params={"onlyIncludeErrors": "false", "detailed": "true"}, + json={urn: [aspect_name]}, + ) + assert resp.json() == { + urn: { + aspect_name: { + "success": True, + "primaryStorage": {"writeStatus": "ACTIVE_STATE"}, + "searchStorage": {"writeStatus": "ACTIVE_STATE"}, + } + } + } + + +def test_mcp_fail_aspect_async_write(auth_session): + urn = generated_urns["apiTraceMCPFail"] + aspect_name = "glossaryTerms" + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v3/entity/dataset/{urn}/{aspect_name}", + params={"async": "true", "systemMetadata": "true"}, + json={ + "value": { + "terms": [{"urn": "urn:li:glossaryTerm:someTerm"}], + "auditStamp": {"time": 0, "actor": "urn:li:corpuser:datahub"}, + }, + "headers": {"If-Version-Match": "-10000"}, + }, + ) + + trace_id = compare_trace_header_system_metadata( + resp, resp.json()[aspect_name]["systemMetadata"] + ) + wait_for_writes_to_sync() + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v1/trace/write/{trace_id}", + params={"onlyIncludeErrors": "false", "detailed": "true", "skipCache": "true"}, + json={urn: [aspect_name]}, + ) + assert resp.json()[urn][aspect_name]["success"] is False + assert resp.json()[urn][aspect_name]["primaryStorage"]["writeStatus"] == "ERROR" + assert ( + resp.json()[urn][aspect_name]["primaryStorage"]["writeExceptions"][0]["message"] + == "Expected version -10000, actual version -1" + ) + assert resp.json()[urn][aspect_name]["searchStorage"] == { + "writeStatus": "ERROR", + "writeMessage": "Primary storage write failed.", + } + + +def test_overwritten_async_write(auth_session): + urn = generated_urns["apiTraceOverwritten"] + aspect_name = "datasetProperties" + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v3/entity/dataset", + params={"async": "true", "systemMetadata": "true"}, + json=[ + { + "urn": urn, + aspect_name: { + "value": {"name": "original", "customProperties": {}, "tags": []} + }, + } + ], + ) + + original_trace_id = compare_trace_header_system_metadata( + resp, resp.json()[0][aspect_name]["systemMetadata"] + ) + wait_for_writes_to_sync() + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v1/trace/write/{original_trace_id}", + params={"onlyIncludeErrors": "false", "detailed": "true"}, + json={urn: [aspect_name]}, + ) + assert resp.json() == { + urn: { + aspect_name: { + "success": True, + "primaryStorage": {"writeStatus": "ACTIVE_STATE"}, + "searchStorage": {"writeStatus": "ACTIVE_STATE"}, + } + } + } + + # Perform 2nd write + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v3/entity/dataset", + params={"async": "true", "systemMetadata": "true"}, + json=[ + { + "urn": urn, + aspect_name: { + "value": {"name": "updated", "customProperties": {}, "tags": []} + }, + } + ], + ) + + second_trace_id = compare_trace_header_system_metadata( + resp, resp.json()[0][aspect_name]["systemMetadata"] + ) + wait_for_writes_to_sync() + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v1/trace/write/{second_trace_id}", + params={"onlyIncludeErrors": "false", "detailed": "true", "skipCache": "true"}, + json={urn: [aspect_name]}, + ) + assert resp.json() == { + urn: { + aspect_name: { + "success": True, + "primaryStorage": {"writeStatus": "ACTIVE_STATE"}, + "searchStorage": {"writeStatus": "ACTIVE_STATE"}, + } + } + } + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v1/trace/write/{original_trace_id}", + params={"onlyIncludeErrors": "false", "detailed": "true"}, + json={urn: [aspect_name]}, + ) + assert resp.json() == { + urn: { + aspect_name: { + "success": True, + "primaryStorage": {"writeStatus": "HISTORIC_STATE"}, + "searchStorage": {"writeStatus": "HISTORIC_STATE"}, + } + } + } + + +def test_missing_elasticsearch_async_write(auth_session): + urn = generated_urns["apiTraceDroppedElasticsearch"] + aspect_name = "status" + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v3/entity/dataset", + params={"async": "true", "systemMetadata": "true"}, + json=[{"urn": urn, aspect_name: {"value": {"removed": False}}}], + ) + + trace_id = compare_trace_header_system_metadata( + resp, resp.json()[0][aspect_name]["systemMetadata"] + ) + wait_for_writes_to_sync() + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v1/trace/write/{trace_id}", + params={"onlyIncludeErrors": "false", "detailed": "true"}, + json={urn: [aspect_name]}, + ) + assert resp.json() == { + urn: { + aspect_name: { + "success": True, + "primaryStorage": {"writeStatus": "ACTIVE_STATE"}, + "searchStorage": {"writeStatus": "ACTIVE_STATE"}, + } + } + } + + # Simulate overwrite + delete_elasticsearch_trace(trace_id) + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v1/trace/write/{trace_id}", + params={"onlyIncludeErrors": "false", "detailed": "true", "skipCache": "true"}, + json={urn: [aspect_name]}, + ) + assert resp.json() == { + urn: { + aspect_name: { + "success": True, + "primaryStorage": {"writeStatus": "ACTIVE_STATE"}, + "searchStorage": {"writeStatus": "HISTORIC_STATE"}, + } + } + } + + # Simulate dropped write + delete_elasticsearch_system_metadata(urn) + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v1/trace/write/{trace_id}", + params={"onlyIncludeErrors": "false", "detailed": "true", "skipCache": "true"}, + json={urn: [aspect_name]}, + ) + assert resp.json() == { + urn: { + aspect_name: { + "success": False, + "primaryStorage": {"writeStatus": "ACTIVE_STATE"}, + "searchStorage": { + "writeStatus": "ERROR", + "writeMessage": "Consumer has processed past the offset.", + }, + } + } + } + + +def test_timeseries_async_write(auth_session): + urn = generated_urns["apiTraceTimeseries"] + aspect_name = "datasetProfile" + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v3/entity/dataset", + params={"async": "true", "systemMetadata": "true"}, + json=[ + { + "urn": urn, + aspect_name: { + "value": { + "timestampMillis": time.time_ns() // 1_000_000, + "messageId": "test timeseries", + "rowCount": 1, + } + }, + } + ], + ) + + trace_id = compare_trace_header_system_metadata( + resp, resp.json()[0][aspect_name]["systemMetadata"] + ) + wait_for_writes_to_sync() + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v1/trace/write/{trace_id}", + params={"onlyIncludeErrors": "false", "detailed": "true"}, + json={urn: [aspect_name]}, + ) + assert resp.json() == { + urn: { + aspect_name: { + "success": True, + "primaryStorage": {"writeStatus": "NO_OP"}, + "searchStorage": {"writeStatus": "TRACE_NOT_IMPLEMENTED"}, + } + } + } + + +def test_noop_async_write(auth_session): + urn = generated_urns["apiTraceNoop"] + aspect_name = "status" + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v3/entity/dataset", + params={"async": "true", "systemMetadata": "true"}, + json=[{"urn": urn, aspect_name: {"value": {"removed": False}}}], + ) + + trace_id = compare_trace_header_system_metadata( + resp, resp.json()[0][aspect_name]["systemMetadata"] + ) + wait_for_writes_to_sync() + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v1/trace/write/{trace_id}", + params={"onlyIncludeErrors": "false", "detailed": "true"}, + json={urn: [aspect_name]}, + ) + assert resp.json() == { + urn: { + aspect_name: { + "success": True, + "primaryStorage": {"writeStatus": "ACTIVE_STATE"}, + "searchStorage": {"writeStatus": "ACTIVE_STATE"}, + } + } + } + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v3/entity/dataset", + params={"async": "true", "systemMetadata": "true"}, + json=[{"urn": urn, aspect_name: {"value": {"removed": False}}}], + ) + + trace_id = compare_trace_header_system_metadata( + resp, resp.json()[0][aspect_name]["systemMetadata"] + ) + wait_for_writes_to_sync() + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v1/trace/write/{trace_id}", + params={"onlyIncludeErrors": "false", "detailed": "true", "skipCache": "true"}, + json={urn: [aspect_name]}, + ) + assert resp.json() == { + urn: { + aspect_name: { + "success": True, + "primaryStorage": {"writeStatus": "NO_OP"}, + "searchStorage": {"writeStatus": "NO_OP"}, + } + } + } + + +def test_noop_with_fmcp_async_write(auth_session): + urn = generated_urns["apiTraceNoopWithFMCP"] + aspect_name = "status" + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v3/entity/dataset", + params={"async": "true", "systemMetadata": "true"}, + json=[{"urn": urn, aspect_name: {"value": {"removed": False}}}], + ) + + trace_id = compare_trace_header_system_metadata( + resp, resp.json()[0][aspect_name]["systemMetadata"] + ) + wait_for_writes_to_sync() + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v1/trace/write/{trace_id}", + params={"onlyIncludeErrors": "false", "detailed": "true"}, + json={urn: [aspect_name]}, + ) + assert resp.json() == { + urn: { + aspect_name: { + "success": True, + "primaryStorage": {"writeStatus": "ACTIVE_STATE"}, + "searchStorage": {"writeStatus": "ACTIVE_STATE"}, + } + } + } + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v3/entity/dataset", + params={"async": "true", "systemMetadata": "true"}, + json=[ + { + "urn": urn, + aspect_name: { + "value": {"removed": False}, + "headers": {"If-Version-Match": "-10000"}, + }, + } + ], + ) + + trace_id = compare_trace_header_system_metadata( + resp, resp.json()[0][aspect_name]["systemMetadata"] + ) + wait_for_writes_to_sync() + + resp = auth_session.post( + f"{auth_session.gms_url()}/openapi/v1/trace/write/{trace_id}", + params={"onlyIncludeErrors": "false", "detailed": "true", "skipCache": "true"}, + json={urn: [aspect_name]}, + ) + assert resp.json()[urn][aspect_name]["success"] is False + assert resp.json()[urn][aspect_name]["primaryStorage"]["writeStatus"] == "ERROR" + assert ( + resp.json()[urn][aspect_name]["primaryStorage"]["writeExceptions"][0]["message"] + == "Expected version -10000, actual version 1" + ) + assert resp.json()[urn][aspect_name]["searchStorage"] == { + "writeStatus": "ERROR", + "writeMessage": "Primary storage write failed.", + } + + +def compare_trace_header_system_metadata(resp, system_metadata): + header_trace_id = extract_trace_header(resp) + system_metadata_trace_id = extract_trace_system_metadata(system_metadata) + assert header_trace_id.startswith("00-" + system_metadata_trace_id) + return system_metadata_trace_id + + +def extract_trace_header(resp): + assert "traceparent" in resp.headers + return resp.headers["traceparent"] + + +def extract_trace_system_metadata(system_metadata): + assert "properties" in system_metadata + assert "telemetryTraceId" in system_metadata["properties"] + return system_metadata["properties"]["telemetryTraceId"] + + +def delete_elasticsearch_trace(trace_id, timeout=10, refresh_interval=1): + field_name = "telemetryTraceId" + index_name = "system_metadata_service_v1" + + update_body = { + "query": {"term": {field_name: trace_id}}, + "script": {"source": f"ctx._source.remove('{field_name}')"}, + } + + response = es.update_by_query( + index=index_name, + body=update_body, + conflicts="proceed", + timeout=timeout, + wait_for_completion=True, + ) + + if response.get("failures"): + raise Exception( + f"Update by query operation had failures: {response['failures']}" + ) + + time.sleep(refresh_interval) + + +def delete_elasticsearch_system_metadata(urn, timeout=10, refresh_interval=1): + index_name = "system_metadata_service_v1" + + update_body = {"query": {"term": {"urn": urn}}} + + response = es.delete_by_query( + index=index_name, + body=update_body, + conflicts="proceed", + timeout=timeout, + wait_for_completion=True, + ) + + if response.get("failures"): + raise Exception( + f"Update by query operation had failures: {response['failures']}" + ) + + time.sleep(refresh_interval) From d0af9d59aab7cc7bee6774bd29d529bd009f1521 Mon Sep 17 00:00:00 2001 From: Maggie Hays Date: Wed, 29 Jan 2025 15:15:26 -0600 Subject: [PATCH 180/249] add datahub-v1.0-rc issue template --- .github/ISSUE_TEMPLATE/--bug-report.md | 2 +- .../datahub-v1-0-rc-bug-report.md | 32 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 .github/ISSUE_TEMPLATE/datahub-v1-0-rc-bug-report.md diff --git a/.github/ISSUE_TEMPLATE/--bug-report.md b/.github/ISSUE_TEMPLATE/--bug-report.md index 8ef9b46b049a41..ed965c9c529385 100644 --- a/.github/ISSUE_TEMPLATE/--bug-report.md +++ b/.github/ISSUE_TEMPLATE/--bug-report.md @@ -29,4 +29,4 @@ If applicable, add screenshots to help explain your problem. - Version [e.g. 22] **Additional context** -Add any other context about the problem here. \ No newline at end of file +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/datahub-v1-0-rc-bug-report.md b/.github/ISSUE_TEMPLATE/datahub-v1-0-rc-bug-report.md new file mode 100644 index 00000000000000..73da33b272e6f3 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/datahub-v1-0-rc-bug-report.md @@ -0,0 +1,32 @@ +--- +name: DataHub v1.0-rc Bug Report +about: Report issues found in DataHub v1.0 Release Candidates +title: "[v1.0-rc/bug] Description of Bug" +labels: bug, datahub-v1.0-rc +assignees: chriscollins3456, david-leifker, maggiehays + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots and/or Screen Recordings** +If applicable, add screenshots and/or screen recordings to help explain the issue. + +**Desktop (please complete the following information):** + - OS: [e.g. iOS] + - Browser [e.g. chrome, safari] + - Version [e.g. 22] + +**Additional context** +Add any other context about the problem here. From a35bfddba27e3eafbfba4a2db176c5625e97c2c4 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Wed, 29 Jan 2025 15:16:49 -0600 Subject: [PATCH 181/249] fix(nocode): fix no-code upgrade (#12494) --- datahub-upgrade/build.gradle | 12 +++++++++++ .../upgrade/config/OpenTelemetryConfig.java | 21 +++++++++++++++++++ .../upgrade/UpgradeCliApplicationTest.java | 8 +++++++ 3 files changed, 41 insertions(+) create mode 100644 datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/OpenTelemetryConfig.java diff --git a/datahub-upgrade/build.gradle b/datahub-upgrade/build.gradle index a3b2e9ad6b3e22..05c7d0c7e3f7d0 100644 --- a/datahub-upgrade/build.gradle +++ b/datahub-upgrade/build.gradle @@ -152,6 +152,18 @@ task runRestoreIndicesUrn(type: Exec) { bootJar.getArchiveFile().get(), "-u", "RestoreIndices", "-a", "batchSize=100", "-a", "urnBasedPagination=true" } +task runNoCode(type: Exec) { + dependsOn bootJar + group = "Execution" + description = "Run the NoCodeDataMigration process locally." + environment "ENTITY_REGISTRY_CONFIG_PATH", "../metadata-models/src/main/resources/entity-registry.yml" + commandLine "java", "-agentlib:jdwp=transport=dt_socket,address=5003,server=y,suspend=n", + "-jar", + "-Dkafka.schemaRegistry.url=http://localhost:8080/schema-registry/api", + "-Dserver.port=8083", + bootJar.getArchiveFile().get(), "-u", "NoCodeDataMigration", "-a", "batchSize=100", "-a", "dbType=MYSQL" +} + docker { name "${docker_registry}/${docker_repo}:v${version}" version "v${version}" diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/OpenTelemetryConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/OpenTelemetryConfig.java new file mode 100644 index 00000000000000..3891c60dbfbd39 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/OpenTelemetryConfig.java @@ -0,0 +1,21 @@ +package com.linkedin.datahub.upgrade.config; + +import com.linkedin.gms.factory.system_telemetry.OpenTelemetryBaseFactory; +import io.datahubproject.metadata.context.TraceContext; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class OpenTelemetryConfig extends OpenTelemetryBaseFactory { + + @Override + protected String getApplicationComponent() { + return "datahub-upgrade"; + } + + @Bean + @Override + protected TraceContext traceContext() { + return super.traceContext(); + } +} diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTest.java index 8b6899b4c78866..77aa6ecf8757cd 100644 --- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTest.java +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTest.java @@ -10,6 +10,7 @@ import com.linkedin.metadata.dao.throttle.NoOpSensor; import com.linkedin.metadata.dao.throttle.ThrottleSensor; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import io.datahubproject.metadata.context.TraceContext; import javax.inject.Named; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; @@ -38,6 +39,8 @@ public class UpgradeCliApplicationTest extends AbstractTestNGSpringContextTests @Autowired private ThrottleSensor kafkaThrottle; + @Autowired private TraceContext traceContext; + @Test public void testRestoreIndicesInit() { /* @@ -62,4 +65,9 @@ public void testNoThrottle() { assertEquals( new NoOpSensor(), kafkaThrottle, "No kafka throttle controls expected in datahub-upgrade"); } + + @Test + public void testTraceContext() { + assertNotNull(traceContext); + } } From 280e82a11d87aac867cab5a0fa9e14cf0ed5da0e Mon Sep 17 00:00:00 2001 From: Maggie Hays Date: Wed, 29 Jan 2025 15:21:45 -0600 Subject: [PATCH 182/249] Updated DataHub v1.0-rc template --- .github/ISSUE_TEMPLATE/datahub-v1-0-rc-bug-report.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/datahub-v1-0-rc-bug-report.md b/.github/ISSUE_TEMPLATE/datahub-v1-0-rc-bug-report.md index 73da33b272e6f3..c9943509fe6206 100644 --- a/.github/ISSUE_TEMPLATE/datahub-v1-0-rc-bug-report.md +++ b/.github/ISSUE_TEMPLATE/datahub-v1-0-rc-bug-report.md @@ -23,10 +23,10 @@ A clear and concise description of what you expected to happen. **Screenshots and/or Screen Recordings** If applicable, add screenshots and/or screen recordings to help explain the issue. -**Desktop (please complete the following information):** +**System details (please complete the following information):** + - DataHub Version Tag [e.g. v1.0-rc1] - OS: [e.g. iOS] - Browser [e.g. chrome, safari] - - Version [e.g. 22] **Additional context** Add any other context about the problem here. From c2a43589c79fe71f7651627ce21b5912093a00f1 Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Wed, 29 Jan 2025 20:42:01 -0500 Subject: [PATCH 183/249] Bring Alchemy UI to OSS (#12481) --- .../linkedin/datahub/graphql/Constants.java | 1 + .../datahub/graphql/GmsGraphQLEngine.java | 75 +- .../datahub/graphql/GmsGraphQLEngineArgs.java | 2 + .../authorization/AuthorizationUtils.java | 16 + .../resolvers/config/AppConfigResolver.java | 21 +- .../UpdateDeprecationResolver.java | 8 + .../entity/EntityPrivilegesResolver.java | 34 + .../resolvers/incident/IncidentUtils.java | 25 + .../BatchUpdateDeprecationResolver.java | 9 +- .../UpdateDisplayPropertiesResolver.java | 106 +++ .../mutate/UpdateUserSettingResolver.java | 24 +- .../mutate/util/DeprecationUtils.java | 40 +- .../resolvers/mutate/util/OwnerUtils.java | 11 + .../resolvers/post/ListPostsResolver.java | 32 +- .../resolvers/query/ListQueriesResolver.java | 23 +- .../graphql/resolvers/search/SearchUtils.java | 90 ++ .../siblings/SiblingsSearchResolver.java | 66 ++ .../timeline/GetTimelineResolver.java | 80 ++ .../common/mappers/DeprecationMapper.java | 3 + .../mappers/DisplayPropertiesMapper.java | 47 + .../common/mappers/UrnToEntityMapper.java | 12 + .../types/container/ContainerType.java | 3 +- .../container/mappers/ContainerMapper.java | 9 + .../corpuser/mappers/CorpUserMapper.java | 1 + .../graphql/types/domain/DomainMapper.java | 10 + .../glossary/mappers/GlossaryNodeMapper.java | 7 + .../graphql/types/mappers/MapperUtils.java | 3 +- .../graphql/types/post/PostMapper.java | 5 + .../datahub/graphql/types/post/PostType.java | 80 ++ .../graphql/types/query/QueryMapper.java | 24 +- .../types/schemafield/SchemaFieldMapper.java | 13 + .../types/schemafield/SchemaFieldType.java | 6 +- .../timeline/mappers/ChangeEventMapper.java | 47 + .../mappers/ChangeTransactionMapper.java | 37 + .../src/main/resources/app.graphql | 72 ++ .../src/main/resources/auth.graphql | 80 ++ .../src/main/resources/entity.graphql | 188 +++- .../src/main/resources/query.graphql | 6 + .../src/main/resources/search.graphql | 7 +- .../src/main/resources/timeline.graphql | 112 ++- .../BatchUpdateDeprecationResolverTest.java | 15 +- .../UpdateDeprecationResolverTest.java | 142 ++- .../query/ListQueriesResolverTest.java | 9 +- datahub-web-react/.eslintrc.js | 8 + datahub-web-react/package.json | 11 + datahub-web-react/src/App.tsx | 1 + datahub-web-react/src/AppV2.less | 57 ++ datahub-web-react/src/Mocks.tsx | 381 +++++++- .../components/Avatar/types.ts | 2 +- .../components/Avatar/utils.ts | 1 + .../AvatarStack/AvatarStack.stories.tsx | 76 ++ .../components/AvatarStack/AvatarStack.tsx | 23 + .../components/AvatarStack/components.ts | 14 + .../components/AvatarStack/types.ts | 11 + .../components/Bar/Bar.stories.tsx | 92 ++ .../alchemy-components/components/Bar/Bar.tsx | 25 + .../components/Bar/components.ts | 15 + .../components/Bar/constant.ts | 5 + .../components/Bar/index.ts | 1 + .../components/Bar/types.ts | 5 + .../components/BarChart/BarChart.stories.tsx | 45 +- .../components/BarChart/BarChart.tsx | 124 ++- .../components/BarChart/components.tsx | 9 +- .../hooks/useAdaptYAccessorToZeroValues.ts | 18 + .../hooks/useAdaptYScaleToZeroValues.ts | 23 + .../BarChart/hooks/useMaxDataValue.ts | 6 + .../BarChart/hooks/useMergedProps.ts | 8 + .../components/BarChart/types.ts | 38 +- .../components/BarChart/utils.ts | 3 +- .../components/Button/Button.stories.tsx | 13 + .../components/Button/Button.tsx | 5 +- .../components/Button/types.ts | 3 +- .../CalendarChart/CalendarChart.stories.tsx | 82 ++ .../CalendarChart/CalendarChart.tsx | 80 ++ .../_tests_/getColorAccessors.test.ts | 82 ++ .../_tests_/prepareCalendarDate.test.ts | 109 +++ .../components/CalendarChart/components.tsx | 30 + .../components/CalendarChart/constants.ts | 1 + .../components/CalendarChart/index.ts | 2 + .../private/components/AxisBottomMonths.tsx | 34 + .../private/components/AxisLeftWeekdays.tsx | 35 + .../private/components/Calendar.tsx | 13 + .../private/components/CalendarContainer.tsx | 31 + .../CalendarChart/private/components/Day.tsx | 40 + .../private/components/Month.tsx | 28 + .../private/components/TickLabel.tsx | 13 + .../CalendarChart/private/components/Week.tsx | 21 + .../CalendarChart/private/constants.ts | 4 + .../CalendarChart/private/context.tsx | 109 +++ .../components/CalendarChart/types.ts | 81 ++ .../components/CalendarChart/utils.ts | 208 +++++ .../components/Card/Card.stories.tsx | 23 +- .../components/Card/Card.tsx | 61 +- .../components/Card/components.ts | 47 +- .../components/Card/types.ts | 3 + .../components/Checkbox/Checkbox.tsx | 9 +- .../components/Checkbox/components.ts | 2 +- .../components/Checkbox/types.ts | 2 +- .../DatePicker/DatePicker.stories.tsx | 61 ++ .../components/DatePicker/DatePicker.tsx | 54 ++ .../components/DatePicker/components.tsx | 32 + .../components/DatePicker/constants.ts | 4 + .../DatePicker/hooks/useVariantProps.ts | 15 + .../components/DatePicker/index.ts | 3 + .../components/DatePicker/styles.css | 15 + .../components/DatePicker/types.ts | 31 + .../DatePicker/variants/common/props.tsx | 7 + .../variants/dateSwitcher/components.tsx | 122 +++ .../variants/dateSwitcher/props.tsx | 14 + .../DatePicker/variants/dateSwitcher/types.ts | 4 + .../components/DatePicker/variants/index.ts | 4 + .../components/Drawer/Drawer.stories.tsx | 97 ++ .../components/Drawer/Drawer.tsx | 53 ++ .../components/Drawer/components.tsx | 19 + .../components/Drawer/constants.ts | 3 + .../components/Drawer/index.ts | 1 + .../components/Drawer/types.ts | 8 + .../GraphCard/GraphCard.stories.tsx | 91 ++ .../components/GraphCard/GraphCard.tsx | 90 ++ .../components/GraphCard/MoreInfoModal.tsx | 55 ++ .../components/GraphCard/components.tsx | 46 + .../components/GraphCard/index.ts | 1 + .../components/GraphCard/types.ts | 12 + .../IconLabel/IconLabel.stories.tsx | 95 ++ .../components/IconLabel/IconLabel.tsx | 33 + .../components/IconLabel/component.ts | 21 + .../components/IconLabel/index.ts | 1 + .../components/IconLabel/types.ts | 13 + .../components/IconLabel/utils.ts | 10 + .../IncidentPriorityLabel.stories.tsx | 85 ++ .../IncidentPriorityLabel.tsx | 43 + .../IncidentPriorityLabel/constant.ts | 7 + .../components/IncidentPriorityLabel/index.ts | 1 + .../components/IncidentPriorityLabel/types.ts | 5 + .../LineChart/LineChart.stories.tsx | 37 +- .../components/LineChart/LineChart.tsx | 180 ++-- .../components/LineChart/components.tsx | 28 + .../components/LineChart/constants.ts | 5 + .../components/LineChart/types.ts | 29 +- .../components/LineChart/utils.ts | 4 + .../components/Loader/Loader.tsx | 26 + .../components/Loader/Loading.stories.tsx | 82 ++ .../components/Loader/components.tsx | 34 + .../components/Loader/constants.ts | 16 + .../components/Loader/index.ts | 1 + .../components/Loader/types.ts | 11 + .../components/Pills/Pill.tsx | 12 +- .../components/Pills/components.ts | 2 + .../components/Pills/types.ts | 2 + .../components/SearchBar/SearchBar.tsx | 2 +- .../components/Select/BasicSelect.tsx | 98 +- .../components/Select/Nested/NestedOption.tsx | 49 +- .../components/Select/Nested/NestedSelect.tsx | 55 +- .../components/Select/Select.stories.tsx | 55 ++ .../components/Select/Select.tsx | 2 + .../components/Select/SimpleSelect.tsx | 121 ++- .../components/Select/components.ts | 117 ++- .../SelectLabelRenderer.tsx | 35 + .../variants/MultiSelectDefault.tsx | 37 + .../variants/MultiSelectLabeled.tsx | 14 + .../variants/SingleSelectDefault.tsx | 34 + .../variants/SingleSelectLabeled.tsx | 33 + .../components/Select/types.ts | 23 +- .../components/Select/utils.ts | 63 +- .../SelectItemCheckboxGroup.tsx | 49 + .../SelectItemsPopover/SelectItemPopover.tsx | 62 ++ .../SelectItems.stories.tsx | 160 ++++ .../SelectItemsPopover/SelectItems.tsx | 217 +++++ .../SelectItemsPopover/__mock.data.ts | 359 ++++++++ .../components/SelectItemsPopover/hooks.tsx | 168 ++++ .../components/SelectItemsPopover/index.ts | 1 + .../select-items-popover.less | 3 + .../components/SelectItemsPopover/types.ts | 0 .../components/Table/Table.stories.tsx | 209 ++++- .../components/Table/Table.tsx | 129 ++- .../components/Table/components.ts | 76 +- .../components/Table/types.ts | 23 + .../components/Table/utils.ts | 8 +- .../components/Timeline/Timeline.stories.tsx | 69 ++ .../components/Timeline/Timeline.tsx | 20 + .../components/Timeline/components.tsx | 14 + .../components/Timeline/index.ts | 1 + .../components/Timeline/types.ts | 17 + .../components/Tooltip2/Tooltip2.tsx | 60 ++ .../components/Tooltip2/TooltipHeader.tsx | 77 ++ .../components/Tooltip2/components.ts | 41 + .../components/Tooltip2/index.ts | 1 + .../components/Tooltip2/types.ts | 20 + .../src/alchemy-components/index.ts | 6 + .../alchemy-components/theme/config/types.ts | 2 +- .../theme/foundations/colors.ts | 1 + datahub-web-react/src/app/AppProviders.tsx | 5 +- .../src/app/EntityRegistryProvider.tsx | 6 +- datahub-web-react/src/app/ProtectedRoutes.tsx | 37 +- datahub-web-react/src/app/SearchRoutes.tsx | 74 +- datahub-web-react/src/app/analytics/event.ts | 116 ++- .../components/AnalyticsChart.tsx | 2 +- .../components/AnalyticsPage.tsx | 54 +- .../components/ChartCard.tsx | 4 +- .../src/app/auth/useGetLogoutHandler.ts | 17 + .../src/app/buildEntityRegistryV2.ts | 58 ++ .../src/app/dataviz/ChartCard.tsx | 55 ++ .../src/app/dataviz/ChartLoading.tsx | 3 + datahub-web-react/src/app/dataviz/Legend.tsx | 31 + .../src/app/dataviz/bar/BarChart.tsx | 108 +++ .../app/dataviz/bar/HorizontalBarChart.tsx | 157 ++++ .../dataviz/bar/HorizontalFullBarChart.tsx | 157 ++++ .../src/app/dataviz/candle/CandleStick.tsx | 79 ++ .../src/app/dataviz/components.ts | 24 + .../src/app/dataviz/constants.ts | 5 + datahub-web-react/src/app/dataviz/index.ts | 6 + .../src/app/dataviz/line/SimpleLineChart.tsx | 69 ++ .../src/app/dataviz/pie/PieChart.tsx | 90 ++ .../app/dataviz/pie/usePieDataAnnotation.ts | 36 + .../src/app/dataviz/stat/SingleStat.tsx | 5 + datahub-web-react/src/app/dataviz/utils.ts | 130 +++ .../src/app/domainV2/CreateDomainModal.tsx | 231 +++++ .../domainV2/DomainAutocompleteOptions.tsx | 53 ++ .../src/app/domainV2/DomainIcon.tsx | 11 + .../src/app/domainV2/DomainItemMenu.tsx | 65 ++ .../src/app/domainV2/DomainListColumns.tsx | 68 ++ .../src/app/domainV2/DomainRoutes.tsx | 58 ++ .../src/app/domainV2/DomainSearch.tsx | 123 +++ .../app/domainV2/DomainSearchResultItem.tsx | 68 ++ .../src/app/domainV2/DomainsContext.tsx | 17 + .../src/app/domainV2/DomainsList.tsx | 208 +++++ .../app/domainV2/EmptyDomainDescription.tsx | 39 + .../src/app/domainV2/EmptyDomainsSection.tsx | 69 ++ .../src/app/domainV2/ManageDomainsPage.tsx | 44 + .../src/app/domainV2/__tests__/utils.test.ts | 179 ++++ .../nestedDomains/DomainsSidebarHeader.tsx | 56 ++ .../nestedDomains/ManageDomainsPageV2.tsx | 68 ++ .../nestedDomains/ManageDomainsSidebar.tsx | 123 +++ .../domainV2/nestedDomains/RootDomains.tsx | 57 ++ .../domainNavigator/DomainNavigator.tsx | 50 ++ .../domainNavigator/DomainNode.tsx | 234 +++++ .../src/app/domainV2/useListDomains.tsx | 27 + datahub-web-react/src/app/domainV2/utils.ts | 146 +++ datahub-web-react/src/app/entity/Entity.tsx | 17 +- .../src/app/entity/EntityRegistry.tsx | 10 +- .../src/app/entity/dataset/DatasetEntity.tsx | 2 +- .../entity/dataset/profile/OperationsTab.tsx | 5 +- .../dataset/profile/__tests__/Schema.test.tsx | 2 +- .../stats/historical/charts/StatChart.tsx | 2 +- .../domain/preview/DomainEntitiesSnippet.tsx | 12 +- .../app/entity/group/AddGroupMembersModal.tsx | 2 +- .../entity/group/GroupOwnerSideBarSection.tsx | 17 +- .../src/app/entity/query/QueryEntity.tsx | 97 ++ .../src/app/entity/shared/EntityContext.ts | 9 +- .../shared/__tests__/siblingsUtils.test.ts | 15 + .../styled/ExpandedOwner/ExpandedOwner.tsx | 6 +- .../containers/profile/EntityProfile.tsx | 2 +- .../DataProduct/DataProductSection.tsx | 3 +- .../sidebar/Ownership/EditOwnersModal.tsx | 8 +- .../sidebar/OwnershipTypeSection.tsx | 1 + .../sidebar/SidebarSiblingsSection.tsx | 2 +- .../entity/shared/containers/profile/utils.ts | 9 +- .../entity/shared/embed/EmbeddedProfile.tsx | 2 +- .../entity/shared/entityForm/FormByEntity.tsx | 15 +- .../shared/entityForm/__tests__/Form.test.tsx | 2 +- .../src/app/entity/shared/siblingUtils.ts | 277 +++++- .../Schema/components/PropertyTypeLabel.tsx | 15 +- .../tabs/Dataset/Validations/acrylTypes.tsx | 21 + .../__tests__/DocumentationTab.test.tsx | 2 +- .../Entity/__tests__/DataJobFlowTab.test.tsx | 2 +- .../shared/tabs/Incident/incidentUtils.ts | 1 + .../tabs/Properties/AddPropertyButton.tsx | 16 +- .../src/app/entity/shared/types.ts | 23 +- .../src/app/entity/shared/useEntityState.ts | 7 + .../src/app/entity/shared/utils.ts | 6 +- .../src/app/entityV2/Access/RoleEntity.tsx | 99 +++ .../app/entityV2/Access/RoleEntityProfile.tsx | 75 ++ .../src/app/entityV2/DefaultEntity.tsx | 37 + datahub-web-react/src/app/entityV2/Entity.tsx | 240 +++++ .../src/app/entityV2/EntityPage.tsx | 124 +++ .../src/app/entityV2/EntityRegistry.tsx | 369 ++++++++ .../BusinessAttributeEntity.tsx | 157 ++++ .../businessAttribute/preview/Preview.tsx | 40 + .../preview/_tests_/Preview.test.tsx | 26 + .../BusinessAttributeDataTypeSection.tsx | 98 ++ .../BusinessAttributeRelatedEntity.tsx | 43 + .../src/app/entityV2/chart/ChartEntity.tsx | 391 ++++++++ .../entityV2/chart/preview/ChartPreview.tsx | 137 +++ .../stats/ChartStatsSummarySubHeader.tsx | 26 + .../chart/shared/ChartStatsSummary.tsx | 113 +++ .../chart/summary/ChartFieldsTable.tsx | 121 +++ .../chart/summary/ChartSummaryOverview.tsx | 118 +++ .../chart/summary/ChartSummaryTab.tsx | 67 ++ .../entityV2/chart/summary/EmbedPreview.tsx | 28 + .../chart/summary/FieldTableByTag.tsx | 33 + .../chart/summary/SummaryQuerySection.tsx | 75 ++ .../entityV2/chart/summary/TableauEmbed.tsx | 40 + .../chart/summary/styledComponents.ts | 32 + .../entityV2/chart/summary/useGetTagFields.ts | 16 + .../container/ContainerEntitiesTab.tsx | 25 + .../entityV2/container/ContainerEntity.tsx | 279 ++++++ .../container/ContainerSummaryTab.tsx | 15 + .../entityV2/container/preview/Preview.tsx | 110 +++ .../tableau/TableauDataSourcesSection.tsx | 56 ++ .../container/tableau/TableauViewsSection.tsx | 55 ++ .../tableau/TableauWorkbookSummaryTab.tsx | 15 + .../entityV2/dashboard/DashboardEntity.tsx | 404 +++++++++ .../dashboard/preview/DashboardPreview.tsx | 143 +++ .../DashboardStatsSummarySubHeader.tsx | 28 + .../shared/DashboardStatsSummary.tsx | 117 +++ .../summary/DashboardSummaryOverview.tsx | 157 ++++ .../dashboard/summary/DashboardSummaryTab.tsx | 33 + .../app/entityV2/dataFlow/DataFlowEntity.tsx | 263 ++++++ .../app/entityV2/dataFlow/preview/Preview.tsx | 110 +++ .../app/entityV2/dataJob/DataJobEntity.tsx | 329 +++++++ .../app/entityV2/dataJob/preview/Preview.tsx | 122 +++ .../src/app/entityV2/dataJob/tabs/RunsTab.tsx | 168 ++++ .../dataPlatform/DataPlatformEntity.tsx | 77 ++ .../DataPlatformInstanceEntity.tsx | 65 ++ .../DataProcessInstanceEntity.tsx | 265 ++++++ .../dataProcessInstance/preview/Preview.tsx | 103 +++ .../dataProduct/AddOutputPortCard.tsx | 23 + .../entityV2/dataProduct/AssetsSections.tsx | 95 ++ .../dataProduct/DataProductEntitiesTab.tsx | 23 + .../dataProduct/DataProductEntity.tsx | 268 ++++++ .../dataProduct/DataProductSummaryTab.tsx | 15 + .../dataProduct/OutputPortsSection.tsx | 112 +++ .../src/app/entityV2/dataProduct/constants.ts | 1 + .../generateUseListDataProductAssets.ts | 23 + .../generateUseListDataProductAssetsCount.ts | 16 + .../entityV2/dataProduct/preview/Preview.tsx | 70 ++ .../app/entityV2/dataset/DatasetEntity.tsx | 517 +++++++++++ .../app/entityV2/dataset/preview/Preview.tsx | 155 ++++ .../app/entityV2/dataset/profile/Lineage.tsx | 62 ++ .../dataset/profile/OperationsTab.tsx | 247 ++++++ .../dataset/profile/UsageFacepile.tsx | 42 + .../profile/__tests__/Lineage.test.tsx | 22 + .../profile/__tests__/Properties.test.tsx | 24 + .../dataset/profile/__tests__/Schema.test.tsx | 398 +++++++++ .../__tests__/SchemaDescriptionField.test.tsx | 66 ++ .../dataset/profile/__tests__/Stats.test.tsx | 155 ++++ .../schema/translateFieldPath.test.tsx | 33 + .../schema/translateFieldPathSegment.test.tsx | 39 + .../profile/__tests__/schema/utils.test.tsx | 49 + .../schema/components/CustomPagination.tsx | 113 +++ .../components/InteriorTitleContent.tsx | 132 +++ .../components/SchemaDescriptionField.tsx | 255 ++++++ .../components/SchemaFilterSelectContent.tsx | 71 ++ .../schema/components/SchemaHeader.tsx | 210 +++++ .../schema/components/SchemaRawView.tsx | 36 + .../profile/schema/components/SchemaRow.tsx | 13 + .../schema/components/SchemaSearchInput.tsx | 175 ++++ .../components/SchemaVersionSummary.tsx | 52 ++ .../components/StructuredPropValues.tsx | 69 ++ .../profile/schema/components/TypeIcon.tsx | 113 +++ .../schema/components/VersionSelector.tsx | 153 ++++ .../dataset/profile/schema/utils/constants.ts | 4 + .../schema/utils/schemaTitleRenderer.tsx | 24 + .../schema/utils/schemaTypeRenderer.tsx | 41 + .../schema/utils/translateFieldPath.tsx | 24 + .../utils/translateFieldPathSegment.tsx | 53 ++ .../dataset/profile/schema/utils/types.ts | 16 + .../dataset/profile/schema/utils/utils.ts | 283 ++++++ .../entityV2/dataset/profile/stats/Stats.tsx | 40 + .../dataset/profile/stats/StatsSection.tsx | 34 + .../stats/historical/HistoricalStatsView.tsx | 230 +++++ .../historical/charts/ProfilingRunsChart.tsx | 91 ++ .../stats/historical/charts/StatChart.tsx | 96 ++ .../stats/snapshot/LatestStatsView.tsx | 36 + .../stats/snapshot/SnapshotStatsView.tsx | 180 ++++ .../stats/DatasetStatsSummarySubHeader.tsx | 48 + .../dataset/profile/stories/documentation.ts | 13 + .../profile/stories/lineageEntities.ts | 101 +++ .../dataset/profile/stories/properties.ts | 10 + .../dataset/profile/stories/sampleSchema.ts | 366 ++++++++ .../entityV2/dataset/profile/stories/stats.ts | 73 ++ .../dataset/shared/DatasetStatsSummary.tsx | 122 +++ .../entityV2/dataset/shared/ExpandingStat.tsx | 47 + .../dataset/shared/FormattedBytesStat.tsx | 12 + .../CreateDataProductModal.tsx | 80 ++ .../DataProductBuilderForm.tsx | 52 ++ .../DataProductsTab/DataProductResult.tsx | 91 ++ .../DataProductsTab/DataProductsTab.tsx | 171 ++++ .../DataProductsTab/EditDataProductModal.tsx | 69 ++ .../entityV2/domain/DataProductsTab/types.ts | 4 + .../app/entityV2/domain/DomainEntitiesTab.tsx | 36 + .../src/app/entityV2/domain/DomainEntity.tsx | 236 +++++ .../domain/preview/DomainEntitiesSnippet.tsx | 45 + .../app/entityV2/domain/preview/Preview.tsx | 66 ++ .../domain/summary/ContentSectionLoading.tsx | 24 + .../domain/summary/ContentsSection.tsx | 102 +++ .../domain/summary/DataProductsSection.tsx | 93 ++ .../domain/summary/DocumentationSection.tsx | 98 ++ .../domain/summary/DomainSummaryTab.tsx | 17 + .../entityV2/domain/summary/OwnerDetail.tsx | 56 ++ .../entityV2/domain/summary/OwnersSection.tsx | 89 ++ .../app/entityV2/glossaryNode/ChildrenTab.tsx | 39 + .../glossaryNode/GlossaryNodeEntity.tsx | 202 +++++ .../glossaryNode/_tests_/utils.test.ts | 65 ++ .../entityV2/glossaryNode/preview/Preview.tsx | 45 + .../src/app/entityV2/glossaryNode/utils.ts | 8 + .../GlossaryRelatedAssetsTabHeader.tsx | 54 ++ .../glossaryTerm/GlossaryTermEntity.tsx | 244 +++++ .../glossaryTerm/_tests_/utils.test.ts | 83 ++ .../entityV2/glossaryTerm/preview/Preview.tsx | 57 ++ .../preview/__tests__/Preview.test.tsx | 27 + .../profile/AddRelatedTermsModal.tsx | 218 +++++ .../profile/GlossaryRelatedEntity.tsx | 56 ++ .../profile/GlossaryRelatedTerms.tsx | 70 ++ .../profile/GlossaryRelatedTermsResult.tsx | 114 +++ .../profile/GlossarySidebarAboutSection.tsx | 53 ++ .../profile/GlossaryTermHeader.tsx | 31 + .../glossaryTerm/profile/RelatedTerm.tsx | 83 ++ .../glossaryTerm/profile/SchemaView.tsx | 30 + .../__tests__/GlossaryRelatedTerms.test.tsx | 20 + .../__tests__/GlossaryTermHeader.test.tsx | 31 + .../profile/useRemoveRelatedTerms.tsx | 60 ++ .../src/app/entityV2/glossaryTerm/utils.ts | 16 + .../entityV2/group/AddGroupMembersModal.tsx | 213 +++++ .../src/app/entityV2/group/Group.tsx | 79 ++ .../src/app/entityV2/group/GroupAssets.tsx | 28 + .../entityV2/group/GroupBasicInfoSection.tsx | 36 + .../src/app/entityV2/group/GroupEditModal.tsx | 153 ++++ .../entityV2/group/GroupInfoHeaderSection.tsx | 62 ++ .../app/entityV2/group/GroupMemberLink.tsx | 37 + .../src/app/entityV2/group/GroupMembers.tsx | 254 ++++++ .../GroupMembersSidebarSectionContent.tsx | 41 + .../group/GroupOwnerSidebarSectionContent.tsx | 63 ++ .../src/app/entityV2/group/GroupProfile.tsx | 224 +++++ .../entityV2/group/GroupProfileInfoCard.tsx | 144 +++ .../src/app/entityV2/group/GroupSidebar.tsx | 64 ++ .../group/GroupSidebarMembersSection.tsx | 20 + .../group/GroupSidebarOwnersSection.tsx | 41 + .../app/entityV2/group/preview/Preview.tsx | 111 +++ .../entityV2/mlFeature/MLFeatureEntity.tsx | 251 ++++++ .../entityV2/mlFeature/preview/Preview.tsx | 72 ++ .../mlFeatureTable/MLFeatureTableEntity.tsx | 229 +++++ .../mlFeatureTable/preview/Preview.tsx | 63 ++ .../mlFeatureTable/profile/Sources.tsx | 81 ++ .../features/MlFeatureDataTypeIcon.tsx | 95 ++ .../features/MlFeatureTableFeatures.tsx | 23 + .../profile/features/TableOfMlFeatures.tsx | 160 ++++ .../app/entityV2/mlModel/MLModelEntity.tsx | 223 +++++ .../app/entityV2/mlModel/preview/Preview.tsx | 53 ++ .../mlModel/profile/MLModelGroupsTab.tsx | 48 + .../mlModel/profile/MLModelSummary.tsx | 47 + .../mlModel/profile/MlModelFeaturesTab.tsx | 17 + .../mlModelGroup/MLModelGroupEntity.tsx | 210 +++++ .../entityV2/mlModelGroup/preview/Preview.tsx | 51 ++ .../mlModelGroup/profile/ModelGroupModels.tsx | 32 + .../mlPrimaryKey/MLPrimaryKeyEntity.tsx | 235 +++++ .../entityV2/mlPrimaryKey/preview/Preview.tsx | 61 ++ .../entityV2/ownership/ManageOwnership.tsx | 49 + .../ownership/OwnershipBuilderModal.tsx | 225 +++++ .../app/entityV2/ownership/OwnershipList.tsx | 132 +++ .../ownership/table/ActionsColumn.tsx | 128 +++ .../ownership/table/DescriptionColumn.tsx | 19 + .../entityV2/ownership/table/NameColumn.tsx | 19 + .../ownership/table/OwnershipTable.tsx | 60 ++ .../src/app/entityV2/ownership/table/types.ts | 14 + .../src/app/entityV2/query/QueryEntity.tsx | 126 +++ .../schemaField/SchemaFieldEntity.tsx | 114 +++ .../schemaField/__tests__/utils.test.ts | 22 + .../entityV2/schemaField/preview/Preview.tsx | 50 ++ .../src/app/entityV2/schemaField/utils.ts | 22 + .../src/app/entityV2/shared/ActorAvatar.tsx | 112 +++ .../CreateGlossaryEntityModal.tsx | 243 +++++ .../EntityDropdown/DeleteEntityMenuAction.tsx | 63 ++ .../EntityDropdown/DomainParentSelect.tsx | 107 +++ .../shared/EntityDropdown/EntityDropdown.tsx | 389 ++++++++ .../EntityDropdown/EntityMenuActions.tsx | 97 ++ .../EntityDropdown/ExternalUrlMenuAction.tsx | 9 + .../EntityDropdown/MoreOptionsMenuAction.tsx | 43 + .../shared/EntityDropdown/MoveDomainModal.tsx | 105 +++ .../EntityDropdown/MoveEntityMenuAction.tsx | 49 + .../MoveGlossaryEntityModal.tsx | 106 +++ .../EntityDropdown/NodeParentSelect.tsx | 86 ++ .../RaiseIncidentMenuAction.tsx | 50 ++ .../UpdateDeprecationMenuAction.tsx | 71 ++ .../EntityDropdown/UpdateDeprecationModal.tsx | 188 ++++ .../__tests__/NodeParentSelect.test.tsx | 24 + .../entityV2/shared/EntityDropdown/index.tsx | 3 + .../EntityDropdown/styledComponents.tsx | 39 + .../shared/EntityDropdown/useDeleteEntity.tsx | 99 +++ .../useDeleteGlossaryEntity.tsx | 55 ++ .../EntityDropdown/useHandleDeleteDomain.ts | 27 + .../useHandleMoveDomainComplete.ts | 36 + .../EntityDropdown/useParentSelector.ts | 79 ++ .../entityV2/shared/EntityDropdown/utils.ts | 61 ++ .../src/app/entityV2/shared/EntityGroups.tsx | 55 ++ .../EntitySearchInput/EntitySearchInput.tsx | 179 ++++ .../EntitySearchInputResult.tsx | 31 + .../entityV2/shared/GlossaryEntityContext.tsx | 46 + .../app/entityV2/shared/PreviewContext.tsx | 9 + .../app/entityV2/shared/SearchCardContext.tsx | 15 + .../shared/SidebarStyledComponents.tsx | 400 +++++++++ .../shared/StyledSyntaxHighlighter.tsx | 8 + .../src/app/entityV2/shared/UrlButton.tsx | 37 + .../shared/__tests__/DeprecationPill.test.tsx | 85 ++ .../entityV2/shared/__tests__/utils.test.ts | 173 ++++ .../CreateEntityAnnouncementModal.tsx | 261 ++++++ .../components/ListSearch/AcrylListSearch.tsx | 118 +++ .../components/legacy/DescriptionModal.tsx | 85 ++ .../shared/components/legacy/Properties.tsx | 30 + .../shared/components/styled/AddLinkModal.tsx | 141 +++ .../shared/components/styled/DemoButton.tsx | 22 + .../components/styled/DeprecationIcon.tsx | 224 +++++ .../shared/components/styled/EmptyTab.tsx | 48 + .../components/styled/ExpandedActor.tsx | 50 ++ .../components/styled/ExpandedActorGroup.tsx | 57 ++ .../styled/ExpandedOwner/ExpandedOwner.tsx | 119 +++ .../styled/ExpandedOwner/OwnerContent.tsx | 143 +++ .../shared/components/styled/InfoItem.tsx | 37 + .../styled/MarkAsDeprecatedButton.tsx | 61 ++ .../shared/components/styled/SeeMore.tsx | 12 + .../shared/components/styled/StatsSummary.tsx | 36 + .../components/styled/StripMarkdownText.tsx | 59 ++ .../shared/components/styled/StyledButton.tsx | 17 + .../components/styled/StyledMDEditor.tsx | 37 + .../shared/components/styled/StyledTable.tsx | 65 ++ .../shared/components/styled/StyledTag.tsx | 59 ++ .../shared/components/styled/TabToolbar.tsx | 14 + .../styled/search/DownloadAsCsvModal.tsx | 196 ++++ .../styled/search/EmbeddedListSearch.tsx | 413 +++++++++ .../styled/search/EmbeddedListSearchEmbed.tsx | 116 +++ .../search/EmbeddedListSearchHeader.tsx | 137 +++ .../styled/search/EmbeddedListSearchModal.tsx | 123 +++ .../search/EmbeddedListSearchResults.tsx | 295 ++++++ .../search/EmbeddedListSearchSection.tsx | 176 ++++ .../styled/search/EntitySearchResults.tsx | 194 ++++ .../styled/search/MatchingViewsLabel.tsx | 34 + .../components/styled/search/SearchSelect.tsx | 212 +++++ .../styled/search/SearchSelectActions.tsx | 137 +++ .../styled/search/SearchSelectBar.tsx | 133 +++ .../styled/search/SearchSelectModal.tsx | 98 ++ .../styled/search/action/ActionDropdown.tsx | 75 ++ .../search/action/DataProductsDropdown.tsx | 89 ++ .../styled/search/action/DeleteDropdown.tsx | 70 ++ .../search/action/DeprecationDropdown.tsx | 86 ++ .../styled/search/action/DomainsDropdown.tsx | 85 ++ .../search/action/GlossaryTermsDropdown.tsx | 55 ++ .../styled/search/action/OwnersDropdown.tsx | 51 ++ .../styled/search/action/TagsDropdown.tsx | 55 ++ .../styled/search/downloadAsCsvUtil.ts | 103 +++ .../search/navigateToEntitySearchUrl.ts | 46 + .../shared/components/styled/search/types.ts | 60 ++ .../search/useInitializeColumnLineageCards.ts | 26 + .../search/useInitializeSearchResultCards.ts | 35 + .../entityV2/shared/components/subtypes.tsx | 66 ++ .../src/app/entityV2/shared/constants.ts | 231 +++++ .../containers/profile/EntityProfile.tsx | 422 +++++++++ .../profile/__tests__/EntityHeader.test.tsx | 95 ++ .../profile/__tests__/EntityProfile.test.tsx | 474 ++++++++++ .../__tests__/PlatformContent.test.tsx | 134 +++ .../profile/header/DefaultEntityHeader.tsx | 250 ++++++ .../containers/profile/header/EntityCount.tsx | 36 + .../profile/header/EntityHeader.tsx | 79 ++ .../header/EntityHeaderLoadingSection.tsx | 32 + .../profile/header/EntityHealthPopover.tsx | 83 ++ .../profile/header/EntityHealthStatus.tsx | 49 + .../containers/profile/header/EntityName.tsx | 123 +++ .../header/EntityPlatformLoadingSection.tsx | 20 + .../containers/profile/header/EntityTabs.tsx | 127 +++ .../header/GlossaryPreviewCardDecoration.tsx | 36 + .../header/IconPicker/IconColorPicker.tsx | 138 +++ .../profile/header/IconPicker/IconPicker.tsx | 115 +++ .../header/PlatformContent/ContainerIcon.tsx | 33 + .../header/PlatformContent/ContainerLink.tsx | 49 + .../PlatformContent/ParentNodesView.tsx | 96 ++ .../ParentNodesViewForSearchRedesign.tsx | 96 ++ .../PlatformContentContainer.tsx | 50 ++ .../PlatformContent/PlatformContentView.tsx | 157 ++++ .../PlatformContent/PlatformHeaderIcons.tsx | 53 ++ .../PlatformContent/StackPlatformImages.tsx | 76 ++ .../header/PlatformContent/constants.ts | 0 .../profile/header/PlatformContent/index.ts | 3 + .../header/StructuredPropertyBadge.tsx | 93 ++ .../shared/containers/profile/header/utils.ts | 29 + .../AboutSection/DescriptionSection.tsx | 31 + .../AboutSection/EmptyContentSection.tsx | 45 + .../sidebar/AboutSection/LinksSection.tsx | 36 + .../AboutSection/SidebarAboutSection.tsx | 104 +++ .../sidebar/AboutSection/SourceRefSection.tsx | 37 + .../Header/SidebarChartHeaderSection.tsx | 113 +++ .../profile/sidebar/Chart/Header/utils.ts | 8 + .../Container/ContainerSelectModal.tsx | 190 ++++ .../SidebarContentsLoadingSection.tsx | 9 + .../Container/SidebarContentsSection.tsx | 81 ++ .../profile/sidebar/Container/utils.tsx | 111 +++ .../Header/SidebarDashboardHeaderSection.tsx | 115 +++ .../DataProduct/DataProductSection.tsx | 117 +++ .../DataProduct/SetDataProductModal.tsx | 206 +++++ .../Header/SidebarDatasetHeaderSection.tsx | 158 ++++ .../sidebar/Dataset/StatsSidebarSection.tsx | 134 +++ .../View/SidebarViewDefinitionSection.tsx | 57 ++ .../profile/sidebar/Domain/SetDomainModal.tsx | 210 +++++ .../Domain/SidebarDataProductsSection.tsx | 76 ++ .../sidebar/Domain/SidebarDomainSection.tsx | 125 +++ .../Domain/SidebarEntitiesLoadingSection.tsx | 10 + .../sidebar/Domain/SidebarEntitiesSection.tsx | 91 ++ .../profile/sidebar/Domain/utils.tsx | 137 +++ .../profile/sidebar/EmptySectionText.tsx | 21 + .../profile/sidebar/EntityBackButton.tsx | 25 + .../profile/sidebar/EntityInfo/EntityInfo.tsx | 66 ++ .../profile/sidebar/EntityProfileSidebar.tsx | 176 ++++ .../EntityProfileSidebarSearchHeader.tsx | 54 ++ .../profile/sidebar/EntitySidebar.tsx | 65 ++ .../sidebar/EntitySidebarSectionsTab.tsx | 51 ++ .../profile/sidebar/EntitySidebarTabs.tsx | 115 +++ .../profile/sidebar/ExploreLineageAction.tsx | 62 ++ .../sidebar/FormInfo/CompletedView.tsx | 87 ++ .../profile/sidebar/FormInfo/FormInfo.tsx | 49 + .../sidebar/FormInfo/IncompleteView.tsx | 108 +++ .../FormInfo/SidebarFormInfoWrapper.tsx | 62 ++ .../profile/sidebar/FormInfo/components.ts | 104 +++ .../profile/sidebar/HeaderAndTabs.tsx | 68 ++ .../profile/sidebar/LastIngested.tsx | 181 ++++ .../Lineage/SidebarLineageLoadingSection.tsx | 10 + .../sidebar/Lineage/SidebarLineageSection.tsx | 159 ++++ .../profile/sidebar/Lineage/utils.tsx | 126 +++ .../containers/profile/sidebar/LinkButton.tsx | 38 + .../sidebar/Ownership/EditOwnersModal.tsx | 431 +++++++++ .../sidebar/Ownership/LdapFormItem.tsx | 65 ++ .../Ownership/OwnershipTypesSelect.tsx | 43 + .../sidebar/Ownership/ownershipUtils.ts | 46 + .../sidebar/OwnershipTypeSection.tsx | 58 ++ .../Ownership/sidebar/SidebarOwnerSection.tsx | 129 +++ .../sidebar/Platform/SelectPlatformModal.tsx | 187 ++++ .../profile/sidebar/ProfileSidebarResizer.tsx | 56 ++ .../Query/SidebarQueryCreatedAtSection.tsx | 18 + .../Query/SidebarQueryDefinitionSection.tsx | 55 ++ .../Query/SidebarQueryDescriptionSection.tsx | 31 + .../Query/SidebarQueryOperationsSection.tsx | 120 +++ .../Query/SidebarQueryUpdatedAtSection.tsx | 18 + .../SidebarEntityRecommendations.tsx | 62 ++ .../SidebarRecommendationsSection.tsx | 19 + .../profile/sidebar/SectionActionButton.tsx | 53 ++ .../profile/sidebar/SidebarCollapseIcon.tsx | 59 ++ .../sidebar/SidebarCollapsibleHeader.tsx | 102 +++ .../sidebar/SidebarCompactSchemaSection.tsx | 13 + .../profile/sidebar/SidebarEntityHeader.tsx | 106 +++ .../sidebar/SidebarGlossaryTermsSection.tsx | 79 ++ .../profile/sidebar/SidebarHeader.tsx | 30 + .../sidebar/SidebarHeaderSectionColumns.tsx | 68 ++ .../profile/sidebar/SidebarLogicSection.tsx | 184 ++++ .../profile/sidebar/SidebarSection.tsx | 119 +++ .../sidebar/SidebarSiblingsSection.tsx | 114 +++ .../profile/sidebar/SidebarTagsSection.tsx | 83 ++ .../profile/sidebar/TitleAction.tsx | 12 + .../sidebar/__tests__/LastIngested.test.tsx | 41 + .../__tests__/SidebarLogicSection.test.tsx | 232 +++++ .../profile/sidebar/shared/EntityProperty.tsx | 31 + .../shared/SidebarPopularityHeaderSection.tsx | 178 ++++ .../shared/SidebarTopUsersHeaderSection.tsx | 18 + .../profile/sidebar/shared/StatusSection.tsx | 163 ++++ .../profile/sidebar/shared/SyncedOrShared.tsx | 90 ++ .../sidebar/shared/SyncedOrSharedTooltip.tsx | 66 ++ .../profile/sidebar/shared/TimeProperty.tsx | 42 + .../sidebar/shared/TopUsersFacepile.tsx | 38 + .../shared/popularity/PopularityIcon.tsx | 24 + .../shared/popularity/PopularityIconBar.tsx | 31 + .../sidebar/shared/styledComponents.ts | 45 + .../profile/sidebar/shared/utils.tsx | 132 +++ .../profile/sidebar/tagRenderer.tsx | 25 + .../profile/useGetDataForProfile.ts | 58 ++ .../shared/containers/profile/utils.tsx | 298 +++++++ .../entityV2/shared/embed/EmbeddedHeader.tsx | 107 +++ .../shared/embed/EmbeddedHealthIcon.tsx | 64 ++ .../entityV2/shared/embed/EmbeddedProfile.tsx | 96 ++ .../embed/UpstreamHealth/ActiveIncidents.tsx | 55 ++ .../UpstreamHealth/FailingAssertions.tsx | 82 ++ .../embed/UpstreamHealth/FailingEntity.tsx | 60 ++ .../embed/UpstreamHealth/FailingInputs.tsx | 111 +++ .../UpstreamHealth/UpstreamEntitiesList.tsx | 93 ++ .../embed/UpstreamHealth/UpstreamHealth.tsx | 220 +++++ .../UpstreamHealth/__tests__/utils.test.tsx | 75 ++ .../shared/embed/UpstreamHealth/utils.ts | 62 ++ .../entityV2/shared/entity/EntityActions.tsx | 336 +++++++ .../shared/entity/NonExistentEntityPage.tsx | 6 + .../entityV2/shared/entityForm/EntityForm.tsx | 15 + .../shared/entityForm/EntityFormModal.tsx | 59 ++ .../shared/externalUrl/ViewInPlatform.tsx | 99 +++ .../shared/links/DataProductMiniPreview.tsx | 80 ++ .../DataProductMiniPreviewAddDataProduct.tsx | 62 ++ .../shared/links/DomainColoredIcon.tsx | 63 ++ .../shared/links/DomainMiniPreview.tsx | 69 ++ .../shared/links/EntityExternalLink.tsx | 14 + .../shared/links/GlossaryTermMiniPreview.tsx | 77 ++ .../links/__tests__/colorUtils.test.tsx | 38 + .../entityV2/shared/notes/NotesSection.tsx | 232 +++++ .../sidebarSection/AboutSidebarSection.tsx | 77 ++ .../shared/sidebarSection/ShowMoreSection.tsx | 18 + .../sidebarSection/SidebarNotesSection.tsx | 15 + .../SidebarStructuredProperties.tsx | 158 ++++ .../UserGroupSidebarSection.tsx | 65 ++ .../UserOwnershipSideBarSection.tsx | 59 ++ .../entityV2/shared/stats/PercentileLabel.tsx | 40 + .../stats/__tests__/statsUtils.test.tsx | 36 + .../app/entityV2/shared/stats/statsUtils.ts | 28 + .../shared/summary/HeaderComponents.tsx | 67 ++ .../shared/summary/IconComponents.tsx | 19 + .../shared/summary/ListComponents.tsx | 56 ++ .../shared/summary/SummaryAboutSection.tsx | 94 ++ .../summary/SummaryCreatedBySection.tsx | 60 ++ .../AccessManagement/AccessManagement.tsx | 117 +++ .../AccessManagerDescription.tsx | 38 + .../__tests__/AccessManagement.test.ts | 279 ++++++ .../tabs/Dataset/AccessManagement/utils.tsx | 27 + .../tabs/Dataset/Governance/TestResults.tsx | 30 + .../Dataset/Governance/TestResultsList.tsx | 112 +++ .../Dataset/Governance/TestResultsSummary.tsx | 87 ++ .../tabs/Dataset/Governance/testUtils.tsx | 49 + .../shared/tabs/Dataset/Queries/AddButton.tsx | 45 + .../shared/tabs/Dataset/Queries/CopyQuery.tsx | 28 + .../Dataset/Queries/EmptyQueriesSection.tsx | 110 +++ .../Dataset/Queries/QueriesListSection.tsx | 263 ++++++ .../tabs/Dataset/Queries/QueriesTab.tsx | 207 +++++ .../shared/tabs/Dataset/Queries/Query.tsx | 81 ++ .../tabs/Dataset/Queries/QueryBuilderForm.tsx | 94 ++ .../Dataset/Queries/QueryBuilderModal.tsx | 173 ++++ .../shared/tabs/Dataset/Queries/QueryCard.tsx | 79 ++ .../tabs/Dataset/Queries/QueryCardDetails.tsx | 133 +++ .../Dataset/Queries/QueryCardDetailsMenu.tsx | 65 ++ .../Dataset/Queries/QueryCardEditButton.tsx | 24 + .../tabs/Dataset/Queries/QueryCardHeader.tsx | 41 + .../tabs/Dataset/Queries/QueryCardQuery.tsx | 64 ++ .../Queries/QueryFilters/QueryFilters.tsx | 106 +++ .../Queries/QueryFilters/useColumnsFilter.ts | 83 ++ .../Queries/QueryFilters/useUsersFilter.ts | 56 ++ .../tabs/Dataset/Queries/QueryModal.tsx | 107 +++ .../shared/tabs/Dataset/Queries/cacheUtils.ts | 139 +++ .../tabs/Dataset/Queries/queryColumns.tsx | 225 +++++ .../shared/tabs/Dataset/Queries/types.ts | 29 + .../Dataset/Queries/useDownstreamQueries.ts | 79 ++ .../Dataset/Queries/useHighlightedQueries.ts | 50 ++ .../tabs/Dataset/Queries/usePopularQueries.ts | 85 ++ .../Dataset/Queries/useQueryTableColumns.tsx | 228 +++++ .../tabs/Dataset/Queries/useRecentQueries.ts | 53 ++ .../tabs/Dataset/Queries/utils/constants.ts | 20 + .../Dataset/Queries/utils/filterQueries.ts | 36 + .../Dataset/Queries/utils/getCurrentPage.ts | 8 + .../Dataset/Queries/utils/getTopNQueries.ts | 34 + .../tabs/Dataset/Queries/utils/mapQuery.ts | 28 + .../Dataset/Schema/CompactSchemaTable.tsx | 242 +++++ .../tabs/Dataset/Schema/SchemaContext.tsx | 13 + .../shared/tabs/Dataset/Schema/SchemaTab.tsx | 277 ++++++ .../tabs/Dataset/Schema/SchemaTable.tsx | 536 +++++++++++ .../Schema/__tests__/filterSchemaRows.test.ts | 286 ++++++ .../Schema/components/ConstraintLabels.tsx | 45 + .../Dataset/Schema/components/ExpandIcon.tsx | 150 ++++ .../Dataset/Schema/components/MenuColumn.tsx | 78 ++ .../Schema/components/NestedRowIcon.tsx | 44 + .../SchemaFieldDrawer/AboutFieldTab.tsx | 115 +++ .../SchemaFieldDrawer/DrawerFooter.tsx | 90 ++ .../SchemaFieldDrawer/FieldDescription.tsx | 181 ++++ .../SchemaFieldDrawer/FieldDetails.tsx | 169 ++++ .../SchemaFieldDrawer/FieldHeader.tsx | 139 +++ .../SchemaFieldDrawer/FieldPath.tsx | 112 +++ .../SchemaFieldDrawer/FieldPopularity.tsx | 48 + .../SchemaFieldDrawer/FieldTags.tsx | 31 + .../SchemaFieldDrawer/FieldTerms.tsx | 34 + .../SchemaFieldDrawer/FieldTitle.tsx | 26 + .../SchemaFieldDrawer/FieldUsageStats.tsx | 65 ++ .../SchemaFieldDrawer/PopularityBars.tsx | 45 + .../SchemaFieldDrawer/SampleValuesSection.tsx | 29 + .../SchemaFieldDrawer/SchemaFieldDrawer.tsx | 307 +++++++ .../SchemaFieldDrawerTabs.tsx | 76 ++ .../SchemaFieldQueriesSidebarTab.tsx | 200 +++++ .../SchemaFieldDrawer/StatsSection.tsx | 39 + .../StatsSidebarColumnTab.tsx | 158 ++++ .../SchemaFieldDrawer/StatsSidebarContent.tsx | 195 ++++ .../SchemaFieldDrawer/StatsSidebarHeader.tsx | 110 +++ .../SchemaFieldDrawer/StatsSidebarView.tsx | 102 +++ .../SchemaFieldDrawer/StatsSummaryRow.tsx | 61 ++ .../SchemaFieldDrawer/TrendDetail.tsx | 83 ++ .../SchemaFieldDrawer/components.ts | 16 + .../Dataset/Schema/components/TypeLabel.tsx | 48 + .../Dataset/Schema/history/ChangeEvent.tsx | 51 ++ .../Schema/history/ChangeTransactionView.tsx | 117 +++ .../Dataset/Schema/history/HistorySidebar.tsx | 139 +++ .../__tests__/changeEventToString.test.ts | 102 +++ .../Schema/history/changeEventToString.ts | 51 ++ .../Dataset/Schema/history/historyUtils.ts | 12 + .../Dataset/Schema/useGetEntitySchema.tsx | 35 + .../Dataset/Schema/useKeyboardControls.ts | 84 ++ .../Dataset/Schema/useSchemaVersioning.tsx | 68 ++ .../useExtractDescriptionInfo.test.ts | 46 + .../useExtractFieldGlossaryTermsInfo.test.ts | 130 +++ .../__tests__/useExtractFieldTagsInfo.test.ts | 117 +++ .../Dataset/Schema/utils/filterSchemaRows.ts | 136 +++ .../utils/getExpandedDrawerFieldPath.ts | 10 + .../utils/getFieldDescriptionDetails.ts | 29 + .../utils/getSchemaFilterTypesFromUrl.ts | 18 + .../Dataset/Schema/utils/queryStringUtils.ts | 11 + .../tabs/Dataset/Schema/utils/statsUtil.ts | 9 + .../utils/updateSchemaFilterQueryString.ts | 32 + .../Schema/utils/useDescriptionRenderer.tsx | 76 ++ .../utils/useExtractFieldDescriptionInfo.ts | 28 + .../utils/useExtractFieldGlossaryTermsInfo.ts | 18 + .../Schema/utils/useExtractFieldTagsInfo.ts | 16 + .../utils/useGetStructuredPropColumns.tsx | 22 + .../utils/useGetTableColumnProperties.ts | 39 + .../Schema/utils/useTagsAndTermsRenderer.tsx | 54 ++ .../useTagsAndTermsRendererFeatureTable.tsx | 42 + .../Schema/utils/useUsageStatsRenderer.tsx | 69 ++ .../shared/tabs/Dataset/Stats/StatsHeader.tsx | 68 ++ .../tabs/Dataset/Stats/StatsSection.tsx | 34 + .../shared/tabs/Dataset/Stats/StatsTab.tsx | 107 +++ .../shared/tabs/Dataset/Stats/constants.ts | 1 + .../Stats/historical/HistoricalStats.tsx | 236 +++++ .../Stats/historical/LookbackWindowSelect.tsx | 33 + .../historical/charts/ProfilingRunsChart.tsx | 111 +++ .../Stats/historical/charts/StatChart.tsx | 113 +++ .../historical/shared/PrefixedSelect.tsx | 33 + .../tabs/Dataset/Stats/lookbackWindows.ts | 21 + .../Dataset/Stats/snapshot/ColumnStats.tsx | 175 ++++ .../Dataset/Stats/snapshot/SampleValueTag.tsx | 36 + .../Dataset/Stats/snapshot/TableStats.tsx | 120 +++ .../shared/tabs/Dataset/Stats/viewType.ts | 4 + .../Timeline/SchemaTimelineSection.tsx | 99 +++ .../Validations/AcrylAssertionDetails.tsx | 38 + .../AcrylAssertionDetailsHeader.tsx | 103 +++ .../AcrylAssertionResultsChart.tsx | 62 ++ .../AcrylAssertionResultsChartHeader.tsx | 113 +++ .../AcrylAssertionResultsChartTimeline.tsx | 115 +++ .../Validations/AcrylAssertionsList.tsx | 41 + .../AcrylAssertionsSummaryLoading.tsx | 60 ++ .../Validations/AcrylAssertionsTable.tsx | 216 +++++ .../AcrylAssertionsTableColumns.tsx | 135 +++ .../Validations/AcrylValidationsTab.tsx | 155 ++++ .../Validations/AssertionGroupHeader.tsx | 62 ++ .../AssertionList/AcryAssertionTypeSelect.tsx | 39 + .../AssertionList/AcrylAssertionFilters.tsx | 86 ++ .../AssertionList/AcrylAssertionList.tsx | 105 +++ .../AcrylAssertionListConstants.tsx | 68 ++ .../AcrylAssertionListFilters.tsx | 150 ++++ .../AcrylAssertionListStatusDot.tsx | 51 ++ .../AssertionList/AcrylAssertionListTable.tsx | 152 ++++ .../AcrylAssertionProgressBar.tsx | 44 + .../AcrylAssertionRecommendedFilters.tsx | 88 ++ .../AssertionList/AssertionGroupHeader.tsx | 54 ++ .../AssertionListTitleContainer.tsx | 34 + .../AssertionList/AssertionName.tsx | 123 +++ .../AssertionList/DataContractBadge.tsx | 30 + .../AssertionList/GroupByTable.tsx | 84 ++ .../AssertionList/StyledComponents.tsx | 9 + .../Summary/AcrylAssertionSummaryCard.tsx | 155 ++++ .../Summary/AcrylAssertionSummarySection.tsx | 80 ++ .../Summary/AcrylAssertionSummaryTab.tsx | 60 ++ .../Summary/AcrylAssertionsSummaryLoading.tsx | 32 + .../Tags/AcrylAssertionTagColumn.tsx | 223 +++++ .../Validations/AssertionList/constant.ts | 43 + .../Validations/AssertionList/hooks.tsx | 167 ++++ .../Validations/AssertionList/types.ts | 109 +++ .../Validations/AssertionList/utils.tsx | 496 +++++++++++ .../Dataset/Validations/AssertionMenu.tsx | 15 + .../Validations/AssertionPlatformAvatar.tsx | 42 + .../Validations/AssertionResultTimeline.tsx | 133 +++ .../tabs/Dataset/Validations/Assertions.tsx | 82 ++ .../Dataset/Validations/BooleanTimeline.tsx | 110 +++ .../DatasetAssertionDescription.tsx | 408 +++++++++ .../Validations/DatasetAssertionDetails.tsx | 259 ++++++ .../DatasetAssertionLogicModal.tsx | 24 + .../DatasetAssertionResultDetails.tsx | 42 + .../Validations/DatasetAssertionsList.tsx | 191 ++++ .../Validations/DatasetAssertionsSummary.tsx | 105 +++ .../Validations/FieldAssertionDescription.tsx | 75 ++ .../FreshnessAssertionDescription.tsx | 78 ++ .../SchemaAssertionDescription.tsx | 28 + .../Dataset/Validations/SchemaSummary.tsx | 71 ++ .../Validations/SchemaSummaryModal.tsx | 33 + .../Validations/SqlAssertionDescription.tsx | 16 + .../Dataset/Validations/ValidationsTab.tsx | 101 +++ .../VolumeAssertionDescription.tsx | 41 + .../__tests__/useGetValidationsTab.test.ts | 38 + .../tabs/Dataset/Validations/acrylTypes.tsx | 21 + .../tabs/Dataset/Validations/acrylUtils.tsx | 391 ++++++++ .../builder/details/PrimaryButton.tsx | 58 ++ .../Validations/assertion/builder/hooks.ts | 139 +++ .../assertion/profile/AssertionProfile.tsx | 63 ++ .../profile/AssertionProfileDrawer.tsx | 21 + .../profile/AssertionProfileFooter.tsx | 51 ++ .../profile/AssertionProfileHeader.tsx | 89 ++ .../profile/AssertionProfileHeaderLoading.tsx | 29 + .../assertion/profile/AssertionTabs.tsx | 77 ++ .../assertion/profile/actions/ActionItem.tsx | 60 ++ .../assertion/profile/actions/Actions.tsx | 41 + .../actions/AssertionListItemActions.tsx | 75 ++ .../profile/actions/ContractAction.tsx | 107 +++ .../profile/actions/CopyLinkAction.tsx | 34 + .../profile/actions/CopyUrnAction.tsx | 43 + .../profile/actions/ExternalUrlAction.tsx | 58 ++ .../profile/actions/styledComponents.tsx | 37 + .../profile/actions/useIsContractsEnabled.tsx | 7 + .../profile/shared/AssertionResultDot.tsx | 28 + .../assertion/profile/shared/CloseButton.tsx | 16 + .../profile/shared/isExternalAssertion.tsx | 15 + .../shared/result/AssertionResultPopover.tsx | 48 + .../result/AssertionResultPopoverContent.tsx | 229 +++++ .../assertion/profile/shared/utils.ts | 11 + .../profile/summary/AssertionDescription.tsx | 34 + .../summary/AssertionSummaryContent.tsx | 33 + .../summary/AssertionSummaryLoading.tsx | 31 + .../summary/AssertionSummarySection.tsx | 30 + .../profile/summary/AssertionSummaryTab.tsx | 22 + .../profile/summary/NoResultsSummary.tsx | 15 + .../profile/summary/result/popoverUtils.ts | 0 .../table/AssertionResultsLoadingItems.tsx | 32 + .../result/table/AssertionResultsTable.tsx | 93 ++ .../table/AssertionResultsTableItem.tsx | 87 ++ .../timeline/AssertionResultsTimeline.tsx | 111 +++ .../timeline/AssertionResultsTimelineViz.tsx | 88 ++ .../timeline/AssertionTimelineSkeleton.tsx | 94 ++ .../ColumnMetricAssertionsResultsGraph.tsx | 0 .../ColumnValueAssertionsResultsGraph.tsx | 0 .../CustomSqlAssertionsResultsGraph.tsx | 0 .../FreshnessAssertionsResultsGraph.tsx | 0 .../summary/result/timeline/TimeSelect.tsx | 43 + .../timeline/VolumeAssertionResultsGraph.tsx | 0 .../StatusOverTimeAssertionResultChart.tsx | 132 +++ .../summary/result/timeline/charts/types.ts | 33 + .../summary/result/timeline/charts/utils.ts | 148 ++++ .../summary/result/timeline/transformers.ts | 128 +++ .../profile/summary/result/timeline/utils.ts | 90 ++ .../schedule/AssertionScheduleSummary.tsx | 125 +++ .../AssertionScheduleSummarySection.tsx | 72 ++ .../schedule/ProviderSummarySection.tsx | 40 + .../summary/shared/AssertionResultPill.tsx | 39 + .../summary/shared/DetailedErrorMessage.tsx | 49 + .../shared/DetailedErrorMessageTooltip.tsx | 34 + .../profile/summary/shared/SelectablePill.tsx | 37 + .../profile/summary/shared/assertionUtils.ts | 87 ++ .../profile/summary/shared/constants.ts | 66 ++ .../summary/shared/resultExtractionUtils.ts | 367 ++++++++ .../summary/shared/resultMessageUtils.ts | 112 +++ .../assertion/profile/summary/utils.tsx | 537 +++++++++++ .../Dataset/Validations/assertionUtils.tsx | 194 ++++ .../contract/DataContractAssertionStatus.tsx | 44 + .../contract/DataContractSummary.tsx | 106 +++ .../contract/DataContractSummaryFooter.tsx | 75 ++ .../Validations/contract/DataContractTab.tsx | 115 +++ .../contract/DataQualityContractSummary.tsx | 100 +++ .../contract/FreshnessContractSummary.tsx | 96 ++ .../contract/FreshnessScheduleSummary.tsx | 42 + .../contract/SchemaContractSummary.tsx | 82 ++ .../DataContractAssertionGroupSelect.tsx | 54 ++ .../contract/builder/DataContractBuilder.tsx | 191 ++++ .../builder/DataContractBuilderModal.tsx | 67 ++ .../Validations/contract/builder/types.ts | 37 + .../Validations/contract/builder/utils.ts | 120 +++ .../Dataset/Validations/contract/utils.tsx | 115 +++ .../Validations/fieldDescriptionUtils.ts | 163 ++++ .../Dataset/Validations/shared/constant.tsx | 13 + .../Validations/shared/styledComponents.tsx | 32 + .../Validations/useGetValidationsTab.ts | 35 + .../shared/tabs/Dataset/Validations/utils.tsx | 95 ++ .../tabs/Dataset/View/ViewDefinitionTab.tsx | 116 +++ .../tabs/Documentation/DocumentationTab.tsx | 163 ++++ .../__tests__/DocumentationTab.test.tsx | 99 +++ .../editor/extensions/htmlToMarkdown.test.tsx | 69 ++ .../editor/extensions/markdownToHtml.test.tsx | 21 + .../components/CompactMarkdownViewer.tsx | 162 ++++ .../components/DescriptionEditor.tsx | 209 +++++ .../components/DescriptionEditorToolbar.tsx | 60 ++ .../components/DescriptionPreview.tsx | 25 + .../components/DescriptionPreviewModal.tsx | 76 ++ .../components/DescriptionPreviewToolbar.tsx | 18 + .../components/DiscardDescriptionModal.tsx | 29 + .../Documentation/components/LinkList.tsx | 96 ++ .../components/SourceDescription.tsx | 33 + .../components/editor/Editor.tsx | 115 +++ .../components/editor/EditorTheme.tsx | 133 +++ .../components/editor/OnChangeMarkdown.tsx | 24 + .../components/editor/__mocks__/Editor.tsx | 24 + .../editor/__tests__/Editor.test.tsx | 16 + .../editor/extensions/htmlToMarkdown.tsx | 117 +++ .../editor/extensions/markdownToHtml.tsx | 27 + .../mentions/DataHubMentionsExtension.tsx | 138 +++ .../extensions/mentions/MentionsComponent.tsx | 68 ++ .../extensions/mentions/MentionsDropdown.tsx | 114 +++ .../extensions/mentions/MentionsNodeView.tsx | 69 ++ .../extensions/mentions/useDataHubMentions.ts | 63 ++ .../editor/toolbar/AddImageButton.tsx | 56 ++ .../editor/toolbar/AddLinkButton.tsx | 30 + .../editor/toolbar/CodeBlockToolbar.tsx | 85 ++ .../editor/toolbar/CommandButton.tsx | 28 + .../editor/toolbar/FloatingToolbar.tsx | 118 +++ .../components/editor/toolbar/HeadingMenu.tsx | 66 ++ .../components/editor/toolbar/Icons.tsx | 45 + .../components/editor/toolbar/LinkModal.tsx | 82 ++ .../editor/toolbar/TableCellMenu.tsx | 71 ++ .../components/editor/toolbar/Toolbar.tsx | 133 +++ .../Documentation/components/editor/utils.ts | 7 + .../shared/tabs/Documentation/utils.ts | 31 + .../entityV2/shared/tabs/Embed/EmbedTab.tsx | 33 + .../shared/tabs/Entity/ChartDashboardsTab.tsx | 19 + .../tabs/Entity/ColumnTabNameHeader.tsx | 12 + .../shared/tabs/Entity/DashboardChartsTab.tsx | 13 + .../tabs/Entity/DashboardDatasetsTab.tsx | 19 + .../shared/tabs/Entity/DataFlowJobsTab.tsx | 60 ++ .../shared/tabs/Entity/DataJobFlowTab.tsx | 14 + .../shared/tabs/Entity/InputFieldsTab.tsx | 61 ++ .../shared/tabs/Entity/TabNameWithCount.tsx | 29 + .../Entity/__tests__/DataJobFlowTab.test.tsx | 42 + .../tabs/Entity/components/EntityList.tsx | 139 +++ .../DynamicPropertiesTab.tsx | 49 + .../Entity/weaklyTypedAspects/DynamicTab.tsx | 50 ++ .../weaklyTypedAspects/DynamicTabularTab.tsx | 29 + .../weaklyTypedAspects/TableValueElement.tsx | 29 + .../shared/tabs/Incident/IncidentTab.tsx | 157 ++++ .../Incident/components/AddIncidentModal.tsx | 202 +++++ .../Incident/components/IncidentListItem.tsx | 330 +++++++ .../Incident/components/IncidentSummary.tsx | 86 ++ .../components/IncidentsLoadingSection.tsx | 43 + .../components/ResolveIncidentModal.tsx | 54 ++ .../shared/tabs/Incident/incidentUtils.ts | 143 +++ .../tabs/Lineage/ColumnLineageSelect.tsx | 113 +++ .../shared/tabs/Lineage/CompactLineageTab.tsx | 253 ++++++ .../shared/tabs/Lineage/ImpactAnalysis.tsx | 86 ++ .../shared/tabs/Lineage/LineageColumnView.tsx | 209 +++++ .../shared/tabs/Lineage/LineageTab.tsx | 96 ++ .../shared/tabs/Lineage/LineageTabContext.tsx | 14 + .../tabs/Lineage/LineageTabTimeSelector.tsx | 48 + .../shared/tabs/Lineage/LineageTable.tsx | 72 ++ ...oadScrollAcrossLineageSearchResultsHook.ts | 66 ++ ...rateUseSearchResultsViaRelationshipHook.ts | 136 +++ .../app/entityV2/shared/tabs/Lineage/hooks.ts | 42 + .../app/entityV2/shared/tabs/Lineage/utils.ts | 6 + .../tabs/ML/MlFeatureFeatureTableTab.tsx | 17 + .../tabs/ML/MlPrimaryKeyFeatureTableTab.tsx | 17 + .../tabs/Properties/CardinalityLabel.tsx | 43 + .../shared/tabs/Properties/NameColumn.tsx | 80 ++ .../shared/tabs/Properties/PropertiesTab.tsx | 140 +++ .../Properties/StructuredPropertyTooltip.tsx | 60 ++ .../Properties/StructuredPropertyValue.tsx | 126 +++ .../shared/tabs/Properties/TabHeader.tsx | 32 + .../shared/tabs/Properties/ValuesColumn.tsx | 26 + .../__tests__/useStructuredProperties.test.ts | 87 ++ .../tabs/Properties/__tests__/utils.test.ts | 86 ++ .../entityV2/shared/tabs/Properties/types.ts | 25 + .../Properties/useStructuredProperties.tsx | 252 ++++++ .../useUpdateExpandedRowsFromFilter.ts | 23 + .../entityV2/shared/tabs/Properties/utils.ts | 67 ++ .../shared/tabs/__tests__/utils.test.tsx | 212 +++++ .../src/app/entityV2/shared/types.ts | 93 ++ .../shared/useIsSeparateSiblingsMode.ts | 28 + .../app/entityV2/shared/useQueryParamValue.ts | 9 + .../src/app/entityV2/shared/utils.ts | 296 +++++++ .../StructuredPropertyEntity.tsx | 91 ++ .../app/entityV2/tag/CompactTagProfile.tsx | 70 ++ .../src/app/entityV2/tag/Tag.tsx | 92 ++ .../src/app/entityV2/tag/TagProfile.tsx | 40 + .../tag/__tests__/TagProfile.test.tsx | 58 ++ .../src/app/entityV2/user/User.tsx | 85 ++ .../src/app/entityV2/user/UserAssets.tsx | 28 + .../entityV2/user/UserBasicInfoContainer.tsx | 59 ++ .../entityV2/user/UserEditProfileModal.tsx | 277 ++++++ .../src/app/entityV2/user/UserGroups.tsx | 142 +++ .../src/app/entityV2/user/UserHeader.tsx | 100 +++ .../src/app/entityV2/user/UserProfile.tsx | 229 +++++ .../app/entityV2/user/UserProfileInfoCard.tsx | 104 +++ .../src/app/entityV2/user/UserSidebar.tsx | 74 ++ .../user/__tests__/UserHeader.test.tsx | 24 + .../src/app/entityV2/user/preview/Preview.tsx | 89 ++ .../src/app/entityV2/view/ManageViews.tsx | 50 ++ .../src/app/entityV2/view/ViewTypeLabel.tsx | 56 ++ .../src/app/entityV2/view/ViewsList.tsx | 144 +++ .../src/app/entityV2/view/ViewsTable.tsx | 61 ++ .../app/entityV2/view/__tests__/utils.test.ts | 25 + .../app/entityV2/view/builder/ViewBuilder.tsx | 127 +++ .../entityV2/view/builder/ViewBuilderForm.tsx | 114 +++ .../view/builder/ViewBuilderModal.tsx | 103 +++ .../view/builder/ViewDefinitionBuilder.tsx | 117 +++ .../view/builder/__tests__/utils.test.ts | 23 + .../src/app/entityV2/view/builder/types.ts | 10 + .../src/app/entityV2/view/builder/utils.ts | 130 +++ .../src/app/entityV2/view/cacheUtils.ts | 250 ++++++ .../entityV2/view/menu/ViewDropdownMenu.tsx | 297 +++++++ .../view/menu/item/DeleteViewItem.tsx | 29 + .../entityV2/view/menu/item/EditViewItem.tsx | 29 + .../entityV2/view/menu/item/IconItemTitle.tsx | 40 + .../view/menu/item/PreviewViewItem.tsx | 17 + .../menu/item/RemoveGlobalDefaultItem.tsx | 23 + .../view/menu/item/RemoveUserDefaultItem.tsx | 24 + .../view/menu/item/SetGlobalDefaultItem.tsx | 24 + .../view/menu/item/SetUserDefaultItem.tsx | 33 + .../app/entityV2/view/menu/item/ViewItem.tsx | 25 + .../app/entityV2/view/select/ViewOption.tsx | 126 +++ .../entityV2/view/select/ViewOptionName.tsx | 213 +++++ .../view/select/ViewOptionTooltipTitle.tsx | 22 + .../app/entityV2/view/select/ViewSelect.tsx | 372 ++++++++ .../view/select/ViewSelectContent.tsx | 94 ++ .../view/select/ViewSelectDropdown.tsx | 55 ++ .../entityV2/view/select/ViewSelectHeader.tsx | 189 ++++ .../view/select/ViewSelectPopoverContent.tsx | 35 + .../view/select/ViewsTableColumns.tsx | 92 ++ .../view/select/renderSelectedView.tsx | 139 +++ .../view/select/renderViewOptionGroup.tsx | 62 ++ .../src/app/entityV2/view/select/style.css | 10 + .../entityV2/view/select/styledComponents.tsx | 107 +++ .../src/app/entityV2/view/select/utils.ts | 14 + .../entityV2/view/shared/DefaultViewIcon.tsx | 37 + .../view/shared/GlobalDefaultViewIcon.tsx | 13 + .../view/shared/UserDefaultViewIcon.tsx | 13 + .../src/app/entityV2/view/types.ts | 52 ++ .../src/app/entityV2/view/utils.ts | 83 ++ .../GlossaryBrowser/GlossaryBrowser.tsx | 35 +- .../app/glossary/GlossaryBrowser/NodeItem.tsx | 25 +- .../app/glossary/GlossaryBrowser/TermItem.tsx | 16 +- .../src/app/glossary/GlossaryRoutes.tsx | 31 +- .../app/glossaryV2/BusinessGlossaryPage.tsx | 131 +++ .../app/glossaryV2/EmptyGlossarySection.tsx | 79 ++ .../GlossaryBrowser/GlossaryBrowser.tsx | 130 +++ .../glossaryV2/GlossaryBrowser/NodeItem.tsx | 270 ++++++ .../glossaryV2/GlossaryBrowser/TermItem.tsx | 113 +++ .../glossaryV2/GlossaryContentProvider.tsx | 93 ++ .../app/glossaryV2/GlossaryEntitiesList.tsx | 96 ++ .../src/app/glossaryV2/GlossaryEntityItem.tsx | 114 +++ .../src/app/glossaryV2/GlossaryListCard.tsx | 238 +++++ .../src/app/glossaryV2/GlossaryNodeCard.tsx | 207 +++++ .../src/app/glossaryV2/GlossaryRoutes.tsx | 73 ++ .../src/app/glossaryV2/GlossarySearch.tsx | 124 +++ .../src/app/glossaryV2/GlossarySidebar.tsx | 96 ++ .../app/glossaryV2/GlossaryStatsProvider.tsx | 91 ++ .../app/glossaryV2/__tests__/utils.test.ts | 55 ++ .../src/app/glossaryV2/cacheUtils.ts | 36 + .../src/app/glossaryV2/colorUtils.ts | 30 + datahub-web-react/src/app/glossaryV2/utils.ts | 27 + .../StructuredProperties.tsx | 4 +- .../src/app/home/HomePagePosts.tsx | 2 + datahub-web-react/src/app/homeV2/HomePage.tsx | 73 ++ .../action/announcement/Announcement.tsx | 55 ++ .../action/announcement/Announcements.tsx | 130 +++ .../useGetUnseenAnnouncements.tsx | 32 + .../src/app/homeV2/action/learn/Resources.tsx | 198 +++++ .../src/app/homeV2/action/nudge/Domains.tsx | 0 .../src/app/homeV2/action/nudge/Glossary.tsx | 0 .../src/app/homeV2/action/nudge/Ingestion.tsx | 0 .../app/homeV2/action/nudge/MetadataTests.tsx | 0 .../app/homeV2/action/nudge/UserProfile.tsx | 0 .../content/HorizontalListSkeletons.tsx | 69 ++ .../app/homeV2/content/recent/EntityCard.tsx | 106 +++ .../homeV2/content/recent/EntityCardList.tsx | 67 ++ .../homeV2/content/recent/RecentActions.tsx | 51 ++ .../content/recent/RecentlyEditedOrViewed.tsx | 20 + .../app/homeV2/content/recent/constants.ts | 19 + .../content/recent/useGetRecentActions.tsx | 46 + .../src/app/homeV2/content/tabs/CenterTab.tsx | 63 ++ .../app/homeV2/content/tabs/CenterTabs.tsx | 93 ++ .../app/homeV2/content/tabs/CountBadge.tsx | 31 + .../content/tabs/activity/ActivityTab.tsx | 5 + .../tabs/activity/cards/AssertionResult.tsx | 0 .../tabs/activity/cards/DocumentationEdit.tsx | 0 .../tabs/activity/cards/DomainCreate.tsx | 0 .../tabs/activity/cards/IncidentStatus.tsx | 0 .../tabs/announcements/AnnouncementCard.tsx | 53 ++ .../announcements/AnnouncementsSkeleton.tsx | 46 + .../tabs/announcements/AnnouncementsTab.tsx | 39 + .../tabs/announcements/EmptyAnnouncements.tsx | 6 + .../announcements/useGetAnnouncements.tsx | 28 + .../content/tabs/discovery/DiscoveryTab.tsx | 26 + .../tabs/discovery/sections/Section.tsx | 61 ++ .../sections/dataProducts/DataProductCard.tsx | 86 ++ .../sections/dataProducts/DataProducts.tsx | 43 + .../dataProducts/useGetDataProducts.tsx | 46 + .../discovery/sections/domains/DomainCard.tsx | 90 ++ .../discovery/sections/domains/Domains.tsx | 50 ++ .../sections/domains/useGetDomains.tsx | 38 + .../insight/InsightStatusProvider.tsx | 72 ++ .../discovery/sections/insight/Insights.tsx | 137 +++ .../insight/cards/FoundationalAssetsCard.tsx | 17 + .../insight/cards/InsightLoadingCard.tsx | 21 + .../insight/cards/MostFrequentlyUpdated.tsx | 19 + .../insight/cards/MostQueriedCard.tsx | 19 + .../sections/insight/cards/MostRowsCard.tsx | 18 + .../sections/insight/cards/MostUsersCard.tsx | 19 + .../insight/cards/MostViewedDashboards.tsx | 19 + .../insight/cards/NewAssetsGraphCard.tsx | 6 + .../insight/cards/PopularGlossaryTerms.tsx | 118 +++ .../cards/RecentlyCreatedDatasetsCard.tsx | 21 + .../cards/RecentlyUpdatedDatasetsCard.tsx | 21 + .../insight/cards/SearchListInsightCard.tsx | 118 +++ .../cards/useGetFoundationalAssets.tsx | 35 + .../insight/cards/useGetMostQueried.tsx | 26 + .../sections/insight/cards/useGetMostRows.tsx | 26 + .../insight/cards/useGetMostUpdated.tsx | 26 + .../insight/cards/useGetMostUsers.tsx | 26 + .../cards/useGetMostViewedDashboards.tsx | 26 + .../insight/cards/useGetSearchAssets.tsx | 61 ++ .../cards/useRecentlyCreatedDatasets.tsx | 31 + .../cards/useRecentlyUpdatedDatasets.tsx | 31 + .../sections/insight/shared/InsightCard.tsx | 60 ++ .../insight/shared/InsightCardSkeleton.tsx | 92 ++ .../sections/platform/PlatformCard.tsx | 96 ++ .../discovery/sections/platform/Platforms.tsx | 47 + .../sections/platform/useGetDataPlatforms.tsx | 30 + .../sections/platform/useGetPlatforms.tsx | 41 + .../src/app/homeV2/content/tabs/tabs.tsx | 52 ++ .../src/app/homeV2/content/tabs/types.ts | 3 + .../homeV2/content/tabs/useGetVisibleTabs.tsx | 83 ++ .../homeV2/introduce/IntroduceYourself.tsx | 26 + .../IntroduceYourselfLeftSidebar.tsx | 53 ++ .../IntroduceYourselfMainContent.tsx | 553 ++++++++++++ .../app/homeV2/introduce/PersonaSelector.tsx | 79 ++ .../useRedirectToIntroduceYourself.tsx | 19 + .../src/app/homeV2/layout/CenterContent.tsx | 68 ++ .../src/app/homeV2/layout/CustomNavLink.tsx | 90 ++ .../src/app/homeV2/layout/LeftSidebar.tsx | 127 +++ .../src/app/homeV2/layout/NavLinks.tsx | 17 + .../src/app/homeV2/layout/NavLinksMenu.tsx | 295 ++++++ .../src/app/homeV2/layout/NavSidebar.tsx | 142 +++ .../src/app/homeV2/layout/RightSidebar.tsx | 55 ++ .../layout/navBarRedesign/NavBarContext.tsx | 111 +++ .../layout/navBarRedesign/NavBarHeader.tsx | 71 ++ .../layout/navBarRedesign/NavBarMenu.tsx | 77 ++ .../layout/navBarRedesign/NavBarMenuItem.tsx | 147 +++ .../navBarRedesign/NavBarMenuItemDropdown.tsx | 90 ++ .../navBarRedesign/NavBarMenuItemGroup.tsx | 18 + .../layout/navBarRedesign/NavBarSkeleton.tsx | 59 ++ .../layout/navBarRedesign/NavBarToggler.tsx | 37 + .../layout/navBarRedesign/NavSidebar.tsx | 294 ++++++ .../app/homeV2/layout/navBarRedesign/types.ts | 69 ++ .../layout/navBarRedesign/useSelectedKey.ts | 42 + .../homeV2/layout/shared/styledComponents.tsx | 25 + .../src/app/homeV2/layout/types.ts | 30 + .../persona/PersonalizationLoadingModal.tsx | 80 ++ .../app/homeV2/persona/useLoadUserPersona.tsx | 16 + .../src/app/homeV2/persona/useUserPersona.ts | 41 + .../homeV2/reference/header/GreetingText.tsx | 45 + .../homeV2/reference/header/UserHeader.tsx | 63 ++ .../reference/header/UserHeaderImage.tsx | 57 ++ .../reference/header/getGreetingText.ts | 10 + .../sections/DefaultEmptyEntityList.tsx | 12 + .../homeV2/reference/sections/EntityLink.tsx | 150 ++++ .../reference/sections/EntityLinkList.tsx | 108 +++ .../sections/EntityLinkListSkeleton.tsx | 50 ++ .../sections/assets/AssetsYouOwn.tsx | 63 ++ .../sections/assets/EmptyAssetsYouOwn.tsx | 25 + .../sections/assets/useGetAssetsYouOwn.tsx | 41 + .../sections/domains/DataProductsYouOwn.tsx | 0 .../sections/domains/DomainsYouOwn.tsx | 60 ++ .../sections/domains/EmptyDomainsYouOwn.tsx | 21 + .../sections/domains/useGetDomainsYouOwn.ts | 40 + .../glossary/EmptyGlossaryNodesYouOwn.tsx | 24 + .../sections/glossary/GlossaryNodesYouOwn.tsx | 60 ++ .../glossary/useGetGlossaryNodesYouOwn.tsx | 40 + .../sections/groups/EmptyGroupsYouAreIn.tsx | 12 + .../sections/groups/GroupsYouAreIn.tsx | 49 + .../sections/groups/useGetGroupsYouAreIn.ts | 31 + .../reference/sections/pinned/PinnedLink.tsx | 89 ++ .../sections/pinned/PinnedLinkList.tsx | 39 + .../sections/pinned/PinnedLinkLogo.tsx | 47 + .../reference/sections/pinned/PinnedLinks.tsx | 28 + .../sections/pinned/useGetPinnedLinks.tsx | 28 + .../sections/tags/EmptyTagsYouOwn.tsx | 21 + .../reference/sections/tags/TagsYouOwn.tsx | 65 ++ .../sections/tags/useGetTagsYouOwn.tsx | 37 + .../src/app/homeV2/reference/types.ts | 3 + .../app/homeV2/shared/__tests__/utils.test.ts | 16 + .../src/app/homeV2/shared/types.ts | 88 ++ .../updateLastViewedAnnouncementTime.tsx | 33 + .../useGetLastViewedAnnouncementTime.tsx | 19 + .../src/app/homeV2/shared/utils.ts | 8 + .../src/app/identity/user/UserUtils.tsx | 4 + .../src/app/identity/user/useUserTitle.ts | 30 + .../src/app/ingest/ManageIngestionPage.tsx | 17 +- .../src/app/ingest/secret/SecretsList.tsx | 16 +- .../app/ingest/source/IngestionSourceList.tsx | 4 +- .../ingest/source/IngestionSourceTable.tsx | 21 +- .../source/IngestionSourceTableColumns.tsx | 11 +- datahub-web-react/src/app/lineage/types.ts | 6 + .../src/app/lineage/utils/constructTree.ts | 2 +- .../app/lineage/utils/extendAsyncEntities.ts | 10 +- .../lineage/utils/useGetLineageTimeParams.ts | 21 +- .../src/app/lineageV2/LineageDisplay.tsx | 127 +++ .../LineageEdge/LineageTableEdge.tsx | 80 ++ .../lineageV2/LineageEdge/TentativeEdge.tsx | 52 ++ .../lineageV2/LineageEntityNode/Column.tsx | 286 ++++++ .../LineageEntityNode/ColumnSearch.tsx | 39 + .../lineageV2/LineageEntityNode/Columns.tsx | 235 +++++ .../LineageEntityNode/ContainerPath.tsx | 88 ++ .../ContractLineageButton.tsx | 45 + .../LineageEntityNode/ExpandLineageButton.tsx | 86 ++ .../LineageEntityNode/GhostEntityMenu.tsx | 82 ++ .../LineageEntityNode/LineageEntityNode.tsx | 147 +++ .../LineageEntityNode/ManageLineageMenu.tsx | 174 ++++ .../LineageEntityNode/NodeContents.tsx | 511 +++++++++++ .../LineageEntityNode/NodeSkeleton.tsx | 37 + .../SchemaFieldNodeContents.tsx | 394 +++++++++ .../LineageEntityNode/components.tsx | 42 + .../useAvoidIntersections.ts | 96 ++ .../LineageEntityNode/useDisplayedColumns.ts | 151 ++++ .../useOnClickExpandLineage.ts | 26 + .../src/app/lineageV2/LineageExplorer.tsx | 137 +++ .../LineageFilterNodeBasic.tsx | 174 ++++ .../LineageFilterNode/LineageFilterSearch.tsx | 118 +++ .../LineageFilterNode/ShowMoreButton.tsx | 138 +++ .../LineageFilterNode/computeOrFilters.ts | 43 + .../useFetchFilterNodeContents.ts | 81 ++ .../src/app/lineageV2/LineageGraph.tsx | 27 + .../src/app/lineageV2/LineageSidebar.tsx | 107 +++ .../src/app/lineageV2/LineageTimeSelector.tsx | 148 ++++ .../LineageTransformationNode.tsx | 166 ++++ .../app/lineageV2/LineageVisualization.tsx | 127 +++ .../lineageV2/LineageVisualizationContext.tsx | 21 + .../src/app/lineageV2/NodeBuilder.ts | 494 +++++++++++ datahub-web-react/src/app/lineageV2/common.ts | 355 ++++++++ .../src/app/lineageV2/constants.ts | 1 + .../DownloadLineageScreenshotButton.tsx | 64 ++ .../lineageV2/controls/LineageControls.tsx | 140 +++ .../controls/LineageSearchFilters.tsx | 95 ++ .../controls/LineageTimeRangeControls.tsx | 15 + .../app/lineageV2/controls/SearchControl.tsx | 197 +++++ .../lineageV2/controls/StyledPanelButton.tsx | 14 + .../app/lineageV2/controls/ZoomControls.tsx | 40 + .../src/app/lineageV2/controls/common.tsx | 32 + .../src/app/lineageV2/lineageUtils.ts | 111 +++ .../lineageV2/manualLineage/AddEntityEdge.tsx | 157 ++++ .../lineageV2/manualLineage/EntityEdge.tsx | 65 ++ .../lineageV2/manualLineage/LineageEdges.tsx | 86 ++ .../manualLineage/LineageEntityView.tsx | 67 ++ .../manualLineage/ManageLineageModal.tsx | 204 +++++ .../lineageV2/manualLineage/UserAvatar.tsx | 62 ++ .../manualLineage/_test_/utils.test.ts | 58 ++ .../manualLineage/updateNodeContext.ts | 52 ++ .../src/app/lineageV2/manualLineage/utils.ts | 34 + datahub-web-react/src/app/lineageV2/types.ts | 76 ++ .../app/lineageV2/useAvoidIntersections.ts | 87 ++ .../src/app/lineageV2/useBulkEntityLineage.ts | 188 ++++ .../app/lineageV2/useColumnHighlighting.ts | 312 +++++++ .../lineageV2/useComputeGraph/filterNodes.ts | 140 +++ .../useComputeGraph/getDisplayedNodes.ts | 268 ++++++ .../useComputeGraph/getFineGrainedLineage.ts | 171 ++++ .../lineageV2/useComputeGraph/orderNodes.ts | 41 + .../useComputeGraph/useComputeGraph.tsx | 98 ++ .../src/app/lineageV2/useLineageV2.ts | 6 + .../src/app/lineageV2/useNodeHighlighting.ts | 57 ++ .../app/lineageV2/useSearchAcrossLineage.ts | 333 +++++++ .../lineageV2/useShouldHideTransformations.ts | 21 + .../src/app/onboarding/OnboardingConfig.tsx | 2 + .../src/app/onboarding/OnboardingContext.tsx | 21 + .../onboarding/OnboardingContextProvider.tsx | 23 + .../src/app/onboarding/OnboardingTour.tsx | 10 +- .../config/EntityProfileOnboardingConfig.tsx | 15 + .../config/HomePageOnboardingConfig.tsx | 2 + .../EntityProfileOnboardingConfig.tsx | 175 ++++ .../configV2/HomePageOnboardingConfig.tsx | 158 ++++ .../src/app/onboarding/configV2/index.ts | 5 + .../app/onboarding/useHandleOnboardingTour.ts | 34 + .../onboarding/useShouldSkipOnboardingTour.ts | 8 + .../useUpdateEducationStepsAllowList.tsx | 28 +- .../policy/PolicyPrivilegeForm.tsx | 11 +- .../src/app/previewV2/BrowsePaths.tsx | 113 +++ .../src/app/previewV2/CardActionCircle.tsx | 40 + .../ColoredBackgroundPlatformIconGroup.tsx | 100 +++ .../src/app/previewV2/CompactView.tsx | 170 ++++ .../src/app/previewV2/ContextPath.tsx | 157 ++++ .../app/previewV2/ContextPathEntityIcon.tsx | 35 + .../app/previewV2/ContextPathEntityLink.tsx | 80 ++ .../app/previewV2/ContextPathSeparator.tsx | 10 + .../src/app/previewV2/DefaultPreviewCard.tsx | 412 +++++++++ .../previewV2/DefaultPreviewCardFooter.tsx | 146 +++ .../src/app/previewV2/EntityHeader.tsx | 126 +++ .../previewV2/EntityPaths/ColumnPathsText.tsx | 70 ++ .../EntityPaths/ColumnsRelationshipText.tsx | 39 + .../EntityPaths/DisplayedColumns.tsx | 37 + .../app/previewV2/EntityPaths/EntityPaths.tsx | 37 + .../EntityPaths/EntityPathsModal.tsx | 103 +++ .../src/app/previewV2/Freshness.tsx | 94 ++ .../src/app/previewV2/HealthIcon.tsx | 47 + .../src/app/previewV2/HealthPopover.tsx | 113 +++ .../previewV2/ImageWIthColoredBackground.tsx | 77 ++ .../src/app/previewV2/LineageBadge.tsx | 70 ++ .../src/app/previewV2/NotesIcon.tsx | 36 + datahub-web-react/src/app/previewV2/Pills.tsx | 119 +++ .../PreviewCardFooterRightSection.tsx | 103 +++ .../src/app/previewV2/QueryStat.tsx | 20 + .../src/app/previewV2/SearchPill.tsx | 126 +++ .../src/app/previewV2/SeeSummaryButton.tsx | 42 + .../src/app/previewV2/__tests__/utils.test.ts | 33 + .../src/app/previewV2/shared.tsx | 10 + datahub-web-react/src/app/previewV2/utils.ts | 157 ++++ .../HoverEntityTooltipContext.tsx | 14 + .../component/CompactEntityNameList.tsx | 54 +- .../renderer/component/EntityPreviewTag.tsx | 41 +- .../renderer/component/HoverEntityTooltip.tsx | 48 +- .../src/app/search/context/SearchContext.tsx | 6 +- .../search/context/SearchContextProvider.tsx | 22 +- .../search/context/SearchResultContext.tsx | 7 +- .../src/app/search/filters/utils.tsx | 43 +- .../combineSiblingsInSearchResults.test.ts | 120 ++- .../src/app/search/utils/constants.ts | 12 + ...ncedSearchFilterOverallUnionTypeSelect.tsx | 46 + .../src/app/searchV2/AnalyticsLink.tsx | 25 + .../src/app/searchV2/BrowseEntityCard.tsx | 65 ++ .../app/searchV2/ChooseEntityTypeModal.tsx | 66 ++ .../src/app/searchV2/CommandK.tsx | 45 + .../src/app/searchV2/EditTextModal.tsx | 37 + .../src/app/searchV2/EmbeddedSearchBar.tsx | 68 ++ .../src/app/searchV2/EmptySearchResults.tsx | 90 ++ .../src/app/searchV2/SaveAsViewButton.tsx | 52 ++ .../src/app/searchV2/SearchBar.tsx | 531 +++++++++++ .../searchV2/SearchEntitySidebarContainer.tsx | 43 + .../src/app/searchV2/SearchFilterLabel.tsx | 226 +++++ .../src/app/searchV2/SearchHeader.tsx | 176 ++++ .../src/app/searchV2/SearchPage.tsx | 267 ++++++ .../src/app/searchV2/SearchResultList.tsx | 291 ++++++ .../src/app/searchV2/SearchResults.tsx | 418 +++++++++ .../searchV2/SearchResultsLoadingSection.tsx | 33 + .../searchV2/SearchResultsRecommendations.tsx | 75 ++ .../src/app/searchV2/SearchablePage.tsx | 174 ++++ .../src/app/searchV2/SimpleSearchFilter.tsx | 132 +++ .../src/app/searchV2/SimpleSearchFilters.tsx | 97 ++ .../src/app/searchV2/ToggleSidebarButton.tsx | 50 ++ .../src/app/searchV2/ViewAllSearchItem.tsx | 46 + .../searchV2/__tests__/SearchPage.test.tsx | 62 ++ .../app/searchV2/__tests__/constants.test.tsx | 7 + .../searchV2/__tests__/filterUtils.test.tsx | 303 +++++++ .../advanced/AdvancedFilterCloseButton.tsx | 31 + .../EntitySubTypeAdvancedFilterLabel.tsx | 105 +++ .../src/app/searchV2/advanced/styles.ts | 20 + .../autoComplete/AutoCompleteEntity.tsx | 126 +++ .../autoComplete/AutoCompleteEntityIcon.tsx | 32 + .../autoComplete/AutoCompleteItem.tsx | 59 ++ .../AutoCompletePlatformNames.tsx | 22 + .../searchV2/autoComplete/AutoCompleteTag.tsx | 18 + .../AutoCompleteTooltipContent.tsx | 61 ++ .../autoComplete/AutoCompleteUser.tsx | 36 + .../autoComplete/ParentContainers.tsx | 58 ++ .../autoComplete/RecommendedOption.tsx | 29 + .../searchV2/autoComplete/SectionHeader.tsx | 39 + .../autoComplete/__tests__/utils.test.ts | 27 + .../autoComplete/quickFilters/QuickFilter.tsx | 99 +++ .../quickFilters/QuickFilters.tsx | 32 + .../autoComplete/quickFilters/utils.tsx | 35 + .../autoComplete/styledComponents.tsx | 11 + .../src/app/searchV2/autoComplete/utils.ts | 19 + .../src/app/searchV2/context/constants.ts | 55 ++ .../src/app/searchV2/filters/ActiveFilter.tsx | 103 +++ .../searchV2/filters/AddFilterDropdown.tsx | 160 ++++ .../src/app/searchV2/filters/FilterOption.tsx | 218 +++++ .../app/searchV2/filters/MoreFilterOption.tsx | 78 ++ .../src/app/searchV2/filters/MoreFilters.tsx | 82 ++ .../app/searchV2/filters/OperatorSelector.tsx | 67 ++ .../searchV2/filters/OptionsDropdownMenu.tsx | 119 +++ .../app/searchV2/filters/ParentEntities.tsx | 84 ++ .../app/searchV2/filters/SaveViewButton.tsx | 69 ++ .../src/app/searchV2/filters/SearchFilter.tsx | 43 + .../searchV2/filters/SearchFilterOptions.tsx | 159 ++++ .../app/searchV2/filters/SearchFilterView.tsx | 58 ++ .../app/searchV2/filters/SearchFilters.tsx | 108 +++ .../searchV2/filters/SearchFiltersBuilder.tsx | 159 ++++ .../filters/SearchFiltersLoadingSection.tsx | 27 + .../searchV2/filters/SearchFiltersSection.tsx | 67 ++ .../app/searchV2/filters/SelectedFilter.tsx | 138 +++ .../filters/SelectedSearchFilters.tsx | 76 ++ .../filters/__tests__/operator.test.tsx | 79 ++ .../searchV2/filters/__tests__/utils.test.tsx | 469 ++++++++++ .../src/app/searchV2/filters/constants.ts | 57 ++ .../src/app/searchV2/filters/field/fields.tsx | 305 +++++++ .../app/searchV2/filters/mapFilterOption.tsx | 56 ++ .../searchV2/filters/operator/operator.tsx | 264 ++++++ .../filters/render/FilterRenderer.tsx | 29 + .../filters/render/FilterRendererRegistry.tsx | 42 + .../__tests__/FilterRendererRegistry.test.tsx | 80 ++ .../searchV2/filters/render/acrylRenderers.ts | 5 + .../assertion/HasFailingAssertionsFilter.tsx | 69 ++ .../HasFailingAssertionsRenderer.tsx | 20 + .../incident/HasActiveIncidentsFilter.tsx | 69 ++ .../incident/HasActiveIncidentsRenderer.tsx | 20 + .../render/shared/BooleanMoreFilter.tsx | 88 ++ .../render/shared/BooleanMoreFilterMenu.tsx | 50 ++ .../render/shared/BooleanSearchFilter.tsx | 77 ++ .../shared/BooleanSimpleSearchFilter.tsx | 75 ++ .../render/shared/styledComponents.tsx | 21 + .../src/app/searchV2/filters/render/types.ts | 29 + .../filters/render/useFilterRenderer.tsx | 16 + .../app/searchV2/filters/styledComponents.ts | 57 ++ .../src/app/searchV2/filters/types.ts | 114 +++ .../src/app/searchV2/filters/useFilterMode.ts | 16 + .../filters/useGetBrowseV2LabelOverride.ts | 32 + .../searchV2/filters/useHydrateFilters.tsx | 71 ++ .../filters/useSearchFilterAnalytics.ts | 23 + .../filters/useSearchFilterDropdown.tsx | 73 ++ .../src/app/searchV2/filters/utils.tsx | 674 ++++++++++++++ .../filters/value/BooleanValueMenu.tsx | 62 ++ .../searchV2/filters/value/DateRangeMenu.tsx | 90 ++ .../searchV2/filters/value/EntityTypeMenu.tsx | 96 ++ .../filters/value/EntityValueMenu.tsx | 85 ++ .../searchV2/filters/value/EnumValueMenu.tsx | 85 ++ .../searchV2/filters/value/TextValueInput.tsx | 19 + .../searchV2/filters/value/TextValueMenu.tsx | 46 + .../searchV2/filters/value/TimeBucketMenu.tsx | 72 ++ .../app/searchV2/filters/value/ValueMenu.tsx | 156 ++++ .../app/searchV2/filters/value/ValueName.tsx | 63 ++ .../searchV2/filters/value/ValueSelector.tsx | 61 ++ .../filters/value/styledComponents.tsx | 15 + .../filters/value/useDateRangeFilterValues.ts | 47 + .../src/app/searchV2/filters/value/utils.tsx | 143 +++ .../src/app/searchV2/matches/GroupedMatch.tsx | 75 ++ .../matches/MatchContextContainer.tsx | 127 +++ .../app/searchV2/matches/MatchedFieldList.tsx | 137 +++ .../matches/SearchTextHighlighter.tsx | 42 + .../src/app/searchV2/matches/constants.ts | 129 +++ .../matches/matchedFieldPathsRenderer.tsx | 8 + .../matches/matchedInputFieldRenderer.tsx | 40 + .../src/app/searchV2/matches/utils.test.ts | 234 +++++ .../src/app/searchV2/matches/utils.ts | 143 +++ .../searchV2/recommendation/FilterPill.tsx | 61 ++ .../recommendation/RecommendedFilters.tsx | 64 ++ .../src/app/searchV2/recommendation/types.ts | 6 + .../useGetRecommendedFilters.tsx | 98 ++ .../src/app/searchV2/recommendation/utils.ts | 16 + .../SearchCardSlideoutContent.tsx | 94 ++ .../app/searchV2/sidebar/BrowseContext.tsx | 247 ++++++ .../src/app/searchV2/sidebar/BrowseNode.tsx | 127 +++ .../app/searchV2/sidebar/BrowseSidebar.tsx | 189 ++++ .../src/app/searchV2/sidebar/EntityBrowse.tsx | 29 + .../src/app/searchV2/sidebar/EntityLink.tsx | 61 ++ .../src/app/searchV2/sidebar/EntityNode.tsx | 104 +++ .../app/searchV2/sidebar/EnvironmentNode.tsx | 85 ++ .../app/searchV2/sidebar/ExpandableNode.tsx | 187 ++++ .../app/searchV2/sidebar/PlatformBrowse.tsx | 64 ++ .../src/app/searchV2/sidebar/PlatformNode.tsx | 178 ++++ .../app/searchV2/sidebar/SidebarContext.tsx | 63 ++ .../searchV2/sidebar/SidebarLoadingError.tsx | 30 + .../__tests__/browseContextUtils.test.ts | 20 + .../searchV2/sidebar/browseContextUtils.ts | 8 + .../src/app/searchV2/sidebar/constants.ts | 4 + .../src/app/searchV2/sidebar/index.tsx | 3 + .../src/app/searchV2/sidebar/types.ts | 17 + .../searchV2/sidebar/useAggregationsQuery.ts | 91 ++ .../searchV2/sidebar/useBrowsePagination.tsx | 107 +++ .../searchV2/sidebar/useSidebarAnalytics.ts | 70 ++ .../searchV2/sidebar/useSidebarEntities.ts | 34 + .../app/searchV2/sidebar/useSidebarFilters.ts | 51 ++ .../searchV2/sidebar/useSidebarPlatforms.tsx | 34 + .../app/searchV2/sorting/SearchSortSelect.tsx | 55 ++ .../searchV2/sorting/useGetSortOptions.tsx | 7 + .../src/app/searchV2/sorting/useSortInput.ts | 13 + .../suggestions/SearchQuerySugggester.tsx | 39 + .../useAdvancedSearchSelectFilters.tsx | 29 + .../app/searchV2/useGetSearchQueryInputs.ts | 40 + .../app/searchV2/useSearchAndBrowseVersion.ts | 34 + .../src/app/searchV2/useSearchViewAll.ts | 16 + .../src/app/searchV2/useToggleSidebar.ts | 23 + .../__tests__/applyFilterOverrides.test.ts | 105 +++ .../utils/__tests__/filterUtils.test.ts | 45 + .../utils/__tests__/generateOrFilters.test.ts | 168 ++++ .../searchV2/utils/applyFilterOverrides.ts | 31 + .../utils/combineSiblingsInAutoComplete.ts | 31 + .../combineSiblingsInSearchResults.test.ts | 551 ++++++++++++ .../utils/combineSiblingsInSearchResults.ts | 34 + .../src/app/searchV2/utils/constants.ts | 187 ++++ .../src/app/searchV2/utils/csvUtils.ts | 25 + .../app/searchV2/utils/filterSearchQuery.ts | 5 + .../src/app/searchV2/utils/filterUtils.ts | 215 +++++ .../utils/filtersToQueryStringParams.ts | 36 + .../app/searchV2/utils/generateOrFilters.ts | 81 ++ .../app/searchV2/utils/hasAdvancedFilters.ts | 12 + .../app/searchV2/utils/navigateToSearchUrl.ts | 38 + .../src/app/searchV2/utils/types.ts | 35 + ...wnloadScrollAcrossEntitiesSearchResults.ts | 51 ++ .../src/app/searchV2/utils/useFilters.ts | 45 + .../src/app/settings/Preferences.tsx | 121 ++- .../src/app/settings/posts/PostsList.tsx | 8 +- .../app/settings/posts/PostsListColumns.tsx | 9 +- .../src/app/settingsV2/AccessTokenModal.tsx | 106 +++ .../src/app/settingsV2/AccessTokens.tsx | 451 ++++++++++ .../src/app/settingsV2/CreateTokenModal.tsx | 217 +++++ .../src/app/settingsV2/Preferences.tsx | 156 ++++ .../src/app/settingsV2/SettingsPage.tsx | 278 ++++++ .../src/app/settingsV2/features/Feature.tsx | 184 ++++ .../src/app/settingsV2/features/Features.tsx | 116 +++ .../features/useDocPropagationSettings.ts | 50 ++ .../src/app/settingsV2/personal/utils.tsx | 19 + .../app/settingsV2/posts/CreatePostForm.tsx | 134 +++ .../app/settingsV2/posts/CreatePostModal.tsx | 183 ++++ .../src/app/settingsV2/posts/ManagePosts.tsx | 46 + .../src/app/settingsV2/posts/PostItemMenu.tsx | 70 ++ .../src/app/settingsV2/posts/PostsList.tsx | 221 +++++ .../app/settingsV2/posts/PostsListColumns.tsx | 32 + .../src/app/settingsV2/posts/constants.ts | 13 + .../src/app/settingsV2/posts/utils.ts | 77 ++ .../src/app/settingsV2/settingsPaths.tsx | 27 + datahub-web-react/src/app/settingsV2/utils.ts | 42 + .../src/app/shared/LastUpdated.tsx | 107 +++ .../src/app/shared/LinkWrapper.tsx | 24 + .../src/app/shared/MatchesContext.tsx | 21 + .../src/app/shared/ShowMoreSection.tsx | 9 +- .../src/app/shared/TabFullsizedContext.tsx | 13 + .../app/shared/TruncatedTextWithTooltip.tsx | 31 + .../app/shared/__tests__/LinkWrapper.test.tsx | 37 + .../TruncatedTextWithTooltip.test.tsx | 27 + .../app/shared/__tests__/pluralize.test.ts | 34 + .../src/app/shared/avatar/getAvatarColor.ts | 2 +- .../AddBusinessAttributeModal.tsx | 2 +- .../src/app/shared/error/ErrorSection.tsx | 1 + .../src/app/shared/recommendation.tsx | 11 +- .../app/shared/share/v2/ShareButtonMenu.tsx | 30 + .../app/shared/share/v2/ShareMenuAction.tsx | 28 + .../share/v2/items/CopyLinkMenuItem.tsx | 51 ++ .../shared/share/v2/items/CopyUrnMenuItem.tsx | 40 + .../shared/share/v2/items/EmailMenuItem.tsx | 52 ++ .../app/shared/share/v2/styledComponents.tsx | 144 +++ .../src/app/shared/styleUtils.ts | 17 + .../src/app/shared/subscribe/drawer/utils.tsx | 37 + .../src/app/shared/tags/AddTagsTermsModal.tsx | 137 ++- datahub-web-react/src/app/shared/textUtil.ts | 17 +- .../src/app/shared/time/timeUtils.tsx | 8 +- .../shared/useEmbeddedProfileLinkProps.tsx | 14 + .../src/app/shared/usePrevious.ts | 4 +- .../src/app/sharedV2/EntitySidebarContext.tsx | 28 + .../src/app/sharedV2/__tests__/utils.test.tsx | 50 ++ .../src/app/sharedV2/ant/OptionalTooltip.tsx | 16 + .../src/app/sharedV2/buttons/BackButton.tsx | 48 + .../app/sharedV2/cards/EntityCountCard.tsx | 94 ++ .../app/sharedV2/cards/SummaryEntityCard.tsx | 17 + .../src/app/sharedV2/cards/components.tsx | 16 + .../src/app/sharedV2/cards/filterFromHex.ts | 313 +++++++ .../src/app/sharedV2/carousel/Carousel.tsx | 157 ++++ .../sharedV2/carousel/HorizontalScroller.tsx | 160 ++++ datahub-web-react/src/app/sharedV2/colors.ts | 34 + .../src/app/sharedV2/colors/colorUtils.ts | 34 + .../app/sharedV2/icons/CompactFieldIcon.tsx | 53 ++ .../sharedV2/icons/ImageColoredBackground.tsx | 62 ++ .../src/app/sharedV2/icons/InfoPopover.tsx | 26 + .../src/app/sharedV2/icons/InfoTooltip.tsx | 33 + .../src/app/sharedV2/icons/PlatformIcon.tsx | 96 ++ .../src/app/sharedV2/icons/colorUtils.ts | 7 + .../sharedV2/icons/customIcons/CustomIcon.tsx | 14 + .../sharedV2/icons/customIcons/add-term.svg | 3 + .../src/app/sharedV2/icons/getTypeIcon.tsx | 36 + .../app/sharedV2/pagination/usePagination.ts | 19 + .../DocumentationPropagationDetails.tsx | 109 +++ .../propagation/LabelPropagationDetails.tsx | 94 ++ .../propagation/PropagationEntityLink.tsx | 56 ++ .../sharedV2/propagation/PropagationIcon.tsx | 22 + .../src/app/sharedV2/propagation/utils.ts | 30 + .../app/sharedV2/search/DownloadButton.tsx | 42 + .../src/app/sharedV2/search/EditButton.tsx | 35 + .../app/sharedV2/search/SearchMenuItems.tsx | 50 ++ .../src/app/sharedV2/sidebar/components.tsx | 42 + .../app/sharedV2/sidebar/useSidebarWidth.ts | 22 + .../src/app/sharedV2/sorting/useSorting.ts | 29 + .../src/app/sharedV2/tags/AddTagTerm.tsx | 50 ++ .../src/app/sharedV2/tags/DataProductLink.tsx | 131 +++ .../src/app/sharedV2/tags/DomainLink.tsx | 117 +++ .../src/app/sharedV2/tags/TagLink.tsx | 69 ++ .../src/app/sharedV2/tags/TagTermGroup.tsx | 294 ++++++ .../src/app/sharedV2/tags/tag/Tag.tsx | 193 ++++ .../src/app/sharedV2/tags/term/Term.tsx | 74 ++ .../app/sharedV2/tags/term/TermContent.tsx | 211 +++++ .../tags/usePropagationContextEntities.ts | 25 + .../sharedV2/text/MatchTextSizeWrapper.tsx | 32 + .../src/app/sharedV2/text/OverflowTitle.tsx | 66 ++ .../src/app/sharedV2/useGetEntities.ts | 18 + datahub-web-react/src/app/sharedV2/utils.tsx | 38 + .../src/app/useBuildEntityRegistry.ts | 7 +- .../src/app/useEntityRegistry.ts | 5 + datahub-web-react/src/app/useIsThemeV2.tsx | 78 ++ .../src/app/useShowNavBarRedesign.tsx | 38 + .../src/app/utils/navigateToUrl.ts | 22 + datahub-web-react/src/app/utils/queryUtils.ts | 15 + datahub-web-react/src/appConfigContext.tsx | 13 + datahub-web-react/src/conf/Global.ts | 8 + .../src/conf/theme/global-overrides-v2.less | 51 ++ .../src/conf/theme/global-overrides.less | 29 + .../src/conf/theme/global-variables-v2.less | 2 + .../src/conf/theme/theme_dark.config.json | 4 +- .../src/conf/theme/theme_light.config.json | 2 + .../src/conf/theme/theme_v2.config.json | 57 ++ datahub-web-react/src/conf/theme/types.ts | 1 + .../src/entityRegistryContext.tsx | 7 +- datahub-web-react/src/graphql/app.graphql | 13 + .../src/graphql/assertion.graphql | 98 +- datahub-web-react/src/graphql/chart.graphql | 1 + .../src/graphql/container.graphql | 20 + .../src/graphql/dashboard.graphql | 1 + .../src/graphql/dataFlow.graphql | 1 + datahub-web-react/src/graphql/dataJob.graphql | 4 + .../src/graphql/dataProduct.graphql | 22 + datahub-web-react/src/graphql/dataset.graphql | 133 ++- datahub-web-react/src/graphql/domain.graphql | 25 + .../src/graphql/fragments.graphql | 316 ++++++- .../src/graphql/glossary.graphql | 2 +- .../src/graphql/glossaryNode.graphql | 2 + .../src/graphql/glossaryTerm.graphql | 1 + .../src/graphql/incident.graphql | 30 + datahub-web-react/src/graphql/lineage.graphql | 266 +++++- datahub-web-react/src/graphql/me.graphql | 7 + .../src/graphql/mlFeature.graphql | 1 + .../src/graphql/mlFeatureTable.graphql | 1 + datahub-web-react/src/graphql/mlModel.graphql | 1 + .../src/graphql/mlModelGroup.graphql | 1 + .../src/graphql/mlPrimaryKey.graphql | 1 + .../src/graphql/mutations.graphql | 4 + datahub-web-react/src/graphql/query.graphql | 32 + .../src/graphql/relationships.graphql | 21 + .../src/graphql/schemaField.graphql | 61 ++ datahub-web-react/src/graphql/search.graphql | 135 ++- .../src/graphql/timeline.graphql | 25 + datahub-web-react/src/graphql/user.graphql | 43 + datahub-web-react/src/images/acryl_hero.svg | 497 +++++++++++ .../src/images/ambulance-icon.svg | 8 + .../src/images/analyticsMenuIcon.svg | 1 + .../src/images/assertion_error_dot.svg | 8 + .../src/images/assertion_failure_dot.svg | 5 + .../src/images/assertion_init_dot.svg | 5 + .../src/images/assertion_no_results_dot.svg | 5 + .../src/images/assertion_success_dot.svg | 5 + .../src/images/assertion_v2_failure_dot.svg | 3 + .../src/images/assertion_v2_success_dot.svg | 3 + .../src/images/datahub-platforms.svg | 317 +++++++ .../src/images/deprecated-status.svg | 3 + .../src/images/deprecation-icon.svg | 4 + .../src/images/governMenuIcon.svg | 1 + datahub-web-react/src/images/help-icon.svg | 3 + .../src/images/ingestionMenuIcon.svg | 1 + datahub-web-react/src/images/introduceBg.svg | 1 + .../src/images/lineage-status.svg | 6 + datahub-web-react/src/images/no-docs.svg | 54 ++ .../src/images/no-stats-available.svg | 50 ++ datahub-web-react/src/images/row-icon.svg | 3 + .../src/images/settingsMenuIcon.svg | 1 + datahub-web-react/src/images/shield-check.svg | 5 + .../src/images/shield-exclamation.svg | 6 + .../src/images/sidebarBackArrow.svg | 3 + .../images/tableau-embedded-data-source.svg | 8 + .../images/tableau-published-data-source.svg | 5 + datahub-web-react/src/images/tableau-view.svg | 6 + .../src/images/tableau-workbook.svg | 7 + .../src/images/timeline-icon.svg | 3 + .../src/images/trending-down-icon.svg | 8 + .../src/images/trending-up-icon.svg | 8 + datahub-web-react/src/setupTests.ts | 14 +- .../utils/test-utils/TestPageContainer.tsx | 95 +- datahub-web-react/yarn.lock | 836 +++++++++++++++++- docker/docker-compose.dev.yml | 4 + .../aspect/utils/DeprecationUtils.java | 11 +- .../request/AggregationQueryBuilder.java | 4 + .../request/AggregationQueryBuilderTest.java | 34 +- .../linkedin/assertion/AssertionSource.pdl | 2 + .../com/linkedin/common/Deprecation.pdl | 3 + .../com/linkedin/common/DisplayProperties.pdl | 20 + .../com/linkedin/common/IconLibrary.pdl | 12 + .../com/linkedin/common/IconProperties.pdl | 22 + .../identity/CorpUserAppearanceSettings.pdl | 5 + .../pegasus/com/linkedin/post/PostInfo.pdl | 8 +- .../pegasus/com/linkedin/post/PostType.pdl | 4 + .../src/main/resources/entity-registry.yml | 1 + .../graphql/featureflags/FeatureFlags.java | 9 + .../config/ChromeExtensionConfiguration.java | 13 + .../config/DataHubAppConfiguration.java | 3 + .../src/main/resources/application.yaml | 15 +- .../factory/graphql/GraphQLEngineFactory.java | 1 + ...com.linkedin.entity.entities.snapshot.json | 4 + ...m.linkedin.platform.platform.snapshot.json | 4 + .../datahubusage/DataHubUsageEventType.java | 14 +- .../war/src/main/resources/boot/policies.json | 10 +- .../authorization/PoliciesConfig.java | 7 + .../e2e/auto_completeV2/v2_auto_complete.js | 44 + .../cypress/e2e/containersV2/v2_containers.js | 16 + .../cypress/e2e/domains/nested_domains.js | 52 +- .../cypress/e2e/domainsV2/v2_domains.js | 15 + .../e2e/domainsV2/v2_nested_domains.js | 266 ++++++ .../cypress/e2e/glossaryV2/v2_glossary.js | 73 ++ .../cypress/e2e/glossaryV2/v2_glossaryTerm.js | 114 +++ .../e2e/glossaryV2/v2_glossary_navigation.js | 147 +++ .../cypress/cypress/e2e/homeV2/v2_home.js | 14 + .../cypress/e2e/incidentsV2/v2_incidents.js | 35 + .../e2e/lineage/download_lineage_results.js | 2 +- .../lineageV2/v2_download_lineage_results.js | 92 ++ .../e2e/lineageV2/v2_impact_analysis.js | 161 ++++ .../e2e/lineageV2/v2_lineage_column_level.js | 58 ++ .../e2e/lineageV2/v2_lineage_column_path.js | 105 +++ .../cypress/e2e/lineageV2/v2_lineage_graph.js | 129 +++ .../cypress/cypress/e2e/loginV2/v2_login.js | 14 + .../cypress/e2e/mutations/add_users.js | 2 +- .../e2e/mutations/dataset_ownership.js | 20 +- .../cypress/e2e/mutations/ingestion_source.js | 4 +- .../manage_ingestion_secret_privilege.js | 6 +- .../e2e/mutationsV2/v2_ingestion_source.js | 82 ++ .../e2e/mutationsV2/v2_managed_ingestion.js | 46 + .../e2e/mutationsV2/v2_managing_secrets.js | 130 +++ .../e2e/operrationsV2/v2_operations.js | 15 + .../e2e/ownershipV2/v2_manage_ownership.js | 45 + .../cypress/cypress/e2e/query/query_tab.js | 3 +- .../e2e/schema_blameV2/v2_schema_blame.js | 59 ++ .../cypress/cypress/e2e/search/search.js | 2 +- .../cypress/e2e/search/searchFilters.js | 4 +- .../cypress/e2e/settings/homePagePost.js | 2 +- .../cypress/e2e/settings/manage_policies.js | 8 +- .../cypress/e2e/settings/managing_groups.js | 2 +- .../cypress/e2e/settingsV2/v2_homePagePost.js | 107 +++ .../e2e/settingsV2/v2_manage_access_tokens.js | 53 ++ .../e2e/settingsV2/v2_manage_policies.js | 143 +++ .../e2e/settingsV2/v2_managing_groups.js | 154 ++++ .../cypress/e2e/siblingsV2/v2_siblings.js | 153 ++++ .../cypress/e2e/task_runV2/v2_task_runs.js | 46 + .../cypress/e2e/viewV2/v2_manage_views.js | 48 + .../cypress/e2e/viewV2/v2_view_select.js | 96 ++ .../cypress/cypress/e2e/views/view_select.js | 4 +- .../tests/cypress/cypress/support/commands.js | 76 +- smoke-test/tests/cypress/data.json | 366 ++++++++ smoke-test/tests/cypress/incidents_test.json | 155 ++++ smoke-test/tests/cypress/integration_test.py | 7 + 1802 files changed, 135920 insertions(+), 1140 deletions(-) create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/IncidentUtils.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateDisplayPropertiesResolver.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/siblings/SiblingsSearchResolver.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/timeline/GetTimelineResolver.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DisplayPropertiesMapper.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/post/PostType.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/timeline/mappers/ChangeEventMapper.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/timeline/mappers/ChangeTransactionMapper.java create mode 100644 datahub-graphql-core/src/main/resources/query.graphql create mode 100644 datahub-web-react/src/AppV2.less create mode 100644 datahub-web-react/src/alchemy-components/components/AvatarStack/AvatarStack.stories.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/AvatarStack/AvatarStack.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/AvatarStack/components.ts create mode 100644 datahub-web-react/src/alchemy-components/components/AvatarStack/types.ts create mode 100644 datahub-web-react/src/alchemy-components/components/Bar/Bar.stories.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/Bar/Bar.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/Bar/components.ts create mode 100644 datahub-web-react/src/alchemy-components/components/Bar/constant.ts create mode 100644 datahub-web-react/src/alchemy-components/components/Bar/index.ts create mode 100644 datahub-web-react/src/alchemy-components/components/Bar/types.ts create mode 100644 datahub-web-react/src/alchemy-components/components/BarChart/hooks/useAdaptYAccessorToZeroValues.ts create mode 100644 datahub-web-react/src/alchemy-components/components/BarChart/hooks/useAdaptYScaleToZeroValues.ts create mode 100644 datahub-web-react/src/alchemy-components/components/BarChart/hooks/useMaxDataValue.ts create mode 100644 datahub-web-react/src/alchemy-components/components/BarChart/hooks/useMergedProps.ts create mode 100644 datahub-web-react/src/alchemy-components/components/CalendarChart/CalendarChart.stories.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/CalendarChart/CalendarChart.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/CalendarChart/_tests_/getColorAccessors.test.ts create mode 100644 datahub-web-react/src/alchemy-components/components/CalendarChart/_tests_/prepareCalendarDate.test.ts create mode 100644 datahub-web-react/src/alchemy-components/components/CalendarChart/components.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/CalendarChart/constants.ts create mode 100644 datahub-web-react/src/alchemy-components/components/CalendarChart/index.ts create mode 100644 datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/AxisBottomMonths.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/AxisLeftWeekdays.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/Calendar.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/CalendarContainer.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/Day.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/Month.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/TickLabel.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/Week.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/CalendarChart/private/constants.ts create mode 100644 datahub-web-react/src/alchemy-components/components/CalendarChart/private/context.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/CalendarChart/types.ts create mode 100644 datahub-web-react/src/alchemy-components/components/CalendarChart/utils.ts create mode 100644 datahub-web-react/src/alchemy-components/components/DatePicker/DatePicker.stories.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/DatePicker/DatePicker.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/DatePicker/components.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/DatePicker/constants.ts create mode 100644 datahub-web-react/src/alchemy-components/components/DatePicker/hooks/useVariantProps.ts create mode 100644 datahub-web-react/src/alchemy-components/components/DatePicker/index.ts create mode 100644 datahub-web-react/src/alchemy-components/components/DatePicker/styles.css create mode 100644 datahub-web-react/src/alchemy-components/components/DatePicker/types.ts create mode 100644 datahub-web-react/src/alchemy-components/components/DatePicker/variants/common/props.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/DatePicker/variants/dateSwitcher/components.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/DatePicker/variants/dateSwitcher/props.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/DatePicker/variants/dateSwitcher/types.ts create mode 100644 datahub-web-react/src/alchemy-components/components/DatePicker/variants/index.ts create mode 100644 datahub-web-react/src/alchemy-components/components/Drawer/Drawer.stories.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/Drawer/Drawer.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/Drawer/components.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/Drawer/constants.ts create mode 100644 datahub-web-react/src/alchemy-components/components/Drawer/index.ts create mode 100644 datahub-web-react/src/alchemy-components/components/Drawer/types.ts create mode 100644 datahub-web-react/src/alchemy-components/components/GraphCard/GraphCard.stories.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/GraphCard/GraphCard.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/GraphCard/MoreInfoModal.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/GraphCard/components.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/GraphCard/index.ts create mode 100644 datahub-web-react/src/alchemy-components/components/GraphCard/types.ts create mode 100644 datahub-web-react/src/alchemy-components/components/IconLabel/IconLabel.stories.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/IconLabel/IconLabel.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/IconLabel/component.ts create mode 100644 datahub-web-react/src/alchemy-components/components/IconLabel/index.ts create mode 100644 datahub-web-react/src/alchemy-components/components/IconLabel/types.ts create mode 100644 datahub-web-react/src/alchemy-components/components/IconLabel/utils.ts create mode 100644 datahub-web-react/src/alchemy-components/components/IncidentPriorityLabel/IncidentPriorityLabel.stories.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/IncidentPriorityLabel/IncidentPriorityLabel.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/IncidentPriorityLabel/constant.ts create mode 100644 datahub-web-react/src/alchemy-components/components/IncidentPriorityLabel/index.ts create mode 100644 datahub-web-react/src/alchemy-components/components/IncidentPriorityLabel/types.ts create mode 100644 datahub-web-react/src/alchemy-components/components/LineChart/constants.ts create mode 100644 datahub-web-react/src/alchemy-components/components/LineChart/utils.ts create mode 100644 datahub-web-react/src/alchemy-components/components/Loader/Loader.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/Loader/Loading.stories.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/Loader/components.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/Loader/constants.ts create mode 100644 datahub-web-react/src/alchemy-components/components/Loader/index.ts create mode 100644 datahub-web-react/src/alchemy-components/components/Loader/types.ts create mode 100644 datahub-web-react/src/alchemy-components/components/Select/private/SelectLabelRenderer/SelectLabelRenderer.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/Select/private/SelectLabelRenderer/variants/MultiSelectDefault.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/Select/private/SelectLabelRenderer/variants/MultiSelectLabeled.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/Select/private/SelectLabelRenderer/variants/SingleSelectDefault.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/Select/private/SelectLabelRenderer/variants/SingleSelectLabeled.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/SelectItemsPopover/SelectItemCheckboxGroup.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/SelectItemsPopover/SelectItemPopover.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/SelectItemsPopover/SelectItems.stories.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/SelectItemsPopover/SelectItems.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/SelectItemsPopover/__mock.data.ts create mode 100644 datahub-web-react/src/alchemy-components/components/SelectItemsPopover/hooks.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/SelectItemsPopover/index.ts create mode 100644 datahub-web-react/src/alchemy-components/components/SelectItemsPopover/select-items-popover.less create mode 100644 datahub-web-react/src/alchemy-components/components/SelectItemsPopover/types.ts create mode 100644 datahub-web-react/src/alchemy-components/components/Timeline/Timeline.stories.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/Timeline/Timeline.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/Timeline/components.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/Timeline/index.ts create mode 100644 datahub-web-react/src/alchemy-components/components/Timeline/types.ts create mode 100644 datahub-web-react/src/alchemy-components/components/Tooltip2/Tooltip2.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/Tooltip2/TooltipHeader.tsx create mode 100644 datahub-web-react/src/alchemy-components/components/Tooltip2/components.ts create mode 100644 datahub-web-react/src/alchemy-components/components/Tooltip2/index.ts create mode 100644 datahub-web-react/src/alchemy-components/components/Tooltip2/types.ts create mode 100644 datahub-web-react/src/app/auth/useGetLogoutHandler.ts create mode 100644 datahub-web-react/src/app/buildEntityRegistryV2.ts create mode 100644 datahub-web-react/src/app/dataviz/ChartCard.tsx create mode 100644 datahub-web-react/src/app/dataviz/ChartLoading.tsx create mode 100644 datahub-web-react/src/app/dataviz/Legend.tsx create mode 100644 datahub-web-react/src/app/dataviz/bar/BarChart.tsx create mode 100644 datahub-web-react/src/app/dataviz/bar/HorizontalBarChart.tsx create mode 100644 datahub-web-react/src/app/dataviz/bar/HorizontalFullBarChart.tsx create mode 100644 datahub-web-react/src/app/dataviz/candle/CandleStick.tsx create mode 100644 datahub-web-react/src/app/dataviz/components.ts create mode 100644 datahub-web-react/src/app/dataviz/constants.ts create mode 100644 datahub-web-react/src/app/dataviz/index.ts create mode 100644 datahub-web-react/src/app/dataviz/line/SimpleLineChart.tsx create mode 100644 datahub-web-react/src/app/dataviz/pie/PieChart.tsx create mode 100644 datahub-web-react/src/app/dataviz/pie/usePieDataAnnotation.ts create mode 100644 datahub-web-react/src/app/dataviz/stat/SingleStat.tsx create mode 100644 datahub-web-react/src/app/dataviz/utils.ts create mode 100644 datahub-web-react/src/app/domainV2/CreateDomainModal.tsx create mode 100644 datahub-web-react/src/app/domainV2/DomainAutocompleteOptions.tsx create mode 100644 datahub-web-react/src/app/domainV2/DomainIcon.tsx create mode 100644 datahub-web-react/src/app/domainV2/DomainItemMenu.tsx create mode 100644 datahub-web-react/src/app/domainV2/DomainListColumns.tsx create mode 100644 datahub-web-react/src/app/domainV2/DomainRoutes.tsx create mode 100644 datahub-web-react/src/app/domainV2/DomainSearch.tsx create mode 100644 datahub-web-react/src/app/domainV2/DomainSearchResultItem.tsx create mode 100644 datahub-web-react/src/app/domainV2/DomainsContext.tsx create mode 100644 datahub-web-react/src/app/domainV2/DomainsList.tsx create mode 100644 datahub-web-react/src/app/domainV2/EmptyDomainDescription.tsx create mode 100644 datahub-web-react/src/app/domainV2/EmptyDomainsSection.tsx create mode 100644 datahub-web-react/src/app/domainV2/ManageDomainsPage.tsx create mode 100644 datahub-web-react/src/app/domainV2/__tests__/utils.test.ts create mode 100644 datahub-web-react/src/app/domainV2/nestedDomains/DomainsSidebarHeader.tsx create mode 100644 datahub-web-react/src/app/domainV2/nestedDomains/ManageDomainsPageV2.tsx create mode 100644 datahub-web-react/src/app/domainV2/nestedDomains/ManageDomainsSidebar.tsx create mode 100644 datahub-web-react/src/app/domainV2/nestedDomains/RootDomains.tsx create mode 100644 datahub-web-react/src/app/domainV2/nestedDomains/domainNavigator/DomainNavigator.tsx create mode 100644 datahub-web-react/src/app/domainV2/nestedDomains/domainNavigator/DomainNode.tsx create mode 100644 datahub-web-react/src/app/domainV2/useListDomains.tsx create mode 100644 datahub-web-react/src/app/domainV2/utils.ts create mode 100644 datahub-web-react/src/app/entity/query/QueryEntity.tsx create mode 100644 datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/acrylTypes.tsx create mode 100644 datahub-web-react/src/app/entity/shared/useEntityState.ts create mode 100644 datahub-web-react/src/app/entityV2/Access/RoleEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/Access/RoleEntityProfile.tsx create mode 100644 datahub-web-react/src/app/entityV2/DefaultEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/Entity.tsx create mode 100644 datahub-web-react/src/app/entityV2/EntityPage.tsx create mode 100644 datahub-web-react/src/app/entityV2/EntityRegistry.tsx create mode 100644 datahub-web-react/src/app/entityV2/businessAttribute/BusinessAttributeEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/businessAttribute/preview/Preview.tsx create mode 100644 datahub-web-react/src/app/entityV2/businessAttribute/preview/_tests_/Preview.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/businessAttribute/profile/BusinessAttributeDataTypeSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/businessAttribute/profile/BusinessAttributeRelatedEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/chart/ChartEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/chart/preview/ChartPreview.tsx create mode 100644 datahub-web-react/src/app/entityV2/chart/profile/stats/ChartStatsSummarySubHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/chart/shared/ChartStatsSummary.tsx create mode 100644 datahub-web-react/src/app/entityV2/chart/summary/ChartFieldsTable.tsx create mode 100644 datahub-web-react/src/app/entityV2/chart/summary/ChartSummaryOverview.tsx create mode 100644 datahub-web-react/src/app/entityV2/chart/summary/ChartSummaryTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/chart/summary/EmbedPreview.tsx create mode 100644 datahub-web-react/src/app/entityV2/chart/summary/FieldTableByTag.tsx create mode 100644 datahub-web-react/src/app/entityV2/chart/summary/SummaryQuerySection.tsx create mode 100644 datahub-web-react/src/app/entityV2/chart/summary/TableauEmbed.tsx create mode 100644 datahub-web-react/src/app/entityV2/chart/summary/styledComponents.ts create mode 100644 datahub-web-react/src/app/entityV2/chart/summary/useGetTagFields.ts create mode 100644 datahub-web-react/src/app/entityV2/container/ContainerEntitiesTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/container/ContainerEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/container/ContainerSummaryTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/container/preview/Preview.tsx create mode 100644 datahub-web-react/src/app/entityV2/container/tableau/TableauDataSourcesSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/container/tableau/TableauViewsSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/container/tableau/TableauWorkbookSummaryTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/dashboard/DashboardEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/dashboard/preview/DashboardPreview.tsx create mode 100644 datahub-web-react/src/app/entityV2/dashboard/profile/DashboardStatsSummarySubHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/dashboard/shared/DashboardStatsSummary.tsx create mode 100644 datahub-web-react/src/app/entityV2/dashboard/summary/DashboardSummaryOverview.tsx create mode 100644 datahub-web-react/src/app/entityV2/dashboard/summary/DashboardSummaryTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataFlow/DataFlowEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataFlow/preview/Preview.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataJob/DataJobEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataJob/preview/Preview.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataJob/tabs/RunsTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataPlatform/DataPlatformEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataPlatformInstance/DataPlatformInstanceEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataProcessInstance/DataProcessInstanceEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataProcessInstance/preview/Preview.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataProduct/AddOutputPortCard.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataProduct/AssetsSections.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataProduct/DataProductEntitiesTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataProduct/DataProductEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataProduct/DataProductSummaryTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataProduct/OutputPortsSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataProduct/constants.ts create mode 100644 datahub-web-react/src/app/entityV2/dataProduct/generateUseListDataProductAssets.ts create mode 100644 datahub-web-react/src/app/entityV2/dataProduct/generateUseListDataProductAssetsCount.ts create mode 100644 datahub-web-react/src/app/entityV2/dataProduct/preview/Preview.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/DatasetEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/preview/Preview.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/Lineage.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/OperationsTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/UsageFacepile.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/__tests__/Lineage.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/__tests__/Properties.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/__tests__/Schema.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/__tests__/SchemaDescriptionField.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/__tests__/Stats.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/__tests__/schema/translateFieldPath.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/__tests__/schema/translateFieldPathSegment.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/__tests__/schema/utils.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/schema/components/CustomPagination.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/schema/components/InteriorTitleContent.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaDescriptionField.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaFilterSelectContent.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaRawView.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaRow.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaSearchInput.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaVersionSummary.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/schema/components/StructuredPropValues.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/schema/components/TypeIcon.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/schema/components/VersionSelector.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/constants.ts create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/schemaTitleRenderer.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/schemaTypeRenderer.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/translateFieldPath.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/translateFieldPathSegment.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/types.ts create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/utils.ts create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/stats/Stats.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/stats/StatsSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/stats/historical/HistoricalStatsView.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/stats/historical/charts/ProfilingRunsChart.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/stats/historical/charts/StatChart.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/stats/snapshot/LatestStatsView.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/stats/snapshot/SnapshotStatsView.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/stats/stats/DatasetStatsSummarySubHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/stories/documentation.ts create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/stories/lineageEntities.ts create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/stories/properties.ts create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/stories/sampleSchema.ts create mode 100644 datahub-web-react/src/app/entityV2/dataset/profile/stories/stats.ts create mode 100644 datahub-web-react/src/app/entityV2/dataset/shared/DatasetStatsSummary.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/shared/ExpandingStat.tsx create mode 100644 datahub-web-react/src/app/entityV2/dataset/shared/FormattedBytesStat.tsx create mode 100644 datahub-web-react/src/app/entityV2/domain/DataProductsTab/CreateDataProductModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/domain/DataProductsTab/DataProductBuilderForm.tsx create mode 100644 datahub-web-react/src/app/entityV2/domain/DataProductsTab/DataProductResult.tsx create mode 100644 datahub-web-react/src/app/entityV2/domain/DataProductsTab/DataProductsTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/domain/DataProductsTab/EditDataProductModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/domain/DataProductsTab/types.ts create mode 100644 datahub-web-react/src/app/entityV2/domain/DomainEntitiesTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/domain/DomainEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/domain/preview/DomainEntitiesSnippet.tsx create mode 100644 datahub-web-react/src/app/entityV2/domain/preview/Preview.tsx create mode 100644 datahub-web-react/src/app/entityV2/domain/summary/ContentSectionLoading.tsx create mode 100644 datahub-web-react/src/app/entityV2/domain/summary/ContentsSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/domain/summary/DataProductsSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/domain/summary/DocumentationSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/domain/summary/DomainSummaryTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/domain/summary/OwnerDetail.tsx create mode 100644 datahub-web-react/src/app/entityV2/domain/summary/OwnersSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/glossaryNode/ChildrenTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/glossaryNode/GlossaryNodeEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/glossaryNode/_tests_/utils.test.ts create mode 100644 datahub-web-react/src/app/entityV2/glossaryNode/preview/Preview.tsx create mode 100644 datahub-web-react/src/app/entityV2/glossaryNode/utils.ts create mode 100644 datahub-web-react/src/app/entityV2/glossaryTerm/GlossaryRelatedAssetsTabHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/glossaryTerm/GlossaryTermEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/glossaryTerm/_tests_/utils.test.ts create mode 100644 datahub-web-react/src/app/entityV2/glossaryTerm/preview/Preview.tsx create mode 100644 datahub-web-react/src/app/entityV2/glossaryTerm/preview/__tests__/Preview.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/glossaryTerm/profile/AddRelatedTermsModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/glossaryTerm/profile/GlossaryRelatedEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/glossaryTerm/profile/GlossaryRelatedTerms.tsx create mode 100644 datahub-web-react/src/app/entityV2/glossaryTerm/profile/GlossaryRelatedTermsResult.tsx create mode 100644 datahub-web-react/src/app/entityV2/glossaryTerm/profile/GlossarySidebarAboutSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/glossaryTerm/profile/GlossaryTermHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/glossaryTerm/profile/RelatedTerm.tsx create mode 100644 datahub-web-react/src/app/entityV2/glossaryTerm/profile/SchemaView.tsx create mode 100644 datahub-web-react/src/app/entityV2/glossaryTerm/profile/__tests__/GlossaryRelatedTerms.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/glossaryTerm/profile/__tests__/GlossaryTermHeader.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/glossaryTerm/profile/useRemoveRelatedTerms.tsx create mode 100644 datahub-web-react/src/app/entityV2/glossaryTerm/utils.ts create mode 100644 datahub-web-react/src/app/entityV2/group/AddGroupMembersModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/group/Group.tsx create mode 100644 datahub-web-react/src/app/entityV2/group/GroupAssets.tsx create mode 100644 datahub-web-react/src/app/entityV2/group/GroupBasicInfoSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/group/GroupEditModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/group/GroupInfoHeaderSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/group/GroupMemberLink.tsx create mode 100644 datahub-web-react/src/app/entityV2/group/GroupMembers.tsx create mode 100644 datahub-web-react/src/app/entityV2/group/GroupMembersSidebarSectionContent.tsx create mode 100644 datahub-web-react/src/app/entityV2/group/GroupOwnerSidebarSectionContent.tsx create mode 100644 datahub-web-react/src/app/entityV2/group/GroupProfile.tsx create mode 100644 datahub-web-react/src/app/entityV2/group/GroupProfileInfoCard.tsx create mode 100644 datahub-web-react/src/app/entityV2/group/GroupSidebar.tsx create mode 100644 datahub-web-react/src/app/entityV2/group/GroupSidebarMembersSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/group/GroupSidebarOwnersSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/group/preview/Preview.tsx create mode 100644 datahub-web-react/src/app/entityV2/mlFeature/MLFeatureEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/mlFeature/preview/Preview.tsx create mode 100644 datahub-web-react/src/app/entityV2/mlFeatureTable/MLFeatureTableEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/mlFeatureTable/preview/Preview.tsx create mode 100644 datahub-web-react/src/app/entityV2/mlFeatureTable/profile/Sources.tsx create mode 100644 datahub-web-react/src/app/entityV2/mlFeatureTable/profile/features/MlFeatureDataTypeIcon.tsx create mode 100644 datahub-web-react/src/app/entityV2/mlFeatureTable/profile/features/MlFeatureTableFeatures.tsx create mode 100644 datahub-web-react/src/app/entityV2/mlFeatureTable/profile/features/TableOfMlFeatures.tsx create mode 100644 datahub-web-react/src/app/entityV2/mlModel/MLModelEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/mlModel/preview/Preview.tsx create mode 100644 datahub-web-react/src/app/entityV2/mlModel/profile/MLModelGroupsTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/mlModel/profile/MLModelSummary.tsx create mode 100644 datahub-web-react/src/app/entityV2/mlModel/profile/MlModelFeaturesTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/mlModelGroup/MLModelGroupEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/mlModelGroup/preview/Preview.tsx create mode 100644 datahub-web-react/src/app/entityV2/mlModelGroup/profile/ModelGroupModels.tsx create mode 100644 datahub-web-react/src/app/entityV2/mlPrimaryKey/MLPrimaryKeyEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/mlPrimaryKey/preview/Preview.tsx create mode 100644 datahub-web-react/src/app/entityV2/ownership/ManageOwnership.tsx create mode 100644 datahub-web-react/src/app/entityV2/ownership/OwnershipBuilderModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/ownership/OwnershipList.tsx create mode 100644 datahub-web-react/src/app/entityV2/ownership/table/ActionsColumn.tsx create mode 100644 datahub-web-react/src/app/entityV2/ownership/table/DescriptionColumn.tsx create mode 100644 datahub-web-react/src/app/entityV2/ownership/table/NameColumn.tsx create mode 100644 datahub-web-react/src/app/entityV2/ownership/table/OwnershipTable.tsx create mode 100644 datahub-web-react/src/app/entityV2/ownership/table/types.ts create mode 100644 datahub-web-react/src/app/entityV2/query/QueryEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/schemaField/SchemaFieldEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/schemaField/__tests__/utils.test.ts create mode 100644 datahub-web-react/src/app/entityV2/schemaField/preview/Preview.tsx create mode 100644 datahub-web-react/src/app/entityV2/schemaField/utils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/ActorAvatar.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/CreateGlossaryEntityModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/DeleteEntityMenuAction.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/DomainParentSelect.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/EntityDropdown.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/EntityMenuActions.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/ExternalUrlMenuAction.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/MoreOptionsMenuAction.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/MoveDomainModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/MoveEntityMenuAction.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/MoveGlossaryEntityModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/NodeParentSelect.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/RaiseIncidentMenuAction.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/UpdateDeprecationMenuAction.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/UpdateDeprecationModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/__tests__/NodeParentSelect.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/index.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/styledComponents.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/useDeleteEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/useDeleteGlossaryEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/useHandleDeleteDomain.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/useHandleMoveDomainComplete.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/useParentSelector.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityDropdown/utils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/EntityGroups.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntitySearchInput/EntitySearchInput.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/EntitySearchInput/EntitySearchInputResult.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/GlossaryEntityContext.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/PreviewContext.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/SearchCardContext.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/SidebarStyledComponents.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/StyledSyntaxHighlighter.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/UrlButton.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/__tests__/DeprecationPill.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/__tests__/utils.test.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/announce/CreateEntityAnnouncementModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/ListSearch/AcrylListSearch.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/legacy/DescriptionModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/legacy/Properties.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/AddLinkModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/DemoButton.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/DeprecationIcon.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/EmptyTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/ExpandedActor.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/ExpandedActorGroup.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/ExpandedOwner/ExpandedOwner.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/ExpandedOwner/OwnerContent.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/InfoItem.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/MarkAsDeprecatedButton.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/SeeMore.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/StatsSummary.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/StripMarkdownText.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/StyledButton.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/StyledMDEditor.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/StyledTable.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/StyledTag.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/TabToolbar.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/DownloadAsCsvModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/EmbeddedListSearch.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/EmbeddedListSearchEmbed.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/EmbeddedListSearchHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/EmbeddedListSearchModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/EmbeddedListSearchResults.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/EmbeddedListSearchSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/EntitySearchResults.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/MatchingViewsLabel.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/SearchSelect.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/SearchSelectActions.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/SearchSelectBar.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/SearchSelectModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/action/ActionDropdown.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/action/DataProductsDropdown.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/action/DeleteDropdown.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/action/DeprecationDropdown.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/action/DomainsDropdown.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/action/GlossaryTermsDropdown.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/action/OwnersDropdown.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/action/TagsDropdown.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/downloadAsCsvUtil.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/navigateToEntitySearchUrl.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/types.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/useInitializeColumnLineageCards.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/components/styled/search/useInitializeSearchResultCards.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/components/subtypes.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/constants.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/EntityProfile.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/__tests__/EntityHeader.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/__tests__/EntityProfile.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/__tests__/PlatformContent.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/DefaultEntityHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/EntityCount.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/EntityHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/EntityHeaderLoadingSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/EntityHealthPopover.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/EntityHealthStatus.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/EntityName.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/EntityPlatformLoadingSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/EntityTabs.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/GlossaryPreviewCardDecoration.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/IconPicker/IconColorPicker.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/IconPicker/IconPicker.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/PlatformContent/ContainerIcon.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/PlatformContent/ContainerLink.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/PlatformContent/ParentNodesView.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/PlatformContent/ParentNodesViewForSearchRedesign.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/PlatformContent/PlatformContentContainer.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/PlatformContent/PlatformContentView.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/PlatformContent/PlatformHeaderIcons.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/PlatformContent/StackPlatformImages.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/PlatformContent/constants.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/PlatformContent/index.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/StructuredPropertyBadge.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/header/utils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/AboutSection/DescriptionSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/AboutSection/EmptyContentSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/AboutSection/LinksSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/AboutSection/SidebarAboutSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/AboutSection/SourceRefSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Chart/Header/SidebarChartHeaderSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Chart/Header/utils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Container/ContainerSelectModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Container/SidebarContentsLoadingSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Container/SidebarContentsSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Container/utils.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Dashboard/Header/SidebarDashboardHeaderSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/DataProduct/DataProductSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/DataProduct/SetDataProductModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Dataset/Header/SidebarDatasetHeaderSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Dataset/StatsSidebarSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Dataset/View/SidebarViewDefinitionSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Domain/SetDomainModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Domain/SidebarDataProductsSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Domain/SidebarDomainSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Domain/SidebarEntitiesLoadingSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Domain/SidebarEntitiesSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Domain/utils.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/EmptySectionText.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/EntityBackButton.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/EntityInfo/EntityInfo.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/EntityProfileSidebar.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/EntityProfileSidebarSearchHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/EntitySidebar.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/EntitySidebarSectionsTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/EntitySidebarTabs.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/ExploreLineageAction.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/FormInfo/CompletedView.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/FormInfo/FormInfo.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/FormInfo/IncompleteView.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/FormInfo/SidebarFormInfoWrapper.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/FormInfo/components.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/HeaderAndTabs.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/LastIngested.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Lineage/SidebarLineageLoadingSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Lineage/SidebarLineageSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Lineage/utils.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/LinkButton.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Ownership/EditOwnersModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Ownership/LdapFormItem.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Ownership/OwnershipTypesSelect.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Ownership/ownershipUtils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Ownership/sidebar/OwnershipTypeSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Platform/SelectPlatformModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/ProfileSidebarResizer.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Query/SidebarQueryCreatedAtSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Query/SidebarQueryDefinitionSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Query/SidebarQueryDescriptionSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Query/SidebarQueryOperationsSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Query/SidebarQueryUpdatedAtSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Recommendations/SidebarEntityRecommendations.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/Recommendations/SidebarRecommendationsSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/SectionActionButton.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/SidebarCollapseIcon.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/SidebarCollapsibleHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/SidebarCompactSchemaSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/SidebarEntityHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/SidebarGlossaryTermsSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/SidebarHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/SidebarHeaderSectionColumns.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/SidebarLogicSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/SidebarSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/SidebarSiblingsSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/SidebarTagsSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/TitleAction.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/__tests__/LastIngested.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/__tests__/SidebarLogicSection.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/shared/EntityProperty.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/shared/SidebarPopularityHeaderSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/shared/SidebarTopUsersHeaderSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/shared/StatusSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/shared/SyncedOrShared.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/shared/SyncedOrSharedTooltip.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/shared/TimeProperty.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/shared/TopUsersFacepile.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/shared/popularity/PopularityIcon.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/shared/popularity/PopularityIconBar.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/shared/styledComponents.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/shared/utils.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/sidebar/tagRenderer.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/useGetDataForProfile.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/containers/profile/utils.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/embed/EmbeddedHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/embed/EmbeddedHealthIcon.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/embed/EmbeddedProfile.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/embed/UpstreamHealth/ActiveIncidents.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/embed/UpstreamHealth/FailingAssertions.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/embed/UpstreamHealth/FailingEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/embed/UpstreamHealth/FailingInputs.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/embed/UpstreamHealth/UpstreamEntitiesList.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/embed/UpstreamHealth/UpstreamHealth.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/embed/UpstreamHealth/__tests__/utils.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/embed/UpstreamHealth/utils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/entity/EntityActions.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/entity/NonExistentEntityPage.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/entityForm/EntityForm.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/entityForm/EntityFormModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/externalUrl/ViewInPlatform.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/links/DataProductMiniPreview.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/links/DataProductMiniPreviewAddDataProduct.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/links/DomainColoredIcon.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/links/DomainMiniPreview.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/links/EntityExternalLink.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/links/GlossaryTermMiniPreview.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/links/__tests__/colorUtils.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/notes/NotesSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/sidebarSection/AboutSidebarSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/sidebarSection/ShowMoreSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/sidebarSection/SidebarNotesSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/sidebarSection/SidebarStructuredProperties.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/sidebarSection/UserGroupSidebarSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/sidebarSection/UserOwnershipSideBarSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/stats/PercentileLabel.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/stats/__tests__/statsUtils.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/stats/statsUtils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/summary/HeaderComponents.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/summary/IconComponents.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/summary/ListComponents.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/summary/SummaryAboutSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/summary/SummaryCreatedBySection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/AccessManagement/AccessManagement.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/AccessManagement/AccessManagerDescription.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/AccessManagement/__tests__/AccessManagement.test.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/AccessManagement/utils.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Governance/TestResults.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Governance/TestResultsList.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Governance/TestResultsSummary.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Governance/testUtils.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/AddButton.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/CopyQuery.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/EmptyQueriesSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/QueriesListSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/QueriesTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/Query.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/QueryBuilderForm.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/QueryBuilderModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/QueryCard.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/QueryCardDetails.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/QueryCardDetailsMenu.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/QueryCardEditButton.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/QueryCardHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/QueryCardQuery.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/QueryFilters/QueryFilters.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/QueryFilters/useColumnsFilter.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/QueryFilters/useUsersFilter.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/QueryModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/cacheUtils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/queryColumns.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/types.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/useDownstreamQueries.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/useHighlightedQueries.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/usePopularQueries.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/useQueryTableColumns.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/useRecentQueries.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/utils/constants.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/utils/filterQueries.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/utils/getCurrentPage.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/utils/getTopNQueries.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Queries/utils/mapQuery.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/CompactSchemaTable.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/SchemaContext.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/SchemaTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/SchemaTable.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/__tests__/filterSchemaRows.test.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/ConstraintLabels.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/ExpandIcon.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/MenuColumn.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/NestedRowIcon.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/AboutFieldTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/DrawerFooter.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldDescription.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldDetails.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldPath.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldPopularity.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldTags.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldTerms.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldTitle.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldUsageStats.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/PopularityBars.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/SampleValuesSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/SchemaFieldDrawer.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/SchemaFieldDrawerTabs.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/SchemaFieldQueriesSidebarTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/StatsSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/StatsSidebarColumnTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/StatsSidebarContent.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/StatsSidebarHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/StatsSidebarView.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/StatsSummaryRow.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/TrendDetail.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/components.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/components/TypeLabel.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/history/ChangeEvent.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/history/ChangeTransactionView.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/history/HistorySidebar.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/history/__tests__/changeEventToString.test.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/history/changeEventToString.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/history/historyUtils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/useGetEntitySchema.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/useKeyboardControls.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/useSchemaVersioning.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/__tests__/useExtractDescriptionInfo.test.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/__tests__/useExtractFieldGlossaryTermsInfo.test.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/__tests__/useExtractFieldTagsInfo.test.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/filterSchemaRows.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/getExpandedDrawerFieldPath.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/getFieldDescriptionDetails.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/getSchemaFilterTypesFromUrl.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/queryStringUtils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/statsUtil.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/updateSchemaFilterQueryString.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/useDescriptionRenderer.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/useExtractFieldDescriptionInfo.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/useExtractFieldGlossaryTermsInfo.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/useExtractFieldTagsInfo.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/useGetStructuredPropColumns.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/useGetTableColumnProperties.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/useTagsAndTermsRenderer.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/useTagsAndTermsRendererFeatureTable.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Schema/utils/useUsageStatsRenderer.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Stats/StatsHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Stats/StatsSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Stats/StatsTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Stats/constants.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Stats/historical/HistoricalStats.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Stats/historical/LookbackWindowSelect.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Stats/historical/charts/ProfilingRunsChart.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Stats/historical/charts/StatChart.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Stats/historical/shared/PrefixedSelect.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Stats/lookbackWindows.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Stats/snapshot/ColumnStats.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Stats/snapshot/SampleValueTag.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Stats/snapshot/TableStats.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Stats/viewType.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Timeline/SchemaTimelineSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AcrylAssertionDetails.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AcrylAssertionDetailsHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AcrylAssertionResultsChart.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AcrylAssertionResultsChartHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AcrylAssertionResultsChartTimeline.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AcrylAssertionsList.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AcrylAssertionsSummaryLoading.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AcrylAssertionsTable.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AcrylAssertionsTableColumns.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AcrylValidationsTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionGroupHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/AcryAssertionTypeSelect.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/AcrylAssertionFilters.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/AcrylAssertionList.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/AcrylAssertionListConstants.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/AcrylAssertionListFilters.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/AcrylAssertionListStatusDot.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/AcrylAssertionListTable.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/AcrylAssertionProgressBar.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/AcrylAssertionRecommendedFilters.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/AssertionGroupHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/AssertionListTitleContainer.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/AssertionName.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/DataContractBadge.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/GroupByTable.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/StyledComponents.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/Summary/AcrylAssertionSummaryCard.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/Summary/AcrylAssertionSummarySection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/Summary/AcrylAssertionSummaryTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/Summary/AcrylAssertionsSummaryLoading.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/Tags/AcrylAssertionTagColumn.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/constant.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/hooks.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/types.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionList/utils.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionMenu.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionPlatformAvatar.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/AssertionResultTimeline.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/Assertions.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/BooleanTimeline.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/DatasetAssertionDescription.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/DatasetAssertionDetails.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/DatasetAssertionLogicModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/DatasetAssertionResultDetails.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/DatasetAssertionsList.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/DatasetAssertionsSummary.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/FieldAssertionDescription.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/FreshnessAssertionDescription.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/SchemaAssertionDescription.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/SchemaSummary.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/SchemaSummaryModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/SqlAssertionDescription.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/ValidationsTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/VolumeAssertionDescription.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/__tests__/useGetValidationsTab.test.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/acrylTypes.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/acrylUtils.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/details/PrimaryButton.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/hooks.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/AssertionProfile.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/AssertionProfileDrawer.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/AssertionProfileFooter.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/AssertionProfileHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/AssertionProfileHeaderLoading.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/AssertionTabs.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/actions/ActionItem.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/actions/Actions.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/actions/AssertionListItemActions.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/actions/ContractAction.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/actions/CopyLinkAction.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/actions/CopyUrnAction.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/actions/ExternalUrlAction.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/actions/styledComponents.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/actions/useIsContractsEnabled.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/shared/AssertionResultDot.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/shared/CloseButton.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/shared/isExternalAssertion.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/shared/result/AssertionResultPopover.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/shared/result/AssertionResultPopoverContent.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/shared/utils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/AssertionDescription.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/AssertionSummaryContent.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/AssertionSummaryLoading.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/AssertionSummarySection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/AssertionSummaryTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/NoResultsSummary.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/result/popoverUtils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/result/table/AssertionResultsLoadingItems.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/result/table/AssertionResultsTable.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/result/table/AssertionResultsTableItem.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/result/timeline/AssertionResultsTimeline.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/result/timeline/AssertionResultsTimelineViz.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/result/timeline/AssertionTimelineSkeleton.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/result/timeline/ColumnMetricAssertionsResultsGraph.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/result/timeline/ColumnValueAssertionsResultsGraph.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/result/timeline/CustomSqlAssertionsResultsGraph.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/result/timeline/FreshnessAssertionsResultsGraph.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/result/timeline/TimeSelect.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/result/timeline/VolumeAssertionResultsGraph.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/result/timeline/charts/StatusOverTimeAssertionResultChart.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/result/timeline/charts/types.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/result/timeline/charts/utils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/result/timeline/transformers.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/result/timeline/utils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/schedule/AssertionScheduleSummary.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/schedule/AssertionScheduleSummarySection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/schedule/ProviderSummarySection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/shared/AssertionResultPill.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/shared/DetailedErrorMessage.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/shared/DetailedErrorMessageTooltip.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/shared/SelectablePill.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/shared/assertionUtils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/shared/constants.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/shared/resultExtractionUtils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/shared/resultMessageUtils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/profile/summary/utils.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertionUtils.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/contract/DataContractAssertionStatus.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/contract/DataContractSummary.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/contract/DataContractSummaryFooter.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/contract/DataContractTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/contract/DataQualityContractSummary.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/contract/FreshnessContractSummary.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/contract/FreshnessScheduleSummary.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/contract/SchemaContractSummary.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/contract/builder/DataContractAssertionGroupSelect.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/contract/builder/DataContractBuilder.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/contract/builder/DataContractBuilderModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/contract/builder/types.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/contract/builder/utils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/contract/utils.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/fieldDescriptionUtils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/shared/constant.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/shared/styledComponents.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/useGetValidationsTab.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/utils.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Dataset/View/ViewDefinitionTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/DocumentationTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/__tests__/DocumentationTab.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/__tests__/components/editor/extensions/htmlToMarkdown.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/__tests__/components/editor/extensions/markdownToHtml.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/CompactMarkdownViewer.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/DescriptionEditor.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/DescriptionEditorToolbar.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/DescriptionPreview.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/DescriptionPreviewModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/DescriptionPreviewToolbar.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/DiscardDescriptionModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/LinkList.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/SourceDescription.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/Editor.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/EditorTheme.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/OnChangeMarkdown.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/__mocks__/Editor.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/__tests__/Editor.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/extensions/htmlToMarkdown.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/extensions/markdownToHtml.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/extensions/mentions/DataHubMentionsExtension.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/extensions/mentions/MentionsComponent.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/extensions/mentions/MentionsDropdown.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/extensions/mentions/MentionsNodeView.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/extensions/mentions/useDataHubMentions.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/toolbar/AddImageButton.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/toolbar/AddLinkButton.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/toolbar/CodeBlockToolbar.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/toolbar/CommandButton.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/toolbar/FloatingToolbar.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/toolbar/HeadingMenu.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/toolbar/Icons.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/toolbar/LinkModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/toolbar/TableCellMenu.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/toolbar/Toolbar.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/components/editor/utils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Documentation/utils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Embed/EmbedTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Entity/ChartDashboardsTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Entity/ColumnTabNameHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Entity/DashboardChartsTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Entity/DashboardDatasetsTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Entity/DataFlowJobsTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Entity/DataJobFlowTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Entity/InputFieldsTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Entity/TabNameWithCount.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Entity/__tests__/DataJobFlowTab.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Entity/components/EntityList.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Entity/weaklyTypedAspects/DynamicPropertiesTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Entity/weaklyTypedAspects/DynamicTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Entity/weaklyTypedAspects/DynamicTabularTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Entity/weaklyTypedAspects/TableValueElement.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Incident/IncidentTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Incident/components/AddIncidentModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Incident/components/IncidentListItem.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Incident/components/IncidentSummary.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Incident/components/IncidentsLoadingSection.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Incident/components/ResolveIncidentModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Incident/incidentUtils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Lineage/ColumnLineageSelect.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Lineage/CompactLineageTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Lineage/ImpactAnalysis.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Lineage/LineageColumnView.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Lineage/LineageTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Lineage/LineageTabContext.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Lineage/LineageTabTimeSelector.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Lineage/LineageTable.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Lineage/generateUseDownloadScrollAcrossLineageSearchResultsHook.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Lineage/generateUseSearchResultsViaRelationshipHook.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Lineage/hooks.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Lineage/utils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/ML/MlFeatureFeatureTableTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/ML/MlPrimaryKeyFeatureTableTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Properties/CardinalityLabel.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Properties/NameColumn.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Properties/PropertiesTab.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Properties/StructuredPropertyTooltip.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Properties/StructuredPropertyValue.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Properties/TabHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Properties/ValuesColumn.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Properties/__tests__/useStructuredProperties.test.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Properties/__tests__/utils.test.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Properties/types.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Properties/useStructuredProperties.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Properties/useUpdateExpandedRowsFromFilter.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/Properties/utils.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/tabs/__tests__/utils.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/shared/types.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/useIsSeparateSiblingsMode.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/useQueryParamValue.ts create mode 100644 datahub-web-react/src/app/entityV2/shared/utils.ts create mode 100644 datahub-web-react/src/app/entityV2/structuredProperty/StructuredPropertyEntity.tsx create mode 100644 datahub-web-react/src/app/entityV2/tag/CompactTagProfile.tsx create mode 100644 datahub-web-react/src/app/entityV2/tag/Tag.tsx create mode 100644 datahub-web-react/src/app/entityV2/tag/TagProfile.tsx create mode 100644 datahub-web-react/src/app/entityV2/tag/__tests__/TagProfile.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/user/User.tsx create mode 100644 datahub-web-react/src/app/entityV2/user/UserAssets.tsx create mode 100644 datahub-web-react/src/app/entityV2/user/UserBasicInfoContainer.tsx create mode 100644 datahub-web-react/src/app/entityV2/user/UserEditProfileModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/user/UserGroups.tsx create mode 100644 datahub-web-react/src/app/entityV2/user/UserHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/user/UserProfile.tsx create mode 100644 datahub-web-react/src/app/entityV2/user/UserProfileInfoCard.tsx create mode 100644 datahub-web-react/src/app/entityV2/user/UserSidebar.tsx create mode 100644 datahub-web-react/src/app/entityV2/user/__tests__/UserHeader.test.tsx create mode 100644 datahub-web-react/src/app/entityV2/user/preview/Preview.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/ManageViews.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/ViewTypeLabel.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/ViewsList.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/ViewsTable.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/__tests__/utils.test.ts create mode 100644 datahub-web-react/src/app/entityV2/view/builder/ViewBuilder.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/builder/ViewBuilderForm.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/builder/ViewBuilderModal.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/builder/ViewDefinitionBuilder.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/builder/__tests__/utils.test.ts create mode 100644 datahub-web-react/src/app/entityV2/view/builder/types.ts create mode 100644 datahub-web-react/src/app/entityV2/view/builder/utils.ts create mode 100644 datahub-web-react/src/app/entityV2/view/cacheUtils.ts create mode 100644 datahub-web-react/src/app/entityV2/view/menu/ViewDropdownMenu.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/menu/item/DeleteViewItem.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/menu/item/EditViewItem.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/menu/item/IconItemTitle.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/menu/item/PreviewViewItem.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/menu/item/RemoveGlobalDefaultItem.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/menu/item/RemoveUserDefaultItem.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/menu/item/SetGlobalDefaultItem.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/menu/item/SetUserDefaultItem.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/menu/item/ViewItem.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/select/ViewOption.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/select/ViewOptionName.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/select/ViewOptionTooltipTitle.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/select/ViewSelect.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/select/ViewSelectContent.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/select/ViewSelectDropdown.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/select/ViewSelectHeader.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/select/ViewSelectPopoverContent.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/select/ViewsTableColumns.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/select/renderSelectedView.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/select/renderViewOptionGroup.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/select/style.css create mode 100644 datahub-web-react/src/app/entityV2/view/select/styledComponents.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/select/utils.ts create mode 100644 datahub-web-react/src/app/entityV2/view/shared/DefaultViewIcon.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/shared/GlobalDefaultViewIcon.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/shared/UserDefaultViewIcon.tsx create mode 100644 datahub-web-react/src/app/entityV2/view/types.ts create mode 100644 datahub-web-react/src/app/entityV2/view/utils.ts create mode 100644 datahub-web-react/src/app/glossaryV2/BusinessGlossaryPage.tsx create mode 100644 datahub-web-react/src/app/glossaryV2/EmptyGlossarySection.tsx create mode 100644 datahub-web-react/src/app/glossaryV2/GlossaryBrowser/GlossaryBrowser.tsx create mode 100644 datahub-web-react/src/app/glossaryV2/GlossaryBrowser/NodeItem.tsx create mode 100644 datahub-web-react/src/app/glossaryV2/GlossaryBrowser/TermItem.tsx create mode 100644 datahub-web-react/src/app/glossaryV2/GlossaryContentProvider.tsx create mode 100644 datahub-web-react/src/app/glossaryV2/GlossaryEntitiesList.tsx create mode 100644 datahub-web-react/src/app/glossaryV2/GlossaryEntityItem.tsx create mode 100644 datahub-web-react/src/app/glossaryV2/GlossaryListCard.tsx create mode 100644 datahub-web-react/src/app/glossaryV2/GlossaryNodeCard.tsx create mode 100644 datahub-web-react/src/app/glossaryV2/GlossaryRoutes.tsx create mode 100644 datahub-web-react/src/app/glossaryV2/GlossarySearch.tsx create mode 100644 datahub-web-react/src/app/glossaryV2/GlossarySidebar.tsx create mode 100644 datahub-web-react/src/app/glossaryV2/GlossaryStatsProvider.tsx create mode 100644 datahub-web-react/src/app/glossaryV2/__tests__/utils.test.ts create mode 100644 datahub-web-react/src/app/glossaryV2/cacheUtils.ts create mode 100644 datahub-web-react/src/app/glossaryV2/colorUtils.ts create mode 100644 datahub-web-react/src/app/glossaryV2/utils.ts create mode 100644 datahub-web-react/src/app/homeV2/HomePage.tsx create mode 100644 datahub-web-react/src/app/homeV2/action/announcement/Announcement.tsx create mode 100644 datahub-web-react/src/app/homeV2/action/announcement/Announcements.tsx create mode 100644 datahub-web-react/src/app/homeV2/action/announcement/useGetUnseenAnnouncements.tsx create mode 100644 datahub-web-react/src/app/homeV2/action/learn/Resources.tsx create mode 100644 datahub-web-react/src/app/homeV2/action/nudge/Domains.tsx create mode 100644 datahub-web-react/src/app/homeV2/action/nudge/Glossary.tsx create mode 100644 datahub-web-react/src/app/homeV2/action/nudge/Ingestion.tsx create mode 100644 datahub-web-react/src/app/homeV2/action/nudge/MetadataTests.tsx create mode 100644 datahub-web-react/src/app/homeV2/action/nudge/UserProfile.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/HorizontalListSkeletons.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/recent/EntityCard.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/recent/EntityCardList.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/recent/RecentActions.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/recent/RecentlyEditedOrViewed.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/recent/constants.ts create mode 100644 datahub-web-react/src/app/homeV2/content/recent/useGetRecentActions.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/CenterTab.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/CenterTabs.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/CountBadge.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/activity/ActivityTab.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/activity/cards/AssertionResult.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/activity/cards/DocumentationEdit.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/activity/cards/DomainCreate.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/activity/cards/IncidentStatus.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/announcements/AnnouncementCard.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/announcements/AnnouncementsSkeleton.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/announcements/AnnouncementsTab.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/announcements/EmptyAnnouncements.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/announcements/useGetAnnouncements.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/DiscoveryTab.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/Section.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/dataProducts/DataProductCard.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/dataProducts/DataProducts.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/dataProducts/useGetDataProducts.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/domains/DomainCard.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/domains/Domains.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/domains/useGetDomains.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/InsightStatusProvider.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/Insights.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/cards/FoundationalAssetsCard.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/cards/InsightLoadingCard.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/cards/MostFrequentlyUpdated.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/cards/MostQueriedCard.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/cards/MostRowsCard.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/cards/MostUsersCard.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/cards/MostViewedDashboards.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/cards/NewAssetsGraphCard.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/cards/PopularGlossaryTerms.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/cards/RecentlyCreatedDatasetsCard.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/cards/RecentlyUpdatedDatasetsCard.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/cards/SearchListInsightCard.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/cards/useGetFoundationalAssets.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/cards/useGetMostQueried.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/cards/useGetMostRows.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/cards/useGetMostUpdated.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/cards/useGetMostUsers.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/cards/useGetMostViewedDashboards.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/cards/useGetSearchAssets.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/cards/useRecentlyCreatedDatasets.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/cards/useRecentlyUpdatedDatasets.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/shared/InsightCard.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/insight/shared/InsightCardSkeleton.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/platform/PlatformCard.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/platform/Platforms.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/platform/useGetDataPlatforms.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/discovery/sections/platform/useGetPlatforms.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/tabs.tsx create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/types.ts create mode 100644 datahub-web-react/src/app/homeV2/content/tabs/useGetVisibleTabs.tsx create mode 100644 datahub-web-react/src/app/homeV2/introduce/IntroduceYourself.tsx create mode 100644 datahub-web-react/src/app/homeV2/introduce/IntroduceYourselfLeftSidebar.tsx create mode 100644 datahub-web-react/src/app/homeV2/introduce/IntroduceYourselfMainContent.tsx create mode 100644 datahub-web-react/src/app/homeV2/introduce/PersonaSelector.tsx create mode 100644 datahub-web-react/src/app/homeV2/introduce/useRedirectToIntroduceYourself.tsx create mode 100644 datahub-web-react/src/app/homeV2/layout/CenterContent.tsx create mode 100644 datahub-web-react/src/app/homeV2/layout/CustomNavLink.tsx create mode 100644 datahub-web-react/src/app/homeV2/layout/LeftSidebar.tsx create mode 100644 datahub-web-react/src/app/homeV2/layout/NavLinks.tsx create mode 100644 datahub-web-react/src/app/homeV2/layout/NavLinksMenu.tsx create mode 100644 datahub-web-react/src/app/homeV2/layout/NavSidebar.tsx create mode 100644 datahub-web-react/src/app/homeV2/layout/RightSidebar.tsx create mode 100644 datahub-web-react/src/app/homeV2/layout/navBarRedesign/NavBarContext.tsx create mode 100644 datahub-web-react/src/app/homeV2/layout/navBarRedesign/NavBarHeader.tsx create mode 100644 datahub-web-react/src/app/homeV2/layout/navBarRedesign/NavBarMenu.tsx create mode 100644 datahub-web-react/src/app/homeV2/layout/navBarRedesign/NavBarMenuItem.tsx create mode 100644 datahub-web-react/src/app/homeV2/layout/navBarRedesign/NavBarMenuItemDropdown.tsx create mode 100644 datahub-web-react/src/app/homeV2/layout/navBarRedesign/NavBarMenuItemGroup.tsx create mode 100644 datahub-web-react/src/app/homeV2/layout/navBarRedesign/NavBarSkeleton.tsx create mode 100644 datahub-web-react/src/app/homeV2/layout/navBarRedesign/NavBarToggler.tsx create mode 100644 datahub-web-react/src/app/homeV2/layout/navBarRedesign/NavSidebar.tsx create mode 100644 datahub-web-react/src/app/homeV2/layout/navBarRedesign/types.ts create mode 100644 datahub-web-react/src/app/homeV2/layout/navBarRedesign/useSelectedKey.ts create mode 100644 datahub-web-react/src/app/homeV2/layout/shared/styledComponents.tsx create mode 100644 datahub-web-react/src/app/homeV2/layout/types.ts create mode 100644 datahub-web-react/src/app/homeV2/persona/PersonalizationLoadingModal.tsx create mode 100644 datahub-web-react/src/app/homeV2/persona/useLoadUserPersona.tsx create mode 100644 datahub-web-react/src/app/homeV2/persona/useUserPersona.ts create mode 100644 datahub-web-react/src/app/homeV2/reference/header/GreetingText.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/header/UserHeader.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/header/UserHeaderImage.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/header/getGreetingText.ts create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/DefaultEmptyEntityList.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/EntityLink.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/EntityLinkList.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/EntityLinkListSkeleton.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/assets/AssetsYouOwn.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/assets/EmptyAssetsYouOwn.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/assets/useGetAssetsYouOwn.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/domains/DataProductsYouOwn.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/domains/DomainsYouOwn.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/domains/EmptyDomainsYouOwn.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/domains/useGetDomainsYouOwn.ts create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/glossary/EmptyGlossaryNodesYouOwn.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/glossary/GlossaryNodesYouOwn.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/glossary/useGetGlossaryNodesYouOwn.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/groups/EmptyGroupsYouAreIn.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/groups/GroupsYouAreIn.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/groups/useGetGroupsYouAreIn.ts create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/pinned/PinnedLink.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/pinned/PinnedLinkList.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/pinned/PinnedLinkLogo.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/pinned/PinnedLinks.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/pinned/useGetPinnedLinks.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/tags/EmptyTagsYouOwn.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/tags/TagsYouOwn.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/sections/tags/useGetTagsYouOwn.tsx create mode 100644 datahub-web-react/src/app/homeV2/reference/types.ts create mode 100644 datahub-web-react/src/app/homeV2/shared/__tests__/utils.test.ts create mode 100644 datahub-web-react/src/app/homeV2/shared/types.ts create mode 100644 datahub-web-react/src/app/homeV2/shared/updateLastViewedAnnouncementTime.tsx create mode 100644 datahub-web-react/src/app/homeV2/shared/useGetLastViewedAnnouncementTime.tsx create mode 100644 datahub-web-react/src/app/homeV2/shared/utils.ts create mode 100644 datahub-web-react/src/app/identity/user/useUserTitle.ts create mode 100644 datahub-web-react/src/app/lineageV2/LineageDisplay.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageEdge/LineageTableEdge.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageEdge/TentativeEdge.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageEntityNode/Column.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageEntityNode/ColumnSearch.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageEntityNode/Columns.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageEntityNode/ContainerPath.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageEntityNode/ContractLineageButton.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageEntityNode/ExpandLineageButton.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageEntityNode/GhostEntityMenu.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageEntityNode/LineageEntityNode.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageEntityNode/ManageLineageMenu.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageEntityNode/NodeContents.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageEntityNode/NodeSkeleton.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageEntityNode/SchemaFieldNodeContents.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageEntityNode/components.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageEntityNode/useAvoidIntersections.ts create mode 100644 datahub-web-react/src/app/lineageV2/LineageEntityNode/useDisplayedColumns.ts create mode 100644 datahub-web-react/src/app/lineageV2/LineageEntityNode/useOnClickExpandLineage.ts create mode 100644 datahub-web-react/src/app/lineageV2/LineageExplorer.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageFilterNode/LineageFilterNodeBasic.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageFilterNode/LineageFilterSearch.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageFilterNode/ShowMoreButton.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageFilterNode/computeOrFilters.ts create mode 100644 datahub-web-react/src/app/lineageV2/LineageFilterNode/useFetchFilterNodeContents.ts create mode 100644 datahub-web-react/src/app/lineageV2/LineageGraph.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageSidebar.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageTimeSelector.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageTransformationNode/LineageTransformationNode.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageVisualization.tsx create mode 100644 datahub-web-react/src/app/lineageV2/LineageVisualizationContext.tsx create mode 100644 datahub-web-react/src/app/lineageV2/NodeBuilder.ts create mode 100644 datahub-web-react/src/app/lineageV2/common.ts create mode 100644 datahub-web-react/src/app/lineageV2/constants.ts create mode 100644 datahub-web-react/src/app/lineageV2/controls/DownloadLineageScreenshotButton.tsx create mode 100644 datahub-web-react/src/app/lineageV2/controls/LineageControls.tsx create mode 100644 datahub-web-react/src/app/lineageV2/controls/LineageSearchFilters.tsx create mode 100644 datahub-web-react/src/app/lineageV2/controls/LineageTimeRangeControls.tsx create mode 100644 datahub-web-react/src/app/lineageV2/controls/SearchControl.tsx create mode 100644 datahub-web-react/src/app/lineageV2/controls/StyledPanelButton.tsx create mode 100644 datahub-web-react/src/app/lineageV2/controls/ZoomControls.tsx create mode 100644 datahub-web-react/src/app/lineageV2/controls/common.tsx create mode 100644 datahub-web-react/src/app/lineageV2/lineageUtils.ts create mode 100644 datahub-web-react/src/app/lineageV2/manualLineage/AddEntityEdge.tsx create mode 100644 datahub-web-react/src/app/lineageV2/manualLineage/EntityEdge.tsx create mode 100644 datahub-web-react/src/app/lineageV2/manualLineage/LineageEdges.tsx create mode 100644 datahub-web-react/src/app/lineageV2/manualLineage/LineageEntityView.tsx create mode 100644 datahub-web-react/src/app/lineageV2/manualLineage/ManageLineageModal.tsx create mode 100644 datahub-web-react/src/app/lineageV2/manualLineage/UserAvatar.tsx create mode 100644 datahub-web-react/src/app/lineageV2/manualLineage/_test_/utils.test.ts create mode 100644 datahub-web-react/src/app/lineageV2/manualLineage/updateNodeContext.ts create mode 100644 datahub-web-react/src/app/lineageV2/manualLineage/utils.ts create mode 100644 datahub-web-react/src/app/lineageV2/types.ts create mode 100644 datahub-web-react/src/app/lineageV2/useAvoidIntersections.ts create mode 100644 datahub-web-react/src/app/lineageV2/useBulkEntityLineage.ts create mode 100644 datahub-web-react/src/app/lineageV2/useColumnHighlighting.ts create mode 100644 datahub-web-react/src/app/lineageV2/useComputeGraph/filterNodes.ts create mode 100644 datahub-web-react/src/app/lineageV2/useComputeGraph/getDisplayedNodes.ts create mode 100644 datahub-web-react/src/app/lineageV2/useComputeGraph/getFineGrainedLineage.ts create mode 100644 datahub-web-react/src/app/lineageV2/useComputeGraph/orderNodes.ts create mode 100644 datahub-web-react/src/app/lineageV2/useComputeGraph/useComputeGraph.tsx create mode 100644 datahub-web-react/src/app/lineageV2/useLineageV2.ts create mode 100644 datahub-web-react/src/app/lineageV2/useNodeHighlighting.ts create mode 100644 datahub-web-react/src/app/lineageV2/useSearchAcrossLineage.ts create mode 100644 datahub-web-react/src/app/lineageV2/useShouldHideTransformations.ts create mode 100644 datahub-web-react/src/app/onboarding/OnboardingContext.tsx create mode 100644 datahub-web-react/src/app/onboarding/OnboardingContextProvider.tsx create mode 100644 datahub-web-react/src/app/onboarding/configV2/EntityProfileOnboardingConfig.tsx create mode 100644 datahub-web-react/src/app/onboarding/configV2/HomePageOnboardingConfig.tsx create mode 100644 datahub-web-react/src/app/onboarding/configV2/index.ts create mode 100644 datahub-web-react/src/app/onboarding/useHandleOnboardingTour.ts create mode 100644 datahub-web-react/src/app/onboarding/useShouldSkipOnboardingTour.ts create mode 100644 datahub-web-react/src/app/previewV2/BrowsePaths.tsx create mode 100644 datahub-web-react/src/app/previewV2/CardActionCircle.tsx create mode 100644 datahub-web-react/src/app/previewV2/ColoredBackgroundPlatformIconGroup.tsx create mode 100644 datahub-web-react/src/app/previewV2/CompactView.tsx create mode 100644 datahub-web-react/src/app/previewV2/ContextPath.tsx create mode 100644 datahub-web-react/src/app/previewV2/ContextPathEntityIcon.tsx create mode 100644 datahub-web-react/src/app/previewV2/ContextPathEntityLink.tsx create mode 100644 datahub-web-react/src/app/previewV2/ContextPathSeparator.tsx create mode 100644 datahub-web-react/src/app/previewV2/DefaultPreviewCard.tsx create mode 100644 datahub-web-react/src/app/previewV2/DefaultPreviewCardFooter.tsx create mode 100644 datahub-web-react/src/app/previewV2/EntityHeader.tsx create mode 100644 datahub-web-react/src/app/previewV2/EntityPaths/ColumnPathsText.tsx create mode 100644 datahub-web-react/src/app/previewV2/EntityPaths/ColumnsRelationshipText.tsx create mode 100644 datahub-web-react/src/app/previewV2/EntityPaths/DisplayedColumns.tsx create mode 100644 datahub-web-react/src/app/previewV2/EntityPaths/EntityPaths.tsx create mode 100644 datahub-web-react/src/app/previewV2/EntityPaths/EntityPathsModal.tsx create mode 100644 datahub-web-react/src/app/previewV2/Freshness.tsx create mode 100644 datahub-web-react/src/app/previewV2/HealthIcon.tsx create mode 100644 datahub-web-react/src/app/previewV2/HealthPopover.tsx create mode 100644 datahub-web-react/src/app/previewV2/ImageWIthColoredBackground.tsx create mode 100644 datahub-web-react/src/app/previewV2/LineageBadge.tsx create mode 100644 datahub-web-react/src/app/previewV2/NotesIcon.tsx create mode 100644 datahub-web-react/src/app/previewV2/Pills.tsx create mode 100644 datahub-web-react/src/app/previewV2/PreviewCardFooterRightSection.tsx create mode 100644 datahub-web-react/src/app/previewV2/QueryStat.tsx create mode 100644 datahub-web-react/src/app/previewV2/SearchPill.tsx create mode 100644 datahub-web-react/src/app/previewV2/SeeSummaryButton.tsx create mode 100644 datahub-web-react/src/app/previewV2/__tests__/utils.test.ts create mode 100644 datahub-web-react/src/app/previewV2/shared.tsx create mode 100644 datahub-web-react/src/app/previewV2/utils.ts create mode 100644 datahub-web-react/src/app/recommendations/HoverEntityTooltipContext.tsx create mode 100644 datahub-web-react/src/app/searchV2/AdvancedSearchFilterOverallUnionTypeSelect.tsx create mode 100644 datahub-web-react/src/app/searchV2/AnalyticsLink.tsx create mode 100644 datahub-web-react/src/app/searchV2/BrowseEntityCard.tsx create mode 100644 datahub-web-react/src/app/searchV2/ChooseEntityTypeModal.tsx create mode 100644 datahub-web-react/src/app/searchV2/CommandK.tsx create mode 100644 datahub-web-react/src/app/searchV2/EditTextModal.tsx create mode 100644 datahub-web-react/src/app/searchV2/EmbeddedSearchBar.tsx create mode 100644 datahub-web-react/src/app/searchV2/EmptySearchResults.tsx create mode 100644 datahub-web-react/src/app/searchV2/SaveAsViewButton.tsx create mode 100644 datahub-web-react/src/app/searchV2/SearchBar.tsx create mode 100644 datahub-web-react/src/app/searchV2/SearchEntitySidebarContainer.tsx create mode 100644 datahub-web-react/src/app/searchV2/SearchFilterLabel.tsx create mode 100644 datahub-web-react/src/app/searchV2/SearchHeader.tsx create mode 100644 datahub-web-react/src/app/searchV2/SearchPage.tsx create mode 100644 datahub-web-react/src/app/searchV2/SearchResultList.tsx create mode 100644 datahub-web-react/src/app/searchV2/SearchResults.tsx create mode 100644 datahub-web-react/src/app/searchV2/SearchResultsLoadingSection.tsx create mode 100644 datahub-web-react/src/app/searchV2/SearchResultsRecommendations.tsx create mode 100644 datahub-web-react/src/app/searchV2/SearchablePage.tsx create mode 100644 datahub-web-react/src/app/searchV2/SimpleSearchFilter.tsx create mode 100644 datahub-web-react/src/app/searchV2/SimpleSearchFilters.tsx create mode 100644 datahub-web-react/src/app/searchV2/ToggleSidebarButton.tsx create mode 100644 datahub-web-react/src/app/searchV2/ViewAllSearchItem.tsx create mode 100644 datahub-web-react/src/app/searchV2/__tests__/SearchPage.test.tsx create mode 100644 datahub-web-react/src/app/searchV2/__tests__/constants.test.tsx create mode 100644 datahub-web-react/src/app/searchV2/__tests__/filterUtils.test.tsx create mode 100644 datahub-web-react/src/app/searchV2/advanced/AdvancedFilterCloseButton.tsx create mode 100644 datahub-web-react/src/app/searchV2/advanced/EntitySubTypeAdvancedFilterLabel.tsx create mode 100644 datahub-web-react/src/app/searchV2/advanced/styles.ts create mode 100644 datahub-web-react/src/app/searchV2/autoComplete/AutoCompleteEntity.tsx create mode 100644 datahub-web-react/src/app/searchV2/autoComplete/AutoCompleteEntityIcon.tsx create mode 100644 datahub-web-react/src/app/searchV2/autoComplete/AutoCompleteItem.tsx create mode 100644 datahub-web-react/src/app/searchV2/autoComplete/AutoCompletePlatformNames.tsx create mode 100644 datahub-web-react/src/app/searchV2/autoComplete/AutoCompleteTag.tsx create mode 100644 datahub-web-react/src/app/searchV2/autoComplete/AutoCompleteTooltipContent.tsx create mode 100644 datahub-web-react/src/app/searchV2/autoComplete/AutoCompleteUser.tsx create mode 100644 datahub-web-react/src/app/searchV2/autoComplete/ParentContainers.tsx create mode 100644 datahub-web-react/src/app/searchV2/autoComplete/RecommendedOption.tsx create mode 100644 datahub-web-react/src/app/searchV2/autoComplete/SectionHeader.tsx create mode 100644 datahub-web-react/src/app/searchV2/autoComplete/__tests__/utils.test.ts create mode 100644 datahub-web-react/src/app/searchV2/autoComplete/quickFilters/QuickFilter.tsx create mode 100644 datahub-web-react/src/app/searchV2/autoComplete/quickFilters/QuickFilters.tsx create mode 100644 datahub-web-react/src/app/searchV2/autoComplete/quickFilters/utils.tsx create mode 100644 datahub-web-react/src/app/searchV2/autoComplete/styledComponents.tsx create mode 100644 datahub-web-react/src/app/searchV2/autoComplete/utils.ts create mode 100644 datahub-web-react/src/app/searchV2/context/constants.ts create mode 100644 datahub-web-react/src/app/searchV2/filters/ActiveFilter.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/AddFilterDropdown.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/FilterOption.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/MoreFilterOption.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/MoreFilters.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/OperatorSelector.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/OptionsDropdownMenu.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/ParentEntities.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/SaveViewButton.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/SearchFilter.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/SearchFilterOptions.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/SearchFilterView.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/SearchFilters.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/SearchFiltersBuilder.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/SearchFiltersLoadingSection.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/SearchFiltersSection.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/SelectedFilter.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/SelectedSearchFilters.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/__tests__/operator.test.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/__tests__/utils.test.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/constants.ts create mode 100644 datahub-web-react/src/app/searchV2/filters/field/fields.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/mapFilterOption.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/operator/operator.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/render/FilterRenderer.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/render/FilterRendererRegistry.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/render/__tests__/FilterRendererRegistry.test.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/render/acrylRenderers.ts create mode 100644 datahub-web-react/src/app/searchV2/filters/render/assertion/HasFailingAssertionsFilter.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/render/assertion/HasFailingAssertionsRenderer.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/render/incident/HasActiveIncidentsFilter.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/render/incident/HasActiveIncidentsRenderer.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/render/shared/BooleanMoreFilter.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/render/shared/BooleanMoreFilterMenu.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/render/shared/BooleanSearchFilter.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/render/shared/BooleanSimpleSearchFilter.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/render/shared/styledComponents.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/render/types.ts create mode 100644 datahub-web-react/src/app/searchV2/filters/render/useFilterRenderer.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/styledComponents.ts create mode 100644 datahub-web-react/src/app/searchV2/filters/types.ts create mode 100644 datahub-web-react/src/app/searchV2/filters/useFilterMode.ts create mode 100644 datahub-web-react/src/app/searchV2/filters/useGetBrowseV2LabelOverride.ts create mode 100644 datahub-web-react/src/app/searchV2/filters/useHydrateFilters.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/useSearchFilterAnalytics.ts create mode 100644 datahub-web-react/src/app/searchV2/filters/useSearchFilterDropdown.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/utils.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/value/BooleanValueMenu.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/value/DateRangeMenu.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/value/EntityTypeMenu.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/value/EntityValueMenu.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/value/EnumValueMenu.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/value/TextValueInput.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/value/TextValueMenu.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/value/TimeBucketMenu.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/value/ValueMenu.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/value/ValueName.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/value/ValueSelector.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/value/styledComponents.tsx create mode 100644 datahub-web-react/src/app/searchV2/filters/value/useDateRangeFilterValues.ts create mode 100644 datahub-web-react/src/app/searchV2/filters/value/utils.tsx create mode 100644 datahub-web-react/src/app/searchV2/matches/GroupedMatch.tsx create mode 100644 datahub-web-react/src/app/searchV2/matches/MatchContextContainer.tsx create mode 100644 datahub-web-react/src/app/searchV2/matches/MatchedFieldList.tsx create mode 100644 datahub-web-react/src/app/searchV2/matches/SearchTextHighlighter.tsx create mode 100644 datahub-web-react/src/app/searchV2/matches/constants.ts create mode 100644 datahub-web-react/src/app/searchV2/matches/matchedFieldPathsRenderer.tsx create mode 100644 datahub-web-react/src/app/searchV2/matches/matchedInputFieldRenderer.tsx create mode 100644 datahub-web-react/src/app/searchV2/matches/utils.test.ts create mode 100644 datahub-web-react/src/app/searchV2/matches/utils.ts create mode 100644 datahub-web-react/src/app/searchV2/recommendation/FilterPill.tsx create mode 100644 datahub-web-react/src/app/searchV2/recommendation/RecommendedFilters.tsx create mode 100644 datahub-web-react/src/app/searchV2/recommendation/types.ts create mode 100644 datahub-web-react/src/app/searchV2/recommendation/useGetRecommendedFilters.tsx create mode 100644 datahub-web-react/src/app/searchV2/recommendation/utils.ts create mode 100644 datahub-web-react/src/app/searchV2/searchSlideout/SearchCardSlideoutContent.tsx create mode 100644 datahub-web-react/src/app/searchV2/sidebar/BrowseContext.tsx create mode 100644 datahub-web-react/src/app/searchV2/sidebar/BrowseNode.tsx create mode 100644 datahub-web-react/src/app/searchV2/sidebar/BrowseSidebar.tsx create mode 100644 datahub-web-react/src/app/searchV2/sidebar/EntityBrowse.tsx create mode 100644 datahub-web-react/src/app/searchV2/sidebar/EntityLink.tsx create mode 100644 datahub-web-react/src/app/searchV2/sidebar/EntityNode.tsx create mode 100644 datahub-web-react/src/app/searchV2/sidebar/EnvironmentNode.tsx create mode 100644 datahub-web-react/src/app/searchV2/sidebar/ExpandableNode.tsx create mode 100644 datahub-web-react/src/app/searchV2/sidebar/PlatformBrowse.tsx create mode 100644 datahub-web-react/src/app/searchV2/sidebar/PlatformNode.tsx create mode 100644 datahub-web-react/src/app/searchV2/sidebar/SidebarContext.tsx create mode 100644 datahub-web-react/src/app/searchV2/sidebar/SidebarLoadingError.tsx create mode 100644 datahub-web-react/src/app/searchV2/sidebar/__tests__/browseContextUtils.test.ts create mode 100644 datahub-web-react/src/app/searchV2/sidebar/browseContextUtils.ts create mode 100644 datahub-web-react/src/app/searchV2/sidebar/constants.ts create mode 100644 datahub-web-react/src/app/searchV2/sidebar/index.tsx create mode 100644 datahub-web-react/src/app/searchV2/sidebar/types.ts create mode 100644 datahub-web-react/src/app/searchV2/sidebar/useAggregationsQuery.ts create mode 100644 datahub-web-react/src/app/searchV2/sidebar/useBrowsePagination.tsx create mode 100644 datahub-web-react/src/app/searchV2/sidebar/useSidebarAnalytics.ts create mode 100644 datahub-web-react/src/app/searchV2/sidebar/useSidebarEntities.ts create mode 100644 datahub-web-react/src/app/searchV2/sidebar/useSidebarFilters.ts create mode 100644 datahub-web-react/src/app/searchV2/sidebar/useSidebarPlatforms.tsx create mode 100644 datahub-web-react/src/app/searchV2/sorting/SearchSortSelect.tsx create mode 100644 datahub-web-react/src/app/searchV2/sorting/useGetSortOptions.tsx create mode 100644 datahub-web-react/src/app/searchV2/sorting/useSortInput.ts create mode 100644 datahub-web-react/src/app/searchV2/suggestions/SearchQuerySugggester.tsx create mode 100644 datahub-web-react/src/app/searchV2/useAdvancedSearchSelectFilters.tsx create mode 100644 datahub-web-react/src/app/searchV2/useGetSearchQueryInputs.ts create mode 100644 datahub-web-react/src/app/searchV2/useSearchAndBrowseVersion.ts create mode 100644 datahub-web-react/src/app/searchV2/useSearchViewAll.ts create mode 100644 datahub-web-react/src/app/searchV2/useToggleSidebar.ts create mode 100644 datahub-web-react/src/app/searchV2/utils/__tests__/applyFilterOverrides.test.ts create mode 100644 datahub-web-react/src/app/searchV2/utils/__tests__/filterUtils.test.ts create mode 100644 datahub-web-react/src/app/searchV2/utils/__tests__/generateOrFilters.test.ts create mode 100644 datahub-web-react/src/app/searchV2/utils/applyFilterOverrides.ts create mode 100644 datahub-web-react/src/app/searchV2/utils/combineSiblingsInAutoComplete.ts create mode 100644 datahub-web-react/src/app/searchV2/utils/combineSiblingsInSearchResults.test.ts create mode 100644 datahub-web-react/src/app/searchV2/utils/combineSiblingsInSearchResults.ts create mode 100644 datahub-web-react/src/app/searchV2/utils/constants.ts create mode 100644 datahub-web-react/src/app/searchV2/utils/csvUtils.ts create mode 100644 datahub-web-react/src/app/searchV2/utils/filterSearchQuery.ts create mode 100644 datahub-web-react/src/app/searchV2/utils/filterUtils.ts create mode 100644 datahub-web-react/src/app/searchV2/utils/filtersToQueryStringParams.ts create mode 100644 datahub-web-react/src/app/searchV2/utils/generateOrFilters.ts create mode 100644 datahub-web-react/src/app/searchV2/utils/hasAdvancedFilters.ts create mode 100644 datahub-web-react/src/app/searchV2/utils/navigateToSearchUrl.ts create mode 100644 datahub-web-react/src/app/searchV2/utils/types.ts create mode 100644 datahub-web-react/src/app/searchV2/utils/useDownloadScrollAcrossEntitiesSearchResults.ts create mode 100644 datahub-web-react/src/app/searchV2/utils/useFilters.ts create mode 100644 datahub-web-react/src/app/settingsV2/AccessTokenModal.tsx create mode 100644 datahub-web-react/src/app/settingsV2/AccessTokens.tsx create mode 100644 datahub-web-react/src/app/settingsV2/CreateTokenModal.tsx create mode 100644 datahub-web-react/src/app/settingsV2/Preferences.tsx create mode 100644 datahub-web-react/src/app/settingsV2/SettingsPage.tsx create mode 100644 datahub-web-react/src/app/settingsV2/features/Feature.tsx create mode 100644 datahub-web-react/src/app/settingsV2/features/Features.tsx create mode 100644 datahub-web-react/src/app/settingsV2/features/useDocPropagationSettings.ts create mode 100644 datahub-web-react/src/app/settingsV2/personal/utils.tsx create mode 100644 datahub-web-react/src/app/settingsV2/posts/CreatePostForm.tsx create mode 100644 datahub-web-react/src/app/settingsV2/posts/CreatePostModal.tsx create mode 100644 datahub-web-react/src/app/settingsV2/posts/ManagePosts.tsx create mode 100644 datahub-web-react/src/app/settingsV2/posts/PostItemMenu.tsx create mode 100644 datahub-web-react/src/app/settingsV2/posts/PostsList.tsx create mode 100644 datahub-web-react/src/app/settingsV2/posts/PostsListColumns.tsx create mode 100644 datahub-web-react/src/app/settingsV2/posts/constants.ts create mode 100644 datahub-web-react/src/app/settingsV2/posts/utils.ts create mode 100644 datahub-web-react/src/app/settingsV2/settingsPaths.tsx create mode 100644 datahub-web-react/src/app/settingsV2/utils.ts create mode 100644 datahub-web-react/src/app/shared/LastUpdated.tsx create mode 100644 datahub-web-react/src/app/shared/LinkWrapper.tsx create mode 100644 datahub-web-react/src/app/shared/MatchesContext.tsx create mode 100644 datahub-web-react/src/app/shared/TabFullsizedContext.tsx create mode 100644 datahub-web-react/src/app/shared/TruncatedTextWithTooltip.tsx create mode 100644 datahub-web-react/src/app/shared/__tests__/LinkWrapper.test.tsx create mode 100644 datahub-web-react/src/app/shared/__tests__/TruncatedTextWithTooltip.test.tsx create mode 100644 datahub-web-react/src/app/shared/__tests__/pluralize.test.ts create mode 100644 datahub-web-react/src/app/shared/share/v2/ShareButtonMenu.tsx create mode 100644 datahub-web-react/src/app/shared/share/v2/ShareMenuAction.tsx create mode 100644 datahub-web-react/src/app/shared/share/v2/items/CopyLinkMenuItem.tsx create mode 100644 datahub-web-react/src/app/shared/share/v2/items/CopyUrnMenuItem.tsx create mode 100644 datahub-web-react/src/app/shared/share/v2/items/EmailMenuItem.tsx create mode 100644 datahub-web-react/src/app/shared/share/v2/styledComponents.tsx create mode 100644 datahub-web-react/src/app/shared/subscribe/drawer/utils.tsx create mode 100644 datahub-web-react/src/app/shared/useEmbeddedProfileLinkProps.tsx create mode 100644 datahub-web-react/src/app/sharedV2/EntitySidebarContext.tsx create mode 100644 datahub-web-react/src/app/sharedV2/__tests__/utils.test.tsx create mode 100644 datahub-web-react/src/app/sharedV2/ant/OptionalTooltip.tsx create mode 100644 datahub-web-react/src/app/sharedV2/buttons/BackButton.tsx create mode 100644 datahub-web-react/src/app/sharedV2/cards/EntityCountCard.tsx create mode 100644 datahub-web-react/src/app/sharedV2/cards/SummaryEntityCard.tsx create mode 100644 datahub-web-react/src/app/sharedV2/cards/components.tsx create mode 100644 datahub-web-react/src/app/sharedV2/cards/filterFromHex.ts create mode 100644 datahub-web-react/src/app/sharedV2/carousel/Carousel.tsx create mode 100644 datahub-web-react/src/app/sharedV2/carousel/HorizontalScroller.tsx create mode 100644 datahub-web-react/src/app/sharedV2/colors.ts create mode 100644 datahub-web-react/src/app/sharedV2/colors/colorUtils.ts create mode 100644 datahub-web-react/src/app/sharedV2/icons/CompactFieldIcon.tsx create mode 100644 datahub-web-react/src/app/sharedV2/icons/ImageColoredBackground.tsx create mode 100644 datahub-web-react/src/app/sharedV2/icons/InfoPopover.tsx create mode 100644 datahub-web-react/src/app/sharedV2/icons/InfoTooltip.tsx create mode 100644 datahub-web-react/src/app/sharedV2/icons/PlatformIcon.tsx create mode 100644 datahub-web-react/src/app/sharedV2/icons/colorUtils.ts create mode 100644 datahub-web-react/src/app/sharedV2/icons/customIcons/CustomIcon.tsx create mode 100644 datahub-web-react/src/app/sharedV2/icons/customIcons/add-term.svg create mode 100644 datahub-web-react/src/app/sharedV2/icons/getTypeIcon.tsx create mode 100644 datahub-web-react/src/app/sharedV2/pagination/usePagination.ts create mode 100644 datahub-web-react/src/app/sharedV2/propagation/DocumentationPropagationDetails.tsx create mode 100644 datahub-web-react/src/app/sharedV2/propagation/LabelPropagationDetails.tsx create mode 100644 datahub-web-react/src/app/sharedV2/propagation/PropagationEntityLink.tsx create mode 100644 datahub-web-react/src/app/sharedV2/propagation/PropagationIcon.tsx create mode 100644 datahub-web-react/src/app/sharedV2/propagation/utils.ts create mode 100644 datahub-web-react/src/app/sharedV2/search/DownloadButton.tsx create mode 100644 datahub-web-react/src/app/sharedV2/search/EditButton.tsx create mode 100644 datahub-web-react/src/app/sharedV2/search/SearchMenuItems.tsx create mode 100644 datahub-web-react/src/app/sharedV2/sidebar/components.tsx create mode 100644 datahub-web-react/src/app/sharedV2/sidebar/useSidebarWidth.ts create mode 100644 datahub-web-react/src/app/sharedV2/sorting/useSorting.ts create mode 100644 datahub-web-react/src/app/sharedV2/tags/AddTagTerm.tsx create mode 100644 datahub-web-react/src/app/sharedV2/tags/DataProductLink.tsx create mode 100644 datahub-web-react/src/app/sharedV2/tags/DomainLink.tsx create mode 100644 datahub-web-react/src/app/sharedV2/tags/TagLink.tsx create mode 100644 datahub-web-react/src/app/sharedV2/tags/TagTermGroup.tsx create mode 100644 datahub-web-react/src/app/sharedV2/tags/tag/Tag.tsx create mode 100644 datahub-web-react/src/app/sharedV2/tags/term/Term.tsx create mode 100644 datahub-web-react/src/app/sharedV2/tags/term/TermContent.tsx create mode 100644 datahub-web-react/src/app/sharedV2/tags/usePropagationContextEntities.ts create mode 100644 datahub-web-react/src/app/sharedV2/text/MatchTextSizeWrapper.tsx create mode 100644 datahub-web-react/src/app/sharedV2/text/OverflowTitle.tsx create mode 100644 datahub-web-react/src/app/sharedV2/useGetEntities.ts create mode 100644 datahub-web-react/src/app/sharedV2/utils.tsx create mode 100644 datahub-web-react/src/app/useIsThemeV2.tsx create mode 100644 datahub-web-react/src/app/useShowNavBarRedesign.tsx create mode 100644 datahub-web-react/src/app/utils/navigateToUrl.ts create mode 100644 datahub-web-react/src/app/utils/queryUtils.ts create mode 100644 datahub-web-react/src/conf/theme/global-overrides-v2.less create mode 100644 datahub-web-react/src/conf/theme/global-variables-v2.less create mode 100644 datahub-web-react/src/conf/theme/theme_v2.config.json create mode 100644 datahub-web-react/src/graphql/schemaField.graphql create mode 100644 datahub-web-react/src/graphql/timeline.graphql create mode 100644 datahub-web-react/src/images/acryl_hero.svg create mode 100644 datahub-web-react/src/images/ambulance-icon.svg create mode 100644 datahub-web-react/src/images/analyticsMenuIcon.svg create mode 100644 datahub-web-react/src/images/assertion_error_dot.svg create mode 100644 datahub-web-react/src/images/assertion_failure_dot.svg create mode 100644 datahub-web-react/src/images/assertion_init_dot.svg create mode 100644 datahub-web-react/src/images/assertion_no_results_dot.svg create mode 100644 datahub-web-react/src/images/assertion_success_dot.svg create mode 100644 datahub-web-react/src/images/assertion_v2_failure_dot.svg create mode 100644 datahub-web-react/src/images/assertion_v2_success_dot.svg create mode 100644 datahub-web-react/src/images/datahub-platforms.svg create mode 100644 datahub-web-react/src/images/deprecated-status.svg create mode 100644 datahub-web-react/src/images/deprecation-icon.svg create mode 100644 datahub-web-react/src/images/governMenuIcon.svg create mode 100644 datahub-web-react/src/images/help-icon.svg create mode 100644 datahub-web-react/src/images/ingestionMenuIcon.svg create mode 100644 datahub-web-react/src/images/introduceBg.svg create mode 100644 datahub-web-react/src/images/lineage-status.svg create mode 100644 datahub-web-react/src/images/no-docs.svg create mode 100644 datahub-web-react/src/images/no-stats-available.svg create mode 100644 datahub-web-react/src/images/row-icon.svg create mode 100644 datahub-web-react/src/images/settingsMenuIcon.svg create mode 100644 datahub-web-react/src/images/shield-check.svg create mode 100644 datahub-web-react/src/images/shield-exclamation.svg create mode 100644 datahub-web-react/src/images/sidebarBackArrow.svg create mode 100644 datahub-web-react/src/images/tableau-embedded-data-source.svg create mode 100644 datahub-web-react/src/images/tableau-published-data-source.svg create mode 100644 datahub-web-react/src/images/tableau-view.svg create mode 100644 datahub-web-react/src/images/tableau-workbook.svg create mode 100644 datahub-web-react/src/images/timeline-icon.svg create mode 100644 datahub-web-react/src/images/trending-down-icon.svg create mode 100644 datahub-web-react/src/images/trending-up-icon.svg create mode 100644 metadata-models/src/main/pegasus/com/linkedin/common/DisplayProperties.pdl create mode 100644 metadata-models/src/main/pegasus/com/linkedin/common/IconLibrary.pdl create mode 100644 metadata-models/src/main/pegasus/com/linkedin/common/IconProperties.pdl create mode 100644 metadata-service/configuration/src/main/java/com/linkedin/metadata/config/ChromeExtensionConfiguration.java create mode 100644 smoke-test/tests/cypress/cypress/e2e/auto_completeV2/v2_auto_complete.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/containersV2/v2_containers.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/domainsV2/v2_domains.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/domainsV2/v2_nested_domains.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/glossaryV2/v2_glossary.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/glossaryV2/v2_glossaryTerm.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/glossaryV2/v2_glossary_navigation.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/homeV2/v2_home.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/incidentsV2/v2_incidents.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/lineageV2/v2_download_lineage_results.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/lineageV2/v2_impact_analysis.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/lineageV2/v2_lineage_column_level.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/lineageV2/v2_lineage_column_path.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/lineageV2/v2_lineage_graph.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/loginV2/v2_login.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/mutationsV2/v2_ingestion_source.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/mutationsV2/v2_managed_ingestion.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/mutationsV2/v2_managing_secrets.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/operrationsV2/v2_operations.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/ownershipV2/v2_manage_ownership.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/schema_blameV2/v2_schema_blame.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/settingsV2/v2_homePagePost.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/settingsV2/v2_manage_access_tokens.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/settingsV2/v2_manage_policies.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/settingsV2/v2_managing_groups.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/siblingsV2/v2_siblings.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/task_runV2/v2_task_runs.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/viewV2/v2_manage_views.js create mode 100644 smoke-test/tests/cypress/cypress/e2e/viewV2/v2_view_select.js create mode 100644 smoke-test/tests/cypress/incidents_test.json diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java index aec5352dec1a64..c514a7c3944528 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java @@ -29,6 +29,7 @@ private Constants() {} public static final String CONTRACTS_SCHEMA_FILE = "contract.graphql"; public static final String CONNECTIONS_SCHEMA_FILE = "connection.graphql"; public static final String VERSION_SCHEMA_FILE = "versioning.graphql"; + public static final String QUERY_SCHEMA_FILE = "query.graphql"; public static final String BROWSE_PATH_DELIMITER = "/"; public static final String BROWSE_PATH_V2_DELIMITER = "␟"; public static final String VERSION_STAMP_FIELD_NAME = "versionStamp"; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 2c5e841322f45f..68f1d851420258 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -257,6 +257,7 @@ import com.linkedin.datahub.graphql.resolvers.mutate.RemoveTagResolver; import com.linkedin.datahub.graphql.resolvers.mutate.RemoveTermResolver; import com.linkedin.datahub.graphql.resolvers.mutate.UpdateDescriptionResolver; +import com.linkedin.datahub.graphql.resolvers.mutate.UpdateDisplayPropertiesResolver; import com.linkedin.datahub.graphql.resolvers.mutate.UpdateNameResolver; import com.linkedin.datahub.graphql.resolvers.mutate.UpdateParentNodeResolver; import com.linkedin.datahub.graphql.resolvers.mutate.UpdateUserSettingResolver; @@ -297,6 +298,7 @@ import com.linkedin.datahub.graphql.resolvers.settings.user.UpdateCorpUserViewsSettingsResolver; import com.linkedin.datahub.graphql.resolvers.settings.view.GlobalViewsSettingsResolver; import com.linkedin.datahub.graphql.resolvers.settings.view.UpdateGlobalViewsSettingsResolver; +import com.linkedin.datahub.graphql.resolvers.siblings.SiblingsSearchResolver; import com.linkedin.datahub.graphql.resolvers.step.BatchGetStepStatesResolver; import com.linkedin.datahub.graphql.resolvers.step.BatchUpdateStepStatesResolver; import com.linkedin.datahub.graphql.resolvers.structuredproperties.CreateStructuredPropertyResolver; @@ -314,6 +316,7 @@ import com.linkedin.datahub.graphql.resolvers.test.UpdateTestResolver; import com.linkedin.datahub.graphql.resolvers.timeline.GetSchemaBlameResolver; import com.linkedin.datahub.graphql.resolvers.timeline.GetSchemaVersionListResolver; +import com.linkedin.datahub.graphql.resolvers.timeline.GetTimelineResolver; import com.linkedin.datahub.graphql.resolvers.type.AspectInterfaceTypeResolver; import com.linkedin.datahub.graphql.resolvers.type.EntityInterfaceTypeResolver; import com.linkedin.datahub.graphql.resolvers.type.HyperParameterValueTypeResolver; @@ -376,6 +379,7 @@ import com.linkedin.datahub.graphql.types.notebook.NotebookType; import com.linkedin.datahub.graphql.types.ownership.OwnershipType; import com.linkedin.datahub.graphql.types.policy.DataHubPolicyType; +import com.linkedin.datahub.graphql.types.post.PostType; import com.linkedin.datahub.graphql.types.query.QueryType; import com.linkedin.datahub.graphql.types.restricted.RestrictedType; import com.linkedin.datahub.graphql.types.role.DataHubRoleType; @@ -389,6 +393,7 @@ import com.linkedin.entity.client.EntityClient; import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.metadata.client.UsageStatsJavaClient; +import com.linkedin.metadata.config.ChromeExtensionConfiguration; import com.linkedin.metadata.config.DataHubConfiguration; import com.linkedin.metadata.config.IngestionConfiguration; import com.linkedin.metadata.config.TestsConfiguration; @@ -496,6 +501,7 @@ public class GmsGraphQLEngine { private final TestsConfiguration testsConfiguration; private final DataHubConfiguration datahubConfiguration; private final ViewsConfiguration viewsConfiguration; + private final ChromeExtensionConfiguration chromeExtensionConfiguration; private final DatasetType datasetType; @@ -540,6 +546,7 @@ public class GmsGraphQLEngine { private final FormType formType; private final IncidentType incidentType; private final RestrictedType restrictedType; + private final PostType postType; private final DataProcessInstanceType dataProcessInstanceType; private final VersionSetType versionSetType; @@ -620,6 +627,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { this.datahubConfiguration = args.datahubConfiguration; this.viewsConfiguration = args.viewsConfiguration; this.featureFlags = args.featureFlags; + this.chromeExtensionConfiguration = args.chromeExtensionConfiguration; this.datasetType = new DatasetType(entityClient); this.roleType = new RoleType(entityClient); @@ -662,6 +670,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { this.formType = new FormType(entityClient); this.incidentType = new IncidentType(entityClient); this.restrictedType = new RestrictedType(entityClient, restrictedService); + this.postType = new PostType(entityClient); this.dataProcessInstanceType = new DataProcessInstanceType(entityClient, featureFlags); this.versionSetType = new VersionSetType(entityClient); @@ -713,6 +722,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { entityTypeType, formType, incidentType, + postType, versionSetType, restrictedType, businessAttributeType, @@ -873,7 +883,8 @@ public GraphQLEngine.Builder builder() { .addSchema(fileBasedSchema(INCIDENTS_SCHEMA_FILE)) .addSchema(fileBasedSchema(CONTRACTS_SCHEMA_FILE)) .addSchema(fileBasedSchema(COMMON_SCHEMA_FILE)) - .addSchema(fileBasedSchema(VERSION_SCHEMA_FILE)); + .addSchema(fileBasedSchema(VERSION_SCHEMA_FILE)) + .addSchema(fileBasedSchema(QUERY_SCHEMA_FILE)); for (GmsGraphQLPlugin plugin : this.graphQLPlugins) { List pluginSchemaFiles = plugin.getSchemaFiles(); @@ -1002,7 +1013,8 @@ private void configureQueryResolvers(final RuntimeWiring.Builder builder) { this.testsConfiguration, this.datahubConfiguration, this.viewsConfiguration, - this.featureFlags)) + this.featureFlags, + this.chromeExtensionConfiguration)) .dataFetcher("me", new MeResolver(this.entityClient, featureFlags)) .dataFetcher("search", new SearchResolver(this.entityClient)) .dataFetcher( @@ -1088,6 +1100,7 @@ private void configureQueryResolvers(final RuntimeWiring.Builder builder) { .dataFetcher( "executionRequest", new GetIngestionExecutionRequestResolver(this.entityClient)) .dataFetcher("getSchemaBlame", new GetSchemaBlameResolver(this.timelineService)) + .dataFetcher("getTimeline", new GetTimelineResolver(this.timelineService)) .dataFetcher( "getSchemaVersionList", new GetSchemaVersionListResolver(this.timelineService)) .dataFetcher("test", getResolver(testType)) @@ -1352,6 +1365,9 @@ private void configureMutationResolvers(final RuntimeWiring.Builder builder) { "createOwnershipType", new CreateOwnershipTypeResolver(this.ownershipTypeService)) .dataFetcher( "updateOwnershipType", new UpdateOwnershipTypeResolver(this.ownershipTypeService)) + .dataFetcher( + "updateDisplayProperties", + new UpdateDisplayPropertiesResolver(this.entityService)) .dataFetcher( "deleteOwnershipType", new DeleteOwnershipTypeResolver(this.ownershipTypeService)) .dataFetcher("submitFormPrompt", new SubmitFormPromptResolver(this.formService)) @@ -1735,7 +1751,10 @@ private void configureDatasetResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("exists", new EntityExistsResolver(entityService)) .dataFetcher("runs", new EntityRunsResolver(entityClient)) .dataFetcher("privileges", new EntityPrivilegesResolver(entityClient)) - .dataFetcher("parentContainers", new ParentContainersResolver(entityClient))) + .dataFetcher("parentContainers", new ParentContainersResolver(entityClient)) + .dataFetcher( + "siblingsSearch", + new SiblingsSearchResolver(this.entityClient, this.viewService))) .type( "Owner", typeWiring -> @@ -1768,6 +1787,13 @@ private void configureDatasetResolvers(final RuntimeWiring.Builder builder) { datasetType, (env) -> ((ForeignKeyConstraint) env.getSource()).getForeignDataset().getUrn()))) + .type( + "Deprecation", + typeWiring -> + typeWiring.dataFetcher( + "replacement", + new EntityTypeResolver( + entityTypes, (env) -> ((Deprecation) env.getSource()).getReplacement()))) .type( "SiblingProperties", typeWiring -> @@ -1878,10 +1904,16 @@ private void configureSchemaFieldResolvers(final RuntimeWiring.Builder builder) builder.type( "SchemaFieldEntity", typeWiring -> - typeWiring.dataFetcher( - "parent", - new EntityTypeResolver( - entityTypes, (env) -> ((SchemaFieldEntity) env.getSource()).getParent()))); + typeWiring + .dataFetcher( + "parent", + new EntityTypeResolver( + entityTypes, (env) -> ((SchemaFieldEntity) env.getSource()).getParent())) + .dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient)) + .dataFetcher( + "lineage", + new EntityLineageResultResolver( + siblingGraphService, restrictedService, this.authorizationConfiguration))); } private void configureEntityPathResolvers(final RuntimeWiring.Builder builder) { @@ -1935,10 +1967,12 @@ private void configureCorpUserResolvers(final RuntimeWiring.Builder builder) { new LoadableTypeBatchResolver<>( dataPlatformType, (env) -> - ((CorpUserEditableProperties) env.getSource()) - .getPlatforms().stream() - .map(DataPlatform::getUrn) - .collect(Collectors.toList())))); + ((CorpUserEditableProperties) env.getSource()).getPlatforms() != null + ? ((CorpUserEditableProperties) env.getSource()) + .getPlatforms().stream() + .map(DataPlatform::getUrn) + .collect(Collectors.toList()) + : ImmutableList.of()))); } /** @@ -3065,11 +3099,20 @@ private void configureQueryEntityResolvers(final RuntimeWiring.Builder builder) .type( "QuerySubject", typeWiring -> - typeWiring.dataFetcher( - "dataset", - new LoadableTypeResolver<>( - datasetType, - (env) -> ((QuerySubject) env.getSource()).getDataset().getUrn()))); + typeWiring + .dataFetcher( + "dataset", + new LoadableTypeResolver<>( + datasetType, + (env) -> ((QuerySubject) env.getSource()).getDataset().getUrn())) + .dataFetcher( + "schemaField", + new LoadableTypeResolver<>( + schemaFieldType, + (env) -> + ((QuerySubject) env.getSource()).getSchemaField() != null + ? ((QuerySubject) env.getSource()).getSchemaField().getUrn() + : null))); } private void configureOwnershipTypeResolver(final RuntimeWiring.Builder builder) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java index 131f4e87637807..25fa43183126fe 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java @@ -13,6 +13,7 @@ import com.linkedin.entity.client.EntityClient; import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.metadata.client.UsageStatsJavaClient; +import com.linkedin.metadata.config.ChromeExtensionConfiguration; import com.linkedin.metadata.config.DataHubConfiguration; import com.linkedin.metadata.config.IngestionConfiguration; import com.linkedin.metadata.config.TestsConfiguration; @@ -87,6 +88,7 @@ public class GmsGraphQLEngineArgs { int graphQLQueryDepthLimit; boolean graphQLQueryIntrospectionEnabled; BusinessAttributeService businessAttributeService; + ChromeExtensionConfiguration chromeExtensionConfiguration; ConnectionService connectionService; AssertionService assertionService; EntityVersioningService entityVersioningService; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java index 29d1c02dacb416..082352dc1917a9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java @@ -375,5 +375,21 @@ public static boolean isViewDatasetUsageAuthorized( new EntitySpec(resourceUrn.getEntityType(), resourceUrn.toString())); } + public static boolean isViewDatasetProfileAuthorized( + final QueryContext context, final Urn resourceUrn) { + return AuthUtil.isAuthorized( + context.getOperationContext(), + PoliciesConfig.VIEW_DATASET_PROFILE_PRIVILEGE, + new EntitySpec(resourceUrn.getEntityType(), resourceUrn.toString())); + } + + public static boolean isViewDatasetOperationsAuthorized( + final QueryContext context, final Urn resourceUrn) { + return AuthUtil.isAuthorized( + context.getOperationContext(), + PoliciesConfig.VIEW_DATASET_OPERATIONS_PRIVILEGE, + new EntitySpec(resourceUrn.getEntityType(), resourceUrn.toString())); + } + private AuthorizationUtils() {} } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java index 8cdc13a14be87c..3dbb5a5d8bc2bf 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java @@ -7,6 +7,7 @@ import com.linkedin.datahub.graphql.generated.AnalyticsConfig; import com.linkedin.datahub.graphql.generated.AppConfig; import com.linkedin.datahub.graphql.generated.AuthConfig; +import com.linkedin.datahub.graphql.generated.ChromeExtensionConfig; import com.linkedin.datahub.graphql.generated.EntityProfileConfig; import com.linkedin.datahub.graphql.generated.EntityProfilesConfig; import com.linkedin.datahub.graphql.generated.EntityType; @@ -23,6 +24,7 @@ import com.linkedin.datahub.graphql.generated.TestsConfig; import com.linkedin.datahub.graphql.generated.ViewsConfig; import com.linkedin.datahub.graphql.generated.VisualConfig; +import com.linkedin.metadata.config.ChromeExtensionConfiguration; import com.linkedin.metadata.config.DataHubConfiguration; import com.linkedin.metadata.config.IngestionConfiguration; import com.linkedin.metadata.config.TestsConfiguration; @@ -50,6 +52,7 @@ public class AppConfigResolver implements DataFetcher get(final DataFetchingEnvironment environmen .setPlatformBrowseV2(_featureFlags.isPlatformBrowseV2()) .setDataContractsEnabled(_featureFlags.isDataContractsEnabled()) .setEditableDatasetNameEnabled(_featureFlags.isEditableDatasetNameEnabled()) + .setThemeV2Enabled(_featureFlags.isThemeV2Enabled()) + .setThemeV2Default(_featureFlags.isThemeV2Default()) + .setThemeV2Toggleable(_featureFlags.isThemeV2Toggleable()) + .setLineageGraphV2(_featureFlags.isLineageGraphV2()) .setShowSeparateSiblings(_featureFlags.isShowSeparateSiblings()) .setShowManageStructuredProperties(_featureFlags.isShowManageStructuredProperties()) + .setSchemaFieldCLLEnabled(_featureFlags.isSchemaFieldCLLEnabled()) + .setHideDbtSourceInLineage(_featureFlags.isHideDbtSourceInLineage()) + .setSchemaFieldLineageIgnoreStatus(_featureFlags.isSchemaFieldLineageIgnoreStatus()) + .setShowNavBarRedesign(_featureFlags.isShowNavBarRedesign()) + .setShowAutoCompleteResults(_featureFlags.isShowAutoCompleteResults()) .setEntityVersioningEnabled(_featureFlags.isEntityVersioning()) .build(); appConfig.setFeatureFlags(featureFlagsConfig); + final ChromeExtensionConfig chromeExtensionConfig = new ChromeExtensionConfig(); + chromeExtensionConfig.setEnabled(_chromeExtensionConfiguration.isEnabled()); + chromeExtensionConfig.setLineageEnabled(_chromeExtensionConfiguration.isLineageEnabled()); + appConfig.setChromeExtensionConfig(chromeExtensionConfig); + return CompletableFuture.completedFuture(appConfig); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/deprecation/UpdateDeprecationResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/deprecation/UpdateDeprecationResolver.java index a2230cf6b6e886..57d1a4439d260c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/deprecation/UpdateDeprecationResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/deprecation/UpdateDeprecationResolver.java @@ -125,6 +125,14 @@ private static void updateDeprecation( // Note is required field in GMS. Set to empty string if not provided. deprecation.setNote(EMPTY_STRING); } + + try { + deprecation.setReplacement( + input.getReplacement() != null ? Urn.createFromString(input.getReplacement()) : null, + SetMode.REMOVE_IF_NULL); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } try { deprecation.setActor(Urn.createFromString(context.getActorUrn())); } catch (URISyntaxException e) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/EntityPrivilegesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/EntityPrivilegesResolver.java index 67ab9bb2878141..5aa7382a49e38b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/EntityPrivilegesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/EntityPrivilegesResolver.java @@ -12,8 +12,17 @@ import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.Entity; import com.linkedin.datahub.graphql.generated.EntityPrivileges; +import com.linkedin.datahub.graphql.resolvers.assertion.AssertionUtils; +import com.linkedin.datahub.graphql.resolvers.dataproduct.DataProductAuthorizationUtils; +import com.linkedin.datahub.graphql.resolvers.incident.IncidentUtils; +import com.linkedin.datahub.graphql.resolvers.mutate.DescriptionUtils; +import com.linkedin.datahub.graphql.resolvers.mutate.util.DeprecationUtils; +import com.linkedin.datahub.graphql.resolvers.mutate.util.DomainUtils; import com.linkedin.datahub.graphql.resolvers.mutate.util.EmbedUtils; import com.linkedin.datahub.graphql.resolvers.mutate.util.GlossaryUtils; +import com.linkedin.datahub.graphql.resolvers.mutate.util.LabelUtils; +import com.linkedin.datahub.graphql.resolvers.mutate.util.LinkUtils; +import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; import graphql.schema.DataFetcher; @@ -112,7 +121,18 @@ private boolean canEditEntityLineage(Urn urn, QueryContext context) { private EntityPrivileges getDatasetPrivileges(Urn urn, QueryContext context) { final EntityPrivileges result = new EntityPrivileges(); result.setCanEditEmbed(EmbedUtils.isAuthorizedToUpdateEmbedForEntity(urn, context)); + // Schema Field Edits are a bit of a hack. result.setCanEditQueries(AuthorizationUtils.canCreateQuery(ImmutableList.of(urn), context)); + result.setCanEditSchemaFieldTags(LabelUtils.isAuthorizedToUpdateTags(context, urn, "ignored")); + result.setCanEditSchemaFieldGlossaryTerms( + LabelUtils.isAuthorizedToUpdateTerms(context, urn, "ignored")); + result.setCanEditSchemaFieldDescription( + DescriptionUtils.isAuthorizedToUpdateFieldDescription(context, urn)); + result.setCanViewDatasetUsage(AuthorizationUtils.isViewDatasetUsageAuthorized(context, urn)); + result.setCanViewDatasetProfile( + AuthorizationUtils.isViewDatasetProfileAuthorized(context, urn)); + result.setCanViewDatasetOperations( + AuthorizationUtils.isViewDatasetOperationsAuthorized(context, urn)); addCommonPrivileges(result, urn, context); return result; } @@ -141,5 +161,19 @@ private void addCommonPrivileges( @Nonnull EntityPrivileges result, @Nonnull Urn urn, @Nonnull QueryContext context) { result.setCanEditLineage(canEditEntityLineage(urn, context)); result.setCanEditProperties(AuthorizationUtils.canEditProperties(urn, context)); + result.setCanEditAssertions( + AssertionUtils.isAuthorizedToEditAssertionFromAssertee(context, urn)); + result.setCanEditIncidents(IncidentUtils.isAuthorizedToEditIncidentForResource(urn, context)); + result.setCanEditDomains( + DomainUtils.isAuthorizedToUpdateDomainsForEntity(context, urn, _entityClient)); + result.setCanEditDataProducts( + DataProductAuthorizationUtils.isAuthorizedToUpdateDataProductsForEntity(context, urn)); + result.setCanEditDeprecation( + DeprecationUtils.isAuthorizedToUpdateDeprecationForEntity(context, urn)); + result.setCanEditGlossaryTerms(LabelUtils.isAuthorizedToUpdateTerms(context, urn, null)); + result.setCanEditTags(LabelUtils.isAuthorizedToUpdateTags(context, urn, null)); + result.setCanEditOwners(OwnerUtils.isAuthorizedToUpdateOwners(context, urn)); + result.setCanEditDescription(DescriptionUtils.isAuthorizedToUpdateDescription(context, urn)); + result.setCanEditLinks(LinkUtils.isAuthorizedToUpdateLinks(context, urn)); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/IncidentUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/IncidentUtils.java new file mode 100644 index 00000000000000..500813e01ad6bd --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/IncidentUtils.java @@ -0,0 +1,25 @@ +package com.linkedin.datahub.graphql.resolvers.incident; + +import com.datahub.authorization.ConjunctivePrivilegeGroup; +import com.datahub.authorization.DisjunctivePrivilegeGroup; +import com.google.common.collect.ImmutableList; +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.metadata.authorization.PoliciesConfig; + +public class IncidentUtils { + + public static boolean isAuthorizedToEditIncidentForResource( + final Urn resourceUrn, final QueryContext context) { + final DisjunctivePrivilegeGroup orPrivilegeGroups = + new DisjunctivePrivilegeGroup( + ImmutableList.of( + AuthorizationUtils.ALL_PRIVILEGES_GROUP, + new ConjunctivePrivilegeGroup( + ImmutableList.of(PoliciesConfig.EDIT_ENTITY_INCIDENTS_PRIVILEGE.getType())))); + + return AuthorizationUtils.isAuthorized( + context, resourceUrn.getEntityType(), resourceUrn.toString(), orPrivilegeGroups); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchUpdateDeprecationResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchUpdateDeprecationResolver.java index 546694ede92697..c4c183b0b16b4f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchUpdateDeprecationResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchUpdateDeprecationResolver.java @@ -46,6 +46,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw input.getDeprecated(), input.getNote(), input.getDecommissionTime(), + input.getReplacement(), resources, context); return true; @@ -84,20 +85,24 @@ private void batchUpdateDeprecation( boolean deprecated, @Nullable String note, @Nullable Long decommissionTime, + @Nullable String replacementUrn, List resources, QueryContext context) { log.debug( - "Batch updating deprecation. deprecated: {}, note: {}, decommissionTime: {}, resources: {}", + "Batch updating deprecation. deprecated: {}, note: {}, decommissionTime: {}, resources: {}" + + "replacementUrn: {}, notificationConfig: {}", deprecated, note, decommissionTime, - resources); + resources, + replacementUrn); try { DeprecationUtils.updateDeprecationForResources( context.getOperationContext(), deprecated, note, decommissionTime, + replacementUrn, resources, UrnUtils.getUrn(context.getActorUrn()), _entityService); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateDisplayPropertiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateDisplayPropertiesResolver.java new file mode 100644 index 00000000000000..47aee01ad94fe5 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateDisplayPropertiesResolver.java @@ -0,0 +1,106 @@ +package com.linkedin.datahub.graphql.resolvers.mutate; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import static com.linkedin.datahub.graphql.resolvers.mutate.MutationUtils.persistAspect; + +import com.linkedin.common.DisplayProperties; +import com.linkedin.common.IconLibrary; +import com.linkedin.common.IconProperties; +import com.linkedin.common.urn.CorpuserUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.exception.AuthorizationException; +import com.linkedin.datahub.graphql.generated.DisplayPropertiesUpdateInput; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.EntityUtils; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import java.util.concurrent.CompletableFuture; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@RequiredArgsConstructor +public class UpdateDisplayPropertiesResolver implements DataFetcher> { + private final EntityService _entityService; + + @Override + public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { + final DisplayPropertiesUpdateInput input = + bindArgument(environment.getArgument("input"), DisplayPropertiesUpdateInput.class); + final String urn = bindArgument(environment.getArgument("urn"), String.class); + + final QueryContext context = environment.getContext(); + Urn targetUrn = Urn.createFromString(urn); + + log.info( + "Updating display properties. urn: {} input: {}", targetUrn.toString(), input.toString()); + + if (!_entityService.exists(context.getOperationContext(), targetUrn, true)) { + throw new IllegalArgumentException( + String.format("Failed to update %s. %s does not exist.", targetUrn, targetUrn)); + } + + return CompletableFuture.supplyAsync( + () -> { + if (!AuthorizationUtils.canManageDomains(context)) { + throw new AuthorizationException( + "Unauthorized to perform this action. Please contact your DataHub administrator."); + } + + try { + DisplayProperties existingDisplayProperties = + (DisplayProperties) + EntityUtils.getAspectFromEntity( + context.getOperationContext(), + targetUrn.toString(), + Constants.DISPLAY_PROPERTIES_ASPECT_NAME, + _entityService, + new DisplayProperties()); + if (input.getColorHex() != null) { + existingDisplayProperties.setColorHex(input.getColorHex()); + } + if (input.getIcon() != null) { + if (!existingDisplayProperties.hasIcon()) { + existingDisplayProperties.setIcon(new IconProperties()); + } + if (input.getIcon().getName() != null) { + existingDisplayProperties.getIcon().setName(input.getIcon().getName()); + } + if (input.getIcon().getStyle() != null) { + existingDisplayProperties.getIcon().setStyle(input.getIcon().getStyle()); + } + if (input.getIcon().getIconLibrary() != null) { + existingDisplayProperties + .getIcon() + .setIconLibrary( + IconLibrary.valueOf(input.getIcon().getIconLibrary().toString())); + } + } + Urn actor = CorpuserUrn.createFromString(context.getActorUrn()); + persistAspect( + context.getOperationContext(), + targetUrn, + Constants.DISPLAY_PROPERTIES_ASPECT_NAME, + existingDisplayProperties, + actor, + _entityService); + return true; + } catch (Exception e) { + log.error( + "Failed to update DisplayProperties for urn: {}, properties: {}. {}", + targetUrn.toString(), + input.toString(), + e.getMessage()); + throw new RuntimeException( + String.format( + "Failed to update DisplayProperties for urn: {}, properties: {}. {}", + targetUrn.toString(), + input.toString(), + e.getMessage())); + } + }); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateUserSettingResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateUserSettingResolver.java index 8a598f8d8bbdda..a88c526552df1e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateUserSettingResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateUserSettingResolver.java @@ -18,7 +18,9 @@ import com.linkedin.mxe.MetadataChangeProposal; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; +import io.datahubproject.metadata.context.OperationContext; import java.util.concurrent.CompletableFuture; +import javax.annotation.Nonnull; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -44,11 +46,16 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw try { // In the future with more settings, we'll need to do a read-modify-write // for now though, we can just write since there is only 1 setting - CorpUserSettings newSettings = new CorpUserSettings(); - newSettings.setAppearance(new CorpUserAppearanceSettings()); + CorpUserSettings newSettings = + getCorpUserSettings(context.getOperationContext(), actor); + CorpUserAppearanceSettings appearanceSettings = + newSettings.hasAppearance() + ? newSettings.getAppearance() + : new CorpUserAppearanceSettings(); if (name.equals(UserSetting.SHOW_SIMPLIFIED_HOMEPAGE)) { - newSettings.setAppearance( - new CorpUserAppearanceSettings().setShowSimplifiedHomepage(value)); + newSettings.setAppearance(appearanceSettings.setShowSimplifiedHomepage(value)); + } else if (name.equals(UserSetting.SHOW_THEME_V2)) { + newSettings.setAppearance(appearanceSettings.setShowThemeV2(value)); } else { log.error("User Setting name {} not currently supported", name); throw new RuntimeException( @@ -77,4 +84,13 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw this.getClass().getSimpleName(), "get"); } + + @Nonnull + private CorpUserSettings getCorpUserSettings( + @Nonnull OperationContext opContext, @Nonnull final Urn urn) { + CorpUserSettings settings = + (CorpUserSettings) + _entityService.getAspect(opContext, urn, CORP_USER_SETTINGS_ASPECT_NAME, 0); + return settings == null ? new CorpUserSettings() : settings; + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DeprecationUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DeprecationUtils.java index 541224b02c1b52..f4beb9e5a9b6c3 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DeprecationUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DeprecationUtils.java @@ -16,8 +16,10 @@ import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityUtils; +import com.linkedin.metadata.utils.SchemaFieldUtils; import com.linkedin.mxe.MetadataChangeProposal; import io.datahubproject.metadata.context.OperationContext; +import java.net.URISyntaxException; import java.util.ArrayList; import java.util.List; import javax.annotation.Nonnull; @@ -50,6 +52,7 @@ public static void updateDeprecationForResources( boolean deprecated, @Nullable String note, @Nullable Long decommissionTime, + @Nullable String replacementUrn, List resources, Urn actor, EntityService entityService) { @@ -57,7 +60,14 @@ public static void updateDeprecationForResources( for (ResourceRefInput resource : resources) { changes.add( buildUpdateDeprecationProposal( - opContext, deprecated, note, decommissionTime, resource, actor, entityService)); + opContext, + deprecated, + note, + decommissionTime, + replacementUrn, + resource, + actor, + entityService)); } EntityUtils.ingestChangeProposals(opContext, changes, entityService, actor, false); } @@ -67,14 +77,38 @@ private static MetadataChangeProposal buildUpdateDeprecationProposal( boolean deprecated, @Nullable String note, @Nullable Long decommissionTime, + @Nullable String replacementUrn, ResourceRefInput resource, Urn actor, EntityService entityService) { String resourceUrn = resource.getResourceUrn(); + String subResource = resource.getSubResource(); + String targetUrn = ""; + + if (subResource == null) { + targetUrn = resourceUrn; + } else { + try { + targetUrn = + SchemaFieldUtils.generateSchemaFieldUrn(Urn.createFromString(resourceUrn), subResource) + .toString(); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } + Deprecation deprecation = getDeprecation( - opContext, entityService, resourceUrn, actor, note, deprecated, decommissionTime); + opContext, + entityService, + resourceUrn, + actor, + note, + deprecated, + decommissionTime, + replacementUrn); + return MutationUtils.buildMetadataChangeProposalWithUrn( - UrnUtils.getUrn(resourceUrn), Constants.DEPRECATION_ASPECT_NAME, deprecation); + UrnUtils.getUrn(targetUrn), Constants.DEPRECATION_ASPECT_NAME, deprecation); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java index b9a12a19f617a3..c0951f926da42a 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java @@ -337,4 +337,15 @@ public static String mapOwnershipTypeToEntity(String type) { final String typeName = SYSTEM_ID + type.toLowerCase(); return Urn.createFromTuple(Constants.OWNERSHIP_TYPE_ENTITY_NAME, typeName).toString(); } + + public static boolean isAuthorizedToUpdateOwners(@Nonnull QueryContext context, Urn resourceUrn) { + final DisjunctivePrivilegeGroup orPrivilegeGroups = + new DisjunctivePrivilegeGroup( + ImmutableList.of( + ALL_PRIVILEGES_GROUP, + new ConjunctivePrivilegeGroup( + ImmutableList.of(PoliciesConfig.EDIT_ENTITY_OWNERS_PRIVILEGE.getType())))); + return AuthorizationUtils.isAuthorized( + context, resourceUrn.getEntityType(), resourceUrn.toString(), orPrivilegeGroups); + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/ListPostsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/ListPostsResolver.java index dc7797882371b7..ba78466fa25898 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/ListPostsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/post/ListPostsResolver.java @@ -4,26 +4,34 @@ import static com.linkedin.metadata.Constants.*; import com.datahub.authentication.Authentication; +import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; +import com.linkedin.datahub.graphql.generated.AndFilterInput; +import com.linkedin.datahub.graphql.generated.FacetFilterInput; +import com.linkedin.datahub.graphql.generated.FilterOperator; import com.linkedin.datahub.graphql.generated.ListPostsInput; import com.linkedin.datahub.graphql.generated.ListPostsResult; +import com.linkedin.datahub.graphql.resolvers.ResolverUtils; import com.linkedin.datahub.graphql.types.post.PostMapper; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; import com.linkedin.metadata.search.SearchEntity; import com.linkedin.metadata.search.SearchResult; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; +import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; +import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -47,7 +55,9 @@ public CompletableFuture get(final DataFetchingEnvironment envi final Integer start = input.getStart() == null ? DEFAULT_START : input.getStart(); final Integer count = input.getCount() == null ? DEFAULT_COUNT : input.getCount(); final String query = input.getQuery() == null ? DEFAULT_QUERY : input.getQuery(); - + final String maybeResourceUrn = input.getResourceUrn() == null ? null : input.getResourceUrn(); + final List filters = + input.getOrFilters() == null ? new ArrayList<>() : input.getOrFilters(); return GraphQLConcurrencyUtils.supplyAsync( () -> { try { @@ -63,7 +73,7 @@ public CompletableFuture get(final DataFetchingEnvironment envi context.getOperationContext().withSearchFlags(flags -> flags.setFulltext(true)), POST_ENTITY_NAME, query, - null, + buildFilters(maybeResourceUrn, filters), sortCriteria, start, count); @@ -95,4 +105,22 @@ public CompletableFuture get(final DataFetchingEnvironment envi this.getClass().getSimpleName(), "get"); } + + @Nullable + private Filter buildFilters(@Nullable String maybeResourceUrn, List filters) { + // Or between filters provided by the user and the maybeResourceUrn if present + if (maybeResourceUrn != null) { + filters.add( + new AndFilterInput( + List.of( + new FacetFilterInput( + "target", + null, + ImmutableList.of(maybeResourceUrn), + false, + FilterOperator.EQUAL)))); + } + + return ResolverUtils.buildFilter(null, filters); + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java index 3c84884bedbae8..778e8e3b61cd60 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.query; import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; +import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.*; import static com.linkedin.metadata.Constants.*; import com.google.common.collect.ImmutableList; @@ -14,6 +15,7 @@ import com.linkedin.datahub.graphql.generated.ListQueriesInput; import com.linkedin.datahub.graphql.generated.ListQueriesResult; import com.linkedin.datahub.graphql.generated.QueryEntity; +import com.linkedin.datahub.graphql.resolvers.search.SearchUtils; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; @@ -56,13 +58,26 @@ public CompletableFuture get(final DataFetchingEnvironment en final Integer start = input.getStart() == null ? DEFAULT_START : input.getStart(); final Integer count = input.getCount() == null ? DEFAULT_COUNT : input.getCount(); final String query = input.getQuery() == null ? DEFAULT_QUERY : input.getQuery(); + final Filter inputFilter = + input.getOrFilters() != null + ? buildFilter(Collections.emptyList(), input.getOrFilters()) + : null; + final Filter finalFilter = + inputFilter != null + ? SearchUtils.combineFilters(inputFilter, buildFilters(input)) + : buildFilters(input); return GraphQLConcurrencyUtils.supplyAsync( () -> { try { - final List sortCriteria = - Collections.singletonList( - new SortCriterion().setField(CREATED_AT_FIELD).setOrder(SortOrder.DESCENDING)); + List sortCriteria = + input.getSortInput() != null + ? Collections.singletonList( + mapSortCriterion(input.getSortInput().getSortCriterion())) + : Collections.singletonList( + new SortCriterion() + .setField(CREATED_AT_FIELD) + .setOrder(SortOrder.DESCENDING)); // First, get all Query Urns. final SearchResult gmsResult = @@ -73,7 +88,7 @@ public CompletableFuture get(final DataFetchingEnvironment en flags -> flags.setFulltext(true).setSkipHighlighting(true)), QUERY_ENTITY_NAME, query, - buildFilters(input), + finalFilter, sortCriteria, start, count); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java index f105a72a1273ee..fbcce1a5e6b065 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java @@ -23,11 +23,13 @@ import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.FacetFilterInput; +import com.linkedin.datahub.graphql.generated.ScrollResults; import com.linkedin.datahub.graphql.generated.SearchResults; import com.linkedin.datahub.graphql.generated.SearchSortInput; import com.linkedin.datahub.graphql.resolvers.ResolverUtils; import com.linkedin.datahub.graphql.types.common.mappers.SearchFlagsInputMapper; import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper; +import com.linkedin.datahub.graphql.types.mappers.UrnScrollResultsMapper; import com.linkedin.datahub.graphql.types.mappers.UrnSearchResultsMapper; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.query.SearchFlags; @@ -38,6 +40,7 @@ import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; +import com.linkedin.metadata.search.ScrollResult; import com.linkedin.metadata.search.SearchResult; import com.linkedin.metadata.service.ViewService; import com.linkedin.view.DataHubViewInfo; @@ -362,6 +365,93 @@ public static List getSortCriteria(@Nullable final SearchSortInpu return sortCriteria; } + public static CompletableFuture scrollAcrossEntities( + QueryContext inputContext, + final EntityClient _entityClient, + final ViewService _viewService, + List inputEntityTypes, + String inputQuery, + Filter baseFilter, + String viewUrn, + com.linkedin.datahub.graphql.generated.SearchFlags inputSearchFlags, + Integer inputCount, + String scrollId, + String inputKeepAlive, + String className) { + + final List entityTypes = + (inputEntityTypes == null || inputEntityTypes.isEmpty()) + ? SEARCHABLE_ENTITY_TYPES + : inputEntityTypes; + final List entityNames = + entityTypes.stream().map(EntityTypeMapper::getName).collect(Collectors.toList()); + + // escape forward slash since it is a reserved character in Elasticsearch, default to * if + // blank/empty + final String query = + StringUtils.isNotBlank(inputQuery) ? ResolverUtils.escapeForwardSlash(inputQuery) : "*"; + + final Optional searchFlags = + Optional.ofNullable(inputSearchFlags) + .map((flags) -> SearchFlagsInputMapper.map(inputContext, flags)); + final OperationContext context = + inputContext.getOperationContext().withSearchFlags(searchFlags::orElse); + + final int count = Optional.ofNullable(inputCount).orElse(DEFAULT_SCROLL_COUNT); + final String keepAlive = Optional.ofNullable(inputKeepAlive).orElse(DEFAULT_SCROLL_KEEP_ALIVE); + + return GraphQLConcurrencyUtils.supplyAsync( + () -> { + final OperationContext baseContext = inputContext.getOperationContext(); + final Optional maybeResolvedView = + Optional.ofNullable(viewUrn) + .map((urn) -> resolveView(baseContext, _viewService, UrnUtils.getUrn(urn))); + + final List finalEntityNames = + maybeResolvedView + .map( + (view) -> + intersectEntityTypes(entityNames, view.getDefinition().getEntityTypes())) + .orElse(entityNames); + + final Filter finalFilters = + maybeResolvedView + .map((view) -> combineFilters(baseFilter, view.getDefinition().getFilter())) + .orElse(baseFilter); + + log.debug( + "Executing search for multiple entities: entity types {}, query {}, filters: {}, scrollId: {}, count: {}", + finalEntityNames, + query, + finalFilters, + scrollId, + count); + + try { + final ScrollResult scrollResult = + _entityClient.scrollAcrossEntities( + context, finalEntityNames, query, finalFilters, scrollId, keepAlive, count); + return UrnScrollResultsMapper.map(inputContext, scrollResult); + } catch (Exception e) { + log.warn( + "Failed to execute search for multiple entities: entity types {}, query {}, filters: {}, searchAfter: {}, count: {}", + finalEntityNames, + query, + finalFilters, + scrollId, + count); + throw new RuntimeException( + "Failed to execute search: " + + String.format( + "entity types %s, query %s, filters: %s, start: %s, count: %s", + finalEntityNames, query, finalFilters, scrollId, count), + e); + } + }, + className, + "scrollAcrossEntities"); + } + public static CompletableFuture searchAcrossEntities( QueryContext inputContext, final EntityClient _entityClient, diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/siblings/SiblingsSearchResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/siblings/SiblingsSearchResolver.java new file mode 100644 index 00000000000000..6e1425bc44166f --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/siblings/SiblingsSearchResolver.java @@ -0,0 +1,66 @@ +package com.linkedin.datahub.graphql.resolvers.siblings; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; + +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.Entity; +import com.linkedin.datahub.graphql.generated.ScrollAcrossEntitiesInput; +import com.linkedin.datahub.graphql.generated.ScrollResults; +import com.linkedin.datahub.graphql.resolvers.ResolverUtils; +import com.linkedin.datahub.graphql.resolvers.search.SearchUtils; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.service.ViewService; +import com.linkedin.metadata.utils.CriterionUtils; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import java.util.concurrent.CompletableFuture; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +/** Resolver that executes a searchAcrossEntities only on an entity's siblings */ +@Slf4j +@RequiredArgsConstructor +public class SiblingsSearchResolver implements DataFetcher> { + + private static final String SIBLINGS_FIELD_NAME = "siblings"; + + private final EntityClient _entityClient; + private final ViewService _viewService; + + @Override + public CompletableFuture get(DataFetchingEnvironment environment) { + final Entity entity = environment.getSource(); + final QueryContext context = environment.getContext(); + final ScrollAcrossEntitiesInput input = + bindArgument(environment.getArgument("input"), ScrollAcrossEntitiesInput.class); + + final Criterion siblingsFilter = + CriterionUtils.buildCriterion(SIBLINGS_FIELD_NAME, Condition.EQUAL, entity.getUrn()); + final Filter baseFilter = + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion().setAnd(new CriterionArray(siblingsFilter)))); + final Filter inputFilter = ResolverUtils.buildFilter(null, input.getOrFilters()); + + return SearchUtils.scrollAcrossEntities( + context, + _entityClient, + _viewService, + input.getTypes(), + input.getQuery(), + SearchUtils.combineFilters(inputFilter, baseFilter), + input.getViewUrn(), + input.getSearchFlags(), + input.getCount(), + input.getScrollId(), + input.getKeepAlive(), + this.getClass().getSimpleName()); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/timeline/GetTimelineResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/timeline/GetTimelineResolver.java new file mode 100644 index 00000000000000..94d925b6e82548 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/timeline/GetTimelineResolver.java @@ -0,0 +1,80 @@ +package com.linkedin.datahub.graphql.resolvers.timeline; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; + +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.generated.ChangeCategoryType; +import com.linkedin.datahub.graphql.generated.GetTimelineInput; +import com.linkedin.datahub.graphql.generated.GetTimelineResult; +import com.linkedin.datahub.graphql.types.timeline.mappers.ChangeTransactionMapper; +import com.linkedin.metadata.timeline.TimelineService; +import com.linkedin.metadata.timeline.data.ChangeCategory; +import com.linkedin.metadata.timeline.data.ChangeTransaction; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import java.net.URISyntaxException; +import java.util.Arrays; +import java.util.List; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; + +/* +Returns the timeline in its original form + */ +@Slf4j +public class GetTimelineResolver implements DataFetcher> { + private final TimelineService _timelineService; + + public GetTimelineResolver(TimelineService timelineService) { + _timelineService = timelineService; + } + + @Override + public CompletableFuture get(final DataFetchingEnvironment environment) + throws Exception { + final GetTimelineInput input = + bindArgument(environment.getArgument("input"), GetTimelineInput.class); + + final String datasetUrnString = input.getUrn(); + final List changeCategories = input.getChangeCategories(); + final long startTime = 0; + final long endTime = 0; + // final String version = input.getVersion() == null ? null : input.getVersion(); + + return CompletableFuture.supplyAsync( + () -> { + try { + final Set changeCategorySet = + changeCategories != null + ? changeCategories.stream() + .map( + changeCategoryType -> + ChangeCategory.valueOf(changeCategoryType.toString())) + .collect(Collectors.toSet()) + : Arrays.stream(ChangeCategory.values()).collect(Collectors.toSet()); + final Urn datasetUrn = Urn.createFromString(datasetUrnString); + final List changeTransactionList = + _timelineService.getTimeline( + datasetUrn, changeCategorySet, startTime, endTime, null, null, false); + GetTimelineResult result = new GetTimelineResult(); + result.setChangeTransactions( + changeTransactionList.stream() + .map(ChangeTransactionMapper::map) + .collect(Collectors.toList())); + return result; + } catch (URISyntaxException u) { + log.error( + String.format( + "Failed to list schema blame data, likely due to the Urn %s being invalid", + datasetUrnString), + u); + return null; + } catch (Exception e) { + log.error("Failed to list schema blame data", e); + return null; + } + }); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DeprecationMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DeprecationMapper.java index 6959a6dcbd0393..c7523f946b950a 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DeprecationMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DeprecationMapper.java @@ -23,6 +23,9 @@ public Deprecation apply( result.setActorEntity(UrnToEntityMapper.map(context, input.getActor())); result.setDeprecated(input.isDeprecated()); result.setDecommissionTime(input.getDecommissionTime()); + if (input.getReplacement() != null) { + result.setReplacement(UrnToEntityMapper.map(context, input.getReplacement())); + } result.setNote(input.getNote()); return result; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DisplayPropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DisplayPropertiesMapper.java new file mode 100644 index 00000000000000..4e62a2d42baaac --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DisplayPropertiesMapper.java @@ -0,0 +1,47 @@ +package com.linkedin.datahub.graphql.types.common.mappers; + +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.DisplayProperties; +import com.linkedin.datahub.graphql.generated.IconLibrary; +import com.linkedin.datahub.graphql.generated.IconProperties; +import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import javax.annotation.Nullable; + +public class DisplayPropertiesMapper + implements ModelMapper { + public static final DisplayPropertiesMapper INSTANCE = new DisplayPropertiesMapper(); + + public static DisplayProperties map( + @Nullable final QueryContext context, com.linkedin.common.DisplayProperties input) { + return INSTANCE.apply(context, input); + } + + @Override + public DisplayProperties apply( + @Nullable final QueryContext context, com.linkedin.common.DisplayProperties input) { + final DisplayProperties result = new DisplayProperties(); + + if (input.hasColorHex()) { + result.setColorHex(input.getColorHex()); + } + if (input.hasIcon()) { + final com.linkedin.common.IconProperties iconPropertiesInput = input.getIcon(); + if (iconPropertiesInput != null) { + final IconProperties iconPropertiesResult = new IconProperties(); + if (iconPropertiesInput.hasIconLibrary()) { + iconPropertiesResult.setIconLibrary( + IconLibrary.valueOf(iconPropertiesInput.getIconLibrary().toString())); + } + if (iconPropertiesInput.hasName()) { + iconPropertiesResult.setName(iconPropertiesInput.getName()); + } + if (iconPropertiesInput.hasStyle()) { + iconPropertiesResult.setStyle(iconPropertiesInput.getStyle()); + } + result.setIcon(iconPropertiesResult); + } + } + + return result; + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java index b815c1b1c1dd9f..694cda13a70e26 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java @@ -25,6 +25,7 @@ import com.linkedin.datahub.graphql.generated.ERModelRelationship; import com.linkedin.datahub.graphql.generated.Entity; import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.Form; import com.linkedin.datahub.graphql.generated.GlossaryNode; import com.linkedin.datahub.graphql.generated.GlossaryTerm; import com.linkedin.datahub.graphql.generated.MLFeature; @@ -34,6 +35,7 @@ import com.linkedin.datahub.graphql.generated.MLPrimaryKey; import com.linkedin.datahub.graphql.generated.Notebook; import com.linkedin.datahub.graphql.generated.OwnershipTypeEntity; +import com.linkedin.datahub.graphql.generated.Post; import com.linkedin.datahub.graphql.generated.QueryEntity; import com.linkedin.datahub.graphql.generated.Restricted; import com.linkedin.datahub.graphql.generated.Role; @@ -227,6 +229,16 @@ public Entity apply(@Nullable QueryContext context, Urn input) { ((BusinessAttribute) partialEntity).setUrn(input.toString()); ((BusinessAttribute) partialEntity).setType(EntityType.BUSINESS_ATTRIBUTE); } + if (input.getEntityType().equals(FORM_ENTITY_NAME)) { + partialEntity = new Form(); + ((Form) partialEntity).setUrn(input.toString()); + ((Form) partialEntity).setType(EntityType.FORM); + } + if (input.getEntityType().equals(POST_ENTITY_NAME)) { + partialEntity = new Post(); + ((Post) partialEntity).setUrn(input.toString()); + ((Post) partialEntity).setType(EntityType.POST); + } if (input.getEntityType().equals(DATA_PROCESS_INSTANCE_ENTITY_NAME)) { partialEntity = new DataProcessInstance(); ((DataProcessInstance) partialEntity).setUrn(input.toString()); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/container/ContainerType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/container/ContainerType.java index 37b021fcb1091a..fc47966b21903c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/container/ContainerType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/container/ContainerType.java @@ -53,7 +53,8 @@ public class ContainerType Constants.DATA_PRODUCTS_ASPECT_NAME, Constants.STRUCTURED_PROPERTIES_ASPECT_NAME, Constants.FORMS_ASPECT_NAME, - Constants.ACCESS_ASPECT_NAME); + Constants.ACCESS_ASPECT_NAME, + Constants.BROWSE_PATHS_V2_ASPECT_NAME); private static final Set FACET_FIELDS = ImmutableSet.of("origin", "platform"); private static final String ENTITY_NAME = "container"; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/container/mappers/ContainerMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/container/mappers/ContainerMapper.java index 7ac00c46475bce..5393235de26b45 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/container/mappers/ContainerMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/container/mappers/ContainerMapper.java @@ -3,6 +3,7 @@ import static com.linkedin.metadata.Constants.*; import com.linkedin.common.Access; +import com.linkedin.common.BrowsePathsV2; import com.linkedin.common.DataPlatformInstance; import com.linkedin.common.Deprecation; import com.linkedin.common.Forms; @@ -20,6 +21,7 @@ import com.linkedin.datahub.graphql.generated.Container; import com.linkedin.datahub.graphql.generated.DataPlatform; import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.types.common.mappers.BrowsePathsV2Mapper; import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; import com.linkedin.datahub.graphql.types.common.mappers.DataPlatformInstanceAspectMapper; import com.linkedin.datahub.graphql.types.common.mappers.DeprecationMapper; @@ -172,6 +174,13 @@ public static Container map( FormsMapper.map(new Forms(envelopedForms.getValue().data()), entityUrn.toString())); } + final EnvelopedAspect envelopedBrowsePathsV2 = aspects.get(BROWSE_PATHS_V2_ASPECT_NAME); + if (envelopedBrowsePathsV2 != null) { + result.setBrowsePathV2( + BrowsePathsV2Mapper.map( + context, new BrowsePathsV2(envelopedBrowsePathsV2.getValue().data()))); + } + return result; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/mappers/CorpUserMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/mappers/CorpUserMapper.java index a94b555daebdfb..e37322c3f5fd92 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/mappers/CorpUserMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/mappers/CorpUserMapper.java @@ -134,6 +134,7 @@ private CorpUserAppearanceSettings mapCorpUserAppearanceSettings( if (corpUserSettings.hasAppearance()) { appearanceResult.setShowSimplifiedHomepage( corpUserSettings.getAppearance().isShowSimplifiedHomepage()); + appearanceResult.setShowThemeV2(corpUserSettings.getAppearance().isShowThemeV2()); } return appearanceResult; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/domain/DomainMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/domain/DomainMapper.java index ffcb94a0b7e29e..f0792928bef089 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/domain/DomainMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/domain/DomainMapper.java @@ -4,6 +4,7 @@ import static com.linkedin.metadata.Constants.FORMS_ASPECT_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTIES_ASPECT_NAME; +import com.linkedin.common.DisplayProperties; import com.linkedin.common.Forms; import com.linkedin.common.InstitutionalMemory; import com.linkedin.common.Ownership; @@ -12,6 +13,7 @@ import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; import com.linkedin.datahub.graphql.generated.Domain; import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.types.common.mappers.DisplayPropertiesMapper; import com.linkedin.datahub.graphql.types.common.mappers.InstitutionalMemoryMapper; import com.linkedin.datahub.graphql.types.common.mappers.OwnershipMapper; import com.linkedin.datahub.graphql.types.form.FormsMapper; @@ -82,6 +84,14 @@ public static Domain map(@Nullable QueryContext context, final EntityResponse en FormsMapper.map(new Forms(envelopedForms.getValue().data()), entityUrn.toString())); } + final EnvelopedAspect envelopedDisplayProperties = + aspects.get(Constants.DISPLAY_PROPERTIES_ASPECT_NAME); + if (envelopedDisplayProperties != null) { + result.setDisplayProperties( + DisplayPropertiesMapper.map( + context, new DisplayProperties(envelopedDisplayProperties.getValue().data()))); + } + if (context != null && !canView(context.getOperationContext(), entityUrn)) { return AuthorizationUtils.restrictEntity(result, Domain.class); } else { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryNodeMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryNodeMapper.java index a694b62999080e..f8425952bcdfe4 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryNodeMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryNodeMapper.java @@ -3,6 +3,7 @@ import static com.linkedin.datahub.graphql.authorization.AuthorizationUtils.canView; import static com.linkedin.metadata.Constants.*; +import com.linkedin.common.DisplayProperties; import com.linkedin.common.Forms; import com.linkedin.common.Ownership; import com.linkedin.common.urn.Urn; @@ -13,6 +14,7 @@ import com.linkedin.datahub.graphql.generated.GlossaryNode; import com.linkedin.datahub.graphql.generated.GlossaryNodeProperties; import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; +import com.linkedin.datahub.graphql.types.common.mappers.DisplayPropertiesMapper; import com.linkedin.datahub.graphql.types.common.mappers.OwnershipMapper; import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; import com.linkedin.datahub.graphql.types.form.FormsMapper; @@ -65,6 +67,11 @@ public GlossaryNode apply( FORMS_ASPECT_NAME, ((entity, dataMap) -> entity.setForms(FormsMapper.map(new Forms(dataMap), entityUrn.toString())))); + mappingHelper.mapToResult( + DISPLAY_PROPERTIES_ASPECT_NAME, + ((glossaryNode, dataMap) -> + glossaryNode.setDisplayProperties( + DisplayPropertiesMapper.map(context, new DisplayProperties(dataMap))))); if (context != null && !canView(context.getOperationContext(), entityUrn)) { return AuthorizationUtils.restrictEntity(mappingHelper.getResult(), GlossaryNode.class); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/MapperUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/MapperUtils.java index 8fe58df2d2edec..f747ba7ee4bd25 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/MapperUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/MapperUtils.java @@ -86,7 +86,8 @@ public static FacetMetadata mapFacet( filterValue.getFacetCount(), filterValue.getEntity() == null ? null - : UrnToEntityMapper.map(context, filterValue.getEntity()))) + : UrnToEntityMapper.map(context, filterValue.getEntity()), + null)) .collect(Collectors.toList())); return facetMetadata; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/post/PostMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/post/PostMapper.java index 674011a4f2f288..e33db2a013190f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/post/PostMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/post/PostMapper.java @@ -48,6 +48,11 @@ private void mapPostInfo(@Nonnull Post post, @Nonnull DataMap dataMap) { post.setContent(mapPostContent(postInfo.getContent())); AuditStamp lastModified = new AuditStamp(); lastModified.setTime(postInfo.getLastModified()); + if (postInfo.hasAuditStamp() + && postInfo.getAuditStamp() != null + && postInfo.getAuditStamp().hasActor()) { + lastModified.setActor(postInfo.getAuditStamp().getActor().toString()); + } post.setLastModified(lastModified); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/post/PostType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/post/PostType.java new file mode 100644 index 00000000000000..c0b5ae8a32f065 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/post/PostType.java @@ -0,0 +1,80 @@ +package com.linkedin.datahub.graphql.types.post; + +import static com.linkedin.datahub.graphql.authorization.AuthorizationUtils.canView; +import static com.linkedin.metadata.Constants.POST_INFO_ASPECT_NAME; + +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.Entity; +import com.linkedin.datahub.graphql.generated.Post; +import com.linkedin.datahub.graphql.types.EntityType; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.Constants; +import graphql.execution.DataFetcherResult; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public class PostType implements EntityType { + public static final Set ASPECTS_TO_FETCH = ImmutableSet.of(POST_INFO_ASPECT_NAME); + + private final EntityClient _entityClient; + + @Override + public com.linkedin.datahub.graphql.generated.EntityType type() { + return com.linkedin.datahub.graphql.generated.EntityType.POST; + } + + @Override + public Function getKeyProvider() { + return Entity::getUrn; + } + + @Override + public Class objectClass() { + return Post.class; + } + + @Override + public List> batchLoad( + @Nonnull List urns, @Nonnull QueryContext context) throws Exception { + try { + final List postUrns = urns.stream().map(UrnUtils::getUrn).collect(Collectors.toList()); + + final Map postMap = + _entityClient.batchGetV2( + context.getOperationContext(), + Constants.POST_ENTITY_NAME, + postUrns.stream() + .filter(urn -> canView(context.getOperationContext(), urn)) + .collect(Collectors.toSet()), + ASPECTS_TO_FETCH); + + final List gmsResults = new ArrayList<>(urns.size()); + for (Urn urn : postUrns) { + gmsResults.add(postMap.getOrDefault(urn, null)); + } + + return gmsResults.stream() + .map( + gmsPost -> + gmsPost == null + ? null + : DataFetcherResult.newResult() + .data(PostMapper.map(context, gmsPost)) + .build()) + .collect(Collectors.toList()); + } catch (Exception e) { + throw new RuntimeException("Failed to batch load Queries", e); + } + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/query/QueryMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/query/QueryMapper.java index 916ebc772f545f..71b66b38fe05ac 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/query/QueryMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/query/QueryMapper.java @@ -4,6 +4,7 @@ import com.linkedin.common.DataPlatformInstance; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.DataPlatform; @@ -11,6 +12,7 @@ import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.QueryEntity; import com.linkedin.datahub.graphql.generated.QuerySubject; +import com.linkedin.datahub.graphql.generated.SchemaFieldEntity; import com.linkedin.datahub.graphql.types.common.mappers.QueryPropertiesMapper; import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; @@ -67,16 +69,36 @@ private void mapQuerySubjects(@Nonnull QueryEntity query, @Nonnull DataMap dataM QuerySubjects querySubjects = new QuerySubjects(dataMap); List res = querySubjects.getSubjects().stream() - .map(sub -> new QuerySubject(createPartialDataset(sub.getEntity()))) + .map(this::mapQuerySubject) .collect(Collectors.toList()); query.setSubjects(res); } + @Nonnull + private QuerySubject mapQuerySubject(com.linkedin.query.QuerySubject subject) { + QuerySubject result = new QuerySubject(); + if (subject.getEntity().getEntityType().equals(DATASET_ENTITY_NAME)) { + result.setDataset(createPartialDataset(subject.getEntity())); + } else if (subject.getEntity().getEntityType().equals(SCHEMA_FIELD_ENTITY_NAME)) { + String parentDataset = subject.getEntity().getEntityKey().get(0); + result.setDataset(createPartialDataset(UrnUtils.getUrn(parentDataset))); + result.setSchemaField(createPartialSchemaField(subject.getEntity())); + } + return result; + } + @Nonnull private Dataset createPartialDataset(@Nonnull Urn datasetUrn) { Dataset partialDataset = new Dataset(); partialDataset.setUrn(datasetUrn.toString()); return partialDataset; } + + @Nonnull + private SchemaFieldEntity createPartialSchemaField(@Nonnull Urn urn) { + SchemaFieldEntity partialSchemaField = new SchemaFieldEntity(); + partialSchemaField.setUrn(urn.toString()); + return partialSchemaField; + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/schemafield/SchemaFieldMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/schemafield/SchemaFieldMapper.java index 30eac54aede9bb..b476fa202d3097 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/schemafield/SchemaFieldMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/schemafield/SchemaFieldMapper.java @@ -3,13 +3,17 @@ import static com.linkedin.metadata.Constants.*; import com.linkedin.businessattribute.BusinessAttributes; +import com.linkedin.common.Deprecation; import com.linkedin.common.Documentation; +import com.linkedin.common.Status; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.SchemaFieldEntity; import com.linkedin.datahub.graphql.types.businessattribute.mappers.BusinessAttributesMapper; +import com.linkedin.datahub.graphql.types.common.mappers.DeprecationMapper; import com.linkedin.datahub.graphql.types.common.mappers.DocumentationMapper; +import com.linkedin.datahub.graphql.types.common.mappers.StatusMapper; import com.linkedin.datahub.graphql.types.common.mappers.UrnToEntityMapper; import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; @@ -52,6 +56,15 @@ public SchemaFieldEntity apply( DOCUMENTATION_ASPECT_NAME, (entity, dataMap) -> entity.setDocumentation(DocumentationMapper.map(context, new Documentation(dataMap)))); + mappingHelper.mapToResult( + STATUS_ASPECT_NAME, + (entity, dataMap) -> entity.setStatus(StatusMapper.map(context, new Status(dataMap)))); + mappingHelper.mapToResult( + DEPRECATION_ASPECT_NAME, + ((schemaField, dataMap) -> + schemaField.setDeprecation( + DeprecationMapper.map(context, new Deprecation((dataMap)))))); + return result; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/schemafield/SchemaFieldType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/schemafield/SchemaFieldType.java index 2fa26d8cf2cdd7..5ed40f843bdf79 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/schemafield/SchemaFieldType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/schemafield/SchemaFieldType.java @@ -31,7 +31,11 @@ public class SchemaFieldType public static final Set ASPECTS_TO_FETCH = ImmutableSet.of( - STRUCTURED_PROPERTIES_ASPECT_NAME, BUSINESS_ATTRIBUTE_ASPECT, DOCUMENTATION_ASPECT_NAME); + STRUCTURED_PROPERTIES_ASPECT_NAME, + DEPRECATION_ASPECT_NAME, + BUSINESS_ATTRIBUTE_ASPECT, + DOCUMENTATION_ASPECT_NAME, + STATUS_ASPECT_NAME); private final EntityClient _entityClient; private final FeatureFlags _featureFlags; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/timeline/mappers/ChangeEventMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/timeline/mappers/ChangeEventMapper.java new file mode 100644 index 00000000000000..8c39760f739487 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/timeline/mappers/ChangeEventMapper.java @@ -0,0 +1,47 @@ +package com.linkedin.datahub.graphql.types.timeline.mappers; + +import com.linkedin.datahub.graphql.generated.ChangeCategoryType; +import com.linkedin.datahub.graphql.generated.ChangeOperationType; +import com.linkedin.metadata.timeline.data.ChangeEvent; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; + +// Class for converting ChangeTransactions received from the Timeline API to SchemaFieldBlame +// structs for every schema +// at every semantic version. +@Slf4j +public class ChangeEventMapper { + + public static com.linkedin.datahub.graphql.generated.ChangeEvent map( + @Nonnull final ChangeEvent incomingChangeEvent) { + final com.linkedin.datahub.graphql.generated.ChangeEvent result = + new com.linkedin.datahub.graphql.generated.ChangeEvent(); + + // result.setAuditStamp(AuditStampMapper.map(incomingChangeEvent.getAuditStamp())); + result.setUrn("empty"); + result.setCategory(ChangeCategoryType.valueOf(incomingChangeEvent.getCategory().toString())); + result.setDescription(incomingChangeEvent.getDescription()); + result.setModifier(incomingChangeEvent.getModifier()); + result.setOperation(ChangeOperationType.valueOf(incomingChangeEvent.getOperation().toString())); + if (incomingChangeEvent.getParameters() != null) { + result.setParameters( + incomingChangeEvent.getParameters().entrySet().stream() + .map( + entry -> { + final com.linkedin.datahub.graphql.generated.TimelineParameterEntry + changeParameter = + new com.linkedin.datahub.graphql.generated.TimelineParameterEntry(); + changeParameter.setKey(entry.getKey()); + changeParameter.setValue(entry.getValue().toString()); + return changeParameter; + }) + .collect(Collectors.toList())); + } + result.setUrn(result.getUrn()); + + return result; + } + + private ChangeEventMapper() {} +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/timeline/mappers/ChangeTransactionMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/timeline/mappers/ChangeTransactionMapper.java new file mode 100644 index 00000000000000..91d30f93cf2e83 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/timeline/mappers/ChangeTransactionMapper.java @@ -0,0 +1,37 @@ +package com.linkedin.datahub.graphql.types.timeline.mappers; + +import com.linkedin.datahub.graphql.generated.ChangeOperationType; +import com.linkedin.metadata.timeline.data.ChangeTransaction; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; + +// Class for converting ChangeTransactions received from the Timeline API to SchemaFieldBlame +// structs for every schema +// at every semantic version. +@Slf4j +public class ChangeTransactionMapper { + + public static com.linkedin.datahub.graphql.generated.ChangeTransaction map( + @Nonnull final ChangeTransaction incomingChangeTransaction) { + final com.linkedin.datahub.graphql.generated.ChangeTransaction result = + new com.linkedin.datahub.graphql.generated.ChangeTransaction(); + + result.setLastSemanticVersion( + incomingChangeTransaction.getSemVer() == null + ? "none" + : incomingChangeTransaction.getSemVer()); + result.setTimestampMillis(incomingChangeTransaction.getTimestamp()); + result.setVersionStamp("none"); + result.setChangeType(ChangeOperationType.MODIFY); + + result.setChanges( + incomingChangeTransaction.getChangeEvents().stream() + .map(ChangeEventMapper::map) + .collect(Collectors.toList())); + + return result; + } + + private ChangeTransactionMapper() {} +} diff --git a/datahub-graphql-core/src/main/resources/app.graphql b/datahub-graphql-core/src/main/resources/app.graphql index ca7f89415f6b87..6c45419b622a89 100644 --- a/datahub-graphql-core/src/main/resources/app.graphql +++ b/datahub-graphql-core/src/main/resources/app.graphql @@ -231,6 +231,11 @@ type AppConfig { Feature flags telling the UI whether a feature is enabled or not """ featureFlags: FeatureFlagsConfig! + + """ + Configuration related to the DataHub Chrome Extension + """ + chromeExtensionConfig: ChromeExtensionConfig! } """ @@ -487,6 +492,11 @@ type FeatureFlagsConfig { """ platformBrowseV2: Boolean! + """ + Whether to show the new lineage visualization. + """ + lineageGraphV2: Boolean! + """ Whether we should show CTAs in the UI related to moving to DataHub Cloud by Acryl. """ @@ -522,6 +532,30 @@ type FeatureFlagsConfig { """ editableDatasetNameEnabled: Boolean! + """ + Allows the V2 theme to be turned on. + Includes new UX for home page, search, entity profiles, and lineage. + If false, then the V2 experience will be unavailable even if themeV2Default or themeV2Toggleable are true. + """ + themeV2Enabled: Boolean! + + """ + Sets the default theme to V2. + If `themeV2Toggleable` is set, then users can toggle between V1 and V2. + If not, then the default is the only option. + """ + themeV2Default: Boolean! + + """ + Allows the V2 theme to be toggled by users. + """ + themeV2Toggleable: Boolean! + + """ + Enables links to schema field-level lineage on lineage explorer. + """ + schemaFieldCLLEnabled: Boolean! + """ If turned on, all siblings will be separated with no way to get to a "combined" sibling view """ @@ -532,6 +566,29 @@ type FeatureFlagsConfig { """ showManageStructuredProperties: Boolean! + """ + If turned on, hides DBT Sources from lineage by: + i) Hiding the source in the lineage graph, if it has no downstreams + ii) Swapping to the source's sibling urn on V2 lineage graph + iii) Representing source sibling as a merged node, with both icons on graph and combined version in sidebar + """ + hideDbtSourceInLineage: Boolean! + + """ + If turned on, schema field lineage will always fetch ghost entities and present them as real + """ + schemaFieldLineageIgnoreStatus: Boolean! + + """ + If turned on, show the newly designed nav bar in the V2 experience + """ + showNavBarRedesign: Boolean! + + """ + If turned on, we display auto complete results. Otherwise, do not. + """ + showAutoCompleteResults: Boolean! + """ If turned on, exposes the versioning feature by allowing users to link entities in the UI. """ @@ -579,3 +636,18 @@ type DocPropagationSettings { """ docColumnPropagation: Boolean } +""" +Configurations related to DataHub Chrome extension +""" +type ChromeExtensionConfig { + """ + Whether the Chrome Extension is enabled + """ + enabled: Boolean! + + """ + Whether lineage is enabled + """ + lineageEnabled: Boolean! +} + diff --git a/datahub-graphql-core/src/main/resources/auth.graphql b/datahub-graphql-core/src/main/resources/auth.graphql index 5ce26067fe58e3..81d8689dcdfdd0 100644 --- a/datahub-graphql-core/src/main/resources/auth.graphql +++ b/datahub-graphql-core/src/main/resources/auth.graphql @@ -286,6 +286,86 @@ type EntityPrivileges { Whether or not a user can update the properties for the entity (e.g. dataset) """ canEditProperties: Boolean + + """ + Whether or not a user can update tags for the entity + """ + canEditTags: Boolean + + """ + Whether or not a user can update glossary terms for the entity + """ + canEditGlossaryTerms: Boolean + + """ + Whether or not a user can update the description for the entity + """ + canEditDescription: Boolean + + """ + Whether or not a user can update the links for the entity + """ + canEditLinks: Boolean + + """ + Whether or not a user can update the domain(s) for the entity + """ + canEditDomains: Boolean + + """ + Whether or not a user can update the data product(s) that the entity belongs to + """ + canEditDataProducts: Boolean + + """ + Whether or not a user can update the owners for the entity + """ + canEditOwners: Boolean + + """ + Whether or not a user can update the incidents for an asset + """ + canEditIncidents: Boolean + + """ + Whether or not a user can update assertions for an asset + """ + canEditAssertions: Boolean + + """ + Whether or not a user can update the deprecation status for an entity + """ + canEditDeprecation: Boolean + + """ + Whether or not a user can update the schema field tags for a dataset + """ + canEditSchemaFieldTags: Boolean + + """ + Whether or not a user can update the schema field tags for a dataset + """ + canEditSchemaFieldGlossaryTerms: Boolean + + """ + Whether or not a user can update the schema field tags for a dataset + """ + canEditSchemaFieldDescription: Boolean + + """ + Whether the user can view dataset usage stats + """ + canViewDatasetUsage: Boolean + + """ + Whether the user can view dataset profiling stats + """ + canViewDatasetProfile: Boolean + + """ + Whether the user can view dataset operations + """ + canViewDatasetOperations: Boolean } """ diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 64c79b05745ded..f6adf884b2badc 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -956,6 +956,11 @@ type Mutation { Remove Business Attribute """ removeBusinessAttribute(input: AddBusinessAttributeInput!): Boolean + + """ + Update a particular entity's display properties + """ + updateDisplayProperties(urn: String!, input: DisplayPropertiesUpdateInput!): Boolean } """ @@ -1750,6 +1755,11 @@ type Dataset implements EntityWithRelationships & Entity & BrowsableEntity { """ siblings: SiblingProperties + """ + Executes a search on only the siblings of an entity + """ + siblingsSearch(input: ScrollAcrossEntitiesInput!): ScrollResults + """ Lineage information for the column-level. Includes a list of objects detailing which columns are upstream and which are downstream of each other. @@ -1862,6 +1872,68 @@ type RoleProperties { requestUrl: String } +""" +Properties related to how the entity is displayed in the Datahub UI +""" +type DisplayProperties { + """ + Color associated with the entity in Hex. For example #FFFFFF + """ + colorHex: String + + """ + The icon associated with the entity + """ + icon: IconProperties +} + +""" +Input for Properties describing an icon associated with an entity +""" +input IconPropertiesInput { + """ + The source of the icon: e.g. Antd, Material, etc + """ + iconLibrary: IconLibrary + + """ + The name of the icon + """ + name: String + + """ + Any modifier for the icon, this will be library-specific, e.g. filled/outlined, etc + """ + style: String +} + +""" +Properties describing an icon associated with an entity +""" +type IconProperties { + """ + The source of the icon: e.g. Antd, Material, etc + """ + iconLibrary: IconLibrary + + """ + The name of the icon + """ + name: String + + """ + Any modifier for the icon, this will be library-specific, e.g. filled/outlined, etc + """ + style: String +} + +enum IconLibrary { + """ + Icons from the Material UI icon library + """ + MATERIAL +} + type FineGrainedLineage { upstreams: [SchemaFieldRef!] downstreams: [SchemaFieldRef!] @@ -2434,6 +2506,11 @@ type GlossaryNode implements Entity { The forms associated with the Dataset """ forms: Forms + + """ + Display properties for the glossary node + """ + displayProperties: DisplayProperties } """ @@ -2875,6 +2952,11 @@ type Container implements Entity { Privileges given to a user relevant to this entity """ privileges: EntityPrivileges + + """ + The browse path V2 corresponding to an entity. If no Browse Paths V2 have been generated before, this will be null. + """ + browsePathV2: BrowsePathV2 } """ @@ -3235,7 +3317,7 @@ type KeyValueSchema { Standalone schema field entity. Differs from the SchemaField struct because it is not directly nested inside a schema field """ -type SchemaFieldEntity implements Entity { +type SchemaFieldEntity implements EntityWithRelationships & Entity { """ Primary key of the schema field """ @@ -3271,6 +3353,11 @@ type SchemaFieldEntity implements Entity { """ relationships(input: RelationshipsInput!): EntityRelationshipsResult + """ + Edges extending from this entity grouped by direction in the lineage graph + """ + lineage(input: LineageInput!): EntityLineageResult + """ Business Attribute associated with the field """ @@ -3280,6 +3367,16 @@ type SchemaFieldEntity implements Entity { Documentation aspect for this schema field """ documentation: Documentation + + """ + The status of the SchemaFieldEntity + """ + status: Status + + """ + deprecation status of the schema field + """ + deprecation: Deprecation } """ @@ -3984,6 +4081,11 @@ type CorpUserAppearanceSettings { who have less operational use cases for the datahub tool. """ showSimplifiedHomepage: Boolean + + """ + Flag controlling whether the V2 UI for DataHub is shown. + """ + showThemeV2: Boolean } """ @@ -4450,6 +4552,21 @@ type CorpGroupEditableProperties { pictureLink: String } +""" +Update a particular entity's display properties +""" +input DisplayPropertiesUpdateInput { + """ + Color associated with the entity in Hex. For example #FFFFFF + """ + colorHex: String + + """ + The icon associated with the entity + """ + icon: IconPropertiesInput +} + """ Arguments provided to update a CorpGroup Entity """ @@ -7153,12 +7270,10 @@ type DashboardStatsSummary { The view count in the last 30 days """ viewCountLast30Days: Int - """ The unique user count in the past 30 days """ uniqueUserCountLast30Days: Int - """ The top users in the past 30 days """ @@ -7179,7 +7294,6 @@ type ChartStatsSummary { The view count in the last 30 days """ viewCountLast30Days: Int - """ The unique user count in the past 30 days """ @@ -8309,6 +8423,11 @@ type Deprecation { The hydrated user who will be credited for modifying this deprecation content """ actorEntity: Entity + + """ + The optional replacement entity + """ + replacement: Entity } """ @@ -8798,6 +8917,16 @@ input UpdateDeprecationInput { """ urn: String! + """ + An optional type of a sub resource to set the deprecation for + """ + subResourceType: SubResourceType + + """ + An optional sub resource identifier to set the deprecation for + """ + subResource: String + """ Whether the Entity is marked as deprecated. """ @@ -8812,6 +8941,11 @@ input UpdateDeprecationInput { Optional - Additional information about the entity deprecation plan """ note: String + + """ + Optional - URN to replace the entity with + """ + replacement: String } @@ -8838,6 +8972,11 @@ input BatchUpdateDeprecationInput { The target assets to attach the tags to """ resources: [ResourceRefInput]! + + """ + Optional - URN to replace the entity with + """ + replacement: String } """ @@ -10878,6 +11017,11 @@ type Domain implements Entity { """ forms: Forms + """ + Display properties for the domain + """ + displayProperties: DisplayProperties + """ Privileges given to a user relevant to this entity """ @@ -11277,6 +11421,11 @@ enum UserSetting { Show simplified homepage """ SHOW_SIMPLIFIED_HOMEPAGE + + """ + Show theme v2 + """ + SHOW_THEME_V2 } """ @@ -11428,6 +11577,11 @@ enum PostType { Posts on the home page """ HOME_PAGE_ANNOUNCEMENT, + + """ + Posts on an entity page + """ + ENTITY_ANNOUNCEMENT, } """ @@ -11568,6 +11722,16 @@ input ListPostsInput { Optional search query """ query: String + + """ + Optional resource urn + """ + resourceUrn: String + + """ + A list of disjunctive criterion for the filter. (or operation to combine filters) + """ + orFilters: [AndFilterInput!] } """ @@ -12044,6 +12208,12 @@ type QuerySubject { The dataset which is the subject of the Query """ dataset: Dataset! + + """ + The schema field which is the subject of the Query. + This will be populated if the subject is specifically a schema field. + """ + schemaField: SchemaFieldEntity } """ @@ -12214,6 +12384,16 @@ input ListQueriesInput { An optional Urn for the parent dataset that the query is associated with. """ datasetUrn: String + + """ + Optional - Information on how to sort the list queries result + """ + sortInput: SortQueriesInput + + """ + A list of disjunctive criterion for the filter. (or operation to combine filters) + """ + orFilters: [AndFilterInput!] } """ diff --git a/datahub-graphql-core/src/main/resources/query.graphql b/datahub-graphql-core/src/main/resources/query.graphql new file mode 100644 index 00000000000000..f4d2aae90a0ca5 --- /dev/null +++ b/datahub-graphql-core/src/main/resources/query.graphql @@ -0,0 +1,6 @@ +input SortQueriesInput { + """ + A criterion to sort query results on + """ + sortCriterion: SortCriterion! +} diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index d8f17faa3d11c2..34169d8c6d18aa 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -829,7 +829,7 @@ type EntityPath { """ Path of entities between source and destination nodes """ - path: [Entity] + path: [Entity]! } @@ -911,6 +911,11 @@ type AggregationMetadata { Entity corresponding to the facet field """ entity: Entity + + """ + Optional display name to show in the UI for this filter value + """ + displayName: String } """ diff --git a/datahub-graphql-core/src/main/resources/timeline.graphql b/datahub-graphql-core/src/main/resources/timeline.graphql index a6f7dfcf3865b8..37c0a43cf0aa5d 100644 --- a/datahub-graphql-core/src/main/resources/timeline.graphql +++ b/datahub-graphql-core/src/main/resources/timeline.graphql @@ -8,6 +8,11 @@ extend type Query { Returns the list of schema versions for a dataset. """ getSchemaVersionList(input: GetSchemaVersionListInput!): GetSchemaVersionListResult + + """ + Returns a list of timeline actions for an entity based on the filter criteria + """ + getTimeline(input: GetTimelineInput!): GetTimelineResult } """ @@ -82,6 +87,22 @@ type GetSchemaVersionListResult { semanticVersionList: [SemanticVersionStruct!] } +""" +Input for getting timeline from a specific version. +Todo: this is where additional filtering would go such as start & end times/versions, change types, etc +""" +input GetTimelineInput { + """ + The urn to fetch timeline for + """ + urn: String! + + """ + The change category types to filter by. If left empty, will fetch all. + """ + changeCategories: [ChangeCategoryType!] +} + """ Input for getting schema changes computed at a specific version. @@ -167,4 +188,93 @@ type SemanticVersionStruct { Version stamp of the change """ versionStamp: String -} \ No newline at end of file +} + +""" +Result of getting timeline from a specific version. +""" +type GetTimelineResult { + changeTransactions: [ChangeTransaction!]! +} + +""" +A change transaction is a set of changes that were committed together. +""" +type ChangeTransaction { + """ + The time at which the transaction was committed + """ + timestampMillis: Long! + """ + The last semantic version that this schema was changed in + """ + lastSemanticVersion: String! + """ + Version stamp of the change + """ + versionStamp: String! + """ + The type of the change + """ + changeType: ChangeOperationType! + """ + The list of changes in this transaction + """ + changes: [ChangeEvent!] +} + + +""" +An individual change in a transaction +""" +type ChangeEvent { + """ + The urn of the entity that was changed + """ + urn: String! + + """ + The category of the change + """ + category: ChangeCategoryType + + """ + The operation of the change + """ + operation: ChangeOperationType + + """ + The modifier of the change + """ + modifier: String + + """ + The parameters of the change + """ + parameters: [TimelineParameterEntry!] + + """ + The audit stamp of the change + """ + auditStamp: AuditStamp + + """ + description of the change + """ + description: String +} + +""" +A timeline parameter entry +""" +type TimelineParameterEntry { + """ + The key of the parameter + """ + key: String + + """ + The value of the parameter + """ + value: String +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/BatchUpdateDeprecationResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/BatchUpdateDeprecationResolverTest.java index 265a1a2e0af5ba..094dd31ab10f07 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/BatchUpdateDeprecationResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/BatchUpdateDeprecationResolverTest.java @@ -69,7 +69,8 @@ public void testGetSuccessNoExistingDeprecation() throws Exception { "test", ImmutableList.of( new ResourceRefInput(TEST_ENTITY_URN_1, null, null), - new ResourceRefInput(TEST_ENTITY_URN_2, null, null))); + new ResourceRefInput(TEST_ENTITY_URN_2, null, null)), + null); Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertTrue(resolver.get(mockEnv).get()); @@ -134,7 +135,8 @@ public void testGetSuccessExistingDeprecation() throws Exception { "test", ImmutableList.of( new ResourceRefInput(TEST_ENTITY_URN_1, null, null), - new ResourceRefInput(TEST_ENTITY_URN_2, null, null))); + new ResourceRefInput(TEST_ENTITY_URN_2, null, null)), + null); Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); Mockito.when(mockEnv.getContext()).thenReturn(mockContext); assertTrue(resolver.get(mockEnv).get()); @@ -192,7 +194,8 @@ public void testGetFailureResourceDoesNotExist() throws Exception { "test", ImmutableList.of( new ResourceRefInput(TEST_ENTITY_URN_1, null, null), - new ResourceRefInput(TEST_ENTITY_URN_2, null, null))); + new ResourceRefInput(TEST_ENTITY_URN_2, null, null)), + null); Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); Mockito.when(mockEnv.getContext()).thenReturn(mockContext); @@ -215,7 +218,8 @@ public void testGetUnauthorized() throws Exception { "test", ImmutableList.of( new ResourceRefInput(TEST_ENTITY_URN_1, null, null), - new ResourceRefInput(TEST_ENTITY_URN_2, null, null))); + new ResourceRefInput(TEST_ENTITY_URN_2, null, null)), + null); Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); QueryContext mockContext = getMockDenyContext(); Mockito.when(mockEnv.getContext()).thenReturn(mockContext); @@ -244,7 +248,8 @@ public void testGetEntityClientException() throws Exception { "test", ImmutableList.of( new ResourceRefInput(TEST_ENTITY_URN_1, null, null), - new ResourceRefInput(TEST_ENTITY_URN_2, null, null))); + new ResourceRefInput(TEST_ENTITY_URN_2, null, null)), + null); Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); Mockito.when(mockEnv.getContext()).thenReturn(mockContext); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/UpdateDeprecationResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/UpdateDeprecationResolverTest.java index ab180724da46df..59275fe9337665 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/UpdateDeprecationResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/UpdateDeprecationResolverTest.java @@ -36,8 +36,27 @@ public class UpdateDeprecationResolverTest { private static final String TEST_ENTITY_URN = "urn:li:dataset:(urn:li:dataPlatform:mysql,my-test,PROD)"; + + // Create SchemaField URN for the entity being deprecated + private static final String TEST_SCHEMA_FIELD_URN = + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,my-test,PROD),old_user_id)"; + + // Create SchemaField URN for the replacement + private static final String REPLACEMENT_SCHEMA_FIELD_URN = + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,my-test,PROD),new_user_id)"; + private static final UpdateDeprecationInput TEST_DEPRECATION_INPUT = - new UpdateDeprecationInput(TEST_ENTITY_URN, true, 0L, "Test note"); + new UpdateDeprecationInput(TEST_ENTITY_URN, null, null, true, 0L, "Test note", null); + + private static final UpdateDeprecationInput TEST_DEPRECATION_INPUT_WITH_REPLACEMENT = + new UpdateDeprecationInput( + TEST_ENTITY_URN, + null, + null, + true, + 0L, + "Test note", + "urn:li:dataset:(urn:li:dataPlatform:mysql,replacement-dataset,PROD)"); private static final CorpuserUrn TEST_ACTOR_URN = new CorpuserUrn("test"); @Test @@ -220,4 +239,125 @@ public void testGetEntityClientException() throws Exception { assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); } + + @Test + public void testGetSuccessNoExistingDeprecationWithReplacement() throws Exception { + // Create resolver + EntityClient mockClient = Mockito.mock(EntityClient.class); + + Mockito.when( + mockClient.batchGetV2( + any(), + eq(Constants.DATASET_ENTITY_NAME), + eq(new HashSet<>(ImmutableSet.of(Urn.createFromString(TEST_ENTITY_URN)))), + eq(ImmutableSet.of(Constants.DEPRECATION_ASPECT_NAME)))) + .thenReturn( + ImmutableMap.of( + Urn.createFromString(TEST_ENTITY_URN), + new EntityResponse() + .setEntityName(Constants.DATASET_ENTITY_NAME) + .setUrn(Urn.createFromString(TEST_ENTITY_URN)) + .setAspects(new EnvelopedAspectMap(Collections.emptyMap())))); + + EntityService mockService = getMockEntityService(); + Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true))) + .thenReturn(true); + + UpdateDeprecationResolver resolver = new UpdateDeprecationResolver(mockClient, mockService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + Mockito.when(mockContext.getActorUrn()).thenReturn(TEST_ACTOR_URN.toString()); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(eq("input"))) + .thenReturn(TEST_DEPRECATION_INPUT_WITH_REPLACEMENT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + resolver.get(mockEnv).get(); + + final Deprecation newDeprecation = + new Deprecation() + .setDeprecated(true) + .setDecommissionTime(0L) + .setNote("Test note") + .setReplacement( + Urn.createFromString( + "urn:li:dataset:(urn:li:dataPlatform:mysql,replacement-dataset,PROD)")) + .setActor(TEST_ACTOR_URN); + final MetadataChangeProposal proposal = + MutationUtils.buildMetadataChangeProposalWithUrn( + UrnUtils.getUrn(TEST_ENTITY_URN), DEPRECATION_ASPECT_NAME, newDeprecation); + + verifyIngestProposal(mockClient, 1, proposal); + + Mockito.verify(mockService, Mockito.times(1)) + .exists(any(), eq(Urn.createFromString(TEST_ENTITY_URN)), eq(true)); + } + + @Test + public void testGetSuccessSchemaFieldDeprecation() throws Exception { + // Create test input using schema field URNs + UpdateDeprecationInput testSchemaFieldInput = + new UpdateDeprecationInput( + TEST_SCHEMA_FIELD_URN, + null, + null, + true, + 0L, + "Deprecating old_user_id in favor of new_user_id", + REPLACEMENT_SCHEMA_FIELD_URN); + + // Create resolver + EntityClient mockClient = Mockito.mock(EntityClient.class); + + Mockito.when( + mockClient.batchGetV2( + any(), + eq(Constants.SCHEMA_FIELD_ENTITY_NAME), + eq(new HashSet<>(ImmutableSet.of(Urn.createFromString(TEST_SCHEMA_FIELD_URN)))), + eq(ImmutableSet.of(Constants.DEPRECATION_ASPECT_NAME)))) + .thenReturn( + ImmutableMap.of( + Urn.createFromString(TEST_SCHEMA_FIELD_URN), + new EntityResponse() + .setEntityName(Constants.SCHEMA_FIELD_ENTITY_NAME) + .setUrn(Urn.createFromString(TEST_SCHEMA_FIELD_URN)) + .setAspects(new EnvelopedAspectMap(Collections.emptyMap())))); + + EntityService mockService = getMockEntityService(); + Mockito.when( + mockService.exists(any(), eq(Urn.createFromString(TEST_SCHEMA_FIELD_URN)), eq(true))) + .thenReturn(true); + + UpdateDeprecationResolver resolver = new UpdateDeprecationResolver(mockClient, mockService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + Mockito.when(mockContext.getActorUrn()).thenReturn(TEST_ACTOR_URN.toString()); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(eq("input"))).thenReturn(testSchemaFieldInput); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + // Execute the resolver + resolver.get(mockEnv).get(); + + // Create expected deprecation object + final Deprecation expectedDeprecation = + new Deprecation() + .setDeprecated(true) + .setDecommissionTime(0L) + .setNote("Deprecating old_user_id in favor of new_user_id") + .setReplacement(Urn.createFromString(REPLACEMENT_SCHEMA_FIELD_URN)) + .setActor(TEST_ACTOR_URN); + + // Verify the correct proposal was made + final MetadataChangeProposal expectedProposal = + MutationUtils.buildMetadataChangeProposalWithUrn( + UrnUtils.getUrn(TEST_SCHEMA_FIELD_URN), DEPRECATION_ASPECT_NAME, expectedDeprecation); + + verifyIngestProposal(mockClient, 1, expectedProposal); + + // Verify that existence check was performed + Mockito.verify(mockService, Mockito.times(1)) + .exists(any(), eq(Urn.createFromString(TEST_SCHEMA_FIELD_URN)), eq(true)); + } } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolverTest.java index 414c62b693b698..fef87b76c56527 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolverTest.java @@ -1,7 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.query; import static com.linkedin.datahub.graphql.TestUtils.*; -import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.*; import static org.testng.Assert.*; import com.google.common.collect.ImmutableList; @@ -41,11 +41,12 @@ public class ListQueriesResolverTest { private static final Urn TEST_QUERY_URN = Urn.createFromTuple("query", "test-id"); private static final ListQueriesInput TEST_INPUT_FULL_FILTERS = - new ListQueriesInput(0, 20, null, QuerySource.MANUAL, TEST_DATASET_URN.toString()); + new ListQueriesInput( + 0, 20, null, QuerySource.MANUAL, TEST_DATASET_URN.toString(), null, null); private static final ListQueriesInput TEST_INPUT_SOURCE_FILTER = - new ListQueriesInput(0, 30, null, QuerySource.MANUAL, null); + new ListQueriesInput(0, 30, null, QuerySource.MANUAL, null, null, null); private static final ListQueriesInput TEST_INPUT_ENTITY_FILTER = - new ListQueriesInput(0, 40, null, null, TEST_DATASET_URN.toString()); + new ListQueriesInput(0, 40, null, null, TEST_DATASET_URN.toString(), null, null); @DataProvider(name = "inputs") public static Object[][] inputs() { diff --git a/datahub-web-react/.eslintrc.js b/datahub-web-react/.eslintrc.js index 3fdf7b6a3042ca..605cde8b9711e4 100644 --- a/datahub-web-react/.eslintrc.js +++ b/datahub-web-react/.eslintrc.js @@ -55,4 +55,12 @@ module.exports = { version: 'detect', // Tells eslint-plugin-react to automatically detect the version of React to use }, }, + overrides: [ + { + files: ['src/app/searchV2/**/*.tsx', 'src/app/entityV2/**/*.tsx'], + rules: { + 'import/no-cycle': 'off', + }, + }, + ], }; diff --git a/datahub-web-react/package.json b/datahub-web-react/package.json index 0ff68de2481ed0..d932945acf5068 100644 --- a/datahub-web-react/package.json +++ b/datahub-web-react/package.json @@ -15,6 +15,7 @@ "@monaco-editor/react": "^4.3.1", "@mui/icons-material": "^5.15.21", "@mui/material": "^5.15.21", + "@phosphor-icons/react": "^2.1.7", "@react-hook/window-size": "^3.0.7", "@react-spring/web": "^9.7.3", "@remirror/pm": "^2.0.3", @@ -22,6 +23,7 @@ "@remirror/styles": "^2.0.3", "@testing-library/jest-dom": "^6.1.4", "@testing-library/react": "^12.0.0", + "@testing-library/react-hooks": "^8.0.1", "@tommoor/remove-markdown": "^0.3.2", "@types/diff": "^5.0.0", "@types/dompurify": "^2.3.3", @@ -38,6 +40,7 @@ "@visx/group": "^3.0.0", "@visx/hierarchy": "^3.0.0", "@visx/legend": "^3.2.0", + "@visx/marker": "^3.5.0", "@visx/scale": "^3.2.0", "@visx/shape": "^3.2.0", "@visx/xychart": "^3.2.0", @@ -45,6 +48,8 @@ "analytics": "^0.8.9", "antd": "4.24.7", "color-hash": "^2.0.1", + "colorthief": "^2.4.0", + "cron-parser": "^4.8.1", "cronstrue": "^1.122.0", "d3-scale": "^4.0.2", "dayjs": "^1.11.7", @@ -53,14 +58,17 @@ "dompurify": "^2.5.4", "dotenv": "^8.2.0", "faker": "5.5.3", + "fuse.js": "^7.0.0", "graphql": "^15.5.0", "graphql-tag": "2.10.3", "graphql.macro": "^1.4.2", "history": "^5.0.0", + "html-to-image": "^1.11.11", "js-cookie": "^2.2.1", "moment": "^2.29.4", "moment-timezone": "^0.5.35", "monaco-editor": "^0.28.1", + "phosphor-react": "^1.4.1", "prosemirror-autocomplete": "^0.4.3", "query-string": "^6.13.8", "rc-table": "^7.13.1", @@ -75,7 +83,10 @@ "react-router": "^5.3", "react-router-dom": "^5.3", "react-syntax-highlighter": "^15.4.4", + "react-vertical-timeline-component": "^3.6.0", "react-visibility-sensor": "^5.1.1", + "react-window": "^1.8.10", + "reactflow": "^11.10.1", "reactour": "^1.19.3", "remirror": "^2.0.23", "styled-components": "^5.2.1", diff --git a/datahub-web-react/src/App.tsx b/datahub-web-react/src/App.tsx index 81f137417f1f8a..9fef14b895a753 100644 --- a/datahub-web-react/src/App.tsx +++ b/datahub-web-react/src/App.tsx @@ -5,6 +5,7 @@ import { ApolloClient, ApolloProvider, createHttpLink, InMemoryCache, ServerErro import { onError } from '@apollo/client/link/error'; import { Helmet, HelmetProvider } from 'react-helmet-async'; import './App.less'; +import './AppV2.less'; import { Routes } from './app/Routes'; import { PageRoutes } from './conf/Global'; import { isLoggedInVar } from './app/auth/checkAuthStatus'; diff --git a/datahub-web-react/src/AppV2.less b/datahub-web-react/src/AppV2.less new file mode 100644 index 00000000000000..3980245cb486e3 --- /dev/null +++ b/datahub-web-react/src/AppV2.less @@ -0,0 +1,57 @@ +@import 'antd/dist/antd.less'; +@import './conf/theme/global-variables.less'; +@import './conf/theme/global-variables-v2.less'; +@import './conf/theme/global-overrides.less'; +@import './conf/theme/global-overrides-v2.less'; + +@font-face { + font-family: 'Manrope'; + font-style: normal; + src: local('Manrope'), url('./fonts/manrope.woff2') format('woff2'); +} + +@font-face { + font-family: 'Mulish'; + font-style: normal; + font-weight: 400; // Regular weight + src: url('./fonts/Mulish-Regular.ttf') format('truetype'); +} + +@font-face { + font-family: 'Mulish'; + font-style: normal; + font-weight: 500; // Medium weight + src: url('./fonts/Mulish-Medium.ttf') format('truetype'); +} + +@font-face { + font-family: 'Mulish'; + font-style: normal; + font-weight: 600; // Semi-bold weight + src: url('./fonts/Mulish-SemiBold.ttf') format('truetype'); +} + +@font-face { + font-family: 'Mulish'; + font-style: normal; + font-weight: 700; // Bold weight + src: url('./fonts/Mulish-Bold.ttf') format('truetype'); +} + +@font-face { + font-family: 'Mulish'; + font-style: italic; + font-weight: 400; // Regular Italic + src: url('./fonts/Mulish-Italic.ttf') format('truetype'); +} + +@font-face { + font-family: 'Mulish'; + font-style: italic; + font-weight: 700; // Bold Italic + src: url('./fonts/Mulish-BoldItalic.ttf') format('truetype'); +} + +.themeV2 * { + font-family: @font-family-v2; +} diff --git a/datahub-web-react/src/Mocks.tsx b/datahub-web-react/src/Mocks.tsx index 063b784920e234..e3a5632c9f5fcd 100644 --- a/datahub-web-react/src/Mocks.tsx +++ b/datahub-web-react/src/Mocks.tsx @@ -1,3 +1,4 @@ +import React from 'react'; import { GetDatasetDocument, UpdateDatasetDocument, GetDatasetSchemaDocument } from './graphql/dataset.generated'; import { GetDataFlowDocument } from './graphql/dataFlow.generated'; import { GetDataJobDocument } from './graphql/dataJob.generated'; @@ -31,6 +32,16 @@ import { AppConfig, EntityPrivileges, BusinessAttribute, + EntityRelationshipsResult, + Maybe, + SearchResult, + DataHubViewType, + LogicalOperator, + DataHubView, + DataHubViewFilter, + GlobalTags, + OwnershipType, + Owner, } from './types.generated'; import { GetTagDocument } from './graphql/tag.generated'; import { GetMlModelDocument } from './graphql/mlModel.generated'; @@ -44,14 +55,34 @@ import { DEFAULT_APP_CONFIG } from './appConfigContext'; import { GetQuickFiltersDocument } from './graphql/quickFilters.generated'; import { GetGrantedPrivilegesDocument } from './graphql/policy.generated'; import { VIEW_ENTITY_PAGE } from './app/entity/shared/constants'; +import { Entity } from './app/entity/Entity'; +import { GenericEntityProperties } from './app/entity/shared/types'; +import { ViewBuilderState } from './app/entity/view/types'; +import { EntityCapabilityType } from './app/entityV2/Entity'; export const entityPrivileges: EntityPrivileges = { canEditLineage: true, + canEditDomains: true, + canEditDataProducts: true, + canEditTags: true, + canEditGlossaryTerms: true, + canEditDescription: true, + canEditLinks: true, + canEditOwners: true, + canEditAssertions: true, + canEditIncidents: true, + canEditDeprecation: true, + canEditSchemaFieldTags: true, + canEditSchemaFieldGlossaryTerms: true, + canEditSchemaFieldDescription: true, + canEditQueries: true, + canEditEmbed: true, canManageEntity: true, canManageChildren: true, - canEditEmbed: true, - canEditQueries: true, canEditProperties: true, + canViewDatasetUsage: true, + canViewDatasetProfile: true, + canViewDatasetOperations: true, __typename: 'EntityPrivileges', }; @@ -94,7 +125,7 @@ export const user1 = { }, settings: { __typename: 'CorpUserSettings', - appearance: { __typename: 'CorpUserAppearanceSettings', showSimplifiedHomepage: false }, + appearance: { __typename: 'CorpUserAppearanceSettings', showSimplifiedHomepage: false, showThemeV2: false }, views: { __typename: 'CorpUserViewSettings', defaultView: null }, }, editableInfo: null, @@ -126,6 +157,8 @@ const user2 = { skills: [], __typename: 'CorpUserEditableProperties', email: 'john@domain.com', + persona: null, + platforms: null, }, groups: { __typename: 'EntityRelationshipsResult', @@ -164,7 +197,7 @@ const user2 = { }, settings: { __typename: 'CorpUserSettings', - appearance: { __typename: 'CorpUserAppearanceSettings', showSimplifiedHomepage: false }, + appearance: { __typename: 'CorpUserAppearanceSettings', showSimplifiedHomepage: false, showThemeV2: false }, views: { __typename: 'CorpUserViewSettings', defaultView: null }, }, editableInfo: null, @@ -549,6 +582,12 @@ export const dataset3 = { parentNodes: null, }, associatedUrn: 'urn:li:dataset:3', + actor: { + __typename: 'CorpUser', + urn: 'urn:li:corpuser:admin', + type: EntityType.CorpUser, + username: '', + }, }, ], }, @@ -585,6 +624,8 @@ export const dataset3 = { }, deprecation: null, usageStats: null, + latestFullTableProfile: null, + latestPartitionProfile: null, operations: null, datasetProfiles: [ { @@ -619,6 +660,7 @@ export const dataset3 = { aspectName: 'autoRenderAspect', payload: '{ "values": [{ "autoField1": "autoValue1", "autoField2": "autoValue2" }] }', renderSpec: { + __typename: 'AutoRenderSpec', displayType: 'tabular', displayName: 'Auto Render Aspect Custom Tab Name', key: 'values', @@ -635,16 +677,20 @@ export const dataset3 = { runs: null, testResults: null, siblings: null, + siblingsSearch: null, statsSummary: null, embed: null, - browsePathV2: { __typename: 'BrowsePathV2', path: [{ name: 'test', entity: null }] }, + browsePathV2: { __typename: 'BrowsePathV2', path: [{ name: 'test', entity: null, __typename: 'BrowsePathEntry' }] }, access: null, dataProduct: null, lastProfile: null, lastOperation: null, structuredProperties: null, forms: null, + notes: [], activeIncidents: null, + upstream: null, + downstream: null, versionProperties: null, } as Dataset; @@ -708,7 +754,9 @@ export const dataset3WithSchema = { foreignKeys: [], }, editableSchemaMetadata: null, + documentation: null, siblings: null, + siblingsSearch: null, }, }; @@ -3796,6 +3844,7 @@ export const mocks = [ EntityType.MlfeatureTable, EntityType.Mlmodel, EntityType.MlmodelGroup, + EntityType.DataProduct, ], }, }, @@ -3961,3 +4010,325 @@ export const platformPrivileges: PlatformPrivileges = { manageStructuredProperties: true, viewStructuredPropertiesPage: true, }; + +export const DomainMock1 = { + urn: 'urn:li:domain:afbdad41-c523-469f-9b62-de94f938f702', + id: 'afbdad41-c523-469f-9b62-de94f938f702', + type: 'DOMAIN', + icon: () => <>, + isSearchEnabled: () => false, + isBrowseEnabled: () => false, + isLineageEnabled: () => false, + getCollectionName: () => 'domain1_mock_1', + getPathName: () => 'domain_path_1', + getGraphName: () => 'domain_graph_1', + displayName: () => 'MOCK_DOMAIN_1', + parentDomains: { + domains: [], + }, + renderProfile: () => <>, + renderPreview: () => <>, + renderSearch: () => <>, + getGenericEntityProperties: () => { + return { + parentDomains: { + count: 1, + domains: [ + { + urn: 'urn:li:domain:afbdad41-c523-469f-9b62-de94f938f702', + type: 'DOMAIN', + name: 'DOMAIN_1', + }, + ], + }, + }; + }, + supportedCapabilities: () => new Set(), +} as Entity; + +export const DomainMock2 = { + urn: 'urn:li:domain:bebdad41-c523-469f-9b62-de94f938f603', + id: 'bebdad41-c523-469f-9b62-de94f938f603', + type: 'DOMAIN', + icon: () => <>, + isSearchEnabled: () => false, + isBrowseEnabled: () => false, + isLineageEnabled: () => false, + getCollectionName: () => 'domain_mock_2', + getPathName: () => 'domain_path_2', + getGraphName: () => 'domain_graph_2', + displayName: () => 'MOCK_DOMAIN_2', + parentDomains: { + domains: [], + }, + renderProfile: () => <>, + renderPreview: () => <>, + renderSearch: () => <>, + getGenericEntityProperties: () => { + return { + parentDomains: { + count: 1, + domains: [ + { + urn: 'urn:li:domain:afbdad41-c523-469f-9b62-de94f938f603', + type: 'DOMAIN', + name: 'DOMAIN_2', + }, + ], + }, + }; + }, + supportedCapabilities: () => new Set(), +} as Entity; + +export const DomainMock3 = [DomainMock1, DomainMock2] as Array>; + +export const expectedResult = [ + { + type: 'DOMAIN', + urn: 'urn:li:domain:afbdad41-c523-469f-9b62-de94f938f702', + name: 'DOMAIN_1', + }, +]; + +export const owners: Owner[] = [ + { + __typename: 'Owner', + owner: { + __typename: 'CorpUser', + username: 'john', + urn: 'urn:li:corpuser:3', + type: EntityType.CorpUser, + }, + associatedUrn: 'urn:li:dataset:1', + type: OwnershipType.Developer, + }, + { + owner: { + __typename: 'CorpUser', + username: 'john', + urn: 'urn:li:corpuser:3', + type: EntityType.CorpUser, + }, + associatedUrn: 'urn:li:dataset:1', + type: OwnershipType.Delegate, + }, + { + owner: { + __typename: 'CorpUser', + username: 'sdas', + urn: 'urn:li:corpuser:1', + type: EntityType.CorpUser, + }, + associatedUrn: 'urn:li:dataset:2', + type: OwnershipType.Dataowner, + }, + { + owner: { + __typename: 'CorpUser', + username: 'sdas', + urn: 'urn:li:corpuser:1', + type: EntityType.CorpUser, + }, + type: OwnershipType.Delegate, + associatedUrn: 'urn:li:dataset:2', + }, +]; + +export const globalTags: GlobalTags = { + __typename: 'GlobalTags', + tags: [ + { + __typename: 'TagAssociation', + tag: { + __typename: 'Tag', + type: EntityType.Tag, + urn: 'urn:li:tag:abc-sample-tag', + name: 'abc-sample-tag', + description: 'sample tag', + properties: { + __typename: 'TagProperties', + name: 'abc-sample-tag', + description: 'sample tag', + colorHex: 'sample tag color', + }, + }, + associatedUrn: 'urn:li:corpuser:1', + }, + ], +}; + +export const entityCapabilities: Set = new Set([ + EntityCapabilityType.OWNERS, + EntityCapabilityType.GLOSSARY_TERMS, + EntityCapabilityType.TAGS, + EntityCapabilityType.DOMAINS, + EntityCapabilityType.DEPRECATION, + EntityCapabilityType.SOFT_DELETE, + EntityCapabilityType.TEST, + EntityCapabilityType.ROLES, + EntityCapabilityType.DATA_PRODUCTS, + EntityCapabilityType.HEALTH, + EntityCapabilityType.LINEAGE, +]); + +const filters: DataHubViewFilter = { + filters: [ + { + condition: FilterOperator.Equal, + field: 'mockField1', + negated: false, + values: ['value1', 'value2', 'value3'], + }, + { + condition: FilterOperator.Exists, + field: 'mockField2', + negated: true, + values: ['value4', 'value5', 'value6'], + }, + ], + operator: LogicalOperator.And, +}; + +export const viewBuilderStateMock: ViewBuilderState = { + viewType: DataHubViewType.Global, + name: 'VIEW_BUILDER_TEST', + description: 'A description for testing convertStateToUpdateInput', + definition: { + entityTypes: [EntityType.AccessToken, EntityType.Domain, EntityType.Container, EntityType.DataFlow], + filter: filters, + }, +}; + +export const searchViewsMock: Array = [ + { + urn: 'test-urn1', + type: EntityType.DatahubView, + viewType: DataHubViewType.Global, + name: 'VIEW_BUILDER_TEST', + description: 'A description for testing convertStateToUpdateInput', + definition: { + entityTypes: [EntityType.AccessToken, EntityType.Domain, EntityType.Container, EntityType.DataFlow], + filter: { + operator: LogicalOperator.And, + filters: [ + { + field: 'mockField1', + condition: FilterOperator.Equal, + values: ['value1', 'value2', 'value3'], + negated: false, + }, + { + field: 'mockField2', + condition: FilterOperator.Exists, + values: ['value4', 'value5', 'value6'], + negated: true, + }, + ], + }, + }, + }, + { + urn: 'test-urn2', + type: EntityType.DatahubView, + viewType: DataHubViewType.Global, + name: 'MOCK_TEST_VIEW', + description: 'Lorem ipsum dolor sit amet, consectetu', + definition: { + entityTypes: [EntityType.AccessToken, EntityType.Container, EntityType.DataFlow], + filter: { + operator: LogicalOperator.Or, + filters: [ + { + field: 'mockField1', + condition: FilterOperator.GreaterThan, + values: ['value1', 'value2', 'value3'], + negated: false, + }, + { + field: 'mockField2', + condition: FilterOperator.In, + values: ['value4', 'value6'], + negated: false, + }, + ], + }, + }, + }, +]; + +export const mockEntityRelationShipResult: Maybe = { + start: 0, + count: 0, + total: 0, + relationships: [ + { + type: 'Test1', + direction: RelationshipDirection.Outgoing, + entity: { + urn: 'urn:li:glossaryTerm:schema.Field16Schema_v1', + type: EntityType.GlossaryTerm, + }, + }, + { + type: 'Test2', + direction: RelationshipDirection.Incoming, + entity: { + urn: 'urn:li:glossaryTerm:schema.Field16Schema_v2', + type: EntityType.Assertion, + }, + }, + ], + __typename: 'EntityRelationshipsResult', +}; + +export const mockSearchResult: SearchResult = { + __typename: 'SearchResult', + entity: { + __typename: 'Dataset', + ...dataset3, + }, + matchedFields: [], + insights: [], + extraProperties: [ + { name: 'isOutputPort', value: 'true' }, + { name: 'test2_name', value: 'test2_value' }, + ], +}; + +export const mockRecord: Record = { + key1: 'value1', + key2: true, + key3: 'value2', + key4: false, + key5: 'value3', + key6: true, +}; + +export const mockFineGrainedLineages1: GenericEntityProperties = { + siblings: { + isPrimary: true, + siblings: [{ type: EntityType.Dataset, urn: 'test_urn' }], + }, + siblingsSearch: { + count: 1, + total: 1, + searchResults: [{ entity: { type: EntityType.Dataset, urn: 'test_urn' }, matchedFields: [] }], + }, + fineGrainedLineages: [ + { + upstreams: [ + { + urn: 'urn:li:glossaryTerm:example.glossaryterm1', + path: 'test_downstream1', + }, + ], + downstreams: [ + { + urn: 'urn:li:glossaryTerm:example.glossaryterm2', + path: 'test_downstream2', + }, + ], + }, + ], +}; diff --git a/datahub-web-react/src/alchemy-components/components/Avatar/types.ts b/datahub-web-react/src/alchemy-components/components/Avatar/types.ts index 98c554b620dcbd..8371384b9300de 100644 --- a/datahub-web-react/src/alchemy-components/components/Avatar/types.ts +++ b/datahub-web-react/src/alchemy-components/components/Avatar/types.ts @@ -2,7 +2,7 @@ import { AvatarSizeOptions } from '@src/alchemy-components/theme/config'; export interface AvatarProps { name: string; - imageUrl?: string; + imageUrl?: string | null; onClick?: () => void; size?: AvatarSizeOptions; showInPill?: boolean; diff --git a/datahub-web-react/src/alchemy-components/components/Avatar/utils.ts b/datahub-web-react/src/alchemy-components/components/Avatar/utils.ts index 46b2ee25488b89..f5c2468e2ab813 100644 --- a/datahub-web-react/src/alchemy-components/components/Avatar/utils.ts +++ b/datahub-web-react/src/alchemy-components/components/Avatar/utils.ts @@ -48,6 +48,7 @@ export const getAvatarSizes = (size) => { sm: { width: '18px', height: '18px', fontSize: '8px' }, md: { width: '24px', height: '24px', fontSize: '12px' }, lg: { width: '28px', height: '28px', fontSize: '14px' }, + xl: { width: '32px', height: '32px', fontSize: '14px' }, default: { width: '20px', height: '20px', fontSize: '10px' }, }; diff --git a/datahub-web-react/src/alchemy-components/components/AvatarStack/AvatarStack.stories.tsx b/datahub-web-react/src/alchemy-components/components/AvatarStack/AvatarStack.stories.tsx new file mode 100644 index 00000000000000..d9d46f7ce84c27 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/AvatarStack/AvatarStack.stories.tsx @@ -0,0 +1,76 @@ +import React from 'react'; +import { Meta, StoryObj } from '@storybook/react'; +import { AvatarStack, avatarListDefaults } from './AvatarStack'; + +// Meta Configuration +const meta = { + title: 'Components / AvatarStack', + component: AvatarStack, + + // Component-level parameters + parameters: { + layout: 'centered', + docs: { + subtitle: 'Displays a list of assignees with avatars.', + }, + }, + + // Component-level argTypes + argTypes: { + avatars: { + description: 'List of avatar objects with name and image source.', + control: 'object', + table: { + type: { + summary: 'Array<{ name: string; imageUrl: string }>', + }, + defaultValue: { summary: '[]' }, + }, + }, + }, + + // Default props + args: { + avatars: avatarListDefaults.avatars, + size: avatarListDefaults.size, + }, +} satisfies Meta; + +export default meta; + +type Story = StoryObj; + +// Sandbox Story +export const sandbox: Story = { + render: (props) => , +}; + +// Example Stories +export const withMultipleAvatar = () => ( + +); + +// Example Stories +export const witouthImagesMultipleAvatar = () => ( + +); + +export const withSingleAvatar = () => ( + +); + +export const withNoAvatar = () => ; diff --git a/datahub-web-react/src/alchemy-components/components/AvatarStack/AvatarStack.tsx b/datahub-web-react/src/alchemy-components/components/AvatarStack/AvatarStack.tsx new file mode 100644 index 00000000000000..e9d8323676bffd --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/AvatarStack/AvatarStack.tsx @@ -0,0 +1,23 @@ +import React from 'react'; +import { AvatarContainer, AvatarStackContainer } from './components'; +import { Avatar } from '../Avatar'; +import { AvatarItemProps, AvatarStackProps } from './types'; + +export const avatarListDefaults: AvatarStackProps = { + avatars: [ + { name: 'John Doe', imageUrl: 'https://randomuser.me/api/portraits/men/1.jpg' }, + { name: 'Test User', imageUrl: 'https://robohash.org/sample-profile.png' }, + { name: 'Micky Test', imageUrl: 'https://randomuser.me/api/portraits/women/1.jpg' }, + ], + size: 'md', +}; + +export const AvatarStack = ({ avatars, size = 'md' }: AvatarStackProps) => { + if (avatars?.length === 0) return

; + const renderAvatarStack = avatars?.map((avatar: AvatarItemProps) => ( + + + + )); + return {renderAvatarStack}; +}; diff --git a/datahub-web-react/src/alchemy-components/components/AvatarStack/components.ts b/datahub-web-react/src/alchemy-components/components/AvatarStack/components.ts new file mode 100644 index 00000000000000..8a437605fbc185 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/AvatarStack/components.ts @@ -0,0 +1,14 @@ +import styled from 'styled-components'; + +export const AvatarStackContainer = styled.div` + position: relative; + display: flex; + align-items: flex-start; +`; + +export const AvatarContainer = styled.div` + margin-left: -10px; + &:first-child { + margin-left: 0; + } +`; diff --git a/datahub-web-react/src/alchemy-components/components/AvatarStack/types.ts b/datahub-web-react/src/alchemy-components/components/AvatarStack/types.ts new file mode 100644 index 00000000000000..d32cf763db54a2 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/AvatarStack/types.ts @@ -0,0 +1,11 @@ +import { AvatarSizeOptions } from '@src/alchemy-components/theme/config'; + +export interface AvatarItemProps { + name: string; + imageUrl?: string | null; +} + +export type AvatarStackProps = { + avatars: AvatarItemProps[]; + size?: AvatarSizeOptions; +}; diff --git a/datahub-web-react/src/alchemy-components/components/Bar/Bar.stories.tsx b/datahub-web-react/src/alchemy-components/components/Bar/Bar.stories.tsx new file mode 100644 index 00000000000000..e2a1d378c2086d --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Bar/Bar.stories.tsx @@ -0,0 +1,92 @@ +import React from 'react'; +import { Meta, StoryObj } from '@storybook/react'; +import colors from '@src/alchemy-components/theme/foundations/colors'; + +import { Bar } from './Bar'; + +const meta: Meta = { + title: 'Components / Bar', + component: Bar, + + // Component-level parameters + parameters: { + layout: 'centered', + docs: { + subtitle: 'Displays a bar component with up to three segments that can be dynamically colored.', + }, + }, + + // Component-level argTypes + argTypes: { + color: { + description: 'Color of the filled bars.', + control: 'color', + table: { + defaultValue: { summary: colors.violet[500] }, + }, + }, + coloredBars: { + description: 'Number of bars to color.', + control: { type: 'number', min: 0, max: 3 }, + table: { + defaultValue: { summary: '2' }, + }, + }, + size: { + description: 'Size of the bars.', + control: 'select', + options: ['sm', 'lg', 'default'], + table: { + defaultValue: { summary: 'default' }, + }, + }, + }, + + // Default props + args: { + color: colors.violet[500], + coloredBars: 2, + size: 'default', + }, +}; + +export default meta; + +type Story = StoryObj; + +// Sandbox Story +export const sandbox: Story = { + render: (props) => , +}; + +// Example Stories +export const withCustomColors: Story = { + args: { + color: '#ff6b6b', + coloredBars: 3, + }, +}; + +export const smallBars: Story = { + args: { + size: 'sm', + coloredBars: 1, + color: '#6bc1ff', + }, +}; + +export const defaultBars: Story = { + args: { + size: 'default', + coloredBars: 2, + color: colors.violet[500], + }, +}; + +export const noColoredBars: Story = { + args: { + size: 'default', + coloredBars: 0, + color: '#C6C0E0', + }, +}; diff --git a/datahub-web-react/src/alchemy-components/components/Bar/Bar.tsx b/datahub-web-react/src/alchemy-components/components/Bar/Bar.tsx new file mode 100644 index 00000000000000..2c3a4aea8984d6 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Bar/Bar.tsx @@ -0,0 +1,25 @@ +import React from 'react'; +import { colors } from '@src/alchemy-components/theme'; + +import { BarComponentProps } from './types'; +import { BarContainer, IndividualBar } from './components'; +import { BAR_HEIGHT_MULTIPLIER } from './constant'; + +const defaultProps: BarComponentProps = { + color: colors.violet[500], + coloredBars: 2, + size: 'default', +}; +export const Bar = ({ + color = defaultProps.color, + coloredBars = defaultProps.coloredBars, + size = defaultProps.size, +}: BarComponentProps) => { + const Bars = Array.from({ length: 3 }, (_, index) => { + const barHeight = (index + 2) * BAR_HEIGHT_MULTIPLIER[size]; + return ( + + ); + }); + return {Bars}; +}; diff --git a/datahub-web-react/src/alchemy-components/components/Bar/components.ts b/datahub-web-react/src/alchemy-components/components/Bar/components.ts new file mode 100644 index 00000000000000..be0e5774d1595f --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Bar/components.ts @@ -0,0 +1,15 @@ +import styled from 'styled-components'; + +export const BarContainer = styled.div` + display: flex; + gap: 2px; + align-items: baseline; +`; + +export const IndividualBar = styled.div<{ height: number; isColored: boolean; color: string; size: string }>` + width: ${(props) => (props.size === 'default' ? '5px' : '3px')}; + height: ${(props) => props.height}px; + background-color: ${(props) => (props.isColored ? props.color : '#C6C0E0')}; + border-radius: 20px; + transition: background-color 0.3s ease, height 0.3s ease; +`; diff --git a/datahub-web-react/src/alchemy-components/components/Bar/constant.ts b/datahub-web-react/src/alchemy-components/components/Bar/constant.ts new file mode 100644 index 00000000000000..ad50dd32dd887e --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Bar/constant.ts @@ -0,0 +1,5 @@ +export const BAR_HEIGHT_MULTIPLIER = { + lg: 5, + sm: 4, + default: 5, +}; diff --git a/datahub-web-react/src/alchemy-components/components/Bar/index.ts b/datahub-web-react/src/alchemy-components/components/Bar/index.ts new file mode 100644 index 00000000000000..856726b09bcaa1 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Bar/index.ts @@ -0,0 +1 @@ +export { Bar } from './Bar'; diff --git a/datahub-web-react/src/alchemy-components/components/Bar/types.ts b/datahub-web-react/src/alchemy-components/components/Bar/types.ts new file mode 100644 index 00000000000000..0f8d6773a1b8ad --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Bar/types.ts @@ -0,0 +1,5 @@ +export interface BarComponentProps { + color: string; + coloredBars: number; + size: 'default' | 'sm'; +} diff --git a/datahub-web-react/src/alchemy-components/components/BarChart/BarChart.stories.tsx b/datahub-web-react/src/alchemy-components/components/BarChart/BarChart.stories.tsx index 1258ff398c0a7e..f0ee44725ad578 100644 --- a/datahub-web-react/src/alchemy-components/components/BarChart/BarChart.stories.tsx +++ b/datahub-web-react/src/alchemy-components/components/BarChart/BarChart.stories.tsx @@ -3,6 +3,8 @@ import { BADGE } from '@geometricpanda/storybook-addon-badges'; import type { Meta, StoryObj } from '@storybook/react'; import { BarChart } from './BarChart'; import { getMockedProps } from './utils'; +import { DEFAULT_MIN_VALUE } from './hooks/useAdaptYAccessorToZeroValues'; +import { DEFAULT_MAX_DOMAIN_VALUE } from './hooks/useAdaptYScaleToZeroValues'; const meta = { title: 'Charts / BarChart', @@ -28,24 +30,26 @@ const meta = { yAccessor: { description: 'A function to convert datum to value of Y', }, - renderTooltipContent: { + maxYDomainForZeroData: { + description: + 'For the case where the data has only zero values, you can set the yScale domain to better display the chart', + table: { + defaultValue: { summary: `${DEFAULT_MAX_DOMAIN_VALUE}` }, + }, + }, + minYForZeroData: { + description: + 'For the case where the data has only zero values, you can set minimal Y value to better display the chart', + table: { + defaultValue: { summary: `${DEFAULT_MIN_VALUE}` }, + }, + }, + popoverRenderer: { description: 'A function to replace default rendering of toolbar', }, margin: { description: 'Add margins to chart', }, - leftAxisTickFormat: { - description: 'A function to format labels of left axis', - }, - leftAxisTickLabelProps: { - description: 'Props for label of left axis', - }, - bottomAxisTickFormat: { - description: 'A function to format labels of bottom axis', - }, - bottomAxisTickLabelProps: { - description: 'Props for label of bottom axis', - }, barColor: { description: 'Color of bar', control: { @@ -58,11 +62,14 @@ const meta = { type: 'color', }, }, - gridColor: { - description: "Color of grid's lines", - control: { - type: 'color', - }, + leftAxisProps: { + description: 'The props for the left axis', + }, + bottomAxisProps: { + description: 'The props for the bottom axis', + }, + gridProps: { + description: 'The props for the grid', }, renderGradients: { description: 'A function to render different gradients that can be used as colors', @@ -72,7 +79,7 @@ const meta = { // Define defaults args: { ...getMockedProps(), - renderTooltipContent: (datum) => <>DATUM: {JSON.stringify(datum)}, + popoverRenderer: (datum) => <>DATUM: {JSON.stringify(datum)}, }, } satisfies Meta; diff --git a/datahub-web-react/src/alchemy-components/components/BarChart/BarChart.tsx b/datahub-web-react/src/alchemy-components/components/BarChart/BarChart.tsx index eb5465a1d1217b..96c2b4f3ebeae9 100644 --- a/datahub-web-react/src/alchemy-components/components/BarChart/BarChart.tsx +++ b/datahub-web-react/src/alchemy-components/components/BarChart/BarChart.tsx @@ -1,5 +1,6 @@ import React, { useState } from 'react'; import { colors } from '@src/alchemy-components/theme'; +import { abbreviateNumber } from '@src/app/dataviz/utils'; import { TickLabelProps } from '@visx/axis'; import { LinearGradient } from '@visx/gradient'; import { ParentSize } from '@visx/responsive'; @@ -7,8 +8,12 @@ import { Axis, AxisScale, BarSeries, Grid, Tooltip, XYChart } from '@visx/xychar import dayjs from 'dayjs'; import { Popover } from '../Popover'; import { ChartWrapper, StyledBarSeries } from './components'; -import { BarChartProps } from './types'; -import { abbreviateNumber } from '../dataviz/utils'; +import { AxisProps, BarChartProps } from './types'; +import { getMockedProps } from './utils'; +import useMergedProps from './hooks/useMergedProps'; +import useAdaptYScaleToZeroValues from './hooks/useAdaptYScaleToZeroValues'; +import useAdaptYAccessorToZeroValue from './hooks/useAdaptYAccessorToZeroValues'; +import useMaxDataValue from './hooks/useMaxDataValue'; const commonTickLabelProps: TickLabelProps = { fontSize: 10, @@ -18,39 +23,66 @@ const commonTickLabelProps: TickLabelProps = { export const barChartDefault: BarChartProps = { data: [], + xAccessor: (datum) => datum?.x, yAccessor: (datum) => datum?.y, - leftAxisTickFormat: abbreviateNumber, - leftAxisTickLabelProps: { - ...commonTickLabelProps, - textAnchor: 'end', - }, - bottomAxisTickFormat: (value) => dayjs(value).format('DD MMM'), - bottomAxisTickLabelProps: { - ...commonTickLabelProps, - textAnchor: 'middle', - verticalAnchor: 'start', - width: 20, - }, + xScale: { type: 'band', paddingInner: 0.4, paddingOuter: 0.1 }, + yScale: { type: 'linear', nice: true, round: true }, + barColor: 'url(#bar-gradient)', barSelectedColor: colors.violet[500], - gridColor: '#e0e0e0', + + leftAxisProps: { + tickFormat: abbreviateNumber, + tickLabelProps: { + ...commonTickLabelProps, + textAnchor: 'end', + }, + hideAxisLine: true, + hideTicks: true, + }, + bottomAxisProps: { + tickFormat: (value) => dayjs(value).format('DD MMM'), + tickLabelProps: { + ...commonTickLabelProps, + textAnchor: 'middle', + verticalAnchor: 'start', + width: 20, + }, + hideAxisLine: true, + hideTicks: true, + }, + gridProps: { + rows: true, + columns: false, + stroke: '#e0e0e0', + strokeWidth: 1, + lineStyle: {}, + }, + renderGradients: () => , }; export function BarChart({ data, + isEmpty, + xAccessor = barChartDefault.xAccessor, yAccessor = barChartDefault.yAccessor, - renderTooltipContent, - margin, - leftAxisTickFormat = barChartDefault.leftAxisTickFormat, - leftAxisTickLabelProps = barChartDefault.leftAxisTickLabelProps, - bottomAxisTickFormat = barChartDefault.bottomAxisTickFormat, - bottomAxisTickLabelProps = barChartDefault.bottomAxisTickLabelProps, + xScale = barChartDefault.xScale, + yScale = barChartDefault.yScale, + maxYDomainForZeroData, + minYForZeroData, + barColor = barChartDefault.barColor, barSelectedColor = barChartDefault.barSelectedColor, - gridColor = barChartDefault.gridColor, + margin, + + leftAxisProps = barChartDefault.leftAxisProps, + bottomAxisProps = barChartDefault.bottomAxisProps, + gridProps = barChartDefault.gridProps, + + popoverRenderer, renderGradients = barChartDefault.renderGradients, }: BarChartProps) { const [hasSelectedBar, setHasSelectedBar] = useState(false); @@ -63,7 +95,27 @@ export function BarChart({ left: (margin?.left ?? 0) + 40, }; - const accessors = { xAccessor, yAccessor }; + const maxDataValue = useMaxDataValue(data, yAccessor); + const adaptedYScale = useAdaptYScaleToZeroValues(yScale, maxDataValue, maxYDomainForZeroData); + const adaptedYAccessor = useAdaptYAccessorToZeroValue(yAccessor, maxDataValue, minYForZeroData); + + const accessors = { xAccessor, yAccessor: adaptedYAccessor }; + + const { computeNumTicks: computeLeftAxisNumTicks, ...mergedLeftAxisProps } = useMergedProps>( + leftAxisProps, + barChartDefault.leftAxisProps, + ); + + const { computeNumTicks: computeBottomAxisNumTicks, ...mergedBottomAxisProps } = useMergedProps< + AxisProps + >(bottomAxisProps, barChartDefault.bottomAxisProps); + + // In case of no data we should render empty graph with axises + // but they don't render at all without any data. + // To handle this case we will render the same graph with fake data and hide bars + if (!data.length) { + return ; + } return ( @@ -73,8 +125,8 @@ export function BarChart({ @@ -82,19 +134,14 @@ export function BarChart({ ({ x2={internalMargin.left} y1={0} y2={height - internalMargin.bottom} - stroke={gridColor} + stroke={gridProps?.stroke} /> - + } $hasSelectedItem={hasSelectedBar} $color={barColor} $selectedColor={barSelectedColor} + $isEmpty={isEmpty} dataKey="bar-seria-0" data={data} radius={4} @@ -136,8 +184,10 @@ export function BarChart({ tooltipData?.nearestDatum && ( ) ); diff --git a/datahub-web-react/src/alchemy-components/components/BarChart/components.tsx b/datahub-web-react/src/alchemy-components/components/BarChart/components.tsx index aa8f1320ef21dd..a0a3f7a2cebfc1 100644 --- a/datahub-web-react/src/alchemy-components/components/BarChart/components.tsx +++ b/datahub-web-react/src/alchemy-components/components/BarChart/components.tsx @@ -12,11 +12,18 @@ export const StyledBarSeries = styled(BarSeries)<{ $hasSelectedItem?: boolean; $color?: string; $selectedColor?: string; + $isEmpty?: boolean; }>` & { cursor: pointer; - fill: ${(props) => (props.$hasSelectedItem ? props.$selectedColor : props.$color) || colors.violet[500]}; + ${(props) => props.$isEmpty && 'pointer-events: none;'} + + fill: ${(props) => { + if (props.$isEmpty) return colors.transparent; + return (props.$hasSelectedItem ? props.$selectedColor : props.$color) || colors.violet[500]; + }}; + ${(props) => props.$hasSelectedItem && 'opacity: 0.3;'} :hover { diff --git a/datahub-web-react/src/alchemy-components/components/BarChart/hooks/useAdaptYAccessorToZeroValues.ts b/datahub-web-react/src/alchemy-components/components/BarChart/hooks/useAdaptYAccessorToZeroValues.ts new file mode 100644 index 00000000000000..19831af296262a --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/BarChart/hooks/useAdaptYAccessorToZeroValues.ts @@ -0,0 +1,18 @@ +import { useMemo } from 'react'; +import { YAccessor } from '../types'; + +export const DEFAULT_MIN_VALUE = 0.1; + +export default function useAdaptYAccessorToZeroValue( + yAccessor: YAccessor, + maxDataValue: number, + minimalValue: number | undefined, +): YAccessor { + return useMemo(() => { + // Data contains non zero values, skip adaptation + if (maxDataValue > 0) return yAccessor; + + // add minimal `y` value + return (value) => Math.max(yAccessor(value), minimalValue ?? DEFAULT_MIN_VALUE); + }, [yAccessor, maxDataValue, minimalValue]); +} diff --git a/datahub-web-react/src/alchemy-components/components/BarChart/hooks/useAdaptYScaleToZeroValues.ts b/datahub-web-react/src/alchemy-components/components/BarChart/hooks/useAdaptYScaleToZeroValues.ts new file mode 100644 index 00000000000000..f46dd7297f3093 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/BarChart/hooks/useAdaptYScaleToZeroValues.ts @@ -0,0 +1,23 @@ +import { AxisScaleOutput } from '@visx/axis'; +import { ScaleConfig } from '@visx/scale'; +import { useMemo } from 'react'; + +export const DEFAULT_MAX_DOMAIN_VALUE = 10; + +export default function useAdaptYScaleToZeroValues( + yScale: ScaleConfig | undefined, + maxDataValue: number, + maxDomainValue: number | undefined, +): ScaleConfig | undefined { + return useMemo(() => { + // yScale should be passed for adaptation otherwise return it as is + if (!yScale) return yScale; + + // Data contains non zero values, no need to adapt + if (maxDataValue > 0) return yScale; + + // Add domain with max value to show data with only zero values correctly + const domain: [number, number] = [0, maxDomainValue ?? DEFAULT_MAX_DOMAIN_VALUE]; + return { domain, ...yScale }; + }, [maxDataValue, maxDomainValue, yScale]); +} diff --git a/datahub-web-react/src/alchemy-components/components/BarChart/hooks/useMaxDataValue.ts b/datahub-web-react/src/alchemy-components/components/BarChart/hooks/useMaxDataValue.ts new file mode 100644 index 00000000000000..331756a1bca89e --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/BarChart/hooks/useMaxDataValue.ts @@ -0,0 +1,6 @@ +import { useMemo } from 'react'; +import { YAccessor } from '../types'; + +export default function useMaxDataValue(data: T[], yAccessor: YAccessor): number { + return useMemo(() => Math.max(...data.map(yAccessor)) ?? 0, [data, yAccessor]); +} diff --git a/datahub-web-react/src/alchemy-components/components/BarChart/hooks/useMergedProps.ts b/datahub-web-react/src/alchemy-components/components/BarChart/hooks/useMergedProps.ts new file mode 100644 index 00000000000000..cf1e3f1cb2c02d --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/BarChart/hooks/useMergedProps.ts @@ -0,0 +1,8 @@ +import { useMemo } from 'react'; + +export default function useMergedProps( + props?: PropsType, + defaultProps?: PropsType, +): PropsType { + return useMemo(() => ({ ...(defaultProps || {}), ...(props || {}) }), [props, defaultProps]) as PropsType; +} diff --git a/datahub-web-react/src/alchemy-components/components/BarChart/types.ts b/datahub-web-react/src/alchemy-components/components/BarChart/types.ts index 5fd7e2e63e2411..26e216ef2c2fc5 100644 --- a/datahub-web-react/src/alchemy-components/components/BarChart/types.ts +++ b/datahub-web-react/src/alchemy-components/components/BarChart/types.ts @@ -1,18 +1,38 @@ -import { TickFormatter, TickLabelProps } from '@visx/axis'; +import { AxisScaleOutput } from '@visx/axis'; +import { ScaleConfig } from '@visx/scale'; import { Margin } from '@visx/xychart'; +import { AxisProps as VisxAxisProps } from '@visx/xychart/lib/components/axis/Axis'; +import { GridProps as VisxGridProps } from '@visx/xychart/lib/components/grid/Grid'; + +export type AxisProps = Omit & { + computeNumTicks?: (width: number, height: number, margin: Margin, data: DatumType[]) => number | undefined; +}; + +export type GridProps = Omit & { + computeNumTicks?: (width: number, height: number, margin: Margin, data: DatumType[]) => number | undefined; +}; + +export type YAccessor = (datum: T) => number; export type BarChartProps = { data: DatumType[]; + isEmpty?: boolean; + xAccessor: (datum: DatumType) => string | number; - yAccessor: (datum: DatumType) => number; - renderTooltipContent?: (datum: DatumType) => React.ReactNode; - margin?: Margin; - leftAxisTickFormat?: TickFormatter; - leftAxisTickLabelProps?: TickLabelProps; - bottomAxisTickFormat?: TickFormatter; - bottomAxisTickLabelProps?: TickLabelProps; + yAccessor: YAccessor; + xScale?: ScaleConfig; + yScale?: ScaleConfig; + maxYDomainForZeroData?: number; + minYForZeroData?: number; + barColor?: string; barSelectedColor?: string; - gridColor?: string; + margin?: Margin; + + leftAxisProps?: AxisProps; + bottomAxisProps?: AxisProps; + gridProps?: GridProps; + + popoverRenderer?: (datum: DatumType) => React.ReactNode; renderGradients?: () => React.ReactNode; }; diff --git a/datahub-web-react/src/alchemy-components/components/BarChart/utils.ts b/datahub-web-react/src/alchemy-components/components/BarChart/utils.ts index 0b592da7f59b08..48f100a2813004 100644 --- a/datahub-web-react/src/alchemy-components/components/BarChart/utils.ts +++ b/datahub-web-react/src/alchemy-components/components/BarChart/utils.ts @@ -7,7 +7,8 @@ export function generateMockData(length = 30, maxValue = 50_000, minValue = 0) { const date = dayjs() .startOf('day') .add(index - length, 'days') - .toDate(); + .toDate() + .getTime(); const value = Math.max(Math.random() * maxValue, minValue); return { diff --git a/datahub-web-react/src/alchemy-components/components/Button/Button.stories.tsx b/datahub-web-react/src/alchemy-components/components/Button/Button.stories.tsx index e2d7c2852da519..22cd810c246bfe 100644 --- a/datahub-web-react/src/alchemy-components/components/Button/Button.stories.tsx +++ b/datahub-web-react/src/alchemy-components/components/Button/Button.stories.tsx @@ -61,6 +61,19 @@ const meta = { type: 'select', }, }, + iconSize: { + description: 'The optional size of the Icon.', + options: ['xs', 'sm', 'md', 'lg', 'xl', '2xl', '3xl', '4xl'], + table: { + defaultValue: { + summary: 'undefined', + detail: 'The size of the Button will be used as the size of the Icon', + }, + }, + control: { + type: 'select', + }, + }, icon: { description: 'The icon to display in the Button.', type: 'string', diff --git a/datahub-web-react/src/alchemy-components/components/Button/Button.tsx b/datahub-web-react/src/alchemy-components/components/Button/Button.tsx index a727b0faf97a99..daf3d39dccdb8c 100644 --- a/datahub-web-react/src/alchemy-components/components/Button/Button.tsx +++ b/datahub-web-react/src/alchemy-components/components/Button/Button.tsx @@ -24,6 +24,7 @@ export const Button = ({ size = buttonDefaults.size, icon, // default undefined iconPosition = buttonDefaults.iconPosition, + iconSize, isCircle = buttonDefaults.isCircle, isLoading = buttonDefaults.isLoading, isDisabled = buttonDefaults.isDisabled, @@ -52,9 +53,9 @@ export const Button = ({ return ( - {icon && iconPosition === 'left' && } + {icon && iconPosition === 'left' && } {!isCircle && children} - {icon && iconPosition === 'right' && } + {icon && iconPosition === 'right' && } ); }; diff --git a/datahub-web-react/src/alchemy-components/components/Button/types.ts b/datahub-web-react/src/alchemy-components/components/Button/types.ts index f510ff4c6c13c5..f0f50b0e0cf8b6 100644 --- a/datahub-web-react/src/alchemy-components/components/Button/types.ts +++ b/datahub-web-react/src/alchemy-components/components/Button/types.ts @@ -1,7 +1,7 @@ import { ButtonHTMLAttributes } from 'react'; import type { IconNames } from '@components'; -import type { SizeOptions, ColorOptions } from '@components/theme/config'; +import type { SizeOptions, ColorOptions, FontSizeOptions } from '@components/theme/config'; export interface ButtonProps extends ButtonHTMLAttributes { variant?: 'filled' | 'outline' | 'text'; @@ -9,6 +9,7 @@ export interface ButtonProps extends ButtonHTMLAttributes { size?: SizeOptions; icon?: IconNames; iconPosition?: 'left' | 'right'; + iconSize?: FontSizeOptions; isCircle?: boolean; isLoading?: boolean; isDisabled?: boolean; diff --git a/datahub-web-react/src/alchemy-components/components/CalendarChart/CalendarChart.stories.tsx b/datahub-web-react/src/alchemy-components/components/CalendarChart/CalendarChart.stories.tsx new file mode 100644 index 00000000000000..1221da45be815a --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/CalendarChart/CalendarChart.stories.tsx @@ -0,0 +1,82 @@ +import React from 'react'; +import { BADGE } from '@geometricpanda/storybook-addon-badges'; +import type { Meta, StoryObj } from '@storybook/react'; +import { CalendarChart } from './CalendarChart'; +import { getMockedProps } from './utils'; + +const meta = { + title: 'Charts / CalendarChart', + component: CalendarChart, + + // Display Properties + parameters: { + layout: 'centered', + badges: [BADGE.EXPERIMENTAL], + docs: { + subtitle: 'A component that is used to show CalendarChart', + }, + }, + + // Component-level argTypes + argTypes: { + data: { + description: 'Array of datum to show', + }, + startDate: { + description: 'The first day of calendar (it will be rounded by weeks)', + }, + endDate: { + description: 'The last day of calendar (it will be rounded by weeks)', + }, + colorAccessor: { + description: "A function to get color by datum's value", + }, + showPopover: { + description: 'Enable to add popovers', + table: { + defaultValue: { summary: 'true' }, + }, + control: { + type: 'boolean', + }, + }, + popoverRenderer: { + description: "A function to render popover's content of datum's value", + }, + leftAxisLabelProps: { + description: 'Props for label of left axis', + }, + bottomAxisLabelProps: { + description: 'Props for label of bottom axis', + }, + margin: { + description: 'Margins for CalendarChart', + }, + selectedDay: { + description: 'Set a date in format `YYYY-MM-DD` to select this day on calendar', + control: { + type: 'text', + }, + }, + }, + + // Define defaults + args: { + ...getMockedProps(), + popoverRenderer: (day) => <>{JSON.stringify(day)}, + onDayClick: () => null, + }, +} satisfies Meta; + +export default meta; + +type Story = StoryObj; + +export const sandbox: Story = { + tags: ['dev'], + render: (props) => ( +
+ +
+ ), +}; diff --git a/datahub-web-react/src/alchemy-components/components/CalendarChart/CalendarChart.tsx b/datahub-web-react/src/alchemy-components/components/CalendarChart/CalendarChart.tsx new file mode 100644 index 00000000000000..ca51e63b6feb85 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/CalendarChart/CalendarChart.tsx @@ -0,0 +1,80 @@ +import { colors } from '@src/alchemy-components/theme'; +import { ParentSize } from '@visx/responsive'; +import React, { useMemo } from 'react'; +import { ChartWrapper } from './components'; +import { AxisBottomMonths } from './private/components/AxisBottomMonths'; +import { AxisLeftWeekdays } from './private/components/AxisLeftWeekdays'; +import { Calendar } from './private/components/Calendar'; +import { CalendarProvider } from './private/context'; +import { CalendarChartProps } from './types'; +import { prepareCalendarData } from './utils'; +import { CalendarContainer } from './private/components/CalendarContainer'; + +const commonLabelProps = { + fill: colors.gray[1700], + fontFamily: 'Mulish', + fontSize: 10, +}; + +export const calendarChartDefault: Omit, 'colorAccessor' | 'startDate' | 'endDate'> = { + data: [], + leftAxisLabelProps: { + ...commonLabelProps, + textAnchor: 'end', + }, + bottomAxisLabelProps: { + ...commonLabelProps, + textAnchor: 'middle', + }, + maxHeight: 350, + showPopover: true, +}; + +export function CalendarChart({ + data = calendarChartDefault.data, + startDate, + endDate, + colorAccessor, + showPopover = calendarChartDefault.showPopover, + popoverRenderer, + leftAxisLabelProps = calendarChartDefault.leftAxisLabelProps, + bottomAxisLabelProps = calendarChartDefault.bottomAxisLabelProps, + margin, + maxHeight = calendarChartDefault.maxHeight, + selectedDay, + onDayClick, +}: CalendarChartProps) { + const preparedData = useMemo( + () => prepareCalendarData(data, startDate, endDate), + [data, startDate, endDate], + ); + + return ( + + + {({ width, height }) => { + return ( + + data={preparedData} + width={width} + height={maxHeight ?? height} + margin={margin} + colorAccessor={colorAccessor} + showPopover={showPopover} + popoverRenderer={popoverRenderer} + selectedDay={selectedDay} + onDayClick={onDayClick} + > + + labelProps={leftAxisLabelProps} /> + labelProps={bottomAxisLabelProps} /> + + data={preparedData} /> + + + ); + }} + + + ); +} diff --git a/datahub-web-react/src/alchemy-components/components/CalendarChart/_tests_/getColorAccessors.test.ts b/datahub-web-react/src/alchemy-components/components/CalendarChart/_tests_/getColorAccessors.test.ts new file mode 100644 index 00000000000000..788b611f1ec219 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/CalendarChart/_tests_/getColorAccessors.test.ts @@ -0,0 +1,82 @@ +import { CalendarData } from '../types'; +import { getColorAccessor } from '../utils'; + +type Datum = { + value1: number; + value2: number; +}; + +const sampleData: CalendarData[] = [ + { day: '2024-01-01', value: { value1: 10, value2: 10 } }, + { day: '2024-01-02', value: { value1: 20, value2: 15 } }, + { day: '2024-01-03', value: { value1: 0, value2: 0 } }, +]; + +const DEFAULT_COLOR = '#EBECF0'; + +const sampleColorAccessors = getColorAccessor( + sampleData, + { + value1: { + valueAccessor: (datum) => datum.value1, + colors: ['#fff', '#000'], // white -> black + }, + value2: { + valueAccessor: (datum) => datum.value2, + colors: ['#ff0000', '#00ff00', '#0000ff'], // red -> green -> blue + }, + }, + DEFAULT_COLOR, +); + +const sampleColorAccessorsEmpty = getColorAccessor(sampleData, {}, DEFAULT_COLOR); + +describe('getColorAccessors', () => { + it('should return default color if there are zero values', () => { + const result = sampleColorAccessors({ value1: 0, value2: 0 }); + + expect(result).toBe(DEFAULT_COLOR); + }); + + it('should return defalut color when value is negative', () => { + const result = sampleColorAccessors({ value1: -50, value2: -10 }); + + expect(result).toBe(DEFAULT_COLOR); + }); + + it('should return defalut color when color accessors are no provided', () => { + const result = sampleColorAccessorsEmpty({ value1: 10, value2: 10 }); + + expect(result).toBe(DEFAULT_COLOR); + }); + + it('should return correctly interpolated color for average value', () => { + const result = sampleColorAccessors({ value1: 10, value2: 0 }); + + expect(result).toBe('rgb(128, 128, 128)'); // the color's between white and black + }); + + it('should return correctly interpolated color for max value', () => { + const result = sampleColorAccessors({ value1: 20, value2: 0 }); + + expect(result).toBe('rgb(0, 0, 0)'); + }); + + it('should return colors for value2 when value2 great then value1', () => { + const result = sampleColorAccessors({ value1: 10, value2: 20 }); + + expect(result).toBe('rgb(0, 0, 255)'); // blue + }); + + it('should return colors for the first max value', () => { + const result = sampleColorAccessors({ value1: 20, value2: 20 }); + + expect(result).toBe('rgb(0, 0, 0)'); + }); + + it('should return max color if value is great then max value', () => { + const result = sampleColorAccessors({ value1: 50, value2: 20 }); + + expect(result).toBe('rgb(0, 0, 0)'); + }); +}); diff --git a/datahub-web-react/src/alchemy-components/components/CalendarChart/_tests_/prepareCalendarDate.test.ts b/datahub-web-react/src/alchemy-components/components/CalendarChart/_tests_/prepareCalendarDate.test.ts new file mode 100644 index 00000000000000..97a0fe382ee116 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/CalendarChart/_tests_/prepareCalendarDate.test.ts @@ -0,0 +1,109 @@ +import { CalendarData } from '../types'; +import { prepareCalendarData } from '../utils'; + +const sampleData: CalendarData[] = [ + { day: '2024-12-02', value: 1 }, + { day: '2024-12-03', value: 2 }, + { day: '2024-12-04', value: 3 }, + { day: '2024-12-05', value: 4 }, + { day: '2024-12-06', value: 5 }, + { day: '2024-12-07', value: 6 }, + { day: '2024-12-08', value: 7 }, +]; + +describe('prepareCalendarData', () => { + it('should prepare data for a full month', () => { + const result = prepareCalendarData(sampleData, '2024-12-01', '2024-12-31'); + + // 3 months (Nov / Dec / Jan) because edge weeks aren't belong to Dec + // because of there are less Dec days then of another month + expect(result).toHaveLength(3); + // Nov 1 week from 2024-11-25 to 2024-12-01 + expect(result[0].weeks).toHaveLength(1); + expect(result[0].weeks[0].days[0].day).toBe('2024-11-25'); + expect(result[0].weeks[0].days[6].day).toBe('2024-12-01'); + expect(result[0].weeks[0].days.some((day) => day.value !== undefined)).toBeFalsy(); // no values + // Dec 4 weeks from 2024-12-02 to 2024-12-29 + expect(result[1].weeks).toHaveLength(4); + expect(result[1].weeks[0].days[0].day).toBe('2024-12-02'); + expect(result[1].weeks[3].days[6].day).toBe('2024-12-29'); + expect(result[1].weeks[0].days.some((day) => day.value !== undefined)).toBeTruthy(); // has values + sampleData.forEach((datum, index) => { + expect(result[1].weeks[0].days[index].value).toBe(datum.value); + }); + expect(result[1].weeks[1].days.some((day) => day.value !== undefined)).toBeFalsy(); // no values + expect(result[1].weeks[2].days.some((day) => day.value !== undefined)).toBeFalsy(); // no values + expect(result[1].weeks[3].days.some((day) => day.value !== undefined)).toBeFalsy(); // no values + // Jan 1 wekk from 2024-12-30 to 2025-01-05 + expect(result[2].weeks).toHaveLength(1); + expect(result[2].weeks[0].days[0].day).toBe('2024-12-30'); + expect(result[2].weeks[0].days[6].day).toBe('2025-01-05'); + expect(result[2].weeks[0].days.some((day) => day.value !== undefined)).toBeFalsy(); // no values + }); + + it('should prepare data for a full week', () => { + const result = prepareCalendarData(sampleData, '2024-12-02', '2024-12-08'); + + expect(result).toHaveLength(1); + expect(result[0]).toHaveProperty('weeks'); + expect(result[0].weeks).toHaveLength(1); + expect(result[0].weeks[0].days).toHaveLength(7); + sampleData.forEach((datum, index) => { + expect(result[0].weeks[0].days[index].value).toBe(datum.value); + }); + }); + + it('should handle a month transition', () => { + const result = prepareCalendarData(sampleData, '2024-11-04', '2024-12-29'); + + // 2 months (Nov / Dec) + expect(result).toHaveLength(2); + // Nov 4 weeks from 2024-11-04 to 2024-12-01 + expect(result[0].key).toBe('2024-11'); + expect(result[0].weeks).toHaveLength(4); + expect(result[0].weeks[0].days[0].day).toBe('2024-11-04'); + expect(result[0].weeks[3].days[6].day).toBe('2024-12-01'); + // Dec 4 weeks from 2024-12-02 to 2024-12-29 + expect(result[1].key).toBe('2024-12'); + expect(result[1].weeks).toHaveLength(4); + expect(result[1].weeks[0].days[0].day).toBe('2024-12-02'); + expect(result[1].weeks[3].days[6].day).toBe('2024-12-29'); + }); + + it('should handle a year transition', () => { + const result = prepareCalendarData(sampleData, '2024-12-02', '2025-01-05'); + + // 2 months (Dec / Jan) + expect(result).toHaveLength(2); + // Dec 4 weeks from 2024-12-02 to 2024-12-29 + expect(result[0].key).toBe('2024-12'); + expect(result[0].weeks).toHaveLength(4); + expect(result[0].weeks[0].days[0].day).toBe('2024-12-02'); + expect(result[0].weeks[3].days[6].day).toBe('2024-12-29'); + // Jan 1 week from 2024-12-30 to 2025-01-05 + expect(result[1].key).toBe('2025-01'); + expect(result[1].weeks).toHaveLength(1); + expect(result[1].weeks[0].days[0].day).toBe('2024-12-30'); + expect(result[1].weeks[0].days[6].day).toBe('2025-01-05'); + }); + + it('should handle when the start and end date are the same', () => { + const result = prepareCalendarData(sampleData, '2024-12-01', '2024-12-01'); + + expect(result).toHaveLength(1); + expect(result[0].weeks).toHaveLength(1); + expect(result[0].weeks[0].days).toHaveLength(7); // rounding by start/end of week + expect(result[0].weeks[0].days[0].day).toBe('2024-11-25'); + expect(result[0].weeks[0].days[6].day).toBe('2024-12-01'); + }); + + it('should handle a range with no data for the given dates', () => { + const result = prepareCalendarData([], '2024-01-01', '2024-01-07'); + + expect(result).toHaveLength(1); + expect(result[0]).toHaveProperty('weeks'); + expect(result[0].weeks).toHaveLength(1); + expect(result[0].weeks[0].days).toHaveLength(7); + expect(result[0].weeks[0].days.some((day) => day.value !== undefined)).toBeFalsy(); + }); +}); diff --git a/datahub-web-react/src/alchemy-components/components/CalendarChart/components.tsx b/datahub-web-react/src/alchemy-components/components/CalendarChart/components.tsx new file mode 100644 index 00000000000000..28524fe43085e4 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/CalendarChart/components.tsx @@ -0,0 +1,30 @@ +import { Bar } from '@visx/shape'; +import styled from 'styled-components'; + +export const ChartWrapper = styled.div` + width: 100%; + height: 100%; + position: relative; +`; + +export const CalendarWrapper = styled.div` + width: 100%; + height: 100%; + display: flex; + justify-content: left; + overflow-x: auto; +`; + +export const CalendarInnerWrapper = styled.div<{ $width: string }>` + width: ${(props) => props.$width}; +`; + +export const StyledBar = styled(Bar)<{ $addTransparency?: boolean }>` + cursor: pointer; + + ${(props) => props.$addTransparency && 'filter: opacity(0.3);'} + + :hover { + filter: drop-shadow(0px 0px 4px rgba(0 0 0 / 0.25)) brightness(90%); + } +`; diff --git a/datahub-web-react/src/alchemy-components/components/CalendarChart/constants.ts b/datahub-web-react/src/alchemy-components/components/CalendarChart/constants.ts new file mode 100644 index 00000000000000..fb20520c9b7a23 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/CalendarChart/constants.ts @@ -0,0 +1 @@ +export const CALENDAR_DATE_FORMAT = 'YYYY-MM-DD'; diff --git a/datahub-web-react/src/alchemy-components/components/CalendarChart/index.ts b/datahub-web-react/src/alchemy-components/components/CalendarChart/index.ts new file mode 100644 index 00000000000000..5ac232084cbf2d --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/CalendarChart/index.ts @@ -0,0 +1,2 @@ +export { CalendarChart } from './CalendarChart'; +export { CALENDAR_DATE_FORMAT } from './constants'; diff --git a/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/AxisBottomMonths.tsx b/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/AxisBottomMonths.tsx new file mode 100644 index 00000000000000..eabbf12c45511f --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/AxisBottomMonths.tsx @@ -0,0 +1,34 @@ +import React from 'react'; +import { DAYS_IN_WEEK } from '../constants'; +import { useCalendarState } from '../context'; +import { TickLabel } from './TickLabel'; +import { AxisBottomMonthsProps } from '../../types'; + +export function AxisBottomMonths({ labelProps }: AxisBottomMonthsProps) { + const { squareSize, squareGap, margin, data } = useCalendarState(); + const weeksInMonth = data.map((group) => group.weeks.length); + const axisTopMargin = 25; + + return ( + <> + {data.map((month, monthIndex) => { + // Do not show the first label when there are only one week in month to prevent labels overlay + if (monthIndex === 0 && month.weeks.length === 1) return null; + + const weeksBefore = weeksInMonth.slice(0, monthIndex).reduce((acc, value) => acc + value, 0); + const yLabel = DAYS_IN_WEEK * (squareSize + squareGap) + margin.top + axisTopMargin; + const xLabel = weeksBefore * (squareSize + squareGap) + squareGap * monthIndex + margin.left + 10; + + return ( + + ); + })} + + ); +} diff --git a/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/AxisLeftWeekdays.tsx b/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/AxisLeftWeekdays.tsx new file mode 100644 index 00000000000000..7888115d7fb289 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/AxisLeftWeekdays.tsx @@ -0,0 +1,35 @@ +import React from 'react'; +import { DAYS_IN_WEEK } from '../constants'; +import { AxisLeftWeekdaysProps } from '../../types'; +import { useCalendarState } from '../context'; +import { TickLabel } from './TickLabel'; + +const WEEKDAYS = ['Mon', 'Tue', 'Wed', 'Thur', 'Fri', 'Sat', 'Sun']; + +export function AxisLeftWeekdays({ labelProps }: AxisLeftWeekdaysProps) { + const { margin, squareSize, squareGap } = useCalendarState(); + + const yLineOffset = 5; + const xLineOffset = 4; + + const lineHeight = squareSize + squareGap; + const lineOffset = Math.floor(squareSize / 2) + squareGap; + + const x = margin.left - xLineOffset; + const y = lineHeight * DAYS_IN_WEEK + margin.top + yLineOffset; + + const renderTickLabel = (number: number, text: string) => { + const labelXOffset = 12; + const xLabel = margin.left - labelXOffset; + const yLabel = lineHeight * number + lineOffset + margin.top; + + return ; + }; + + return ( + <> + {WEEKDAYS.map((weekday, index) => renderTickLabel(index, weekday))} + + + ); +} diff --git a/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/Calendar.tsx b/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/Calendar.tsx new file mode 100644 index 00000000000000..61323ee48b1256 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/Calendar.tsx @@ -0,0 +1,13 @@ +import React from 'react'; +import { CalendarProps } from '../../types'; +import { Month } from './Month'; + +export function Calendar({ data }: CalendarProps) { + return ( + <> + {data.map((month, monthIndex) => { + return ; + })} + + ); +} diff --git a/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/CalendarContainer.tsx b/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/CalendarContainer.tsx new file mode 100644 index 00000000000000..169a14f0a5d4ec --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/CalendarContainer.tsx @@ -0,0 +1,31 @@ +import React, { useMemo } from 'react'; +import { useCalendarState } from '../context'; +import { DAYS_IN_WEEK } from '../constants'; +import { CalendarContainerProps } from '../../types'; +import { CalendarInnerWrapper, CalendarWrapper } from '../../components'; + +export function CalendarContainer({ children }: CalendarContainerProps) { + const { squareSize, squareGap, margin, parentHeight, countOfWeeks, countOfMonths } = useCalendarState(); + + const svgHeight = useMemo( + () => Math.min((squareSize + squareGap) * DAYS_IN_WEEK + margin.top + margin.bottom, parentHeight), + [squareSize, squareGap, margin.top, margin.bottom, parentHeight], + ); + + // Calendar can be wider then the parent container because of minimal size of square and gap + // In this case the horizontal sroll should be shown + const svgWidth = useMemo( + () => (squareSize + squareGap) * countOfWeeks + squareGap * countOfMonths + margin.left + margin.right, + [squareSize, squareGap, countOfWeeks, countOfMonths, margin.left, margin.right], + ); + + return ( + + + + {children} + + + + ); +} diff --git a/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/Day.tsx b/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/Day.tsx new file mode 100644 index 00000000000000..229be8d2aa4b6b --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/Day.tsx @@ -0,0 +1,40 @@ +import { Popover } from '@src/alchemy-components/components/Popover'; +import React, { useMemo } from 'react'; +import { DayProps } from '../../types'; +import { useCalendarState } from '../context'; +import { StyledBar } from '../../components'; + +export function Day({ day, weekOffset, dayIndex }: DayProps) { + const { squareSize, squareGap, margin, colorAccessor, showPopover, popoverRenderer, selectedDay, onDayClick } = + useCalendarState(); + const color = useMemo(() => colorAccessor(day.value), [colorAccessor, day.value]); + + const y = useMemo( + () => (squareGap + squareSize) * dayIndex + margin.top, + [squareGap, squareSize, dayIndex, margin], + ); + + const renderBar = () => { + return ( + onDayClick?.(day)} + $addTransparency={!!selectedDay && selectedDay !== day.day} + /> + ); + }; + + if (showPopover) { + return ( + + {renderBar()} + + ); + } + return renderBar(); +} diff --git a/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/Month.tsx b/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/Month.tsx new file mode 100644 index 00000000000000..312935716d57f6 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/Month.tsx @@ -0,0 +1,28 @@ +import React, { useMemo } from 'react'; +import { MonthProps } from '../../types'; +import { useCalendarState } from '../context'; +import { Week } from './Week'; + +export function Month({ month, monthIndex }: MonthProps) { + const { squareGap, data } = useCalendarState(); + const monthOffset = useMemo(() => squareGap * monthIndex, [squareGap, monthIndex]); + const countOfWeeksBefore = useMemo( + () => data.slice(0, monthIndex).reduce((countOfWeeks, monthItem) => countOfWeeks + monthItem.weeks.length, 0), + [data, monthIndex], + ); + + return ( + <> + {month.weeks.map((week, weekIndex) => { + return ( + + ); + })} + + ); +} diff --git a/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/TickLabel.tsx b/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/TickLabel.tsx new file mode 100644 index 00000000000000..b3af03cb176a0b --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/TickLabel.tsx @@ -0,0 +1,13 @@ +import React, { SVGAttributes } from 'react'; + +export type TickLabelProps = SVGAttributes & { + text: string; +}; + +export const TickLabel = ({ text, ...props }: TickLabelProps) => { + return ( + + {text} + + ); +}; diff --git a/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/Week.tsx b/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/Week.tsx new file mode 100644 index 00000000000000..973ad8c5165f1f --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/CalendarChart/private/components/Week.tsx @@ -0,0 +1,21 @@ +import React, { useMemo } from 'react'; +import { WeekProps } from '../../types'; +import { useCalendarState } from '../context'; +import { Day } from './Day'; + +export function Week({ week, weekNumber, monthOffset }: WeekProps) { + const { squareSize, squareGap, margin } = useCalendarState(); + + const x = useMemo(() => { + const weekOffset = weekNumber * (squareGap + squareSize); + return monthOffset + weekOffset + margin.left; + }, [squareGap, squareSize, monthOffset, weekNumber, margin.left]); + + return ( + <> + {week.days.map((day, dayIndex) => { + return key={day.key} day={day} weekOffset={x} dayIndex={dayIndex} />; + })} + + ); +} diff --git a/datahub-web-react/src/alchemy-components/components/CalendarChart/private/constants.ts b/datahub-web-react/src/alchemy-components/components/CalendarChart/private/constants.ts new file mode 100644 index 00000000000000..89baf1700b1431 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/CalendarChart/private/constants.ts @@ -0,0 +1,4 @@ +export const DAYS_IN_WEEK = 7; +export const MIN_DAYS_IN_WEEK = 4; +export const MIN_SQUARE_SIZE = 16; +export const MIN_GAP_SIZE = 4; diff --git a/datahub-web-react/src/alchemy-components/components/CalendarChart/private/context.tsx b/datahub-web-react/src/alchemy-components/components/CalendarChart/private/context.tsx new file mode 100644 index 00000000000000..a83fd6c7f56cfc --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/CalendarChart/private/context.tsx @@ -0,0 +1,109 @@ +import { Margin } from '@visx/xychart'; +import React, { createContext, PropsWithChildren, useContext, useMemo } from 'react'; +import { DayData, MonthData } from '../types'; +import { DAYS_IN_WEEK, MIN_GAP_SIZE, MIN_SQUARE_SIZE } from './constants'; + +export type CalendarContextState = { + data: MonthData[]; + parentWidth: number; + parentHeight: number; + squareSize: number; + squareGap: number; + margin: Margin; + countOfWeeks: number; + countOfMonths: number; + showPopover?: boolean; + popoverRenderer?: (day: DayData) => React.ReactNode; + colorAccessor: (value?: ValueType) => string; + selectedDay?: string | null; + onDayClick?: (day: DayData) => void; +}; + +export const CalendarContext = createContext(null); + +export type CalendarProviderProps = { + data: MonthData[]; + width: number; + height: number; + margin?: Margin; + showPopover?: boolean; + popoverRenderer?: (day: DayData) => React.ReactNode; + colorAccessor: (value?: ValueType) => string; + selectedDay?: string | null; + onDayClick?: (day: DayData) => void; +}; + +export function CalendarProvider({ + children, + data, + width, + height, + margin, + showPopover, + popoverRenderer, + colorAccessor, + selectedDay, + onDayClick, +}: PropsWithChildren>) { + const calendarMargin = useMemo( + () => ({ + // additional top margin + top: (margin?.top ?? 0) + 10, + right: (margin?.right ?? 0) + 0, + // additional space for bottom axis + bottom: (margin?.bottom ?? 0) + 40, + // additional space for left axis + left: (margin?.left ?? 0) + 50, + }), + [margin], + ); + + const calendarWidth = useMemo(() => width - calendarMargin.right - calendarMargin.left, [width, calendarMargin]); + const calendarHeight = useMemo(() => height - calendarMargin.top - calendarMargin.bottom, [height, calendarMargin]); + + const countOfWeeks = useMemo(() => data.reduce((acc, value) => acc + value.weeks.length, 0), [data]); + const countOfMonths = useMemo(() => data.length, [data.length]); + + const squareGap = MIN_GAP_SIZE; + + const squareSize = useMemo(() => { + // Compute size of square depending of width + const maxSizeByWidth = Math.floor( + (calendarWidth - squareGap * Math.floor(countOfWeeks) - squareGap * Math.floor(countOfMonths)) / + Math.floor(countOfWeeks), + ); + + // Compute size of square depending of height + const maxSizeByHeight = Math.floor((calendarHeight - squareGap * DAYS_IN_WEEK) / DAYS_IN_WEEK); + + return Math.max(Math.min(maxSizeByWidth, maxSizeByHeight), MIN_SQUARE_SIZE); + }, [squareGap, countOfWeeks, countOfMonths, calendarHeight, calendarWidth]); + + return ( + + {children} + + ); +} + +export function useCalendarState() { + const context = useContext | null>(CalendarContext); + if (context === null) throw new Error(`${useCalendarState.name} must be used under a ${CalendarProvider.name}`); + return context; +} diff --git a/datahub-web-react/src/alchemy-components/components/CalendarChart/types.ts b/datahub-web-react/src/alchemy-components/components/CalendarChart/types.ts new file mode 100644 index 00000000000000..198775527c3cfa --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/CalendarChart/types.ts @@ -0,0 +1,81 @@ +import { Margin } from '@visx/xychart'; +import { SVGAttributes } from 'react'; + +export type DayData = { + day: string; + key: string; + value?: ValueType; +}; + +export type WeekData = { + days: DayData[]; + key: string; +}; + +export type MonthData = { + weeks: WeekData[]; + key: string; + label: string; +}; + +export type CalendarData = { + day: string; + value: ValueType; +}; + +export type LabelProps = Omit, 'x, y'>; + +export type Accessor = (value: ValueType) => ResponseType; + +export type ColorAccessor = { + valueAccessor: Accessor; + colors: string[]; +}; + +export type CalendarChartProps = { + data: CalendarData[]; + startDate: string | Date; + endDate: string | Date; + colorAccessor: (value: any) => string; + showPopover?: boolean; + popoverRenderer?: (day: DayData) => React.ReactNode; + leftAxisLabelProps?: LabelProps; + bottomAxisLabelProps?: LabelProps; + margin?: Margin; + maxHeight?: number; + selectedDay?: string | null; + onDayClick?: (day: DayData) => void; +}; + +export type CalendarProps = { + data: MonthData[]; +}; + +export type MonthProps = { + month: MonthData; + monthIndex: number; +}; + +export type WeekProps = { + week: WeekData; + weekNumber: number; + monthOffset: number; +}; + +export type DayProps = { + day: DayData; + weekOffset: number; + dayIndex: number; +}; + +export type AxisLeftWeekdaysProps = { + labelProps?: LabelProps; +}; + +export type AxisBottomMonthsProps = { + labelProps?: LabelProps; +}; + +export type CalendarContainerProps = { + children: React.ReactNode; +}; diff --git a/datahub-web-react/src/alchemy-components/components/CalendarChart/utils.ts b/datahub-web-react/src/alchemy-components/components/CalendarChart/utils.ts new file mode 100644 index 00000000000000..a4acb1dbbf5dfb --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/CalendarChart/utils.ts @@ -0,0 +1,208 @@ +import { scaleLinear } from '@visx/scale'; +import * as d3interpolate from '@visx/vendor/d3-interpolate'; +import dayjs from 'dayjs'; +import isoWeek from 'dayjs/plugin/isoWeek'; +import utc from 'dayjs/plugin/utc'; +import advancedFormat from 'dayjs/plugin/advancedFormat'; +import { CalendarChartProps, CalendarData, ColorAccessor, DayData, MonthData, WeekData } from './types'; +import { DAYS_IN_WEEK, MIN_DAYS_IN_WEEK } from './private/constants'; +import { CALENDAR_DATE_FORMAT } from './constants'; + +dayjs.extend(isoWeek); +dayjs.extend(utc); +dayjs.extend(advancedFormat); + +export function prepareCalendarData( + data: CalendarData[], + start: string | Date, + end: string | Date, + labelFormat = 'MMM ’YY', +): MonthData[] { + // Round start/end date to the start/end of the week + const startDate = dayjs(start).utc(true).startOf('isoWeek').startOf('day'); + const endDate = dayjs(end).utc(true).endOf('isoWeek').startOf('day'); + + // Helper functions to create day, week, and month objects + const createDay = (keyDay, value?: ValueType): DayData => ({ + day: keyDay.format(CALENDAR_DATE_FORMAT), + key: keyDay.format(CALENDAR_DATE_FORMAT), + value, + }); + + const createWeek = (keyDay, days?: DayData[]): WeekData => ({ + days: days ?? [], + key: keyDay.format('YYYY-MM-WW'), + }); + + const createMonth = (keyDay, weeks?: WeekData[]): MonthData => ({ + weeks: weeks ?? [], + key: keyDay.format('YYYY-MM'), + label: keyDay.format(labelFormat), + }); + + const months: MonthData[] = []; + let month: MonthData = createMonth(startDate.endOf('isoWeek')); + let week: WeekData | null = createWeek(startDate); + + // Iterate through each day from startDate to endDate + for (let day = dayjs(startDate); !day.isAfter(endDate); day = day.add(1, 'day')) { + // Find a value corresponding to the current day + const value = data.filter((datum) => dayjs(datum.day).utc(true).isSame(day, 'date'))?.[0]?.value; + + const dayItem: DayData = createDay(day, value); + + if (week === null) week = createWeek(day); + week.days.push(dayItem); // Add the day item to the current week + + // Check if it's the last day of the week + if (day.isoWeekday() === DAYS_IN_WEEK) { + const startOfWeek = day.startOf('isoWeek'); + const endOfWeek = day; + const firstDayOfNextWeek = day.add(1, 'day'); + + if (startOfWeek.month() !== endOfWeek.month()) { + // Handle month transition in current week + + const lastWeekdayOfMonth = startOfWeek.endOf('month').isoWeekday(); + + if (lastWeekdayOfMonth >= MIN_DAYS_IN_WEEK) { + // This week belongs to the current month + month.weeks.push(week); + months.push(month); + week = null; + month = createMonth(firstDayOfNextWeek); + } else { + // This week belongs to the next month + if (month.weeks.length) months.push(month); // add current month if it has any weeks + month = createMonth(firstDayOfNextWeek, [week]); + week = null; + } + } else if (firstDayOfNextWeek.month() !== endOfWeek.month()) { + // Handle if month changed on next week + month.weeks.push(week); + months.push(month); + week = null; + month = createMonth(firstDayOfNextWeek); + } else { + month.weeks.push(week); + week = null; + } + + if (endOfWeek.isSame(endDate)) { + // Handle the end day + if (week && month) month.weeks.push(week); + if (month.weeks.length) months.push(month); + } + } + } + + return months; +} + +export function getColorAccessor( + data: CalendarData[], + colorAccessors: { [key: string]: ColorAccessor }, + defaultColor: string, + correctiveMaxValue = 0, +) { + if (Object.keys(colorAccessors).length === 0) return () => defaultColor; + + const scales = Object.entries(colorAccessors).reduce((acc, [key, accessor]) => { + return { + ...acc, + ...{ + [key]: scaleLinear({ + domain: [ + 0, + Math.max(...data.map((datum) => accessor.valueAccessor(datum.value)), correctiveMaxValue), + ], + range: [0, 1], + clamp: true, + }), + }, + }; + }, {}); + + const colorInterpolators = Object.entries(colorAccessors).reduce((acc, [key, accessor]) => { + return { + ...acc, + ...{ + [key]: d3interpolate.interpolateRgbBasis(accessor.colors), + }, + }; + }, {}); + + return function (datumValue?: ValueType) { + if (datumValue === undefined) return defaultColor; + + // Get key and value of item with max value + const [key, value] = Object.entries(colorAccessors) + .map(([accessorKey, colorAccessor]) => [accessorKey, colorAccessor.valueAccessor(datumValue)]) + .reduce((max, current) => (current[1] > max[1] ? current : max)); + + if ((value as number) <= 0) return defaultColor; + + const scaledValue = scales[key](value); + return colorInterpolators[key](scaledValue); + }; +} + +export type MockCalendarValue = { + inserts: number; + updates: number; + deletes: number; +}; + +export function generateMockData( + length: number, + startDate: string | Date = '2024-11-30', + maxValue = 3_000, + minValue = 0, +): CalendarData[] { + return Array(length) + .fill(0) + .map((_, index) => { + const day = dayjs(startDate) + .startOf('day') + .add(index - length + 1, 'days') + .format(CALENDAR_DATE_FORMAT); + + return { + day, + value: { + inserts: Math.max(Math.random() * maxValue, minValue), + updates: Math.max(Math.random() * maxValue, minValue), + deletes: Math.max(Math.random() * maxValue, minValue), + }, + }; + }); +} + +export function getMockedProps( + startDate = '2024-01-01', + endDate = '2024-12-31', +): CalendarChartProps { + const data = generateMockData(150, '2024-11-30'); + + const colorAccessor = getColorAccessor( + data, + { + insertsAndUpdates: { + valueAccessor: (datum) => datum.inserts + datum.updates, + colors: ['#CAC3F1', '#705EE4', '#3E2F9D'], + }, + deletes: { + valueAccessor: (datum) => datum.deletes, + colors: ['#f1c3ca', '#CF6D6D', '#ab4242'], + }, + }, + '#EBECF0', + ); + + return { + data, + startDate, + endDate, + colorAccessor, + }; +} diff --git a/datahub-web-react/src/alchemy-components/components/Card/Card.stories.tsx b/datahub-web-react/src/alchemy-components/components/Card/Card.stories.tsx index 336831fd15cfab..67966dbaa12d82 100644 --- a/datahub-web-react/src/alchemy-components/components/Card/Card.stories.tsx +++ b/datahub-web-react/src/alchemy-components/components/Card/Card.stories.tsx @@ -70,9 +70,30 @@ const meta = { type: 'text', }, }, + maxWidth: { + description: 'The maximum width of the card', + control: { + type: 'text', + }, + }, + height: { + description: 'The height of the card', + control: { + type: 'text', + }, + }, onClick: { description: 'The on click function for the card', }, + isEmpty: { + description: 'Whether the card is in empty state with No data', + table: { + defaultValue: { summary: `${cardDefaults.isEmpty}` }, + }, + control: { + type: 'boolean', + }, + }, }, // Define default args @@ -80,7 +101,7 @@ const meta = { title: 'Title', subTitle: 'Subtitle', iconAlignment: 'horizontal', - width: '150px', + isEmpty: false, }, } satisfies Meta; diff --git a/datahub-web-react/src/alchemy-components/components/Card/Card.tsx b/datahub-web-react/src/alchemy-components/components/Card/Card.tsx index 55c581251bea99..397ae2a664e6f4 100644 --- a/datahub-web-react/src/alchemy-components/components/Card/Card.tsx +++ b/datahub-web-react/src/alchemy-components/components/Card/Card.tsx @@ -6,6 +6,7 @@ import { Pill } from '../Pills'; export const cardDefaults: CardProps = { title: 'Title', iconAlignment: 'horizontal', + isEmpty: false, }; export const Card = ({ @@ -18,31 +19,45 @@ export const Card = ({ icon, children, width, + maxWidth, + height, + isEmpty, }: CardProps) => { return ( - -
- {icon &&
{icon}
} - - - {title} - {!!percent && ( - <Pill - label={`${Math.abs(percent)}%`} - size="sm" - colorScheme={percent < 0 ? 'red' : 'green'} - leftIcon={percent < 0 ? 'TrendingDown' : 'TrendingUp'} - clickable={false} - /> - )} - - + <> + {isEmpty ? ( + + + No Data {subTitle} - {button} - - -
- {children} -
+ + + ) : ( + +
+ {icon} + + + {title} + {!!percent && ( + <Pill + label={`${Math.abs(percent)}%`} + size="sm" + colorScheme={percent < 0 ? 'red' : 'green'} + leftIcon={percent < 0 ? 'TrendingDown' : 'TrendingUp'} + clickable={false} + /> + )} + + + {subTitle} + {button} + + +
+ {children} +
+ )} + ); }; diff --git a/datahub-web-react/src/alchemy-components/components/Card/components.ts b/datahub-web-react/src/alchemy-components/components/Card/components.ts index bb3821fffc7f58..f9510a85cb7a67 100644 --- a/datahub-web-react/src/alchemy-components/components/Card/components.ts +++ b/datahub-web-react/src/alchemy-components/components/Card/components.ts @@ -2,25 +2,30 @@ import { colors, radius, spacing, typography } from '@src/alchemy-components/the import { IconAlignmentOptions } from '@src/alchemy-components/theme/config'; import styled from 'styled-components'; -export const CardContainer = styled.div<{ hasButton: boolean; width?: string }>(({ hasButton, width }) => ({ - border: `1px solid ${colors.gray[100]}`, - borderRadius: radius.lg, - padding: spacing.md, - minWidth: '150px', - boxShadow: '0px 1px 2px 0px rgba(33, 23, 95, 0.07)', - backgroundColor: colors.white, - display: 'flex', - flexDirection: 'column', - gap: spacing.md, - width, +export const CardContainer = styled.div<{ hasButton?: boolean; width?: string; maxWidth?: string; height?: string }>( + ({ hasButton, width, maxWidth, height }) => ({ + border: `1px solid ${colors.gray[100]}`, + borderRadius: radius.lg, + padding: spacing.md, + display: 'flex', + flex: `1 1 ${maxWidth}`, + minWidth: '150px', + boxShadow: '0px 1px 2px 0px rgba(33, 23, 95, 0.07)', + backgroundColor: colors.white, + flexDirection: 'column', + gap: spacing.md, + maxWidth, + width, + height, - '&:hover': hasButton - ? { - border: `1px solid ${colors.violet[500]}`, - cursor: 'pointer', - } - : {}, -})); + '&:hover': hasButton + ? { + border: `1px solid ${colors.violet[500]}`, + cursor: 'pointer', + } + : {}, + }), +); export const Header = styled.div<{ iconAlignment?: IconAlignmentOptions }>(({ iconAlignment }) => ({ display: 'flex', @@ -37,14 +42,14 @@ export const TitleContainer = styled.div({ width: '100%', }); -export const Title = styled.div({ +export const Title = styled.div<{ $isEmpty?: boolean }>(({ $isEmpty }) => ({ fontSize: typography.fontSizes.lg, fontWeight: typography.fontWeights.bold, - color: colors.gray[600], + color: $isEmpty ? colors.gray[1800] : colors.gray[600], display: 'flex', alignItems: 'center', gap: spacing.xsm, -}); +})); export const SubTitleContainer = styled.div({ display: 'flex', diff --git a/datahub-web-react/src/alchemy-components/components/Card/types.ts b/datahub-web-react/src/alchemy-components/components/Card/types.ts index e5b0e36f83e4ce..991215f0bed066 100644 --- a/datahub-web-react/src/alchemy-components/components/Card/types.ts +++ b/datahub-web-react/src/alchemy-components/components/Card/types.ts @@ -10,4 +10,7 @@ export interface CardProps { iconAlignment?: IconAlignmentOptions; children?: React.ReactNode; width?: string; + maxWidth?: string; + height?: string; + isEmpty?: boolean; } diff --git a/datahub-web-react/src/alchemy-components/components/Checkbox/Checkbox.tsx b/datahub-web-react/src/alchemy-components/components/Checkbox/Checkbox.tsx index 6ab4db74610e49..d69a28195bebbe 100644 --- a/datahub-web-react/src/alchemy-components/components/Checkbox/Checkbox.tsx +++ b/datahub-web-react/src/alchemy-components/components/Checkbox/Checkbox.tsx @@ -12,7 +12,6 @@ import { } from './components'; export const checkboxDefaults: CheckboxProps = { - label: 'Label', error: '', isChecked: false, isDisabled: false, @@ -42,9 +41,11 @@ export const Checkbox = ({ return ( - + {label ? ( + + ) : null} { if (!isDisabled) { diff --git a/datahub-web-react/src/alchemy-components/components/Checkbox/components.ts b/datahub-web-react/src/alchemy-components/components/Checkbox/components.ts index 6a4ad08c9c4ce6..7193f8577c4357 100644 --- a/datahub-web-react/src/alchemy-components/components/Checkbox/components.ts +++ b/datahub-web-react/src/alchemy-components/components/Checkbox/components.ts @@ -57,7 +57,7 @@ export const Checkmark = styled.div<{ intermediate?: boolean; error: string; che content: '""', position: 'absolute', display: 'none', - left: !intermediate ? '6px' : '8px', + left: !intermediate ? '5px' : '8px', top: !intermediate ? '1px' : '3px', width: !intermediate ? '5px' : '0px', height: '10px', diff --git a/datahub-web-react/src/alchemy-components/components/Checkbox/types.ts b/datahub-web-react/src/alchemy-components/components/Checkbox/types.ts index 7ee10011689397..e4bbe8808378e8 100644 --- a/datahub-web-react/src/alchemy-components/components/Checkbox/types.ts +++ b/datahub-web-react/src/alchemy-components/components/Checkbox/types.ts @@ -1,7 +1,7 @@ import { InputHTMLAttributes } from 'react'; export interface CheckboxProps extends InputHTMLAttributes { - label: string; + label?: string; error?: string; isChecked?: boolean; setIsChecked?: React.Dispatch>; diff --git a/datahub-web-react/src/alchemy-components/components/DatePicker/DatePicker.stories.tsx b/datahub-web-react/src/alchemy-components/components/DatePicker/DatePicker.stories.tsx new file mode 100644 index 00000000000000..c116003505bf72 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/DatePicker/DatePicker.stories.tsx @@ -0,0 +1,61 @@ +import type { Meta, StoryObj } from '@storybook/react'; +import moment from 'moment'; +import React, { useState } from 'react'; +import { DatePicker, datePickerDefault } from './DatePicker'; +import { DatePickerValue } from './types'; + +const meta = { + title: 'Forms / DatePicker', + component: DatePicker, + parameters: { + layout: 'centered', + docs: { + subtitle: 'A component to select a date.', + }, + }, + argTypes: { + value: { + description: 'The value of the component can be a Moment object or null | undefined', + table: { + defaultValue: { summary: 'undefined' }, + type: { summary: 'Moment | null | undefined' }, + }, + }, + onChange: { + description: 'Callback function, can be executed when the selected date is changing', + table: { + defaultValue: { summary: 'undefined' }, + }, + }, + disabled: { + description: 'Determine whether the DatePicker is disabled', + table: { + defaultValue: { summary: 'false' }, + }, + control: { + type: 'boolean', + }, + }, + variant: { + description: 'Preset of predefined props', + table: { + defaultValue: { summary: 'DEFAULT' }, + }, + }, + }, + args: { ...datePickerDefault }, +} satisfies Meta; + +export default meta; + +type Story = StoryObj; + +function WrappedDatePicker(props) { + const [value, setValue] = useState(moment()); + return setValue(v)} {...props} />; +} + +export const sandbox: Story = { + tags: ['dev'], + render: WrappedDatePicker, +}; diff --git a/datahub-web-react/src/alchemy-components/components/DatePicker/DatePicker.tsx b/datahub-web-react/src/alchemy-components/components/DatePicker/DatePicker.tsx new file mode 100644 index 00000000000000..765cfdce14877d --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/DatePicker/DatePicker.tsx @@ -0,0 +1,54 @@ +import React, { useEffect, useMemo, useState } from 'react'; +import { StyledAntdDatePicker } from './components'; +import useVariantProps from './hooks/useVariantProps'; +import { DatePickerVariant } from './constants'; +import { DatePickerProps, DatePickerValue } from './types'; + +export const datePickerDefault: DatePickerProps = { + variant: DatePickerVariant.Default, + disabled: false, +}; + +export function DatePicker({ + value, + onChange, + variant = datePickerDefault.variant, + disabled = datePickerDefault.disabled, + disabledDate, +}: DatePickerProps) { + const [internalValue, setInternalValue] = useState(value); + + const [isOpen, setIsOpen] = useState(false); + const presetProps = useVariantProps(variant); + const { inputRender, ...datePickerProps } = presetProps; + + useEffect(() => onChange?.(internalValue), [onChange, internalValue]); + + const wrappedInputRender = useMemo(() => { + if (!inputRender) return undefined; + + return (props: React.InputHTMLAttributes) => + inputRender({ + ...props, + datePickerProps: { + disabled, + }, + datePickerState: { + open: isOpen, + setValue: setInternalValue, + }, + }); + }, [disabled, isOpen, inputRender]); + + return ( + wrappedInputRender?.(props))} + onChange={(newValue) => setInternalValue(newValue)} + onOpenChange={(open) => setIsOpen(open)} + disabled={disabled} + disabledDate={disabledDate} + /> + ); +} diff --git a/datahub-web-react/src/alchemy-components/components/DatePicker/components.tsx b/datahub-web-react/src/alchemy-components/components/DatePicker/components.tsx new file mode 100644 index 00000000000000..8d44635c622219 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/DatePicker/components.tsx @@ -0,0 +1,32 @@ +import { colors } from '@src/alchemy-components/theme'; +import { DatePicker as AntdDatePicker } from 'antd'; +import styled from 'styled-components'; + +export const StyledAntdDatePicker = styled(AntdDatePicker)<{ $noDefaultPaddings?: boolean }>` + &.ant-picker { + ${(props) => props.$noDefaultPaddings && 'padding: 0;'} + width: 100%; + } + + &.acryl-date-picker .ant-picker-cell-today > .ant-picker-cell-inner::before { + border: 1px solid ${colors.violet[500]} !important; + } +`; + +export const StyledCalendarWrapper = styled.div` + & .ant-picker-cell-selected > .ant-picker-cell-inner { + background: ${colors.violet[500]} !important; + } + + & .ant-picker-cell-today > .ant-picker-cell-inner::before { + border: 1px solid ${colors.violet[500]} !important; + } + + & .ant-picker-today-btn { + color: ${colors.violet[500]}; + } + + & .ant-picker-header-view button:hover { + color: ${colors.violet[500]}; + } +`; diff --git a/datahub-web-react/src/alchemy-components/components/DatePicker/constants.ts b/datahub-web-react/src/alchemy-components/components/DatePicker/constants.ts new file mode 100644 index 00000000000000..b452978fa2555e --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/DatePicker/constants.ts @@ -0,0 +1,4 @@ +export enum DatePickerVariant { + Default = 'DEFAULT', + DateSwitcher = 'DATE_SWITCHER', +} diff --git a/datahub-web-react/src/alchemy-components/components/DatePicker/hooks/useVariantProps.ts b/datahub-web-react/src/alchemy-components/components/DatePicker/hooks/useVariantProps.ts new file mode 100644 index 00000000000000..65932ba58a517e --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/DatePicker/hooks/useVariantProps.ts @@ -0,0 +1,15 @@ +import { useMemo } from 'react'; +import { DatePickerVariant } from '../constants'; +import { VariantProps } from '../types'; +import { CommonVariantProps, DateSwitcherVariantProps } from '../variants'; + +export default function useVariantProps(variant: DatePickerVariant | undefined): VariantProps { + return useMemo(() => { + switch (variant) { + case DatePickerVariant.DateSwitcher: + return DateSwitcherVariantProps; + default: + return CommonVariantProps; + } + }, [variant]); +} diff --git a/datahub-web-react/src/alchemy-components/components/DatePicker/index.ts b/datahub-web-react/src/alchemy-components/components/DatePicker/index.ts new file mode 100644 index 00000000000000..f0563e87283cb7 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/DatePicker/index.ts @@ -0,0 +1,3 @@ +export { DatePicker } from './DatePicker'; +export { DatePickerVariant } from './constants'; +export type { DatePickerValue } from './types'; diff --git a/datahub-web-react/src/alchemy-components/components/DatePicker/styles.css b/datahub-web-react/src/alchemy-components/components/DatePicker/styles.css new file mode 100644 index 00000000000000..64a7be7eb39b4b --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/DatePicker/styles.css @@ -0,0 +1,15 @@ +.acryl-date-picker .ant-picker-cell-today > .ant-picker-cell-inner::before { + border: 1px solid #533fd1 !important; +} + +.acryl-date-picker .ant-picker-cell-selected > .ant-picker-cell-inner { + background: #533fd1 !important; +} + +.acryl-date-picker .ant-picker-today-btn { + color: #533fd1; +} + +.acryl-date-picker .ant-picker-header-view button:hover { + color: #533fd1; +} diff --git a/datahub-web-react/src/alchemy-components/components/DatePicker/types.ts b/datahub-web-react/src/alchemy-components/components/DatePicker/types.ts new file mode 100644 index 00000000000000..4e16346edd679b --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/DatePicker/types.ts @@ -0,0 +1,31 @@ +import { DatePicker as AntdDatePicker } from 'antd'; +import { Moment } from 'moment'; +import { DatePickerVariant } from './constants'; + +export type DatePickerProps = { + value?: DatePickerValue; + onChange?: (value: DatePickerValue) => void; + disabled?: boolean; + disabledDate?: (value: DatePickerValue) => boolean; + variant?: DatePickerVariant; +}; + +export type DatePickerState = { + open?: boolean; + value?: DatePickerValue; + setValue?: React.Dispatch>; +}; + +export type ExtendedInputRenderProps = React.InputHTMLAttributes & { + datePickerProps: DatePickerProps; + datePickerState: DatePickerState; +}; + +export type AntdDatePickerProps = React.ComponentProps; + +export type DatePickerValue = Moment | null | undefined; + +export type VariantProps = Omit & { + inputRender?: (props: ExtendedInputRenderProps) => React.ReactNode; + $noDefaultPaddings?: boolean; +}; diff --git a/datahub-web-react/src/alchemy-components/components/DatePicker/variants/common/props.tsx b/datahub-web-react/src/alchemy-components/components/DatePicker/variants/common/props.tsx new file mode 100644 index 00000000000000..9323530d332a8e --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/DatePicker/variants/common/props.tsx @@ -0,0 +1,7 @@ +import React from 'react'; +import { StyledCalendarWrapper } from '../../components'; +import { VariantProps } from '../../types'; + +export const CommonVariantProps: VariantProps = { + panelRender: (panel) => {panel}, +}; diff --git a/datahub-web-react/src/alchemy-components/components/DatePicker/variants/dateSwitcher/components.tsx b/datahub-web-react/src/alchemy-components/components/DatePicker/variants/dateSwitcher/components.tsx new file mode 100644 index 00000000000000..4e3f0067f907d1 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/DatePicker/variants/dateSwitcher/components.tsx @@ -0,0 +1,122 @@ +import { colors } from '@components'; +import { CaretLeft, CaretRight } from 'phosphor-react'; +import React, { useCallback, useMemo } from 'react'; +import styled from 'styled-components'; +import { Text } from '../../../Text/Text'; +import { SwitcherDirection } from './types'; +import { ExtendedInputRenderProps } from '../../types'; + +const StyledContainer = styled.div<{ $opened?: boolean; $disabled?: boolean }>` + border: 1px solid ${(props) => (props.$opened || props.$disabled ? colors.gray[1800] : colors.gray[100])}; + ${(props) => props.$opened && !props.$disabled && `outline: 2px solid ${colors.violet[300]};`} + border-radius: 8px; + padding: 8px; + display: flex; + flex-direction: row; + gap: 8px; + justify-content: space-between; + align-items: center; + width: 100%; + + box-shadow: 0px 1px 2px 0px rgba(33, 23, 95, 0.07); + + ${(props) => + props.$disabled && + ` + background: ${colors.gray[1500]}; + cursor: not-allowed; + `} + + :hover, + :focus, + :active { + ${(props) => !props.$disabled && 'box-shadow: 0px 1px 2px 1px rgba(33, 23, 95, 0.07);'} + } +`; + +const Content = styled(Text)<{ $disabled?: boolean }>` + color: ${colors.gray[1800]}; + user-select: none; + cursor: ${(props) => (props.$disabled ? 'not-allowed' : 'pointer')}; + + :hover { + ${(props) => !props.$disabled && `color: ${colors.violet[500]};`} + } +`; + +const CaretWrapper = styled.div<{ $disabled?: boolean }>` + & svg { + color: ${colors.gray[1800]}; + display: flex; + align-items: center; + cursor: ${(props) => (props.$disabled ? 'not-allowed' : 'pointer')}; + + :hover { + ${(props) => !props.$disabled && `color: ${colors.violet[500]};`} + } + } +`; + +type SwitcherButtonProps = { + direction: SwitcherDirection; + onClick: (direction: SwitcherDirection) => void; + disabled?: boolean; +}; + +function SwitcherButton({ direction, onClick, disabled }: SwitcherButtonProps) { + const Icon = direction === SwitcherDirection.Left ? CaretLeft : CaretRight; + + const onClickHandler = useCallback(() => { + if (disabled) return null; + return onClick?.(direction); + }, [direction, disabled, onClick]); + + return ( + + + + ); +} + +export function DateSwitcherInput({ datePickerProps, datePickerState, ...props }: ExtendedInputRenderProps) { + const { disabled } = datePickerProps; + const { setValue, open } = datePickerState; + + const onSwitcherClick = useCallback( + (direction: SwitcherDirection) => { + if (disabled) return null; + + return setValue?.((currentValue) => { + if (!currentValue) return currentValue; + // FYI: clone value to trigger rerendering after changes + const cloneOfCurrentValue = currentValue.clone(); + const sign = direction === SwitcherDirection.Left ? -1 : 1; + const newValue = cloneOfCurrentValue.add(1 * sign, 'day'); + return newValue; + }); + }, + [setValue, disabled], + ); + + const isDateSwitchingDisabled = useMemo(() => disabled || !props.title, [disabled, props.title]); + + return ( + + + + + {props.title ? props.title : props.placeholder} + + + + + ); +} diff --git a/datahub-web-react/src/alchemy-components/components/DatePicker/variants/dateSwitcher/props.tsx b/datahub-web-react/src/alchemy-components/components/DatePicker/variants/dateSwitcher/props.tsx new file mode 100644 index 00000000000000..8deb9ad8902dc8 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/DatePicker/variants/dateSwitcher/props.tsx @@ -0,0 +1,14 @@ +import React from 'react'; +import { VariantProps } from '../../types'; +import { CommonVariantProps } from '../common/props'; +import { DateSwitcherInput } from './components'; + +export const DateSwitcherVariantProps: VariantProps = { + ...CommonVariantProps, + bordered: false, + allowClear: false, + format: 'll', + suffixIcon: null, + inputRender: (props) => , + $noDefaultPaddings: true, +}; diff --git a/datahub-web-react/src/alchemy-components/components/DatePicker/variants/dateSwitcher/types.ts b/datahub-web-react/src/alchemy-components/components/DatePicker/variants/dateSwitcher/types.ts new file mode 100644 index 00000000000000..f0b73721ea0a61 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/DatePicker/variants/dateSwitcher/types.ts @@ -0,0 +1,4 @@ +export enum SwitcherDirection { + Left = 'LEFT', + Right = 'RIGHT', +} diff --git a/datahub-web-react/src/alchemy-components/components/DatePicker/variants/index.ts b/datahub-web-react/src/alchemy-components/components/DatePicker/variants/index.ts new file mode 100644 index 00000000000000..e9ce5e764fa631 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/DatePicker/variants/index.ts @@ -0,0 +1,4 @@ +import { CommonVariantProps } from './common/props'; +import { DateSwitcherVariantProps } from './dateSwitcher/props'; + +export { CommonVariantProps, DateSwitcherVariantProps }; diff --git a/datahub-web-react/src/alchemy-components/components/Drawer/Drawer.stories.tsx b/datahub-web-react/src/alchemy-components/components/Drawer/Drawer.stories.tsx new file mode 100644 index 00000000000000..df0014ae7824e4 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Drawer/Drawer.stories.tsx @@ -0,0 +1,97 @@ +import { BADGE } from '@geometricpanda/storybook-addon-badges'; +import type { Meta, StoryObj } from '@storybook/react'; +import React, { useState } from 'react'; +import { Button } from '../Button'; +import { Drawer, drawerDefault } from './Drawer'; +import { DrawerProps } from './types'; + +// Auto Docs +const meta = { + title: 'Components / Drawer', + component: Drawer, + + // Display Properties + parameters: { + layout: 'centered', + badges: [BADGE.EXPERIMENTAL], + docs: { + subtitle: 'A panel which slides in from the edge of the screen.', + }, + }, + + // Component-level argTypes + argTypes: { + title: { + description: 'The title of the drawer', + control: { + type: 'text', + }, + }, + open: { + description: 'is the drawer opened', + control: { + type: 'boolean', + }, + }, + onClose: { + description: 'The handler called when the drawer is closed', + }, + width: { + description: 'Width of the drawer', + table: { + defaultValue: { summary: `${drawerDefault.width}` }, + }, + control: { + type: 'number', + }, + }, + closable: { + description: 'Whether a close (x) button is visible on top left of the Drawer dialog or not', + table: { + defaultValue: { summary: `${drawerDefault.closable}` }, + }, + control: { + type: 'boolean', + }, + }, + maskTransparent: { + description: 'Whether the mask is visible', + table: { + defaultValue: { summary: `${drawerDefault.maskTransparent}` }, + }, + control: { + type: 'boolean', + }, + }, + }, + + // Define default args + args: { + title: 'Title', + ...drawerDefault, + }, +} satisfies Meta; + +export default meta; + +// Stories + +type Story = StoryObj; + +const WrappedDrawer = ({ ...props }: DrawerProps) => { + const [isOpen, setIsOpen] = useState(false); + return ( + <> + + setIsOpen(false)}> + Content + + + ); +}; +// Basic story is what is displayed 1st in storybook +// Pass props to this so that it can be customized via the UI props panel +export const sandbox: Story = { + tags: ['dev'], + render: (props) => , +}; diff --git a/datahub-web-react/src/alchemy-components/components/Drawer/Drawer.tsx b/datahub-web-react/src/alchemy-components/components/Drawer/Drawer.tsx new file mode 100644 index 00000000000000..65ec4a14286eab --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Drawer/Drawer.tsx @@ -0,0 +1,53 @@ +import React from 'react'; +import { Button } from '../Button'; +import { Text } from '../Text'; +import { StyledDrawer, TitleContainer } from './components'; +import { maskTransparentStyle } from './constants'; +import { DrawerProps } from './types'; + +export const drawerDefault: Omit = { + width: 600, + closable: true, + maskTransparent: false, +}; + +export const Drawer = ({ + title, + children, + open, + onClose, + width = drawerDefault.width, + closable = drawerDefault.closable, + maskTransparent = drawerDefault.maskTransparent, +}: React.PropsWithChildren) => { + return ( + + + {title} + + {closable && ( + + + + + ), + renderGraph: () => ( + <>{JSON.stringify(day)}} /> + ), + graphHeight: 'fit-content', + }, +} satisfies Meta; + +export default meta; + +type Story = StoryObj; + +export const sandbox: Story = { + tags: ['dev'], + render: (props) => ( +
+ +
+ ), +}; diff --git a/datahub-web-react/src/alchemy-components/components/GraphCard/GraphCard.tsx b/datahub-web-react/src/alchemy-components/components/GraphCard/GraphCard.tsx new file mode 100644 index 00000000000000..a706c66317f462 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/GraphCard/GraphCard.tsx @@ -0,0 +1,90 @@ +import React, { useState } from 'react'; +import styled from 'styled-components'; +import { CardContainer } from '../Card/components'; +import { Loader } from '../Loader'; +import { PageTitle } from '../PageTitle'; +import { Text } from '../Text'; +import { + ControlsContainer, + EmptyMessageContainer, + GraphCardBody, + GraphCardHeader, + GraphContainer, + LoaderContainer, +} from './components'; +import { GraphCardProps } from './types'; +import MoreInfoModal from './MoreInfoModal'; + +const EmptyMessageWrapper = styled.div` + text-align: center; +`; + +const LinkText = styled(Text)` + display: inline; + :hover { + cursor: pointer; + } +`; + +export function GraphCard({ + title, + subTitle, + loading, + graphHeight = '350px', + width = '100%', + renderGraph, + renderControls, + isEmpty, + emptyContent, + moreInfoModalContent, +}: GraphCardProps) { + const [showInfoModal, setShowInfoModal] = useState(false); + + const handleModalClose = () => { + setShowInfoModal(false); + }; + + return ( + + + + {renderControls?.()} + + + {loading && ( + + + + )} + + {!loading && ( + + + {renderGraph()} + + {isEmpty && + (emptyContent || ( + + + + No Data + + No stats collected for this asset at the moment. + {moreInfoModalContent && ( + setShowInfoModal(true)}> + More info + + )} + + + + ))} + + )} + + ); +} diff --git a/datahub-web-react/src/alchemy-components/components/GraphCard/MoreInfoModal.tsx b/datahub-web-react/src/alchemy-components/components/GraphCard/MoreInfoModal.tsx new file mode 100644 index 00000000000000..eb3045a1aaef43 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/GraphCard/MoreInfoModal.tsx @@ -0,0 +1,55 @@ +import { Text, typography } from '@components'; +import { Modal } from 'antd'; +import React from 'react'; +import styled from 'styled-components'; + +export const StyledModal = styled(Modal)` + font-family: ${typography.fonts.body}; + + &&& .ant-modal-content { + box-shadow: 0px 4px 12px 0px rgba(9, 1, 61, 0.12); + border-radius: 12px; + } + + .ant-modal-header { + border-bottom: 0; + padding-bottom: 0; + border-radius: 12px !important; + } + + .ant-modal-body { + padding: 8px 24px 24px 24px; + } + + .ant-modal-close-x { + svg { + font-size: 18px; + } + } +`; + +interface Props { + showModal: boolean; + handleClose: () => void; + modalContent: React.ReactNode; +} + +const MoreInfoModal = ({ showModal, handleClose, modalContent }: Props) => { + return ( + + No Data + + } + > + {modalContent} + + ); +}; + +export default MoreInfoModal; diff --git a/datahub-web-react/src/alchemy-components/components/GraphCard/components.tsx b/datahub-web-react/src/alchemy-components/components/GraphCard/components.tsx new file mode 100644 index 00000000000000..025bb896df2f7f --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/GraphCard/components.tsx @@ -0,0 +1,46 @@ +import styled from 'styled-components'; + +export const GraphCardHeader = styled.div` + display: flex; + flex-direction: row; + justify-content: space-between; +`; + +export const GraphCardBody = styled.div` + width: 100%; + position: relative; +`; + +export const ControlsContainer = styled.div` + height: 42px; + display: flex; + flex-direction: row; + gap: 8px; +`; + +export const GraphContainer = styled.div<{ $isEmpty?: boolean; $height: string }>` + width: 100%; + height: ${(props) => props.$height}; + + ${(props) => + props.$isEmpty && + ` + position: relative; + pointer-events: none; + filter: blur(2px); + `} +`; + +export const EmptyMessageContainer = styled.div` + top: 50%; + left: 50%; + transform: translate(-50%, -50%); + position: absolute; +`; + +export const LoaderContainer = styled.div<{ $height: string }>` + display: flex; + width: 100%; + height: ${(props) => props.$height}; + min-height: 200px; +`; diff --git a/datahub-web-react/src/alchemy-components/components/GraphCard/index.ts b/datahub-web-react/src/alchemy-components/components/GraphCard/index.ts new file mode 100644 index 00000000000000..5796bf09a240f7 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/GraphCard/index.ts @@ -0,0 +1 @@ +export { GraphCard } from './GraphCard'; diff --git a/datahub-web-react/src/alchemy-components/components/GraphCard/types.ts b/datahub-web-react/src/alchemy-components/components/GraphCard/types.ts new file mode 100644 index 00000000000000..b63e6b8999f747 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/GraphCard/types.ts @@ -0,0 +1,12 @@ +export type GraphCardProps = { + title: string; + subTitle?: string | React.ReactNode; + loading?: boolean; + graphHeight?: string; + width?: string; + renderGraph: () => React.ReactNode; + renderControls?: () => React.ReactNode; + isEmpty?: boolean; + emptyContent?: React.ReactNode; + moreInfoModalContent?: React.ReactNode; +}; diff --git a/datahub-web-react/src/alchemy-components/components/IconLabel/IconLabel.stories.tsx b/datahub-web-react/src/alchemy-components/components/IconLabel/IconLabel.stories.tsx new file mode 100644 index 00000000000000..3f831d1b4c7c6f --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/IconLabel/IconLabel.stories.tsx @@ -0,0 +1,95 @@ +import React from 'react'; +import { Meta, StoryObj } from '@storybook/react'; +import { Airplane } from '@phosphor-icons/react'; + +import { IconLabel } from './IconLabel'; +import { IconType } from './types'; + +const meta: Meta = { + title: 'Components / IconLabel', + component: IconLabel, + + // Component-level parameters + parameters: { + layout: 'centered', + docs: { + subtitle: 'Displays a label with an icon or image.', + }, + }, + + // Component-level argTypes + argTypes: { + icon: { + description: 'The icon or image source to display.', + control: false, + table: { + type: { summary: 'ReactNode' }, + }, + }, + name: { + description: 'The label text.', + control: 'text', + table: { + type: { summary: 'string' }, + }, + }, + imageUrl: { + description: 'The image url.', + control: 'text', + table: { + type: { summary: 'string' }, + }, + }, + type: { + description: 'The type of icon to render.', + control: 'radio', + options: [IconType.ICON, IconType.IMAGE], + table: { + defaultValue: { summary: IconType.ICON }, + type: { summary: 'IconType.ICON | IconType.IMAGE' }, + }, + }, + }, + + // Default props + args: { + icon: , // Default to an emoji for demonstration + name: 'Icon Label', + type: IconType.ICON, + }, +}; + +export default meta; + +type Story = StoryObj; + +// Sandbox Story +export const sandbox: Story = { + render: (props) => , +}; + +// Example Stories +export const withIcon: Story = { + args: { + icon: , + name: 'Airplane Icon', + type: IconType.ICON, + }, +}; + +export const withImage: Story = { + args: { + imageUrl: 'https://png.pngtree.com/png-vector/20230209/ourmid/pngtree-test-icon-png-image_6591706.png', + name: 'Placeholder Image', + type: IconType.IMAGE, + icon: , + }, +}; + +export const longTextLabel: Story = { + args: { + icon: , + name: 'This is a very long label text for testing purposes', + type: IconType.ICON, + }, +}; diff --git a/datahub-web-react/src/alchemy-components/components/IconLabel/IconLabel.tsx b/datahub-web-react/src/alchemy-components/components/IconLabel/IconLabel.tsx new file mode 100644 index 00000000000000..2fcf046c925c46 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/IconLabel/IconLabel.tsx @@ -0,0 +1,33 @@ +import React, { useEffect, useState } from 'react'; +import { IconLabelProps, IconType } from './types'; +import { IconLabelContainer, ImageContainer, Label } from './component'; +import { isValidImageUrl } from './utils'; + +export const IconLabel = ({ icon, name, type, style, imageUrl }: IconLabelProps) => { + const [isValidImage, setIsValidImage] = useState(false); + + useEffect(() => { + if (type === IconType.IMAGE && typeof imageUrl === 'string') { + isValidImageUrl(imageUrl).then(setIsValidImage); // Validate the image URL + } + }, [imageUrl, type]); + + const renderIcons = () => { + if (type === IconType.ICON) { + return icon; + } + + if (type === IconType.IMAGE && isValidImage) { + return {name}; + } + + return null; // Render the fallback (e.g., emoji or placeholder) + }; + + return ( + + {renderIcons()} + + + ); +}; diff --git a/datahub-web-react/src/alchemy-components/components/IconLabel/component.ts b/datahub-web-react/src/alchemy-components/components/IconLabel/component.ts new file mode 100644 index 00000000000000..a2878cdbf30385 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/IconLabel/component.ts @@ -0,0 +1,21 @@ +import styled from 'styled-components'; + +export const IconLabelContainer = styled.div` + display: flex; + align-items: center; + gap: 10px; +`; + +export const ImageContainer = styled.div` + display: flex; + align-items: center; + margin-right: 0px; +`; + +export const Label = styled.span` + font-family: Mulish; + font-size: 14px; + font-weight: 400; + color: #374066; + white-space: normal; +`; diff --git a/datahub-web-react/src/alchemy-components/components/IconLabel/index.ts b/datahub-web-react/src/alchemy-components/components/IconLabel/index.ts new file mode 100644 index 00000000000000..3f4085aa2b9364 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/IconLabel/index.ts @@ -0,0 +1 @@ +export { IconLabel } from './IconLabel'; diff --git a/datahub-web-react/src/alchemy-components/components/IconLabel/types.ts b/datahub-web-react/src/alchemy-components/components/IconLabel/types.ts new file mode 100644 index 00000000000000..3b38b45ee024a9 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/IconLabel/types.ts @@ -0,0 +1,13 @@ +export interface IconLabelProps { + icon: JSX.Element; + name: string; + type: IconType; + marginRight?: string; + imageUrl?: string; + style?: React.CSSProperties; +} + +export enum IconType { + ICON = 'ICON', + IMAGE = 'IMAGE', +} diff --git a/datahub-web-react/src/alchemy-components/components/IconLabel/utils.ts b/datahub-web-react/src/alchemy-components/components/IconLabel/utils.ts new file mode 100644 index 00000000000000..0b5e6e0cb8a6c3 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/IconLabel/utils.ts @@ -0,0 +1,10 @@ +// Helper function to validate image URLs +export const isValidImageUrl = async (url: string): Promise => { + return new Promise((resolve) => { + const img = new Image(); + img.src = url; + + img.onload = () => resolve(true); // Image is valid + img.onerror = () => resolve(false); // Image is invalid + }); +}; diff --git a/datahub-web-react/src/alchemy-components/components/IncidentPriorityLabel/IncidentPriorityLabel.stories.tsx b/datahub-web-react/src/alchemy-components/components/IncidentPriorityLabel/IncidentPriorityLabel.stories.tsx new file mode 100644 index 00000000000000..7e21c5c9fc2a8a --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/IncidentPriorityLabel/IncidentPriorityLabel.stories.tsx @@ -0,0 +1,85 @@ +import React from 'react'; +import { Meta, StoryObj } from '@storybook/react'; +import { IncidentPriorityLabel } from './IncidentPriorityLabel'; + +const meta: Meta = { + title: 'Components / IncidentPriorityLabel', + component: IncidentPriorityLabel, + + // Component-level parameters + parameters: { + layout: 'centered', + docs: { + subtitle: 'Displays a priority label for incidents with an icon and title.', + }, + }, + + // Component-level argTypes + argTypes: { + priority: { + description: 'Incident string containing priority', + control: { type: 'text' }, + table: { + type: { summary: '"CRITICAL" | "HIGH" | "MEDIUM" | "LOW"' }, + }, + }, + title: { + description: 'Incident string containing title', + control: { type: 'text' }, + table: { + type: { summary: 'title: string' }, + }, + }, + }, + + // Default props + args: { + priority: 'CRITICAL', + title: 'Critical', + }, +}; + +export default meta; + +type Story = StoryObj; + +// Sandbox Story +export const sandbox: Story = { + render: (props) => , +}; + +// Example Stories +export const criticalPriority: Story = { + args: { + priority: 'CRITICAL', + title: 'Critical', + }, +}; + +export const highPriority: Story = { + args: { + priority: 'HIGH', + title: 'High', + }, +}; + +export const mediumPriority: Story = { + args: { + priority: 'MEDIUM', + title: 'Medium', + }, +}; + +export const lowPriority: Story = { + args: { + priority: 'LOW', + title: 'Low', + }, +}; + +export const unknownPriority: Story = { + args: { + priority: 'UNKNOWN', + title: 'Unknown', + }, +}; diff --git a/datahub-web-react/src/alchemy-components/components/IncidentPriorityLabel/IncidentPriorityLabel.tsx b/datahub-web-react/src/alchemy-components/components/IncidentPriorityLabel/IncidentPriorityLabel.tsx new file mode 100644 index 00000000000000..d3e0ba6ab9316b --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/IncidentPriorityLabel/IncidentPriorityLabel.tsx @@ -0,0 +1,43 @@ +import React from 'react'; +import { ExclamationMark } from '@phosphor-icons/react'; +import colors from '@src/alchemy-components/theme/foundations/colors'; + +import { IconLabel } from '../IconLabel'; +import { IncidentPriorityLabelProps } from './types'; +import { Bar } from '../Bar'; +import { PRIORITIES } from './constant'; +import { IconType } from '../IconLabel/types'; + +const PRIORITY_LEVEL = { + [PRIORITIES.HIGH]: 3, + [PRIORITIES.MEDIUM]: 2, + [PRIORITIES.LOW]: 1, +}; + +const renderBars = (priority: string) => { + return ; +}; + +const Icons = { + [PRIORITIES.CRITICAL]: { + icon: , + type: IconType.ICON, + }, + [PRIORITIES.HIGH]: { + icon: renderBars(PRIORITIES.HIGH), + type: IconType.ICON, + }, + [PRIORITIES.MEDIUM]: { + icon: renderBars(PRIORITIES.MEDIUM), + type: IconType.ICON, + }, + [PRIORITIES.LOW]: { + icon: renderBars(PRIORITIES.LOW), + type: IconType.ICON, + }, +}; + +export const IncidentPriorityLabel = ({ priority, title, style }: IncidentPriorityLabelProps) => { + const { icon, type } = Icons[priority] || {}; + return ; +}; diff --git a/datahub-web-react/src/alchemy-components/components/IncidentPriorityLabel/constant.ts b/datahub-web-react/src/alchemy-components/components/IncidentPriorityLabel/constant.ts new file mode 100644 index 00000000000000..55533c3ab7b6a1 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/IncidentPriorityLabel/constant.ts @@ -0,0 +1,7 @@ +export const PRIORITIES = { + NONE: 'NONE', + LOW: 'LOW', + MEDIUM: 'MEDIUM', + HIGH: 'HIGH', + CRITICAL: 'CRITICAL', +}; diff --git a/datahub-web-react/src/alchemy-components/components/IncidentPriorityLabel/index.ts b/datahub-web-react/src/alchemy-components/components/IncidentPriorityLabel/index.ts new file mode 100644 index 00000000000000..2638ed25db9e52 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/IncidentPriorityLabel/index.ts @@ -0,0 +1 @@ +export { IncidentPriorityLabel } from './IncidentPriorityLabel'; diff --git a/datahub-web-react/src/alchemy-components/components/IncidentPriorityLabel/types.ts b/datahub-web-react/src/alchemy-components/components/IncidentPriorityLabel/types.ts new file mode 100644 index 00000000000000..2eb4dbefa8fac0 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/IncidentPriorityLabel/types.ts @@ -0,0 +1,5 @@ +export interface IncidentPriorityLabelProps { + priority: string; + title: string; + style?: React.CSSProperties; +} diff --git a/datahub-web-react/src/alchemy-components/components/LineChart/LineChart.stories.tsx b/datahub-web-react/src/alchemy-components/components/LineChart/LineChart.stories.tsx index 8cce0369918a2e..fce3921ff1ecd0 100644 --- a/datahub-web-react/src/alchemy-components/components/LineChart/LineChart.stories.tsx +++ b/datahub-web-react/src/alchemy-components/components/LineChart/LineChart.stories.tsx @@ -3,6 +3,7 @@ import { BADGE } from '@geometricpanda/storybook-addon-badges'; import type { Meta, StoryObj } from '@storybook/react'; import { LineChart } from './LineChart'; import { getMockedProps } from '../BarChart/utils'; +import { DEFAULT_MAX_DOMAIN_VALUE } from '../BarChart/hooks/useAdaptYScaleToZeroValues'; const meta = { title: 'Charts / LineChart', @@ -28,24 +29,19 @@ const meta = { yAccessor: { description: 'A function to convert datum to value of Y', }, - renderTooltipContent: { + maxYDomainForZeroData: { + description: + 'For the case where the data has only zero values, you can set the yScale domain to better display the chart', + table: { + defaultValue: { summary: `${DEFAULT_MAX_DOMAIN_VALUE}` }, + }, + }, + popoverRenderer: { description: 'A function to replace default rendering of toolbar', }, margin: { description: 'Add margins to chart', }, - leftAxisTickFormat: { - description: 'A function to format labels of left axis', - }, - leftAxisTickLabelProps: { - description: 'Props for label of left axis', - }, - bottomAxisTickFormat: { - description: 'A function to format labels of bottom axis', - }, - bottomAxisTickLabelProps: { - description: 'Props for label of bottom axis', - }, lineColor: { description: 'Color of line on chart', control: { @@ -58,11 +54,14 @@ const meta = { type: 'color', }, }, - gridColor: { - description: "Color of grid's lines", - control: { - type: 'color', - }, + leftAxisProps: { + description: 'The props for the left axis', + }, + bottomAxisProps: { + description: 'The props for the bottom axis', + }, + gridProps: { + description: 'The props for the grid', }, renderGradients: { description: 'A function to render different gradients that can be used as colors', @@ -78,7 +77,7 @@ const meta = { // Define defaults args: { ...getMockedProps(), - renderTooltipContent: (datum) => <>DATUM: {JSON.stringify(datum)}, + popoverRenderer: (datum) => <>DATUM: {JSON.stringify(datum)}, }, } satisfies Meta; diff --git a/datahub-web-react/src/alchemy-components/components/LineChart/LineChart.tsx b/datahub-web-react/src/alchemy-components/components/LineChart/LineChart.tsx index 22580122ccf84f..540b798f1ab1d1 100644 --- a/datahub-web-react/src/alchemy-components/components/LineChart/LineChart.tsx +++ b/datahub-web-react/src/alchemy-components/components/LineChart/LineChart.tsx @@ -1,16 +1,22 @@ import { colors } from '@src/alchemy-components/theme'; -// import { abbreviateNumber } from '@src/app/dataviz/utils'; +import { abbreviateNumber } from '@src/app/dataviz/utils'; import { TickLabelProps } from '@visx/axis'; import { curveMonotoneX } from '@visx/curve'; import { LinearGradient } from '@visx/gradient'; import { ParentSize } from '@visx/responsive'; -import { AreaSeries, Axis, AxisScale, Grid, LineSeries, Tooltip, XYChart } from '@visx/xychart'; +import { AreaSeries, Axis, AxisScale, Grid, Tooltip, XYChart } from '@visx/xychart'; import dayjs from 'dayjs'; import React, { useState } from 'react'; import { Popover } from '../Popover'; -import { ChartWrapper } from './components'; +import { ChartWrapper, TooltipGlyph } from './components'; import { LineChartProps } from './types'; -import { abbreviateNumber } from '../dataviz/utils'; +import { getMockedProps } from '../BarChart/utils'; +import useMergedProps from '../BarChart/hooks/useMergedProps'; +import { roundToEven } from './utils'; +import { AxisProps, GridProps } from '../BarChart/types'; +import { GLYPH_DROP_SHADOW_FILTER } from './constants'; +import useAdaptYScaleToZeroValues from '../BarChart/hooks/useAdaptYScaleToZeroValues'; +import useMaxDataValue from '../BarChart/hooks/useMaxDataValue'; const commonTickLabelProps: TickLabelProps = { fontSize: 10, @@ -18,30 +24,55 @@ const commonTickLabelProps: TickLabelProps = { fill: colors.gray[1700], }; -const GLYPH_DROP_SHADOW_FILTER = ` - drop-shadow(0px 1px 3px rgba(33, 23, 95, 0.30)) - drop-shadow(0px 2px 5px rgba(33, 23, 95, 0.25)) - drop-shadow(0px -2px 5px rgba(33, 23, 95, 0.25) -`; - export const lineChartDefault: LineChartProps = { data: [], + isEmpty: false, + xAccessor: (datum) => datum?.x, yAccessor: (datum) => datum?.y, - leftAxisTickFormat: abbreviateNumber, - leftAxisTickLabelProps: { - ...commonTickLabelProps, - textAnchor: 'end', - }, - bottomAxisTickFormat: (x) => dayjs(x).format('D MMM'), - bottomAxisTickLabelProps: { - ...commonTickLabelProps, - textAnchor: 'middle', - verticalAnchor: 'start', - }, + xScale: { type: 'time' }, + yScale: { type: 'log', nice: true, round: true, base: 2 }, + lineColor: colors.violet[500], areaColor: 'url(#line-gradient)', - gridColor: '#e0e0e0', + margin: { top: 0, right: 0, bottom: 0, left: 0 }, + + leftAxisProps: { + tickFormat: abbreviateNumber, + tickLabelProps: { + ...commonTickLabelProps, + textAnchor: 'end', + width: 50, + }, + computeNumTicks: () => 5, + hideAxisLine: true, + hideTicks: true, + }, + bottomAxisProps: { + tickFormat: (x) => dayjs(x).format('D MMM'), + tickLabelProps: { + ...commonTickLabelProps, + textAnchor: 'middle', + verticalAnchor: 'start', + }, + computeNumTicks: (width, _, margin, data) => { + const widthOfTick = 80; + const widthOfAxis = width - margin.right - margin.left; + const maxCountOfTicks = Math.ceil(widthOfAxis / widthOfTick); + const numOfTicks = roundToEven(maxCountOfTicks / 2); + return Math.min(numOfTicks, data.length - 1); + }, + hideAxisLine: true, + hideTicks: true, + }, + gridProps: { + rows: true, + columns: false, + stroke: '#e0e0e0', + computeNumTicks: () => 5, + lineStyle: {}, + }, + renderGradients: () => ( ), @@ -50,29 +81,28 @@ export const lineChartDefault: LineChartProps = { strokeWidth: 2, filter: GLYPH_DROP_SHADOW_FILTER, }, - renderTooltipGlyph: (props) => { - return ( - <> - - - - ); - }, + renderTooltipGlyph: (props) => , }; export function LineChart({ data, + isEmpty, + xAccessor = lineChartDefault.xAccessor, yAccessor = lineChartDefault.yAccessor, - renderTooltipContent, - margin, - leftAxisTickFormat = lineChartDefault.leftAxisTickFormat, - leftAxisTickLabelProps = lineChartDefault.leftAxisTickLabelProps, - bottomAxisTickFormat = lineChartDefault.bottomAxisTickFormat, - bottomAxisTickLabelProps = lineChartDefault.bottomAxisTickLabelProps, + xScale = lineChartDefault.xScale, + yScale = lineChartDefault.yScale, + maxYDomainForZeroData, + lineColor = lineChartDefault.lineColor, areaColor = lineChartDefault.areaColor, - gridColor = lineChartDefault.gridColor, + margin, + + leftAxisProps, + bottomAxisProps, + gridProps, + + popoverRenderer, renderGradients = lineChartDefault.renderGradients, toolbarVerticalCrosshairStyle = lineChartDefault.toolbarVerticalCrosshairStyle, renderTooltipGlyph = lineChartDefault.renderTooltipGlyph, @@ -82,13 +112,37 @@ export function LineChart({ // FYI: additional margins to show left and bottom axises const internalMargin = { top: (margin?.top ?? 0) + 30, - right: (margin?.right ?? 0) + 20, + right: (margin?.right ?? 0) + 30, bottom: (margin?.bottom ?? 0) + 35, left: (margin?.left ?? 0) + 40, }; + const maxDataValue = useMaxDataValue(data, yAccessor); + const adaptedYScale = useAdaptYScaleToZeroValues(yScale, maxDataValue, maxYDomainForZeroData); + const accessors = { xAccessor, yAccessor }; + const { computeNumTicks: computeLeftAxisNumTicks, ...mergedLeftAxisProps } = useMergedProps>( + leftAxisProps, + lineChartDefault.leftAxisProps, + ); + + const { computeNumTicks: computeBottomAxisNumTicks, ...mergedBottomAxisProps } = useMergedProps< + AxisProps + >(bottomAxisProps, lineChartDefault.bottomAxisProps); + + const { computeNumTicks: computeGridNumTicks, ...mergedGridProps } = useMergedProps>( + gridProps, + lineChartDefault.gridProps, + ); + + // In case of no data we should render empty graph with axises + // but they don't render at all without any data. + // To handle this case we will render the same graph with fake data and hide bars + if (!data.length) { + return ; + } + return ( setShowGrid(true)} onMouseLeave={() => setShowGrid(false)}> @@ -97,54 +151,54 @@ export function LineChart({ {renderGradients?.()} - + {/* Left vertical line for y-axis */} + {/* Bottom horizontal line for x-axis */} + {showGrid && ( - + )} dataKey="line-chart-seria-01" data={data} - fill={areaColor} - curve={curveMonotoneX} - {...accessors} - /> - - dataKey="line-chart-seria-01" - data={data} - stroke={lineColor} + fill={!isEmpty ? areaColor : 'transparent'} curve={curveMonotoneX} + lineProps={{ stroke: !isEmpty ? lineColor : 'transparent' }} {...accessors} /> @@ -162,8 +216,10 @@ export function LineChart({ tooltipData?.nearestDatum && ( ) ); diff --git a/datahub-web-react/src/alchemy-components/components/LineChart/components.tsx b/datahub-web-react/src/alchemy-components/components/LineChart/components.tsx index fb6c0cf1ced784..904f4dbbc360d1 100644 --- a/datahub-web-react/src/alchemy-components/components/LineChart/components.tsx +++ b/datahub-web-react/src/alchemy-components/components/LineChart/components.tsx @@ -1,4 +1,8 @@ +import { colors } from '@src/alchemy-components/theme'; +import React, { useEffect, useRef } from 'react'; import styled from 'styled-components'; +import { GLYPH_DROP_SHADOW_FILTER } from './constants'; +import { TooltipGlyphProps } from './types'; export const ChartWrapper = styled.div` width: 100%; @@ -6,3 +10,27 @@ export const ChartWrapper = styled.div` position: relative; cursor: pointer; `; + +export const TooltipGlyph = ({ x, y }: TooltipGlyphProps) => { + const ref = useRef(null); + + // FYI: Change size of parent SVG to prevent showing window's horizontal scrolling + // There are no any another ways to do it without fixing the library + useEffect(() => { + if (ref.current) { + const parent = ref.current.closest('svg'); + + if (parent) { + parent.setAttribute('width', '1'); + parent.setAttribute('height', '1'); + } + } + }, [ref]); + + return ( + + + + + ); +}; diff --git a/datahub-web-react/src/alchemy-components/components/LineChart/constants.ts b/datahub-web-react/src/alchemy-components/components/LineChart/constants.ts new file mode 100644 index 00000000000000..d0952ded2be2e9 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/LineChart/constants.ts @@ -0,0 +1,5 @@ +export const GLYPH_DROP_SHADOW_FILTER = ` + drop-shadow(0px 1px 3px rgba(33, 23, 95, 0.30)) + drop-shadow(0px 2px 5px rgba(33, 23, 95, 0.25)) + drop-shadow(0px -2px 5px rgba(33, 23, 95, 0.25) +`; diff --git a/datahub-web-react/src/alchemy-components/components/LineChart/types.ts b/datahub-web-react/src/alchemy-components/components/LineChart/types.ts index cf45662ba7cf90..818a353720e6a5 100644 --- a/datahub-web-react/src/alchemy-components/components/LineChart/types.ts +++ b/datahub-web-react/src/alchemy-components/components/LineChart/types.ts @@ -1,22 +1,35 @@ -import { TickFormatter, TickLabelProps } from '@visx/axis'; +import { AxisScaleOutput } from '@visx/axis'; +import { ScaleConfig } from '@visx/scale'; import { Margin } from '@visx/xychart'; import { RenderTooltipGlyphProps } from '@visx/xychart/lib/components/Tooltip'; import React from 'react'; +import { AxisProps, GridProps } from '../BarChart/types'; export type LineChartProps = { data: DatumType[]; + isEmpty?: boolean; + xAccessor: (datum: DatumType) => string | number; yAccessor: (datum: DatumType) => number; - renderTooltipContent?: (datum: DatumType) => React.ReactNode; - margin?: Margin; - leftAxisTickFormat?: TickFormatter; - leftAxisTickLabelProps?: TickLabelProps; - bottomAxisTickFormat?: TickFormatter; - bottomAxisTickLabelProps?: TickLabelProps; + xScale?: ScaleConfig; + yScale?: ScaleConfig; + maxYDomainForZeroData?: number; + lineColor?: string; areaColor?: string; - gridColor?: string; + margin?: Margin; + + leftAxisProps?: AxisProps; + bottomAxisProps?: AxisProps; + gridProps?: GridProps; + + popoverRenderer?: (datum: DatumType) => React.ReactNode; renderGradients?: () => React.ReactNode; toolbarVerticalCrosshairStyle?: React.SVGProps; renderTooltipGlyph?: (props: RenderTooltipGlyphProps) => React.ReactNode | undefined; }; + +export type TooltipGlyphProps = { + x: number; + y: number; +}; diff --git a/datahub-web-react/src/alchemy-components/components/LineChart/utils.ts b/datahub-web-react/src/alchemy-components/components/LineChart/utils.ts new file mode 100644 index 00000000000000..1f3e8e55a9e663 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/LineChart/utils.ts @@ -0,0 +1,4 @@ +export function roundToEven(value: number) { + const rounded = Math.ceil(value); + return rounded % 2 === 0 ? rounded : rounded + 1; +} diff --git a/datahub-web-react/src/alchemy-components/components/Loader/Loader.tsx b/datahub-web-react/src/alchemy-components/components/Loader/Loader.tsx new file mode 100644 index 00000000000000..53ec36d34ee587 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Loader/Loader.tsx @@ -0,0 +1,26 @@ +import React from 'react'; +import { LoaderBackRing, LoaderWrapper, StyledLoadingOutlined } from './components'; +import { LoaderSizes, RingWidths } from './constants'; +import { LoaderProps } from './types'; + +export const loaderDefault: LoaderProps = { + size: 'md', + justifyContent: 'center', + alignItems: 'none', +}; + +export function Loader({ + size = loaderDefault.size, + justifyContent = loaderDefault.justifyContent, + alignItems = loaderDefault.alignItems, +}: LoaderProps) { + const loaderSize = LoaderSizes[size || 'md']; + const ringWidth = RingWidths[size || 'md']; + + return ( + + + + + ); +} diff --git a/datahub-web-react/src/alchemy-components/components/Loader/Loading.stories.tsx b/datahub-web-react/src/alchemy-components/components/Loader/Loading.stories.tsx new file mode 100644 index 00000000000000..8d06d54c3201fb --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Loader/Loading.stories.tsx @@ -0,0 +1,82 @@ +import { BADGE } from '@geometricpanda/storybook-addon-badges'; +import { GridList } from '@src/alchemy-components/.docs/mdx-components'; +import type { Meta, StoryObj } from '@storybook/react'; +import React from 'react'; +import { Loader, loaderDefault } from './Loader'; + +const meta = { + title: 'Components / Loader', + component: Loader, + + // Display Properties + parameters: { + // layout: 'flex', + badges: [BADGE.EXPERIMENTAL], + docs: { + subtitle: 'A component that is used to show the loading spinner', + }, + }, + + // Component-level argTypes + argTypes: { + size: { + description: 'The size of the Loader', + type: 'string', + options: ['xs', 'sm', 'md', 'lg', 'xl'], + table: { + defaultValue: { summary: loaderDefault.size }, + }, + control: { + type: 'select', + }, + }, + justifyContent: { + description: 'The option to configure css-option: justify-content', + type: 'string', + options: ['center', 'flex-start'], + table: { + defaultValue: { summary: loaderDefault.justifyContent }, + }, + control: { + type: 'select', + }, + }, + alignItems: { + description: 'The option to configure css-option: align-items', + type: 'string', + options: ['center', 'flex-start', 'none'], + table: { + defaultValue: { summary: loaderDefault.alignItems }, + }, + control: { + type: 'select', + }, + }, + }, + + // Define defaults + args: loaderDefault, +} satisfies Meta; + +export default meta; + +type Story = StoryObj; + +export const sandbox: Story = { + args: { + justifyContent: 'flex-start', + }, + + tags: ['dev'], + render: (props) => , +}; + +export const sizes = () => ( + + + + + + + +); diff --git a/datahub-web-react/src/alchemy-components/components/Loader/components.tsx b/datahub-web-react/src/alchemy-components/components/Loader/components.tsx new file mode 100644 index 00000000000000..161d342204e5da --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Loader/components.tsx @@ -0,0 +1,34 @@ +import { LoadingOutlined } from '@ant-design/icons'; +import { colors } from '@src/alchemy-components/theme'; +import styled from 'styled-components'; +import { AlignItemsOptions, JustifyContentOptions } from './types'; + +export const LoaderWrapper = styled.div<{ + $marginTop?: number; + $justifyContent: JustifyContentOptions; + $alignItems: AlignItemsOptions; +}>` + display: flex; + justify-content: ${(props) => props.$justifyContent}; + align-items: ${(props) => props.$alignItems}; + margin: auto; + width: 100%; + position: relative; +`; + +export const StyledLoadingOutlined = styled(LoadingOutlined)<{ $height: number }>` + font-size: ${(props) => props.$height}px; + height: ${(props) => props.$height}px; + position: absolute; + + svg { + fill: ${colors.violet[500]}; + } +`; + +export const LoaderBackRing = styled.span<{ $height: number; $ringWidth: number }>` + width: ${(props) => props.$height}px; + height: ${(props) => props.$height}px; + border: ${(props) => props.$ringWidth}px solid ${colors.gray[100]}; + border-radius: 50%; +`; diff --git a/datahub-web-react/src/alchemy-components/components/Loader/constants.ts b/datahub-web-react/src/alchemy-components/components/Loader/constants.ts new file mode 100644 index 00000000000000..383046fa5a72c0 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Loader/constants.ts @@ -0,0 +1,16 @@ +export const LoaderSizes = { + xs: 14, + sm: 22, + md: 38, + lg: 54, + xl: 78, +}; + +// FYI: we have to adjust the width of border (ring) to size of the svg loading icon +export const RingWidths = { + xs: 1, + sm: 2, + md: 3, + lg: 4, + xl: 6, +}; diff --git a/datahub-web-react/src/alchemy-components/components/Loader/index.ts b/datahub-web-react/src/alchemy-components/components/Loader/index.ts new file mode 100644 index 00000000000000..d70278852510b5 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Loader/index.ts @@ -0,0 +1 @@ +export { Loader } from './Loader'; diff --git a/datahub-web-react/src/alchemy-components/components/Loader/types.ts b/datahub-web-react/src/alchemy-components/components/Loader/types.ts new file mode 100644 index 00000000000000..ede553b11e73fb --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Loader/types.ts @@ -0,0 +1,11 @@ +import { SizeOptions } from '@src/alchemy-components/theme/config'; + +export type JustifyContentOptions = 'center' | 'flex-start'; + +export type AlignItemsOptions = 'center' | 'flex-start' | 'none'; + +export type LoaderProps = { + size?: SizeOptions; + justifyContent?: JustifyContentOptions; + alignItems?: AlignItemsOptions; +}; diff --git a/datahub-web-react/src/alchemy-components/components/Pills/Pill.tsx b/datahub-web-react/src/alchemy-components/components/Pills/Pill.tsx index 898ec89fce5957..86a12149272047 100644 --- a/datahub-web-react/src/alchemy-components/components/Pills/Pill.tsx +++ b/datahub-web-react/src/alchemy-components/components/Pills/Pill.tsx @@ -1,6 +1,5 @@ import { Icon } from '@components'; import React from 'react'; - import { PillContainer, PillText } from './components'; import { PillProps } from './types'; @@ -23,6 +22,8 @@ export function Pill({ onClickRightIcon, onClickLeftIcon, onPillClick, + customStyle, + customIconRenderer, }: PillProps) { return ( - {leftIcon && } - {label} + {customIconRenderer + ? customIconRenderer() + : leftIcon && } + {label} {rightIcon && } ); diff --git a/datahub-web-react/src/alchemy-components/components/Pills/components.ts b/datahub-web-react/src/alchemy-components/components/Pills/components.ts index 79734561a92da6..bd3b8e0e5578e7 100644 --- a/datahub-web-react/src/alchemy-components/components/Pills/components.ts +++ b/datahub-web-react/src/alchemy-components/components/Pills/components.ts @@ -30,4 +30,6 @@ export const PillText = styled.span({ whiteSpace: 'nowrap', overflow: 'hidden', textOverflow: 'ellipsis', + fontSize: '12px', + fontWeight: 400, }); diff --git a/datahub-web-react/src/alchemy-components/components/Pills/types.ts b/datahub-web-react/src/alchemy-components/components/Pills/types.ts index 17d4d12465e1ef..51685019f79b9c 100644 --- a/datahub-web-react/src/alchemy-components/components/Pills/types.ts +++ b/datahub-web-react/src/alchemy-components/components/Pills/types.ts @@ -12,6 +12,8 @@ export interface PillProps extends HTMLAttributes, PillStyleProps { label: string; rightIcon?: string; leftIcon?: string; + customStyle?: React.CSSProperties; + customIconRenderer?: () => void; onClickRightIcon?: (e: React.MouseEvent) => void; onClickLeftIcon?: (e: React.MouseEvent) => void; onPillClick?: (e: React.MouseEvent) => void; diff --git a/datahub-web-react/src/alchemy-components/components/SearchBar/SearchBar.tsx b/datahub-web-react/src/alchemy-components/components/SearchBar/SearchBar.tsx index f39f761058d8c7..8e9eea5eb907ca 100644 --- a/datahub-web-react/src/alchemy-components/components/SearchBar/SearchBar.tsx +++ b/datahub-web-react/src/alchemy-components/components/SearchBar/SearchBar.tsx @@ -4,7 +4,7 @@ import { StyledSearchBar } from './components'; import { SearchBarProps } from './types'; export const searchBarDefaults: SearchBarProps = { - placeholder: 'Search..', + placeholder: 'Search...', value: '', width: '272px', allowClear: true, diff --git a/datahub-web-react/src/alchemy-components/components/Select/BasicSelect.tsx b/datahub-web-react/src/alchemy-components/components/Select/BasicSelect.tsx index b49159ba38a758..c4b6404cc0f54e 100644 --- a/datahub-web-react/src/alchemy-components/components/Select/BasicSelect.tsx +++ b/datahub-web-react/src/alchemy-components/components/Select/BasicSelect.tsx @@ -1,75 +1,31 @@ -import { Button, Icon, Pill, Text } from '@components'; +import { Button, Text } from '@components'; import { isEqual } from 'lodash'; import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react'; import { ActionButtonsContainer, Container, - DescriptionContainer, Dropdown, FooterBase, LabelContainer, - LabelsWrapper, OptionContainer, OptionLabel, OptionList, - Placeholder, SearchIcon, SearchInput, SearchInputContainer, SelectAllOption, SelectBase, SelectLabel, - SelectValue, + SelectLabelContainer, StyledCancelButton, StyledCheckbox, StyledClearButton, + StyledIcon, } from './components'; -import { ActionButtonsProps, SelectLabelDisplayProps, SelectOption, SelectProps } from './types'; +import SelectLabelRenderer from './private/SelectLabelRenderer/SelectLabelRenderer'; +import { ActionButtonsProps, SelectOption, SelectProps } from './types'; import { getFooterButtonSize } from './utils'; -const SelectLabelDisplay = ({ - selectedValues, - options, - placeholder, - isMultiSelect, - removeOption, - disabledValues, - showDescriptions, -}: SelectLabelDisplayProps) => { - const selectedOptions = options.filter((opt) => selectedValues.includes(opt.value)); - return ( - - {!!selectedOptions.length && - isMultiSelect && - selectedOptions.map((o) => { - const isDisabled = disabledValues?.includes(o.value); - return ( - { - e.stopPropagation(); - removeOption?.(o); - }} - clickable={!isDisabled} - /> - ); - })} - {!selectedValues.length && {placeholder}} - {!isMultiSelect && ( - <> - {selectedOptions[0]?.label} - {showDescriptions && !!selectedValues.length && ( - {selectedOptions[0]?.description} - )} - - )} - - ); -}; - const SelectActionButtons = ({ selectedValues, isOpen, @@ -77,14 +33,13 @@ const SelectActionButtons = ({ isReadOnly, showClear, handleClearSelection, - fontSize = 'md', }: ActionButtonsProps) => { return ( {showClear && selectedValues.length > 0 && !isDisabled && !isReadOnly && ( - + )} - + ); }; @@ -110,6 +65,7 @@ export const BasicSelect = ({ options = selectDefaults.options, label = selectDefaults.label, values = [], + initialValues, onCancel, onUpdate, showSearch = selectDefaults.showSearch, @@ -124,11 +80,12 @@ export const BasicSelect = ({ showSelectAll = selectDefaults.showSelectAll, selectAllLabel = selectDefaults.selectAllLabel, showDescriptions = selectDefaults.showDescriptions, + icon, ...props }: SelectProps) => { const [searchQuery, setSearchQuery] = useState(''); const [isOpen, setIsOpen] = useState(false); - const [selectedValues, setSelectedValues] = useState(values); + const [selectedValues, setSelectedValues] = useState(initialValues || values); const [tempValues, setTempValues] = useState(values); const selectRef = useRef(null); const [areAllSelected, setAreAllSelected] = useState(false); @@ -237,22 +194,24 @@ export const BasicSelect = ({ fontSize={size} {...props} > - + + {icon && } + + @@ -305,9 +264,16 @@ export const BasicSelect = ({ ) : ( - - {option.label} - + + {option.icon} + + {option.label} + + {!!option.description && ( {option.description} diff --git a/datahub-web-react/src/alchemy-components/components/Select/Nested/NestedOption.tsx b/datahub-web-react/src/alchemy-components/components/Select/Nested/NestedOption.tsx index 8a7d3670b2b1b9..2de5c4284fe4a4 100644 --- a/datahub-web-react/src/alchemy-components/components/Select/Nested/NestedOption.tsx +++ b/datahub-web-react/src/alchemy-components/components/Select/Nested/NestedOption.tsx @@ -85,6 +85,7 @@ interface OptionProps { isMultiSelect?: boolean; isLoadingParentChildList?: boolean; setSelectedOptions: React.Dispatch>; + hideParentCheckbox?: boolean; } export const NestedOption = ({ @@ -99,6 +100,7 @@ export const NestedOption = ({ areParentsSelectable, isLoadingParentChildList, setSelectedOptions, + hideParentCheckbox, }: OptionProps) => { const [autoSelectChildren, setAutoSelectChildren] = useState(false); const [loadingParentUrns, setLoadingParentUrns] = useState([]); @@ -240,7 +242,12 @@ export const NestedOption = ({ }} isSelected={!isMultiSelect && isSelected} // added hack to show cursor in wait untill we get the inline spinner - style={{ width: '100%', cursor: loadingParentUrns.includes(option.value) ? 'wait' : 'pointer' }} + style={{ + width: '100%', + cursor: loadingParentUrns.includes(option.value) ? 'wait' : 'pointer', + display: 'flex', + justifyContent: hideParentCheckbox ? 'space-between' : 'normal', + }} > {option.isParent && {option.label}} {!option.isParent && <>{option.label}} @@ -262,27 +269,29 @@ export const NestedOption = ({ style={{ cursor: 'pointer', marginLeft: '4px' }} /> )} - { - e.preventDefault(); - if (isImplicitlySelected) { - return; + {!(hideParentCheckbox && option.isParent) && ( + { + e.preventDefault(); + if (isImplicitlySelected) { + return; } - } - selectOption(); - }} - disabled={isImplicitlySelected} - /> + e.stopPropagation(); + if (isParentMissingChildren) { + loadData?.(option); + if (!areParentsSelectable) { + setAutoSelectChildren(true); + } + } + selectOption(); + }} + disabled={isImplicitlySelected} + /> + )} {isOpen && ( diff --git a/datahub-web-react/src/alchemy-components/components/Select/Nested/NestedSelect.tsx b/datahub-web-react/src/alchemy-components/components/Select/Nested/NestedSelect.tsx index 744c7bfcfec0d2..d2748658e161c7 100644 --- a/datahub-web-react/src/alchemy-components/components/Select/Nested/NestedSelect.tsx +++ b/datahub-web-react/src/alchemy-components/components/Select/Nested/NestedSelect.tsx @@ -1,11 +1,11 @@ import React, { useCallback, useEffect, useRef, useState } from 'react'; import styled from 'styled-components'; - -import { Icon, Pill } from '@components'; +import { Icon, Pill, colors } from '@components'; import { ActionButtonsContainer, Container, + CountBadge, Dropdown, OptionList, Placeholder, @@ -30,17 +30,35 @@ const LabelDisplayWrapper = styled.div` max-height: 125px; min-height: 16px; `; +const StyledCountBadgeContainer = styled.div` + display: flex; + align-items: center; + gap: 4px; + color: ${colors.gray[1800]}; +`; interface SelectLabelDisplayProps { selectedOptions: SelectOption[]; placeholder: string; handleOptionChange: (node: SelectOption) => void; + showCount?: boolean; } -const SelectLabelDisplay = ({ selectedOptions, placeholder, handleOptionChange }: SelectLabelDisplayProps) => { +const SelectLabelDisplay = ({ + selectedOptions, + placeholder, + handleOptionChange, + showCount, +}: SelectLabelDisplayProps) => { return ( - {!!selectedOptions.length && + {showCount && selectedOptions.length > 0 ? ( + + {placeholder} + {selectedOptions.length} + + ) : ( + !!selectedOptions.length && selectedOptions.map((o) => ( - ))} + )) + )} {!selectedOptions.length && {placeholder}} ); @@ -64,6 +83,7 @@ export interface ActionButtonsProps { isDisabled: boolean; isReadOnly: boolean; handleClearSelection: () => void; + showCount?: boolean; } const SelectActionButtons = ({ @@ -73,10 +93,11 @@ const SelectActionButtons = ({ isReadOnly, handleClearSelection, fontSize = 'md', + showCount = false, }: ActionButtonsProps) => { return ( - {!!selectedOptions.length && !isDisabled && !isReadOnly && ( + {!showCount && !!selectedOptions.length && !isDisabled && !isReadOnly && ( { const [searchQuery, setSearchQuery] = useState(''); @@ -151,6 +178,18 @@ export const NestedSelect = ({ const [selectedOptions, setSelectedOptions] = useState(initialValues); const selectRef = useRef(null); + useEffect(() => { + if (initialValues && shouldAlwaysSyncParentValues) { + const filteredOptions = selectedOptions.filter((option) => + initialValues.some((initial) => initial.value === option.value), + ); + if (filteredOptions.length !== selectedOptions.length) { + setSelectedOptions(filteredOptions); + } + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [initialValues]); + // TODO: handle searching inside of a nested component on the FE only const handleDocumentClick = useCallback((e: MouseEvent) => { @@ -257,12 +296,14 @@ export const NestedSelect = ({ onClick={handleSelectClick} fontSize={size} data-testid="nested-options-dropdown-container" + width={props.width} {...props} > {isOpen && ( @@ -302,6 +344,7 @@ export const NestedSelect = ({ setSelectedOptions={setSelectedOptions} areParentsSelectable={areParentsSelectable} isLoadingParentChildList={isLoadingParentChildList} + hideParentCheckbox={hideParentCheckbox} /> ))} diff --git a/datahub-web-react/src/alchemy-components/components/Select/Select.stories.tsx b/datahub-web-react/src/alchemy-components/components/Select/Select.stories.tsx index 0ec20b15e771ab..fefdb05c375a2f 100644 --- a/datahub-web-react/src/alchemy-components/components/Select/Select.stories.tsx +++ b/datahub-web-react/src/alchemy-components/components/Select/Select.stories.tsx @@ -5,6 +5,7 @@ import React from 'react'; import { Select, selectDefaults } from './Select'; import { SimpleSelect } from './SimpleSelect'; import { SelectSizeOptions } from './types'; +import { AVAILABLE_ICONS } from '../Icon'; // Auto Docs const meta: Meta = { @@ -158,6 +159,33 @@ const meta: Meta = { defaultValue: { summary: selectDefaults.showDescriptions?.toString() }, }, }, + icon: { + description: `The name of the icon to display.`, + type: 'string', + options: AVAILABLE_ICONS, + table: { + defaultValue: { summary: 'undefined' }, + }, + control: { + type: 'select', + }, + }, + optionSwitchable: { + description: 'Set to `true` to uselect option by clicking on it', + type: 'boolean', + table: { + defaultValue: { summary: 'undefined' }, + }, + control: { + type: 'boolean', + }, + }, + selectLabelProps: { + description: 'Additional props for rendering of select label', + table: { + defaultValue: { summary: 'undefined' }, + }, + }, }, // Define defaults @@ -212,6 +240,7 @@ export const simpleSelectSandbox: Story = { isMultiSelect={props.isMultiSelect} placeholder={props.placeholder} disabledValues={props.disabledValues} + icon={props.icon} /> ), }; @@ -321,6 +350,18 @@ export const simpleSelectSizes = () => ( ); +export const simpleSelectWithIcon = () => ( + +); + // Basic story is what is displayed 1st in storybook & is used as the code sandbox // Pass props to this so that it can be customized via the UI props panel export const BasicSelectSandbox: Story = { @@ -341,6 +382,7 @@ export const BasicSelectSandbox: Story = { width={props.width} isMultiSelect={props.isMultiSelect} placeholder={props.placeholder} + icon={props.icon} /> ), }; @@ -429,3 +471,16 @@ export const footerActions = () => ( /> ); + +export const withIcon = () => ( +
, }; +// Scrollable Table with a maximum height export const withScroll = () => (
( /> ); +// Table with custom column widths export const withCustomColumnWidths = () => (
( /> ); +// Table with column sorting functionality export const withColumnSorting = () => (
( /> ); +// Table without a header export const withoutHeader = () => (
( showHeader={false} /> ); + +// Table with clickable rows +export const ClickableRows = () => { + const [selectedRow, setSelectedRow] = useState(); + + return ( +
setSelectedRow(record.key)} + rowClassName={(record) => (record.key === selectedRow ? 'selected-row' : '')} + /> + ); +}; + +// Define columns for the grouped table. Each column specifies its title, data key, and optional functionalities like sorting or custom rendering. +const groupByColumns = [ + { + key: 'id', + title: 'ID', + width: '10%', + render: (record) =>
{record.id}
, + }, + { + key: 'name', + title: 'Name', + width: '30%', + render: (record) =>
{record.name}
, + sorter: (a: any, b: any) => a - b, + }, + { + key: 'status', + title: 'Status', + width: '20%', + sorter: (a: any, b: any) => a - b, + render: (record) =>
{record.status}
, + }, + { + key: 'createdAt', + title: 'Created At', + width: '25%', + render: (record) =>
{record.createdAt}
, + }, + { + title: '', + dataIndex: '', + key: 'actions', + width: '15%', + render: (record) => { + return !record.children &&
actions
; + }, + alignment: 'right' as AlignmentOptions, + }, +]; + +// Hierarchical data with parent rows and nested child rows for group-by table functionality. +const groupByData = [ + { + id: '1', + name: 'Parent Row 1', + status: 'Active', + createdAt: '2024-11-20', + children: [ + { + id: '1.1', + name: 'Child Row 1.1', + status: 'Active', + createdAt: '2024-11-21', + }, + { + id: '1.2', + name: 'Child Row 1.2', + status: 'Inactive', + createdAt: '2024-11-22', + }, + ], + }, + { + id: '2', + name: 'Parent Row 2', + status: 'Inactive', + createdAt: '2024-11-19', + children: [ + { + id: '2.1', + name: 'Child Row 2.1', + status: 'Active', + createdAt: '2024-11-21', + }, + { + id: '2.2', + name: 'Child Row 2.2', + status: 'Inactive', + createdAt: '2024-11-22', + }, + ], + }, +]; + +// Table with group-by functionality and expandable rows +export const WithGroupByFunctionality = () => { + const [expandedRowKeys, setExpandedRowKeys] = useState(['Parent Row 1']); + const [sortedOptions, setSortedOptions] = useState<{ sortColumn: string; sortOrder: SortingState }>({ + sortColumn: '', + sortOrder: SortingState.ORIGINAL, + }); + const onExapand = (record: any) => { + const key = record.name; + setExpandedRowKeys((prev: any) => (prev.includes(key) ? prev.filter((k) => k !== key) : [...prev, key])); + }; + const getSortedRecord = (record) => { + const { sortOrder, sortColumn } = sortedOptions; + if (sortOrder === SortingState.ORIGINAL) { + return record.children; + } + + const sortFunctions = { + status: { + [SortingState.ASCENDING]: (a, b) => a.name.localeCompare(b.name), + [SortingState.DESCENDING]: (a, b) => b.name.localeCompare(a.name), + }, + name: { + [SortingState.ASCENDING]: (a, b) => a.name.localeCompare(b.name), + [SortingState.DESCENDING]: (a, b) => b.name.localeCompare(a.name), + }, + }; + const sortFunction = sortFunctions[sortColumn]?.[sortOrder]; + + const data = sortFunction ? [...record.children].sort(sortFunction) : record.children; + + return data; + }; + + return ( +
+ setSortedOptions({ sortColumn, sortOrder }) + } + expandable={{ + expandedRowRender: (record) => { + let sortedRecord = record.children; + if (sortedOptions.sortColumn && sortedOptions.sortOrder) { + sortedRecord = getSortedRecord(record); + } + + return ( +
+ ); + }, + rowExpandable: () => true, + expandIconPosition: 'end', + expandedRowKeys, + }} + onExpand={onExapand} + /> + ); +}; diff --git a/datahub-web-react/src/alchemy-components/components/Table/Table.tsx b/datahub-web-react/src/alchemy-components/components/Table/Table.tsx index 11e598f8d4e0f7..92a75c7a9f0b1a 100644 --- a/datahub-web-react/src/alchemy-components/components/Table/Table.tsx +++ b/datahub-web-react/src/alchemy-components/components/Table/Table.tsx @@ -1,6 +1,7 @@ +import { CaretUp, CaretDown } from 'phosphor-react'; import { LoadingOutlined } from '@ant-design/icons'; import { Text } from '@components'; -import React, { useState } from 'react'; +import React, { useEffect, useState } from 'react'; import { BaseTable, HeaderContainer, @@ -13,8 +14,8 @@ import { TableHeaderCell, TableRow, } from './components'; -import { TableProps } from './types'; -import { getSortedData, handleActiveSort, renderCell, SortingState } from './utils'; +import { SortingState, TableProps } from './types'; +import { getSortedData, handleActiveSort, renderCell } from './utils'; export const tableDefaults: TableProps = { columns: [], @@ -23,6 +24,7 @@ export const tableDefaults: TableProps = { isLoading: false, isScrollable: false, maxHeight: '100%', + isBorderless: false, }; export const Table = ({ @@ -32,12 +34,28 @@ export const Table = ({ isLoading = tableDefaults.isLoading, isScrollable = tableDefaults.isScrollable, maxHeight = tableDefaults.maxHeight, + expandable, + isBorderless = tableDefaults.isBorderless, + onRowClick, + onExpand, + rowClassName, + handleSortColumnChange = undefined, + rowRefs, + headerRef, ...props }: TableProps) => { const [sortColumn, setSortColumn] = useState(null); const [sortOrder, setSortOrder] = useState(SortingState.ORIGINAL); const sortedData = getSortedData(columns, data, sortColumn, sortOrder); + const isRowClickable = !!onRowClick; + + useEffect(() => { + if (handleSortColumnChange && sortOrder && sortColumn) { + handleSortColumnChange({ sortColumn, sortOrder }); + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [sortOrder, sortColumn]); if (isLoading) { return ( @@ -49,19 +67,24 @@ export const Table = ({ } return ( - + + {/* Render the table header if enabled */} {showHeader && ( - + - {columns.map((column) => ( - - + {/* Map through columns to create header cells */} + {columns.map((column, index) => ( + + {column.title} - {column.sorter && ( + {column.sorter && ( // Render sort icons if the column is sortable - column.sorter && handleActiveSort( column.key, sortColumn, @@ -70,6 +93,7 @@ export const Table = ({ ) } > + {/* Sort icons for ascending and descending */} ({ ))} + {/* Placeholder for expandable icon if enabled */} )} + {/* Render table body with rows and cells */} - {sortedData.map((row, index) => ( - - {columns.map((column) => { - return ( - - {renderCell(column, row, index)} - - ); - })} - - ))} + {sortedData.map((row: any, index) => { + const isExpanded = expandable?.expandedRowKeys?.includes(row?.name); // Check if row is expanded + const canExpand = expandable?.rowExpandable?.(row); // Check if row is expandable + + return ( + <> + {/* Render the main row */} + { + if (canExpand) onExpand?.(row); // Handle row expansion + onRowClick?.(row); // Handle row click + }} + className={rowClassName?.(row)} // Add row-specific class + ref={(el) => { + if (rowRefs && el) { + const currentRefs = rowRefs.current; + currentRefs[index] = el; + } + }} + isRowClickable={isRowClickable} + > + {/* Render each cell in the row */} + + {columns.map((column, i) => { + return ( + + {/* Add expandable icon if applicable or render row */} + {columns.length - 1 === i && canExpand ? ( +
+ {isExpanded ? ( + // Expanded icon + ) : ( + // Collapsed icon + )} +
+ ) : ( + renderCell(column, row, index) + )} +
+ ); + })} +
+ {/* Render expanded content if row is expanded */} + {isExpanded && expandable?.expandedRowRender && ( + + +
+ {expandable.expandedRowRender(row, index)} {/* Expanded content */} +
+
+
+ )} + + ); + })} diff --git a/datahub-web-react/src/alchemy-components/components/Table/components.ts b/datahub-web-react/src/alchemy-components/components/Table/components.ts index 8908256a81ddf2..fae98fd1e97293 100644 --- a/datahub-web-react/src/alchemy-components/components/Table/components.ts +++ b/datahub-web-react/src/alchemy-components/components/Table/components.ts @@ -1,15 +1,19 @@ import { Icon } from '@components'; -import { colors, radius, spacing, typography } from '@src/alchemy-components/theme'; +import { colors, radius, spacing, typography, borders } from '@src/alchemy-components/theme'; import { AlignmentOptions } from '@src/alchemy-components/theme/config'; import styled from 'styled-components'; -export const TableContainer = styled.div<{ isScrollable?: boolean; maxHeight?: string }>( - ({ isScrollable, maxHeight }) => ({ - borderRadius: radius.lg, - border: `1px solid ${colors.gray[1400]}`, +export const TableContainer = styled.div<{ isScrollable?: boolean; maxHeight?: string; isBorderless?: boolean }>( + ({ isScrollable, maxHeight, isBorderless }) => ({ + borderRadius: isBorderless ? radius.none : radius.lg, + border: isBorderless ? borders.none : `1px solid ${colors.gray[1400]}`, overflow: isScrollable ? 'auto' : 'hidden', width: '100%', maxHeight: maxHeight || '100%', + + '& .selected-row': { + background: `${colors.gray[100]} !important`, + }, }), ); @@ -26,37 +30,53 @@ export const TableHeader = styled.thead({ zIndex: 100, }); -export const TableHeaderCell = styled.th<{ width?: string }>(({ width }) => ({ - padding: `${spacing.sm} ${spacing.md}`, - color: colors.gray[600], - fontSize: typography.fontSizes.sm, - fontWeight: typography.fontWeights.medium, - textAlign: 'start', - width: width || 'auto', -})); +export const TableHeaderCell = styled.th<{ width?: string; shouldAddRightBorder?: boolean }>( + ({ width, shouldAddRightBorder }) => ({ + padding: `${spacing.sm} ${spacing.md}`, + color: colors.gray[600], + fontSize: typography.fontSizes.sm, + fontWeight: typography.fontWeights.medium, + textAlign: 'start', + width: width || 'auto', + borderRight: shouldAddRightBorder ? `1px solid ${colors.gray[1400]}` : borders.none, + }), +); -export const HeaderContainer = styled.div({ +export const HeaderContainer = styled.div<{ alignment?: AlignmentOptions }>(({ alignment }) => ({ display: 'flex', alignItems: 'center', gap: spacing.sm, -}); + fontSize: '12px', + fontWeight: 700, + justifyContent: alignment, +})); -export const TableRow = styled.tr({ - '&:last-child': { - '& td': { - borderBottom: 'none', +export const TableRow = styled.tr<{ canExpand?: boolean; isRowClickable?: boolean }>( + ({ canExpand, isRowClickable }) => ({ + background: canExpand ? colors.gray[100] : 'transparent', + cursor: isRowClickable ? 'pointer' : 'normal', + '&:last-child': { + '& td': { + borderBottom: 'none', + }, }, - }, - '& td:first-child': { - fontWeight: typography.fontWeights.medium, - color: colors.gray[600], - }, -}); + '& td:first-child': { + fontWeight: typography.fontWeights.medium, + color: colors.gray[600], + }, + }), +); -export const TableCell = styled.td<{ width?: string; alignment?: AlignmentOptions }>(({ width, alignment }) => ({ - padding: spacing.md, - borderBottom: `1px solid ${colors.gray[1400]}`, +export const TableCell = styled.td<{ + width?: string; + alignment?: AlignmentOptions; + isGroupHeader?: boolean; +}>(({ width, alignment, isGroupHeader }) => ({ + padding: isGroupHeader + ? `${spacing.xsm} ${spacing.xsm} ${spacing.xsm} ${spacing.md}` + : `${spacing.md} ${spacing.xsm} ${spacing.md} ${spacing.md}`, + borderBottom: isGroupHeader ? `1px solid ${colors.gray[200]}` : `1px solid ${colors.gray[100]}`, color: colors.gray[1700], fontSize: typography.fontSizes.md, fontWeight: typography.fontWeights.normal, diff --git a/datahub-web-react/src/alchemy-components/components/Table/types.ts b/datahub-web-react/src/alchemy-components/components/Table/types.ts index b3e0357d5cf147..1a980c7c81b4ca 100644 --- a/datahub-web-react/src/alchemy-components/components/Table/types.ts +++ b/datahub-web-react/src/alchemy-components/components/Table/types.ts @@ -18,4 +18,27 @@ export interface TableProps extends TableHTMLAttributes { isLoading?: boolean; isScrollable?: boolean; maxHeight?: string; + isBorderless?: boolean; + isExpandedInnerTable?: boolean; + expandable?: ExpandableProps; + onRowClick?: (record: T) => void; + rowClassName?: (record: T) => string; + onExpand?: (record: T) => void; + handleSortColumnChange?: ({ sortColumn, sortOrder }: { sortColumn: string; sortOrder: SortingState }) => void; + rowRefs?: React.MutableRefObject; + headerRef?: React.RefObject; +} + +export interface ExpandableProps { + expandedRowRender?: (record: T, index: number) => React.ReactNode; + rowExpandable?: (record: T) => boolean; + defaultExpandedRowKeys?: string[]; + expandIconPosition?: 'start' | 'end'; // Configurable position of the expand icon + expandedRowKeys?: string[]; +} + +export enum SortingState { + ASCENDING = 'ascending', + DESCENDING = 'descending', + ORIGINAL = 'original', } diff --git a/datahub-web-react/src/alchemy-components/components/Table/utils.ts b/datahub-web-react/src/alchemy-components/components/Table/utils.ts index c76494d32ca633..601af2beea8e21 100644 --- a/datahub-web-react/src/alchemy-components/components/Table/utils.ts +++ b/datahub-web-react/src/alchemy-components/components/Table/utils.ts @@ -1,10 +1,4 @@ -import { Column } from './types'; - -export enum SortingState { - ASCENDING = 'ascending', - DESCENDING = 'descending', - ORIGINAL = 'original', -} +import { Column, SortingState } from './types'; export const handleActiveSort = ( key: string, diff --git a/datahub-web-react/src/alchemy-components/components/Timeline/Timeline.stories.tsx b/datahub-web-react/src/alchemy-components/components/Timeline/Timeline.stories.tsx new file mode 100644 index 00000000000000..191c0e522c17b0 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Timeline/Timeline.stories.tsx @@ -0,0 +1,69 @@ +import { BADGE } from '@geometricpanda/storybook-addon-badges'; +import type { Meta, StoryObj } from '@storybook/react'; +import React from 'react'; +import { Timeline } from './Timeline'; + +// Auto Docs +const meta = { + title: 'Components / Timeline', + component: Timeline, + + // Display Properties + parameters: { + layout: 'centered', + badges: [BADGE.EXPERIMENTAL], + docs: { + subtitle: 'Vertical display timeline.', + }, + }, + + // Component-level argTypes + argTypes: { + items: { + description: 'A list of items to render in the timeline', + }, + renderContent: { + description: "A function to render a content of the timeline's item", + }, + renderDot: { + description: "A function to render a dot of the timeline's item", + }, + }, + + // Define default args + args: { + items: [ + { + key: '1', + }, + { + key: '2', + }, + { + key: '3', + }, + ], + renderContent: (item) => `Content for item ${item.key}`, + renderDot: (item) => { + if (item.key === '2') { + return
; + } + return undefined; + }, + }, +} satisfies Meta; + +export default meta; + +// Stories + +type Story = StoryObj; + +// Basic story is what is displayed 1st in storybook +// Pass props to this so that it can be customized via the UI props panel +export const sandbox: Story = { + tags: ['dev'], + render: (props) => { + return ; + }, +}; diff --git a/datahub-web-react/src/alchemy-components/components/Timeline/Timeline.tsx b/datahub-web-react/src/alchemy-components/components/Timeline/Timeline.tsx new file mode 100644 index 00000000000000..d6d994430f545b --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Timeline/Timeline.tsx @@ -0,0 +1,20 @@ +import { Timeline as AntdTimeline } from 'antd'; +import React from 'react'; +import { StyledAntdTimeline } from './components'; +import { BaseItemType, TimelineProps } from './types'; + +export const Timeline = ({ + items, + renderDot, + renderContent, +}: TimelineProps) => { + return ( + + {items.map((item) => ( + + {renderContent(item)} + + ))} + + ); +}; diff --git a/datahub-web-react/src/alchemy-components/components/Timeline/components.tsx b/datahub-web-react/src/alchemy-components/components/Timeline/components.tsx new file mode 100644 index 00000000000000..cbcf6540149293 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Timeline/components.tsx @@ -0,0 +1,14 @@ +import { colors } from '@src/alchemy-components/theme'; +import { Timeline as AntdTimeline } from 'antd'; +import styled from 'styled-components'; + +export const StyledAntdTimeline = styled(AntdTimeline)` + .ant-timeline-item-head { + padding: 0; + } + + .ant-timeline-item-tail { + border-width: 1px; + border-color: ${colors.gray[100]}; + } +` as typeof AntdTimeline; diff --git a/datahub-web-react/src/alchemy-components/components/Timeline/index.ts b/datahub-web-react/src/alchemy-components/components/Timeline/index.ts new file mode 100644 index 00000000000000..f3667d487e7bab --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Timeline/index.ts @@ -0,0 +1 @@ +export { Timeline } from './Timeline'; diff --git a/datahub-web-react/src/alchemy-components/components/Timeline/types.ts b/datahub-web-react/src/alchemy-components/components/Timeline/types.ts new file mode 100644 index 00000000000000..a1a01dccf7f506 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Timeline/types.ts @@ -0,0 +1,17 @@ +import React from 'react'; + +export type TimelineItem = { + key: string; + content: React.ReactNode; + dot?: React.ReactNode; +}; + +export interface BaseItemType { + key: string; +} + +export type TimelineProps = { + items: ItemType[]; + renderContent: (item: ItemType) => React.ReactNode; + renderDot?: (item: ItemType) => React.ReactNode; +}; diff --git a/datahub-web-react/src/alchemy-components/components/Tooltip2/Tooltip2.tsx b/datahub-web-react/src/alchemy-components/components/Tooltip2/Tooltip2.tsx new file mode 100644 index 00000000000000..8b77195fb40726 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Tooltip2/Tooltip2.tsx @@ -0,0 +1,60 @@ +import { Tooltip, TooltipProps } from 'antd'; +import * as React from 'react'; +import { + Container, + Content, + Section, + SectionHeader, + SectionsContainer, + SectionTitle, + Title, + TitleSuffix, +} from './components'; +import { Tooltip2Props } from './types'; + +export function Tooltip2(props: Tooltip2Props & TooltipProps) { + const { header: Header, width, title, sections, ...otherProps } = props; + + if (!Header && !title) return null; + + const renderTitle = () => { + return ( + + {Header &&
} + {title} + {sections && ( + + {sections?.map((section) => ( +
+ + {section.title} + {section.titleSuffix && {section.titleSuffix}} + + {section?.content && {section.content}} +
+ ))} +
+ )} + + ); + }; + + return ( + + {props.children} + + ); +} diff --git a/datahub-web-react/src/alchemy-components/components/Tooltip2/TooltipHeader.tsx b/datahub-web-react/src/alchemy-components/components/Tooltip2/TooltipHeader.tsx new file mode 100644 index 00000000000000..7b3ac460b4d8ce --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Tooltip2/TooltipHeader.tsx @@ -0,0 +1,77 @@ +import colors from '@src/alchemy-components/theme/foundations/colors'; +import React from 'react'; +import styled from 'styled-components'; +import { TooltipHeaderProps } from './types'; + +const Container = styled.div` + display: flex; + align-items: center; +`; + +const TitleContainer = styled.div` + display: flex; + flex-direction: column; + min-width: 0; + margin-right: 8px; +`; + +const PrimaryTitle = styled.div` + display: flex; + align-items: center; +`; + +const Title = styled.div` + font-weight: 500; + font-size: 14px; + color: ${colors.gray[600]}; +`; + +const TitleSuffix = styled.div` + margin-left: 4px; +`; + +export const SubTitle = styled.div` + font-weight: 400; + font-size: 12px; + flex-shrink: 1; + min-width: 0; + color: ${colors.gray[1700]}; +`; + +const ActionContainer = styled.div` + margin-left: auto; + flex-shrink: 0; +`; + +const Image = styled.img` + width: 32px; + height: 32px; + border-radius: 200px; + margin-right: 8px; + flex-shrink: 0; + object-fit: contain; +`; + +export function TooltipHeader({ title, subTitle, image, action: Action, titleSuffix }: TooltipHeaderProps) { + if (!title) return null; + + return ( + + {image && } + + + {title} + {titleSuffix && {titleSuffix}} + + {subTitle && {subTitle}} + + {Action && ( + + + + )} + + ); +} + +export default TooltipHeader; diff --git a/datahub-web-react/src/alchemy-components/components/Tooltip2/components.ts b/datahub-web-react/src/alchemy-components/components/Tooltip2/components.ts new file mode 100644 index 00000000000000..bb2af3a1c9eccd --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Tooltip2/components.ts @@ -0,0 +1,41 @@ +import colors from '@src/alchemy-components/theme/foundations/colors'; +import styled from 'styled-components'; + +export const Title = styled.div` + color: ${colors.gray[1700]}; + font-size: 14px; + font-weight: 400; +`; + +export const TitleSuffix = styled.div` + margin-left: 4px; +`; + +export const SectionsContainer = styled.div` + margin-top: 8px; +`; + +export const Section = styled.div` + margin-top: 12px; +`; + +export const SectionHeader = styled.div` + display: flex; + align-items: flex-start; +`; + +export const SectionTitle = styled.div` + font-weight: 700; + font-size: 12px; + color: ${colors.gray[600]}; +`; + +export const Content = styled.div` + margin: 4px 0; +`; + +export const Container = styled.div` + ${Section}:first-child { + margin-top: 0px; + } +`; diff --git a/datahub-web-react/src/alchemy-components/components/Tooltip2/index.ts b/datahub-web-react/src/alchemy-components/components/Tooltip2/index.ts new file mode 100644 index 00000000000000..b3f01cbaac7d08 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Tooltip2/index.ts @@ -0,0 +1 @@ +export { Tooltip2 } from './Tooltip2'; diff --git a/datahub-web-react/src/alchemy-components/components/Tooltip2/types.ts b/datahub-web-react/src/alchemy-components/components/Tooltip2/types.ts new file mode 100644 index 00000000000000..5438a0b408a956 --- /dev/null +++ b/datahub-web-react/src/alchemy-components/components/Tooltip2/types.ts @@ -0,0 +1,20 @@ +export interface SectionType { + title: string; + titleSuffix?: string | React.ReactNode; + content: string | React.ReactNode; +} + +export interface Tooltip2Props { + header?: React.ComponentType; + sections?: SectionType[]; + children?: React.ReactNode; + width?: number; +} + +export interface TooltipHeaderProps { + title: string; + titleSuffix?: React.ReactNode; + subTitle?: string; + image?: string; + action?: React.ComponentType; +} diff --git a/datahub-web-react/src/alchemy-components/index.ts b/datahub-web-react/src/alchemy-components/index.ts index 7e40d343e884dc..ac05349437fd04 100644 --- a/datahub-web-react/src/alchemy-components/index.ts +++ b/datahub-web-react/src/alchemy-components/index.ts @@ -6,12 +6,17 @@ export * from './components/Avatar'; export * from './components/Badge'; export * from './components/BarChart'; export * from './components/Button'; +export * from './components/CalendarChart'; export * from './components/Card'; export * from './components/Checkbox'; +export * from './components/DatePicker'; +export * from './components/Drawer'; +export * from './components/GraphCard'; export * from './components/Heading'; export * from './components/Icon'; export * from './components/Input'; export * from './components/LineChart'; +export * from './components/Loader'; export * from './components/PageTitle'; export * from './components/Pills'; export * from './components/Popover'; @@ -21,4 +26,5 @@ export * from './components/Switch'; export * from './components/Table'; export * from './components/Text'; export * from './components/TextArea'; +export * from './components/Timeline'; export * from './components/Tooltip'; diff --git a/datahub-web-react/src/alchemy-components/theme/config/types.ts b/datahub-web-react/src/alchemy-components/theme/config/types.ts index 79ba2e27018f76..f4d06f4c043ba6 100644 --- a/datahub-web-react/src/alchemy-components/theme/config/types.ts +++ b/datahub-web-react/src/alchemy-components/theme/config/types.ts @@ -41,7 +41,7 @@ export type VariantOptions = 'filled' | 'outline'; export type AlignmentOptions = 'left' | 'right' | 'center' | 'justify'; // Avatar Size options -export type AvatarSizeOptions = 'sm' | 'md' | 'lg' | 'default'; +export type AvatarSizeOptions = 'sm' | 'md' | 'lg' | 'xl' | 'default'; // Icon Alignment types export type IconAlignmentOptions = 'horizontal' | 'vertical'; diff --git a/datahub-web-react/src/alchemy-components/theme/foundations/colors.ts b/datahub-web-react/src/alchemy-components/theme/foundations/colors.ts index 760127a8b0f781..013a1f55cfb323 100644 --- a/datahub-web-react/src/alchemy-components/theme/foundations/colors.ts +++ b/datahub-web-react/src/alchemy-components/theme/foundations/colors.ts @@ -23,6 +23,7 @@ const colors = { 1600: '#F5F6FA', 1700: '#5F6685', 1800: '#8088A3', + 1900: '#A3A7B9', }, violet: { diff --git a/datahub-web-react/src/app/AppProviders.tsx b/datahub-web-react/src/app/AppProviders.tsx index 00597e1cf76406..767dd5e778c114 100644 --- a/datahub-web-react/src/app/AppProviders.tsx +++ b/datahub-web-react/src/app/AppProviders.tsx @@ -6,6 +6,7 @@ import QuickFiltersProvider from '../providers/QuickFiltersProvider'; import SearchContextProvider from './search/context/SearchContextProvider'; import EntityRegistryProvider from './EntityRegistryProvider'; import { BrowserTitleProvider } from './shared/BrowserTabTitleContext'; +import { NavBarProvider } from './homeV2/layout/navBarRedesign/NavBarContext'; interface Props { children: React.ReactNode; @@ -19,7 +20,9 @@ export default function AppProviders({ children }: Props) { - {children} + + {children} + diff --git a/datahub-web-react/src/app/EntityRegistryProvider.tsx b/datahub-web-react/src/app/EntityRegistryProvider.tsx index 9e283c0d07fc8a..000858bae6e150 100644 --- a/datahub-web-react/src/app/EntityRegistryProvider.tsx +++ b/datahub-web-react/src/app/EntityRegistryProvider.tsx @@ -1,9 +1,13 @@ +import buildEntityRegistryV2 from '@app/buildEntityRegistryV2'; import React from 'react'; import { EntityRegistryContext } from '../entityRegistryContext'; +import EntityRegistry from './entity/EntityRegistry'; import useBuildEntityRegistry from './useBuildEntityRegistry'; +export const globalEntityRegistryV2 = buildEntityRegistryV2(); + const EntityRegistryProvider = ({ children }: { children: React.ReactNode }) => { - const entityRegistry = useBuildEntityRegistry(); + const entityRegistry = useBuildEntityRegistry() as EntityRegistry; return {children}; }; diff --git a/datahub-web-react/src/app/ProtectedRoutes.tsx b/datahub-web-react/src/app/ProtectedRoutes.tsx index d975e6d4d99c2d..9d9f24e3da7936 100644 --- a/datahub-web-react/src/app/ProtectedRoutes.tsx +++ b/datahub-web-react/src/app/ProtectedRoutes.tsx @@ -1,16 +1,36 @@ import React, { useEffect } from 'react'; import { Switch, Route, useLocation, useHistory } from 'react-router-dom'; import { Layout } from 'antd'; +import styled from 'styled-components'; import { HomePage } from './home/HomePage'; +import { HomePage as HomePageV2 } from './homeV2/HomePage'; import { SearchRoutes } from './SearchRoutes'; import EmbedRoutes from './EmbedRoutes'; import { NEW_ROUTE_MAP, PageRoutes } from '../conf/Global'; +import { useIsThemeV2, useSetThemeIsV2 } from './useIsThemeV2'; import { getRedirectUrl } from '../conf/utils'; +import { IntroduceYourself } from './homeV2/introduce/IntroduceYourself'; +import { useSetUserTitle } from './identity/user/useUserTitle'; +import { useSetUserPersona } from './homeV2/persona/useUserPersona'; +import { useSetNavBarRedesignEnabled } from './useShowNavBarRedesign'; +import { OnboardingContextProvider } from './onboarding/OnboardingContextProvider'; + +const StyledLayout = styled(Layout)` + background-color: transparent; +`; /** * Container for all views behind an authentication wall. */ export const ProtectedRoutes = (): JSX.Element => { + useSetThemeIsV2(); + useSetUserPersona(); + useSetUserTitle(); + useSetNavBarRedesignEnabled(); + + const isThemeV2 = useIsThemeV2(); + const FinalHomePage = isThemeV2 ? HomePageV2 : HomePage; + const location = useLocation(); const history = useHistory(); @@ -22,12 +42,15 @@ export const ProtectedRoutes = (): JSX.Element => { }, [location]); return ( - - - } /> - } /> - } /> - - + + + + } /> + } /> + } /> + } /> + + + ); }; diff --git a/datahub-web-react/src/app/SearchRoutes.tsx b/datahub-web-react/src/app/SearchRoutes.tsx index 024b6e0add15a3..9a635bec046373 100644 --- a/datahub-web-react/src/app/SearchRoutes.tsx +++ b/datahub-web-react/src/app/SearchRoutes.tsx @@ -1,16 +1,9 @@ import React from 'react'; -import { Switch, Route, Redirect } from 'react-router-dom'; -import { NoPageFound } from './shared/NoPageFound'; +import { Redirect, Route, Switch } from 'react-router-dom'; import { PageRoutes } from '../conf/Global'; -import { SearchablePage } from './search/SearchablePage'; -import { useEntityRegistry } from './useEntityRegistry'; -import { EntityPage } from './entity/EntityPage'; -import { BrowseResultsPage } from './browse/BrowseResultsPage'; -import { SearchPage } from './search/SearchPage'; import { AnalyticsPage } from './analyticsDashboard/components/AnalyticsPage'; -import { ManageIngestionPage } from './ingest/ManageIngestionPage'; -import GlossaryRoutes from './glossary/GlossaryRoutes'; -import { SettingsPage } from './settings/SettingsPage'; +import { BrowseResultsPage } from './browse/BrowseResultsPage'; +import { BusinessAttributes } from './businessAttribute/BusinessAttributes'; import { useUserContext } from './context/useUserContext'; import DomainRoutes from './domain/DomainRoutes'; import { @@ -20,8 +13,26 @@ import { useIsNestedDomainsEnabled, } from './useAppConfig'; import { ManageDomainsPage } from './domain/ManageDomainsPage'; -import { BusinessAttributes } from './businessAttribute/BusinessAttributes'; import StructuredProperties from './govern/structuredProperties/StructuredProperties'; + +import { EntityPage } from './entity/EntityPage'; +import { EntityPage as EntityPageV2 } from './entityV2/EntityPage'; +import GlossaryRoutes from './glossary/GlossaryRoutes'; +import GlossaryRoutesV2 from './glossaryV2/GlossaryRoutes'; +import { ManageIngestionPage } from './ingest/ManageIngestionPage'; +import { SearchPage } from './search/SearchPage'; +import { SearchablePage } from './search/SearchablePage'; +import { SearchPage as SearchPageV2 } from './searchV2/SearchPage'; +import { SearchablePage as SearchablePageV2 } from './searchV2/SearchablePage'; +import { SettingsPage } from './settings/SettingsPage'; +import { SettingsPage as SettingsPageV2 } from './settingsV2/SettingsPage'; +import { NoPageFound } from './shared/NoPageFound'; +import { useEntityRegistry } from './useEntityRegistry'; + +import DomainRoutesV2 from './domainV2/DomainRoutes'; +import { ManageDomainsPage as ManageDomainsPageV2 } from './domainV2/ManageDomainsPage'; +import { useIsThemeV2 } from './useIsThemeV2'; + /** * Container for all searchable page routes */ @@ -33,6 +44,8 @@ export const SearchRoutes = (): JSX.Element => { ? entityRegistry.getEntitiesForSearchRoutes() : entityRegistry.getNonGlossaryEntities(); const { config } = useAppConfig(); + const isThemeV2 = useIsThemeV2(); + const FinalSearchablePage = isThemeV2 ? SearchablePageV2 : SearchablePage; const businessAttributesFlag = useBusinessAttributesFlag(); const appConfigContextLoaded = useIsAppConfigContextLoaded(); @@ -42,16 +55,25 @@ export const SearchRoutes = (): JSX.Element => { (me.platformPrivileges?.manageStructuredProperties || me.platformPrivileges?.viewStructuredPropertiesPage); return ( - + {entities.map((entity) => ( } + render={() => + isThemeV2 ? ( + + ) : ( + + ) + } /> ))} - } /> + (isThemeV2 ? : )} + /> } /> } /> } /> @@ -61,11 +83,25 @@ export const SearchRoutes = (): JSX.Element => { /> } /> } /> - {isNestedDomainsEnabled && } />} - {!isNestedDomainsEnabled && } />} + {isNestedDomainsEnabled && ( + (isThemeV2 ? : )} + /> + )} + {!isNestedDomainsEnabled && ( + (isThemeV2 ? : )} + /> + )} + } /> - } /> - } /> + (isThemeV2 ? : )} /> + (isThemeV2 ? : )} + /> {showStructuredProperties && ( } /> )} @@ -83,6 +119,6 @@ export const SearchRoutes = (): JSX.Element => { /> - + ); }; diff --git a/datahub-web-react/src/app/analytics/event.ts b/datahub-web-react/src/app/analytics/event.ts index 9152f2fb8eedb8..7d213acb42a59e 100644 --- a/datahub-web-react/src/app/analytics/event.ts +++ b/datahub-web-react/src/app/analytics/event.ts @@ -2,6 +2,7 @@ import { AllowedValue, DataHubViewType, EntityType, + LineageDirection, PropertyCardinality, PropertyValueInput, RecommendationRenderType, @@ -64,6 +65,8 @@ export enum EventType { ActivatePolicyEvent, ShowSimplifiedHomepageEvent, ShowStandardHomepageEvent, + ShowV2ThemeEvent, + RevertV2ThemeEvent, CreateGlossaryEntityEvent, CreateDomainEvent, MoveDomainEvent, @@ -98,6 +101,16 @@ export enum EventType { ApplyStructuredPropertyEvent, UpdateStructuredPropertyOnAssetEvent, RemoveStructuredPropertyEvent, + ClickDocRequestCTA, + IntroduceYourselfViewEvent, + IntroduceYourselfSubmitEvent, + IntroduceYourselfSkipEvent, + ExpandLineageEvent, + ContractLineageEvent, + ShowHideLineageColumnsEvent, + SearchLineageColumnsEvent, + FilterLineageColumnsEvent, + DrillDownLineageEvent, } /** @@ -119,6 +132,29 @@ export interface PageViewEvent extends BaseEvent { originPath: string; } +/** + * Viewed the Introduce Yourself page on the UI. + */ +export interface IntroduceYourselfViewEvent extends BaseEvent { + type: EventType.IntroduceYourselfViewEvent; +} + +/** + * Submitted the "Introduce Yourself" page through the UI. + */ +export interface IntroduceYourselfSubmitEvent extends BaseEvent { + type: EventType.IntroduceYourselfSubmitEvent; + role: string; + platformUrns: Array; +} + +/** + * Skipped the "Introduce Yourself" page through the UI. + */ +export interface IntroduceYourselfSkipEvent extends BaseEvent { + type: EventType.IntroduceYourselfSkipEvent; +} + /** * Viewed the Home Page on the UI. */ @@ -474,6 +510,14 @@ export interface ShowStandardHomepageEvent extends BaseEvent { type: EventType.ShowStandardHomepageEvent; } +export interface ShowV2ThemeEvent extends BaseEvent { + type: EventType.ShowV2ThemeEvent; +} + +export interface RevertV2ThemeEvent extends BaseEvent { + type: EventType.RevertV2ThemeEvent; +} + export interface HomePageExploreAllClickEvent extends BaseEvent { type: EventType.HomePageExploreAllClickEvent; } @@ -656,6 +700,64 @@ export interface CreateBusinessAttributeEvent extends BaseEvent { name: string; } +export enum DocRequestCTASource { + TaskCenter = 'TaskCenter', + AssetPage = 'AssetPage', +} + +export interface ClickDocRequestCTA extends BaseEvent { + type: EventType.ClickDocRequestCTA; + source: DocRequestCTASource; +} + +export interface ExpandLineageEvent extends BaseEvent { + type: EventType.ExpandLineageEvent; + direction: LineageDirection; + levelsExpanded: '1' | 'all'; + entityUrn: string; + entityType: EntityType; +} + +export interface ContractLineageEvent extends BaseEvent { + type: EventType.ContractLineageEvent; + direction: LineageDirection; + entityUrn: string; + entityType?: EntityType; +} + +export interface ShowHideLineageColumnsEvent extends BaseEvent { + type: EventType.ShowHideLineageColumnsEvent; + action: 'show' | 'hide'; + entityUrn: string; + entityType: EntityType; + entityPlatformUrn?: string; +} + +export interface SearchLineageColumnsEvent extends BaseEvent { + type: EventType.SearchLineageColumnsEvent; + entityUrn: string; + entityType: EntityType; + searchTextLength: number; +} + +export interface FilterLineageColumnsEvent extends BaseEvent { + type: EventType.FilterLineageColumnsEvent; + action: 'enable' | 'disable'; + entityUrn: string; + entityType: EntityType; + shownCount: number; +} + +export interface DrillDownLineageEvent extends BaseEvent { + type: EventType.DrillDownLineageEvent; + action: 'select' | 'deselect'; + entityUrn: string; + entityType: EntityType; + parentUrn: string; + parentEntityType: EntityType; + dataType?: string; +} + export interface CreateStructuredPropertyClickEvent extends BaseEvent { type: EventType.CreateStructuredPropertyClickEvent; } @@ -720,6 +822,9 @@ export interface RemoveStructuredPropertyEvent extends StructuredPropertyOnAsset export type Event = | PageViewEvent | HomePageViewEvent + | IntroduceYourselfViewEvent + | IntroduceYourselfSubmitEvent + | IntroduceYourselfSkipEvent | SignUpEvent | LogInEvent | LogOutEvent @@ -774,6 +879,8 @@ export type Event = | DeleteIngestionSourceEvent | ExecuteIngestionSourceEvent | ShowStandardHomepageEvent + | ShowV2ThemeEvent + | RevertV2ThemeEvent | SsoEvent | CreateViewEvent | UpdateViewEvent @@ -793,6 +900,12 @@ export type Event = | EmbedProfileViewInDataHubEvent | EmbedLookupNotFoundEvent | CreateBusinessAttributeEvent + | ExpandLineageEvent + | ContractLineageEvent + | ShowHideLineageColumnsEvent + | SearchLineageColumnsEvent + | FilterLineageColumnsEvent + | DrillDownLineageEvent | CreateStructuredPropertyClickEvent | CreateStructuredPropertyEvent | EditStructuredPropertyEvent @@ -800,4 +913,5 @@ export type Event = | ViewStructuredPropertyEvent | ApplyStructuredPropertyEvent | UpdateStructuredPropertyOnAssetEvent - | RemoveStructuredPropertyEvent; + | RemoveStructuredPropertyEvent + | ClickDocRequestCTA; diff --git a/datahub-web-react/src/app/analyticsDashboard/components/AnalyticsChart.tsx b/datahub-web-react/src/app/analyticsDashboard/components/AnalyticsChart.tsx index d466a8f2ccd6d7..f8bf1350b783d1 100644 --- a/datahub-web-react/src/app/analyticsDashboard/components/AnalyticsChart.tsx +++ b/datahub-web-react/src/app/analyticsDashboard/components/AnalyticsChart.tsx @@ -33,7 +33,7 @@ export const AnalyticsChart = ({ chartData, width, height }: Props) => { } return ( - +
{chartData.title} diff --git a/datahub-web-react/src/app/analyticsDashboard/components/AnalyticsPage.tsx b/datahub-web-react/src/app/analyticsDashboard/components/AnalyticsPage.tsx index f139ef7368a74d..fed1b0c15e2cad 100644 --- a/datahub-web-react/src/app/analyticsDashboard/components/AnalyticsPage.tsx +++ b/datahub-web-react/src/app/analyticsDashboard/components/AnalyticsPage.tsx @@ -2,6 +2,7 @@ import React, { useState } from 'react'; import styled from 'styled-components'; import { Alert, Divider, Input, Select } from 'antd'; import { SearchOutlined } from '@ant-design/icons'; +import { useShowNavBarRedesign } from '@src/app/useShowNavBarRedesign'; import { ChartGroup } from './ChartGroup'; import { useGetAnalyticsChartsQuery, useGetMetadataAnalyticsChartsQuery } from '../../../graphql/analytics.generated'; import { useGetHighlightsQuery } from '../../../graphql/highlights.generated'; @@ -11,13 +12,38 @@ import { useListDomainsQuery } from '../../../graphql/domain.generated'; import filterSearchQuery from '../../search/utils/filterSearchQuery'; import { ANTD_GRAY } from '../../entity/shared/constants'; import { useUserContext } from '../../context/useUserContext'; +import { useIsThemeV2 } from '../../useIsThemeV2'; + +const PageContainer = styled.div<{ isV2: boolean; $isShowNavBarRedesign?: boolean }>` + background-color: ${(props) => (props.isV2 ? '#fff' : 'inherit')}; + ${(props) => + props.$isShowNavBarRedesign && + ` + height: 100%; + margin: 5px; + overflow: auto; + box-shadow: ${props.theme.styles['box-shadow-navbar-redesign']}; + `} + ${(props) => + !props.$isShowNavBarRedesign && + ` + margin-right: ${props.isV2 ? '24px' : '0'}; + margin-bottom: ${props.isV2 ? '24px' : '0'}; + `} + border-radius: ${(props) => { + if (props.isV2 && props.$isShowNavBarRedesign) return props.theme.styles['border-radius-navbar-redesign']; + return props.isV2 ? '8px' : '0'; + }}; +`; const HighlightGroup = styled.div` - display: flex; - align-items: space-between; - justify-content: center; - padding-top: 20px; + margin-top: 20px; + padding: 0 20px; margin-bottom: 10px; + display: grid; + grid-template-rows: auto auto; + grid-template-columns: repeat(4, 1fr); + gap: 10px; `; const MetadataAnalyticsInput = styled.div` @@ -47,6 +73,8 @@ const StyledSearchBar = styled(Input)` `; export const AnalyticsPage = () => { + const isV2 = useIsThemeV2(); + const isShowNavBarRedesign = useShowNavBarRedesign(); const me = useUserContext(); const canManageDomains = me?.platformPrivileges?.createDomains; const { data: chartData, loading: chartLoading, error: chartError } = useGetAnalyticsChartsQuery(); @@ -88,14 +116,14 @@ export const AnalyticsPage = () => { const isLoading = highlightLoading || chartLoading || domainLoading || metadataAnalyticsLoading; return ( - <> - {isLoading && } + + {isLoading && } {highlightError && ( )} {highlightData?.getHighlights?.map((highlight) => ( - + ))} <> @@ -105,7 +133,7 @@ export const AnalyticsPage = () => { {chartData?.getAnalyticsCharts ?.filter((chartGroup) => chartGroup.groupId === 'GlobalMetadataAnalytics') .map((chartGroup) => ( - + ))} <> @@ -126,7 +154,7 @@ export const AnalyticsPage = () => { > All {domainData?.listDomains?.domains?.map((domainChoice) => ( - + {domainChoice?.properties?.name} ))} @@ -159,7 +187,7 @@ export const AnalyticsPage = () => { ) : metadataAnalyticsData?.getMetadataAnalyticsCharts?.map((chartGroup) => ( - + ))} <> @@ -168,12 +196,12 @@ export const AnalyticsPage = () => { chartData?.getAnalyticsCharts ?.filter((chartGroup) => chartGroup.groupId === 'DataHubUsageAnalytics') .map((chartGroup) => ( - <> + - + ))} - + ); }; diff --git a/datahub-web-react/src/app/analyticsDashboard/components/ChartCard.tsx b/datahub-web-react/src/app/analyticsDashboard/components/ChartCard.tsx index 1fe247b878543a..e975d2ecf92901 100644 --- a/datahub-web-react/src/app/analyticsDashboard/components/ChartCard.tsx +++ b/datahub-web-react/src/app/analyticsDashboard/components/ChartCard.tsx @@ -1,9 +1,9 @@ import { Card } from 'antd'; import styled from 'styled-components'; -export const ChartCard = styled(Card)<{ shouldScroll: boolean }>` +export const ChartCard = styled(Card)<{ $shouldScroll: boolean }>` margin: 12px; box-shadow: ${(props) => props.theme.styles['box-shadow']}; height: 440px; - overflow-y: ${(props) => (props.shouldScroll ? 'scroll' : 'hidden')}; + overflow-y: ${(props) => (props.$shouldScroll ? 'scroll' : 'hidden')}; `; diff --git a/datahub-web-react/src/app/auth/useGetLogoutHandler.ts b/datahub-web-react/src/app/auth/useGetLogoutHandler.ts new file mode 100644 index 00000000000000..a4b35b4de8db23 --- /dev/null +++ b/datahub-web-react/src/app/auth/useGetLogoutHandler.ts @@ -0,0 +1,17 @@ +import { useCallback } from 'react'; +import Cookies from 'js-cookie'; +import { GlobalCfg } from '@src/conf'; +import analytics, { EventType } from '../analytics'; +import { useUserContext } from '../context/useUserContext'; +import { isLoggedInVar } from './checkAuthStatus'; + +export default function useGetLogoutHandler() { + const me = useUserContext(); + + return useCallback(() => { + analytics.event({ type: EventType.LogOutEvent }); + isLoggedInVar(false); + Cookies.remove(GlobalCfg.CLIENT_AUTH_COOKIE); + me.updateLocalState({ selectedViewUrn: undefined }); + }, [me]); +} diff --git a/datahub-web-react/src/app/buildEntityRegistryV2.ts b/datahub-web-react/src/app/buildEntityRegistryV2.ts new file mode 100644 index 00000000000000..7461034bf81270 --- /dev/null +++ b/datahub-web-react/src/app/buildEntityRegistryV2.ts @@ -0,0 +1,58 @@ +import EntityRegistry from './entityV2/EntityRegistry'; +import { DashboardEntity } from './entityV2/dashboard/DashboardEntity'; +import { ChartEntity } from './entityV2/chart/ChartEntity'; +import { UserEntity } from './entityV2/user/User'; +import { GroupEntity } from './entityV2/group/Group'; +import { DatasetEntity } from './entityV2/dataset/DatasetEntity'; +import { DataFlowEntity } from './entityV2/dataFlow/DataFlowEntity'; +import { DataJobEntity } from './entityV2/dataJob/DataJobEntity'; +import { TagEntity } from './entityV2/tag/Tag'; +import { GlossaryTermEntity } from './entityV2/glossaryTerm/GlossaryTermEntity'; +import { MLFeatureEntity } from './entityV2/mlFeature/MLFeatureEntity'; +import { MLPrimaryKeyEntity } from './entityV2/mlPrimaryKey/MLPrimaryKeyEntity'; +import { MLFeatureTableEntity } from './entityV2/mlFeatureTable/MLFeatureTableEntity'; +import { MLModelEntity } from './entityV2/mlModel/MLModelEntity'; +import { MLModelGroupEntity } from './entityV2/mlModelGroup/MLModelGroupEntity'; +import { DomainEntity } from './entityV2/domain/DomainEntity'; +import { ContainerEntity } from './entityV2/container/ContainerEntity'; +import GlossaryNodeEntity from './entityV2/glossaryNode/GlossaryNodeEntity'; +import { DataPlatformEntity } from './entityV2/dataPlatform/DataPlatformEntity'; +import { DataProductEntity } from './entityV2/dataProduct/DataProductEntity'; +import { DataPlatformInstanceEntity } from './entityV2/dataPlatformInstance/DataPlatformInstanceEntity'; +import { RoleEntity } from './entityV2/Access/RoleEntity'; +import { QueryEntity } from './entityV2/query/QueryEntity'; +import { SchemaFieldEntity } from './entityV2/schemaField/SchemaFieldEntity'; +import { StructuredPropertyEntity } from './entityV2/structuredProperty/StructuredPropertyEntity'; +import { DataProcessInstanceEntity } from './entityV2/dataProcessInstance/DataProcessInstanceEntity'; +import { BusinessAttributeEntity } from './entityV2/businessAttribute/BusinessAttributeEntity'; + +export default function buildEntityRegistryV2() { + const registry = new EntityRegistry(); + registry.register(new DatasetEntity()); + registry.register(new DashboardEntity()); + registry.register(new ChartEntity()); + registry.register(new UserEntity()); + registry.register(new GroupEntity()); + registry.register(new TagEntity()); + registry.register(new DataFlowEntity()); + registry.register(new DataJobEntity()); + registry.register(new GlossaryTermEntity()); + registry.register(new MLFeatureEntity()); + registry.register(new MLPrimaryKeyEntity()); + registry.register(new MLFeatureTableEntity()); + registry.register(new MLModelEntity()); + registry.register(new MLModelGroupEntity()); + registry.register(new DomainEntity()); + registry.register(new ContainerEntity()); + registry.register(new GlossaryNodeEntity()); + registry.register(new RoleEntity()); + registry.register(new DataPlatformEntity()); + registry.register(new DataProductEntity()); + registry.register(new DataPlatformInstanceEntity()); + registry.register(new QueryEntity()); + registry.register(new SchemaFieldEntity()); + registry.register(new StructuredPropertyEntity()); + registry.register(new DataProcessInstanceEntity()); + registry.register(new BusinessAttributeEntity()); + return registry; +} diff --git a/datahub-web-react/src/app/dataviz/ChartCard.tsx b/datahub-web-react/src/app/dataviz/ChartCard.tsx new file mode 100644 index 00000000000000..6d25daca9bc1e2 --- /dev/null +++ b/datahub-web-react/src/app/dataviz/ChartCard.tsx @@ -0,0 +1,55 @@ +import React from 'react'; + +import { Typography } from 'antd'; +import styled from 'styled-components'; +import { ANTD_GRAY } from '../entity/shared/constants'; + +const Card = styled.div` + display: flex; + flex-direction: column; + padding: 1rem; + background-color: white; + box-shadow: 0px 3px 6px 0px ${ANTD_GRAY[5]}; + border-radius: 8px; + + text { + fill: ${ANTD_GRAY[8]}; + font-weight: 400 !important; + } +`; + +const Header = styled.div` + display: flex; + align-items: center; + justify-content: space-between; +`; + +const Body = styled.div` + position: relative; + display: flex; + align-items: center; + justify-content: center; +`; + +const Heading = styled(Typography.Text)` + display: block; + font-size: 14px;s + font-weight: 600; + color: ${ANTD_GRAY[8]}; + min-width: 300px; +`; + +interface Props { + title: string; + chart: React.ReactElement; + flex?: number; +} + +export const ChartCard = ({ title, chart, flex = 1 }: Props) => ( + +
+ {title} +
+ {chart} +
+); diff --git a/datahub-web-react/src/app/dataviz/ChartLoading.tsx b/datahub-web-react/src/app/dataviz/ChartLoading.tsx new file mode 100644 index 00000000000000..393791aba6603f --- /dev/null +++ b/datahub-web-react/src/app/dataviz/ChartLoading.tsx @@ -0,0 +1,3 @@ +import React from 'react'; + +export const ChartLoading = () => <>Chart; diff --git a/datahub-web-react/src/app/dataviz/Legend.tsx b/datahub-web-react/src/app/dataviz/Legend.tsx new file mode 100644 index 00000000000000..a199894ae39d67 --- /dev/null +++ b/datahub-web-react/src/app/dataviz/Legend.tsx @@ -0,0 +1,31 @@ +import React from 'react'; + +import { LegendOrdinal, LegendItem, LegendLabel } from '@visx/legend'; + +interface Props { + scale: any; +} + +export const Legend = ({ scale }: Props) => { + return ( + + {(labels) => ( +
+ {labels.map((label) => ( + + + + + + {label.text} + + + ))} +
+ )} +
+ ); +}; diff --git a/datahub-web-react/src/app/dataviz/bar/BarChart.tsx b/datahub-web-react/src/app/dataviz/bar/BarChart.tsx new file mode 100644 index 00000000000000..1176bc7ea7ebf6 --- /dev/null +++ b/datahub-web-react/src/app/dataviz/bar/BarChart.tsx @@ -0,0 +1,108 @@ +import React from 'react'; + +import dayjs from 'dayjs'; + +import { Axis, BarSeries, BarStack, Grid, XYChart } from '@visx/xychart'; +import { ParentSize } from '@visx/responsive'; + +import { Legend } from '../Legend'; +import { ChartWrapper } from '../components'; + +import { abbreviateNumber } from '../utils'; + +export const BarChart = ({ + data, + dataKeys, + xAccessor, + yAccessor, + colorAccessor, + tickFormat = 'MMM D', + yAxisLabel, +}: { + data: any; + dataKeys: DataKeys; + xAccessor: (d: Data) => string; + yAccessor: (d: any, key: string) => string; + colorAccessor: (d: string) => string; + tickFormat?: string; + yAxisLabel?: string; +}) => { + if (!Array.isArray(dataKeys)) throw new Error('Datakeys must be an array'); + + const multipleData = dataKeys.length > 1; + const margin = { top: 20, right: 20, bottom: 30, left: 60 }; + const tickCount = Math.max(1, Math.min(data.length, 10)); + + return ( + + + {({ width }) => { + if (!width) return null; + + return ( + <> + + + {multipleData ? ( + + {dataKeys + .slice() + .reverse() + .map((dK) => ( + yAccessor(d, dK)} + colorAccessor={() => colorAccessor(dK)} + /> + ))} + + ) : ( + yAccessor(d, dataKeys[0])} + colorAccessor={() => colorAccessor(dataKeys[0])} + radiusTop + /> + )} + dayjs(d).format(tickFormat)} + hideAxisLine + /> + {/* Left Axis is for COUNT/NUMBER values only */} + (Number.isInteger(value) ? value : '')} + tickComponent={({ x, y, formattedValue }) => ( + + {abbreviateNumber(formattedValue)} + + )} + hideAxisLine + /> + + + + ); + }} + + + ); +}; diff --git a/datahub-web-react/src/app/dataviz/bar/HorizontalBarChart.tsx b/datahub-web-react/src/app/dataviz/bar/HorizontalBarChart.tsx new file mode 100644 index 00000000000000..9a963addc834d6 --- /dev/null +++ b/datahub-web-react/src/app/dataviz/bar/HorizontalBarChart.tsx @@ -0,0 +1,157 @@ +/* eslint-disable @typescript-eslint/dot-notation, no-param-reassign */ + +import React from 'react'; + +import { Grid, XYChart } from '@visx/xychart'; +import { BarStackHorizontal, Bar } from '@visx/shape'; +import { AxisLeft } from '@visx/axis'; +import { scaleLinear, scaleBand } from '@visx/scale'; +import { ParentSize } from '@visx/responsive'; + +import { Legend } from '../Legend'; +import { ChartWrapper } from '../components'; + +import { COMPLETED_COLOR, NOT_STARTED_COLOR, IN_PROGRESS_COLOR } from '../constants'; + +export const HorizontalBarChart = ({ + data, + dataKeys, + yAccessor, + colorAccessor, +}: { + data: Data[]; + dataKeys: DataKeys; + yAccessor: (d: Data) => string; + colorAccessor: (d: string) => string; +}) => { + if (!data || !data.length || !dataKeys) return null; + if (!Array.isArray(dataKeys)) throw new Error('Datakeys must be an array'); + + const totals = data.reduce((allTotals, currentDate) => { + const total = dataKeys.reduce((dailyTotal, k) => { + dailyTotal += Number(currentDate[k]); + return dailyTotal; + }, 0); + allTotals.push(total); + return allTotals; + }, [] as number[]); + + const xScale = scaleLinear({ + domain: [0, Math.max(...totals)], + nice: true, + }); + + const yScale = scaleBand({ + domain: data.map(yAccessor), + padding: 0.2, + }); + + return ( + + + {({ width: parentWidth }) => { + if (!parentWidth) return null; + + const margin = { top: 20, right: 0, bottom: 0, left: 120 }; + const baseHeight = data.length < 2 ? 180 : 280; + const xMax = parentWidth - margin.left - margin.right; + const yMax = baseHeight - margin.top - margin.bottom; + + xScale.rangeRound([0, xMax]); + yScale.rangeRound([yMax, 0]); + + return ( + <> + + + + + {(barStacks) => + barStacks.map((barStack) => + barStack.bars.map((bar) => { + // Use the bar color to determine which label to display for Doc Initiatives + let label = null; + if (bar.color === COMPLETED_COLOR) label = bar.bar.data['Completed']; + if (bar.color === IN_PROGRESS_COLOR) + label = bar.bar.data['In Progress']; + if (bar.color === NOT_STARTED_COLOR) + label = bar.bar.data['Not Started']; + + if (label === '0') return null; + + const { x, y, width, height } = bar; + const { left } = margin; + + const newX = x + left + 10; + const barWidth = width + 5; + + let textX = barWidth <= 20 ? newX + barWidth - 5 : newX + barWidth - 15; + if (barWidth > 300) textX = newX + barWidth - 20; + + return ( + + + {label && ( + + {label} + + )} + + ); + }), + ) + } + + + + + + ); + }} + + + ); +}; diff --git a/datahub-web-react/src/app/dataviz/bar/HorizontalFullBarChart.tsx b/datahub-web-react/src/app/dataviz/bar/HorizontalFullBarChart.tsx new file mode 100644 index 00000000000000..4bb5848a38008b --- /dev/null +++ b/datahub-web-react/src/app/dataviz/bar/HorizontalFullBarChart.tsx @@ -0,0 +1,157 @@ +/* eslint-disable @typescript-eslint/dot-notation, no-param-reassign */ + +import React from 'react'; + +import { Grid, XYChart } from '@visx/xychart'; +import { BarStackHorizontal, Bar } from '@visx/shape'; +import { AxisLeft } from '@visx/axis'; +import { scaleLinear, scaleBand } from '@visx/scale'; +import { ParentSize } from '@visx/responsive'; + +import { Legend } from '../Legend'; +import { ChartWrapper } from '../components'; + +import { COMPLETED_COLOR, NOT_STARTED_COLOR, IN_PROGRESS_COLOR } from '../constants'; + +export const HorizontalFullBarChart = ({ + data, + dataKeys, + yAccessor, + colorAccessor, +}: { + data: Data[]; + dataKeys: DataKeys; + yAccessor: (d: Data) => string; + colorAccessor: (d: string) => string; +}) => { + if (!data || !data.length || !dataKeys) return null; + if (!Array.isArray(dataKeys)) throw new Error('Datakeys must be an array'); + + const totals = data.reduce((allTotals, currentDate) => { + const total = dataKeys.reduce((dailyTotal, k) => { + dailyTotal += Number(currentDate[k]); + return dailyTotal; + }, 0); + allTotals.push(total); + return allTotals; + }, [] as number[]); + + const xScale = scaleLinear({ + domain: [0, Math.max(...totals)], + nice: true, + }); + + const yScale = scaleBand({ + domain: data.map(yAccessor), + padding: 0.2, + }); + + return ( + + + {({ width: parentWidth }) => { + if (!parentWidth) return null; + + const margin = { top: 20, right: 0, bottom: 0, left: 120 }; + const baseHeight = 280; + const calculatedHeight = data.length > 4 ? baseHeight + data.length * 10 : baseHeight; + const xMax = parentWidth - margin.left - margin.right; + const yMax = calculatedHeight - margin.top - margin.bottom; + + xScale.rangeRound([0, xMax]); + yScale.rangeRound([yMax, 0]); + + return ( + <> + + + + + {(barStacks) => + barStacks.map((barStack) => + barStack.bars.map((bar) => { + // Use the bar color to determine which label to display for Doc Initiatives + let label = null; + if (bar.color === COMPLETED_COLOR) label = bar.bar.data['Completed']; + if (bar.color === IN_PROGRESS_COLOR) + label = bar.bar.data['In Progress']; + if (bar.color === NOT_STARTED_COLOR) + label = bar.bar.data['Not Started']; + + if (label === '0') return null; + + const { x, y, width, height } = bar; + const { left } = margin; + + const newX = x + left + 10; + const barWidth = width + 5; + + let textX = barWidth <= 20 ? newX + barWidth - 5 : newX + barWidth - 10; + if (barWidth > 300) textX = newX + barWidth - 20; + + return ( + + + {label && ( + + {label} + + )} + + ); + }), + ) + } + + + + + + ); + }} + + + ); +}; diff --git a/datahub-web-react/src/app/dataviz/candle/CandleStick.tsx b/datahub-web-react/src/app/dataviz/candle/CandleStick.tsx new file mode 100644 index 00000000000000..437c677c7f11e5 --- /dev/null +++ b/datahub-web-react/src/app/dataviz/candle/CandleStick.tsx @@ -0,0 +1,79 @@ +import React from 'react'; + +import { Group } from '@visx/group'; +import { Bar } from '@visx/shape'; +import { BarProps } from '@visx/shape/lib/shapes/Bar'; +import { GlyphCircle, GlyphDiamond } from '@visx/glyph'; +import { GlyphDiamondProps } from '@visx/glyph/lib/glyphs/GlyphDiamond'; +import { GlyphCircleProps } from '@visx/glyph/lib/glyphs/GlyphCircle'; +import { AddSVGProps } from '@visx/shape/lib/types'; + +type DiamondProps = GlyphDiamondProps & Omit, keyof GlyphDiamondProps>; +type CircleProps = GlyphCircleProps & Omit, keyof GlyphCircleProps>; +type CandleBarProps = AddSVGProps; + +type Props = { + parentChartHeight: number; + candleHeight: number; + barWidth: number; + shapeSize: number; + leftOffset: number; + color: string; + shape: + | { + type: 'diamond'; + extraProps?: DiamondProps; + } + | { + type: 'circle'; + extraProps?: CircleProps; + }; + markerOverlapPx?: number; + extraBarProps?: CandleBarProps; + opacity?: number; + wrapper?: (children: JSX.Element) => JSX.Element; +}; +export const CandleStick = ({ + parentChartHeight, + candleHeight, + barWidth, + shapeSize, + leftOffset, + color, + shape, + wrapper, + opacity, + markerOverlapPx, + extraBarProps, +}: Props) => { + const yOffset = parentChartHeight - candleHeight; + + const shapeProps: DiamondProps | CircleProps = { + top: yOffset, + left: leftOffset, + fill: color, + stroke: 'white', + strokeWidth: (markerOverlapPx ?? 1) > 1 ? 1 / (markerOverlapPx ?? 1) : 1, + filter: markerOverlapPx ? undefined : 'drop-shadow(0px 1px 2.5px rgb(0 0 0 / 0.1))', + size: shapeSize, + ...shape.extraProps, + }; + const barProps: CandleBarProps = { + height: candleHeight, + width: barWidth, + x: leftOffset - barWidth / 2, + y: yOffset, + fill: color, + stroke: 'white', + strokeWidth: (markerOverlapPx ?? 1) > 1 ? 1 / (markerOverlapPx ?? 1) : 1, + ...extraBarProps, + }; + + const candleGroup = ( + + + {shape.type === 'diamond' ? : } + + ); + return wrapper ? wrapper(candleGroup) : candleGroup; +}; diff --git a/datahub-web-react/src/app/dataviz/components.ts b/datahub-web-react/src/app/dataviz/components.ts new file mode 100644 index 00000000000000..9be7476522ce17 --- /dev/null +++ b/datahub-web-react/src/app/dataviz/components.ts @@ -0,0 +1,24 @@ +import styled from 'styled-components'; + +export const ChartWrapper = styled.div` + width: 100%; + height: 100%; + position: relative; + + .horizontalBarChartTick { + foreignObject { + text-align: right; + } + } + + .horizontalBarChartInlineLabel { + fill: #fff; + font-weight: 600; + font-family: 'Manrope', sans-serif; + } + + .visx-axis-label { + font-weight: 600 !important; + font-family: 'Manrope', sans-serif !important; + } +`; diff --git a/datahub-web-react/src/app/dataviz/constants.ts b/datahub-web-react/src/app/dataviz/constants.ts new file mode 100644 index 00000000000000..ba8b2d642ca68e --- /dev/null +++ b/datahub-web-react/src/app/dataviz/constants.ts @@ -0,0 +1,5 @@ +export type STATUS = 'Completed' | 'In Progress' | 'Not Started'; + +export const COMPLETED_COLOR = '#20D3BD'; +export const IN_PROGRESS_COLOR = '#7532A4'; +export const NOT_STARTED_COLOR = '#1677FF'; diff --git a/datahub-web-react/src/app/dataviz/index.ts b/datahub-web-react/src/app/dataviz/index.ts new file mode 100644 index 00000000000000..070c56bb50351e --- /dev/null +++ b/datahub-web-react/src/app/dataviz/index.ts @@ -0,0 +1,6 @@ +export * from './ChartCard'; +export * from './ChartLoading'; +export * from './bar/BarChart'; +export * from './bar/HorizontalBarChart'; +export * from './bar/HorizontalFullBarChart'; +export * from './pie/PieChart'; diff --git a/datahub-web-react/src/app/dataviz/line/SimpleLineChart.tsx b/datahub-web-react/src/app/dataviz/line/SimpleLineChart.tsx new file mode 100644 index 00000000000000..aa8831078ce643 --- /dev/null +++ b/datahub-web-react/src/app/dataviz/line/SimpleLineChart.tsx @@ -0,0 +1,69 @@ +import React from 'react'; + +import { curveCatmullRom } from '@visx/curve'; +import { LinePath } from '@visx/shape'; +import { ParentSize } from '@visx/responsive'; +import { MarkerCircle } from '@visx/marker'; +import { extent, max } from '@visx/vendor/d3-array'; +import { scaleTime, scaleLinear } from '@visx/scale'; + +import { ChartWrapper } from '../components'; + +interface Props { + data: any; +} + +type Data = { + date: string; + value: number; +}; + +export const SimpleLineChart = ({ data }: Props) => { + const getDate = (d: Data) => new Date(d.date); + const getValue = (d: Data) => d.value; + + const markerEnd = 'url(#marker-circle)'; + + return ( + + + {({ width }) => { + if (!width) return null; + + const height = 20; + + const xScale = scaleTime({ + range: [0, width - 10], + domain: extent(data, getDate) as [Date, Date], + }); + + const yScale = scaleLinear({ + range: [height / 2, 5], + domain: [0, (max(data, getValue) || 0) as number], + nice: true, + }); + + return ( + + + + + + + + + xScale(getDate(d))} + y={(d: Data) => yScale(getValue(d))} + curve={curveCatmullRom} + markerEnd={markerEnd} + stroke="#9F33CC" + /> + + ); + }} + + + ); +}; diff --git a/datahub-web-react/src/app/dataviz/pie/PieChart.tsx b/datahub-web-react/src/app/dataviz/pie/PieChart.tsx new file mode 100644 index 00000000000000..6925e7b59ad69d --- /dev/null +++ b/datahub-web-react/src/app/dataviz/pie/PieChart.tsx @@ -0,0 +1,90 @@ +import React from 'react'; + +import { Pie } from '@visx/shape'; +import { PieArcDatum } from '@visx/shape/lib/shapes/Pie'; +import { Annotation, Label, Connector } from '@visx/annotation'; + +import { useDataAnnotationPosition } from './usePieDataAnnotation'; + +const PieDataAnnotation = ({ + title, + arc, + path, + subtitle, +}: { + title: string; + path: any; + arc: PieArcDatum<{ [x: string]: string }>; + subtitle?: string; +}) => { + const { surfaceX, surfaceY, labelX, labelY } = useDataAnnotationPosition({ arc, path }); + + return ( + + + + ); +}; + +export const PieChart = ({ data }: any) => { + const width = 380; + const height = 280; + const margin = { top: 50, right: 50, bottom: 50, left: 50 }; + + const innerWidth = width - margin.left - margin.right; + const innerHeight = height - margin.top - margin.bottom; + const radius = Math.min(innerWidth, innerHeight) * 0.45; + const centerY = innerHeight / 2; + const centerX = innerWidth / 2; + + const pieSortValues = (a, b) => b - a; + const value = (d) => d.value; + + const createValueLabel = (count: number) => { + let total = 0; + data.forEach((d) => { + total += d.value; + }); + const percent = Math.round((count / total) * 100); + return `${percent}% (${count.toLocaleString()})`; + }; + + return ( + + + + {(pie) => { + return pie.arcs.map((arc) => { + const { name } = arc.data; + const { color } = arc.data; + const arcPath = pie.path(arc); + return ( + + + {arc.endAngle - arc.startAngle !== 0 ? ( + + ) : null} + + ); + }); + }} + + + + ); +}; diff --git a/datahub-web-react/src/app/dataviz/pie/usePieDataAnnotation.ts b/datahub-web-react/src/app/dataviz/pie/usePieDataAnnotation.ts new file mode 100644 index 00000000000000..1aeed5f169a848 --- /dev/null +++ b/datahub-web-react/src/app/dataviz/pie/usePieDataAnnotation.ts @@ -0,0 +1,36 @@ +import { PieArcDatum } from '@visx/shape/lib/shapes/Pie'; + +const useDataAnnotationPosition = ({ + arc, + path, +}: { + arc: PieArcDatum<{ [x: string]: string }>; + path: any; +}): { + labelX: number; + labelY: number; + surfaceX: number; + surfaceY: number; +} => { + const middleAngle = Math.PI / 2 - (arc.startAngle + (arc.endAngle - arc.startAngle) / 2); + + const outerRadius: number = path.outerRadius()(arc); + + const normalX = Math.cos(middleAngle); + const normalY = Math.sin(-middleAngle); + + const labelX = normalX * outerRadius * 0.1 * (middleAngle < Math.PI ? 1 : -1); + const labelY = normalY * outerRadius * 0.1; + + const surfaceX = normalX * outerRadius; + const surfaceY = normalY * outerRadius; + + return { + labelX, + labelY, + surfaceX, + surfaceY, + }; +}; + +export { useDataAnnotationPosition }; diff --git a/datahub-web-react/src/app/dataviz/stat/SingleStat.tsx b/datahub-web-react/src/app/dataviz/stat/SingleStat.tsx new file mode 100644 index 00000000000000..bce1553c2d7f57 --- /dev/null +++ b/datahub-web-react/src/app/dataviz/stat/SingleStat.tsx @@ -0,0 +1,5 @@ +import React from 'react'; + +export const SingleStat = () => { + return <>stat; +}; diff --git a/datahub-web-react/src/app/dataviz/utils.ts b/datahub-web-react/src/app/dataviz/utils.ts new file mode 100644 index 00000000000000..c09e903ee47352 --- /dev/null +++ b/datahub-web-react/src/app/dataviz/utils.ts @@ -0,0 +1,130 @@ +import dayjs from 'dayjs'; +import { scaleOrdinal } from '@visx/scale'; + +import { COMPLETED_COLOR, NOT_STARTED_COLOR, IN_PROGRESS_COLOR } from './constants'; + +// Mock Data Util +export const generateDateSeries = (numOfDays) => + Array(numOfDays) + .fill(0) + .map((d, i) => ({ + date: dayjs(new Date(Date.now() - 24 * 60 * 60 * 1000 * i)).format(), + value: Math.round(Math.max(10, Math.random() * 100 || 0)), + })); + +// Status Ordinal Scale +export const statusOrdinalScale = scaleOrdinal({ + domain: ['Not Started', 'In Progress', 'Completed'], + range: [NOT_STARTED_COLOR, IN_PROGRESS_COLOR, COMPLETED_COLOR], +}); + +// private utils to help with rounding y axis numbers +const NUMERICAL_ABBREVIATIONS = ['k', 'm', 'b', 't']; +function roundToPrecision(n: number, precision: number) { + const prec = 10 ** precision; + return Math.round(n * prec) / prec; +} + +/** + * ie. 24044 -> 24k + * @param n + */ +export const truncateNumberForDisplay = (n: number, skipRounding?: boolean): string => { + let base = Math.floor(Math.log(Math.abs(n)) / Math.log(1000)); + const suffix = NUMERICAL_ABBREVIATIONS[Math.min(NUMERICAL_ABBREVIATIONS.length - 1, base - 1)]; + base = NUMERICAL_ABBREVIATIONS.indexOf(suffix) + 1; + const roundedNumber = skipRounding ? n : Math.round(n); + return suffix ? roundToPrecision(n / 1000 ** base, 0) + suffix : `${roundedNumber}`; +}; + +// Number Abbreviations +export const abbreviateNumber = (str) => { + const number = parseFloat(str); + if (Number.isNaN(number)) return str; + if (number < 1000) return number; + const abbreviations = ['K', 'M', 'B', 'T']; + const index = Math.floor(Math.log10(number) / 3); + const suffix = abbreviations[index - 1]; + const shortNumber = number / 10 ** (index * 3); + return `${shortNumber}${suffix}`; +}; + +// Byte Abbreviations +export const abbreviateBytes = (str): string => { + const bytes = parseFloat(str); + if (Number.isNaN(bytes)) return str; + if (bytes < 1024) return `${bytes} B`; + const units = ['B', 'KB', 'MB', 'GB', 'TB']; + const index = Math.floor(Math.log(bytes) / Math.log(1024)); + const shortBytes = bytes / 1024 ** index; + + return `${abbreviateNumber(shortBytes.toFixed())} ${units[index]}`; +}; + +type CalculateYScaleExtentForChartOptions = { + defaultYValue: number; + // Between 0-1, represents what % of the chart's height should be empty above and below. + // ie. if this is .1, then 10% of the chart's height will be empty. + yScaleBufferFactor?: number; +}; + +/** + * Creates a yscale range for charts, with optional buffers + * @param yValues + * @param options + */ +export const calculateYScaleExtentForChart = ( + yValues: number[], + options: CalculateYScaleExtentForChartOptions = { defaultYValue: 0 }, +): { min: number; max: number } => { + let min = yValues.length ? Math.min(...yValues) : options.defaultYValue; + let max = yValues.length ? Math.max(...yValues) : options.defaultYValue; + + // Add some extra range above and below so things aren't pushed to the edge + const { yScaleBufferFactor } = options; + if (yScaleBufferFactor) { + let yScaleBuffer = 0; + // Edge case: if max and min are the same, add some buffer above and below so the points are nicely centered + if (max === min) { + // Ie. Let's say max/min=1.5B. In this case we want to add ~100M buffer and and below + // This will make the y-axis display 1.4B at the bottom and 1.6B at the top, + // While nicely centering the data points. + const decimalPlaceValue = max.toString().length; + yScaleBuffer = 10 ** (decimalPlaceValue - 1); + } else { + // By default, the chart will put the min at the bottom edge and the max at the top edge. + // So if yScaleBufferFactor=0.1, then we want 10% of the chart at the top and bottom to be empty + const distance = max - min; + const newDistance = distance / (1 - yScaleBufferFactor); + yScaleBuffer = newDistance * yScaleBufferFactor; + } + + min -= yScaleBuffer; + max += yScaleBuffer; + } + + return { min, max }; +}; + +/** + * Gets the px overlap between two markers + * @param marker1 + * @param marker2 + * @returns {number | undefined} undefined if no overlap + */ +export function calculateOverlapBetweenTwoMarkers( + marker1: { xOffset: number; width: number }, + marker2: { xOffset: number; width: number }, +): undefined | number { + let markerOverlapPx: number | undefined; + + // Take width of the left and right half of the two markers (where they will collide) + const netWidth = marker1.width / 2 + marker2.width / 2; + + // Calculate distance and potential overlap + const distance = Math.abs(marker1.xOffset - marker2.xOffset); + if (distance < netWidth) { + markerOverlapPx = netWidth - distance; + } + return markerOverlapPx; +} diff --git a/datahub-web-react/src/app/domainV2/CreateDomainModal.tsx b/datahub-web-react/src/app/domainV2/CreateDomainModal.tsx new file mode 100644 index 00000000000000..056e0847c71d61 --- /dev/null +++ b/datahub-web-react/src/app/domainV2/CreateDomainModal.tsx @@ -0,0 +1,231 @@ +import React, { useState } from 'react'; +import styled from 'styled-components'; +import { message, Button, Input, Modal, Typography, Form, Collapse, Tag } from 'antd'; +import { useCreateDomainMutation } from '../../graphql/domain.generated'; +import { useEnterKeyListener } from '../shared/useEnterKeyListener'; +import { validateCustomUrnId } from '../shared/textUtil'; +import analytics, { EventType } from '../analytics'; +import DomainParentSelect from '../entityV2/shared/EntityDropdown/DomainParentSelect'; +import { useIsNestedDomainsEnabled } from '../useAppConfig'; +import { useDomainsContext as useDomainsContextV2 } from './DomainsContext'; + +const SuggestedNamesGroup = styled.div` + margin-top: 8px; +`; + +const ClickableTag = styled(Tag)` + :hover { + cursor: pointer; + } +`; + +const FormItem = styled(Form.Item)` + .ant-form-item-label { + padding-bottom: 2px; + } +`; + +const FormItemWithMargin = styled(FormItem)` + margin-bottom: 16px; +`; + +const FormItemNoMargin = styled(FormItem)` + margin-bottom: 0; +`; + +const FormItemLabel = styled(Typography.Text)` + font-weight: 600; + color: #373d44; +`; + +const AdvancedLabel = styled(Typography.Text)` + color: #373d44; +`; + +type Props = { + onClose: () => void; + onCreate: ( + urn: string, + id: string | undefined, + name: string, + description: string | undefined, + parentDomain?: string, + ) => void; +}; + +const SUGGESTED_DOMAIN_NAMES = ['Engineering', 'Marketing', 'Sales', 'Product']; + +const ID_FIELD_NAME = 'id'; +const NAME_FIELD_NAME = 'name'; +const DESCRIPTION_FIELD_NAME = 'description'; + +export default function CreateDomainModal({ onClose, onCreate }: Props) { + const isNestedDomainsEnabled = useIsNestedDomainsEnabled(); + const [createDomainMutation] = useCreateDomainMutation(); + const { entityData } = useDomainsContextV2(); + const [selectedParentUrn, setSelectedParentUrn] = useState( + (isNestedDomainsEnabled && entityData?.urn) || '', + ); + const [createButtonEnabled, setCreateButtonEnabled] = useState(false); + const [form] = Form.useForm(); + + const onCreateDomain = () => { + createDomainMutation({ + variables: { + input: { + id: form.getFieldValue(ID_FIELD_NAME), + name: form.getFieldValue(NAME_FIELD_NAME), + description: form.getFieldValue(DESCRIPTION_FIELD_NAME), + parentDomain: selectedParentUrn || undefined, + }, + }, + }) + .then(({ data, errors }) => { + if (!errors) { + analytics.event({ + type: EventType.CreateDomainEvent, + parentDomainUrn: selectedParentUrn || undefined, + }); + message.success({ + content: `Created domain!`, + duration: 3, + }); + onCreate( + data?.createDomain || '', + form.getFieldValue(ID_FIELD_NAME), + form.getFieldValue(NAME_FIELD_NAME), + form.getFieldValue(DESCRIPTION_FIELD_NAME), + selectedParentUrn || undefined, + ); + form.resetFields(); + } + }) + .catch((e) => { + message.destroy(); + message.error({ content: `Failed to create Domain!: \n ${e.message || ''}`, duration: 3 }); + }); + onClose(); + }; + + // Handle the Enter press + useEnterKeyListener({ + querySelectorToExecuteClick: '#createDomainButton', + }); + + return ( + + + + + } + > +
{ + setCreateButtonEnabled(!form.getFieldsError().some((field) => field.errors.length > 0)); + }} + > + {isNestedDomainsEnabled && ( + Parent (optional)}> + + + )} + Name}> + + + + + {SUGGESTED_DOMAIN_NAMES.map((name) => { + return ( + { + form.setFieldsValue({ + name, + }); + setCreateButtonEnabled(true); + }} + > + {name} + + ); + })} + + + Description} + help="You can always change the description later." + > + + + + + + Advanced Options} key="1"> + Domain Id} + help="By default, a random UUID will be generated to uniquely identify this domain. If + you'd like to provide a custom id instead to more easily keep track of this domain, + you may provide it here. Be careful, you cannot easily change the domain id after + creation." + > + ({ + validator(_, value) { + if (value && validateCustomUrnId(value)) { + return Promise.resolve(); + } + return Promise.reject(new Error('Please enter a valid Domain id')); + }, + }), + ]} + > + + + + + + +
+ ); +} diff --git a/datahub-web-react/src/app/domainV2/DomainAutocompleteOptions.tsx b/datahub-web-react/src/app/domainV2/DomainAutocompleteOptions.tsx new file mode 100644 index 00000000000000..7f392b6b8a57d0 --- /dev/null +++ b/datahub-web-react/src/app/domainV2/DomainAutocompleteOptions.tsx @@ -0,0 +1,53 @@ +import { LoadingOutlined } from '@ant-design/icons'; +import React from 'react'; +import styled from 'styled-components'; +import { Entity } from '../../types.generated'; +import { getParentDomains } from '../domain/utils'; +import EntityRegistry from '../entity/EntityRegistry'; +import { ANTD_GRAY } from '../entityV2/shared/constants'; +import ParentEntities from '../search/filters/ParentEntities'; + +const LoadingWrapper = styled.div` + padding: 8px; + display: flex; + justify-content: center; + + svg { + height: 15px; + width: 15px; + color: ${ANTD_GRAY[8]}; + } +`; + +interface AntOption { + label: JSX.Element; + value: string; +} + +export default function domainAutocompleteOptions( + entities: Entity[], + loading: boolean, + entityRegistry: EntityRegistry, +): AntOption[] { + if (loading) { + return [ + { + label: ( + + + + ), + value: 'loading', + }, + ]; + } + return entities.map((entity) => ({ + label: ( + <> + + {entityRegistry.getDisplayName(entity.type, entity)} + + ), + value: entity.urn, + })); +} diff --git a/datahub-web-react/src/app/domainV2/DomainIcon.tsx b/datahub-web-react/src/app/domainV2/DomainIcon.tsx new file mode 100644 index 00000000000000..76326f89bd70d2 --- /dev/null +++ b/datahub-web-react/src/app/domainV2/DomainIcon.tsx @@ -0,0 +1,11 @@ +import React from 'react'; +import { Globe } from '@phosphor-icons/react'; +import { TYPE_ICON_CLASS_NAME } from '../entityV2/shared/components/subtypes'; + +type Props = { + style?: React.CSSProperties; +}; + +export default function DomainIcon({ style }: Props) { + return ; +} diff --git a/datahub-web-react/src/app/domainV2/DomainItemMenu.tsx b/datahub-web-react/src/app/domainV2/DomainItemMenu.tsx new file mode 100644 index 00000000000000..a0007b90435d74 --- /dev/null +++ b/datahub-web-react/src/app/domainV2/DomainItemMenu.tsx @@ -0,0 +1,65 @@ +import React from 'react'; +import { DeleteOutlined } from '@ant-design/icons'; +import { Dropdown, Menu, message, Modal } from 'antd'; +import { EntityType } from '../../types.generated'; +import { useEntityRegistry } from '../useEntityRegistry'; +import { useDeleteDomainMutation } from '../../graphql/domain.generated'; +import { MenuIcon } from '../entity/shared/EntityDropdown/EntityDropdown'; + +type Props = { + urn: string; + name: string; + onDelete?: () => void; +}; + +export default function DomainItemMenu({ name, urn, onDelete }: Props) { + const entityRegistry = useEntityRegistry(); + const [deleteDomainMutation] = useDeleteDomainMutation(); + + const deleteDomain = () => { + deleteDomainMutation({ + variables: { + urn, + }, + }) + .then(({ errors }) => { + if (!errors) { + message.success('Deleted Domain!'); + onDelete?.(); + } + }) + .catch(() => { + message.destroy(); + message.error({ content: `Failed to delete Domain!: An unknown error occurred.`, duration: 3 }); + }); + }; + + const onConfirmDelete = () => { + Modal.confirm({ + title: `Delete Domain '${name}'`, + content: `Are you sure you want to remove this ${entityRegistry.getEntityName(EntityType.Domain)}?`, + onOk() { + deleteDomain(); + }, + onCancel() {}, + okText: 'Yes', + maskClosable: true, + closable: true, + }); + }; + + return ( + + +  Delete + + + } + > + + + ); +} diff --git a/datahub-web-react/src/app/domainV2/DomainListColumns.tsx b/datahub-web-react/src/app/domainV2/DomainListColumns.tsx new file mode 100644 index 00000000000000..646056adcf57ea --- /dev/null +++ b/datahub-web-react/src/app/domainV2/DomainListColumns.tsx @@ -0,0 +1,68 @@ +import React from 'react'; +import { Tag, Typography } from 'antd'; +import { Tooltip } from '@components'; +import { Link } from 'react-router-dom'; +import styled from 'styled-components'; +import { Maybe, Ownership } from '../../types.generated'; +import { useEntityRegistry } from '../useEntityRegistry'; +import AvatarsGroup from '../shared/avatar/AvatarsGroup'; +import DomainItemMenu from './DomainItemMenu'; + +interface DomainEntry { + name: string; + entities: string; + urn: string; + ownership?: Maybe; + url: string; +} + +const AvatarGroupWrapper = styled.div` + margin-right: 10px; + display: inline-block; +`; + +const DomainNameContainer = styled.div` + margin-left: 16px; + margin-right: 16px; + display: inline; +`; + +export function DomainListMenuColumn(handleDelete: (urn: string) => void) { + return (record: DomainEntry) => ( + handleDelete(record.urn)} /> + ); +} + +export function DomainNameColumn(logoIcon: JSX.Element) { + return (record: DomainEntry) => ( + + + {logoIcon} + + {record.name} + + + {record.entities} entities + + + + ); +} + +export function DomainOwnersColumn(ownership: Maybe) { + const entityRegistry = useEntityRegistry(); + + if (!ownership) { + return null; + } + + const { owners } = ownership; + if (!owners || owners.length === 0) { + return null; + } + return ( + + + + ); +} diff --git a/datahub-web-react/src/app/domainV2/DomainRoutes.tsx b/datahub-web-react/src/app/domainV2/DomainRoutes.tsx new file mode 100644 index 00000000000000..4910bb807aef31 --- /dev/null +++ b/datahub-web-react/src/app/domainV2/DomainRoutes.tsx @@ -0,0 +1,58 @@ +import React, { useState } from 'react'; +import { matchPath, Route, Switch, useLocation } from 'react-router-dom'; +import styled from 'styled-components/macro'; +import { PageRoutes } from '../../conf/Global'; +import { EntityType } from '../../types.generated'; +import { EntityPage } from '../entity/EntityPage'; +import { GenericEntityProperties } from '../entity/shared/types'; +import EntitySidebarContext from '../sharedV2/EntitySidebarContext'; +import useSidebarWidth from '../sharedV2/sidebar/useSidebarWidth'; +import { useEntityRegistry } from '../useEntityRegistry'; +import { DomainsContext } from './DomainsContext'; +import ManageDomainsPageV2 from './nestedDomains/ManageDomainsPageV2'; +import ManageDomainsSidebar from './nestedDomains/ManageDomainsSidebar'; +import { useShowNavBarRedesign } from '../useShowNavBarRedesign'; + +const ContentWrapper = styled.div<{ $isShowNavBarRedesign?: boolean; $isEntityProfile?: boolean }>` + display: flex; + overflow: hidden; + border-radius: 8px; + flex: 1; + ${(props) => !props.$isEntityProfile && props.$isShowNavBarRedesign && 'padding: 5px;'} +`; + +export default function DomainRoutes() { + const entityRegistry = useEntityRegistry(); + const [entityData, setEntityData] = useState(null); + const [isSidebarClosed, setIsSidebarClosed] = useState(true); + const entitySidebarWidth = useSidebarWidth(); + const isShowNavBarRedesign = useShowNavBarRedesign(); + + const location = useLocation(); + const isEntityProfile = + matchPath(location.pathname, `/${entityRegistry.getPathName(EntityType.Domain)}/:urn`) !== null; + + return ( + + + + + + } + /> + } /> + + + + + ); +} diff --git a/datahub-web-react/src/app/domainV2/DomainSearch.tsx b/datahub-web-react/src/app/domainV2/DomainSearch.tsx new file mode 100644 index 00000000000000..73552060cb5e88 --- /dev/null +++ b/datahub-web-react/src/app/domainV2/DomainSearch.tsx @@ -0,0 +1,123 @@ +import React, { useRef, useState } from 'react'; +import { LoadingOutlined, SearchOutlined } from '@ant-design/icons'; +import styled from 'styled-components/macro'; +import { useGetAutoCompleteResultsQuery } from '../../graphql/search.generated'; +import { EntityType } from '../../types.generated'; +import { SearchBar } from '../searchV2/SearchBar'; +import ClickOutside from '../shared/ClickOutside'; +import { useEntityRegistry } from '../useEntityRegistry'; +import DomainSearchResultItem from './DomainSearchResultItem'; +import { ANTD_GRAY, REDESIGN_COLORS } from '../entityV2/shared/constants'; + +const DomainSearchWrapper = styled.div` + flex-shrink: 0; + position: relative; +`; + +const ResultsWrapper = styled.div` + background-color: white; + border-radius: 5px; + box-shadow: 0 3px 6px -4px rgb(0 0 0 / 12%), 0 6px 16px 0 rgb(0 0 0 / 8%), 0 9px 28px 8px rgb(0 0 0 / 5%); + padding: 8px; + position: absolute; + max-height: 210px; + overflow: auto; + width: calc(100% - 32px); + left: 16px; + top: 55px; + z-index: 1; +`; + +const LoadingWrapper = styled(ResultsWrapper)` + display: flex; + justify-content: center; + padding: 16px 0; + font-size: 16px; +`; + +const SearchIcon = styled(SearchOutlined)` + color: ${REDESIGN_COLORS.TEXT_HEADING_SUB_LINK}; + padding: 16px; + width: 100%; + font-size: 20px; +`; + +type Props = { + isCollapsed?: boolean; + unhideSidebar?: () => void; +}; + +function DomainSearch({ isCollapsed, unhideSidebar }: Props) { + const [query, setQuery] = useState(''); + const [isSearchBarFocused, setIsSearchBarFocused] = useState(false); + const entityRegistry = useEntityRegistry(); + const { data, loading } = useGetAutoCompleteResultsQuery({ + variables: { + input: { + type: EntityType.Domain, + query, + }, + }, + skip: !query, + }); + + const entities = data?.autoComplete?.entities || []; + const timerRef = useRef(-1); + + const handleQueryChange = (q: string) => { + window.clearTimeout(timerRef.current); + timerRef.current = window.setTimeout(() => { + setQuery(q); + }, 250); + }; + + return ( + + {isCollapsed && unhideSidebar ? ( + + ) : ( + setIsSearchBarFocused(false)}> + null} + onQueryChange={(q) => handleQueryChange(q)} + entityRegistry={entityRegistry} + onFocus={() => setIsSearchBarFocused(true)} + /> + {loading && isSearchBarFocused && ( + + + + )} + {!loading && isSearchBarFocused && !!entities?.length && ( + + {entities?.map((entity) => ( + setIsSearchBarFocused(false)} + /> + ))} + + )} + + )} + + ); +} + +export default DomainSearch; diff --git a/datahub-web-react/src/app/domainV2/DomainSearchResultItem.tsx b/datahub-web-react/src/app/domainV2/DomainSearchResultItem.tsx new file mode 100644 index 00000000000000..dc33ea173e0aeb --- /dev/null +++ b/datahub-web-react/src/app/domainV2/DomainSearchResultItem.tsx @@ -0,0 +1,68 @@ +// Create a new component called SearchResultItem.js +import React from 'react'; +import { Link } from 'react-router-dom'; +import Highlight from 'react-highlighter'; +import styled from 'styled-components/macro'; +import { Entity, EntityType } from '../../types.generated'; +import { IconStyleType } from '../entity/Entity'; +import { ANTD_GRAY } from '../entity/shared/constants'; +import DomainIcon from './DomainIcon'; +import ParentEntities from '../search/filters/ParentEntities'; +import { getParentDomains } from './utils'; +import EntityRegistry from '../entity/EntityRegistry'; + +type Props = { + entity: Entity; + entityRegistry: EntityRegistry; + query: string; + onResultClick: () => void; +}; + +const SearchResult = styled(Link)` + color: #262626; + display: flex; + align-items: center; + gap: 8px; + height: 100%; + padding: 6px 8px; + width: 100%; + &:hover { + background-color: ${ANTD_GRAY[3]}; + color: #262626; + } +`; + +const IconWrapper = styled.span``; + +const highlightMatchStyle = { + fontWeight: 'bold', + background: 'none', + padding: 0, +}; + +function DomainSearchResultItem({ entity, entityRegistry, query, onResultClick }: Props) { + return ( + + + {entity.type === EntityType.Domain ? ( + + ) : ( + entityRegistry.getIcon(entity.type, 12, IconStyleType.ACCENT) + )} + +
+ + + {entityRegistry.getDisplayName(entity.type, entity)} + +
+
+ ); +} + +export default DomainSearchResultItem; diff --git a/datahub-web-react/src/app/domainV2/DomainsContext.tsx b/datahub-web-react/src/app/domainV2/DomainsContext.tsx new file mode 100644 index 00000000000000..3a6523e31a001f --- /dev/null +++ b/datahub-web-react/src/app/domainV2/DomainsContext.tsx @@ -0,0 +1,17 @@ +import React, { useContext } from 'react'; +import { GenericEntityProperties } from '../entity/shared/types'; + +export interface DomainsContextType { + entityData: GenericEntityProperties | null; + setEntityData: (entityData: GenericEntityProperties | null) => void; +} + +export const DomainsContext = React.createContext({ + entityData: null, + setEntityData: () => {}, +}); + +export const useDomainsContext = () => { + const { entityData, setEntityData } = useContext(DomainsContext); + return { entityData, setEntityData }; +}; diff --git a/datahub-web-react/src/app/domainV2/DomainsList.tsx b/datahub-web-react/src/app/domainV2/DomainsList.tsx new file mode 100644 index 00000000000000..7966cdc671c3e2 --- /dev/null +++ b/datahub-web-react/src/app/domainV2/DomainsList.tsx @@ -0,0 +1,208 @@ +import React, { useEffect, useState } from 'react'; +import { Button, Empty, Pagination, Typography } from 'antd'; +import { useLocation } from 'react-router'; +import styled from 'styled-components'; +import * as QueryString from 'query-string'; +import { PlusOutlined } from '@ant-design/icons'; +import { AlignType } from 'rc-table/lib/interface'; +import { EntityType } from '../../types.generated'; +import { useListDomainsQuery } from '../../graphql/domain.generated'; +import CreateDomainModal from './CreateDomainModal'; +import { Message } from '../shared/Message'; +import TabToolbar from '../entity/shared/components/styled/TabToolbar'; +import { SearchBar } from '../search/SearchBar'; +import { useEntityRegistry } from '../useEntityRegistry'; +import { scrollToTop } from '../shared/searchUtils'; +import { addToListDomainsCache, removeFromListDomainsCache } from './utils'; +import { OnboardingTour } from '../onboarding/OnboardingTour'; +import { DOMAINS_INTRO_ID, DOMAINS_CREATE_DOMAIN_ID } from '../onboarding/config/DomainsOnboardingConfig'; +import { getElasticCappedTotalValueText } from '../entity/shared/constants'; +import { StyledTable } from '../entity/shared/components/styled/StyledTable'; +import { DomainOwnersColumn, DomainListMenuColumn, DomainNameColumn } from './DomainListColumns'; +import DomainIcon from './DomainIcon'; + +const DomainsContainer = styled.div``; + +export const DomainsPaginationContainer = styled.div` + display: flex; + justify-content: center; + padding: 12px; + padding-left: 16px; + border-bottom: 1px solid; + display: flex; + justify-content: space-between; + align-items: center; +`; + +const PaginationInfo = styled(Typography.Text)` + padding: 0px; +`; + +const DEFAULT_PAGE_SIZE = 25; + +export const DomainsList = () => { + const entityRegistry = useEntityRegistry(); + const location = useLocation(); + const params = QueryString.parse(location.search, { arrayFormat: 'comma' }); + const paramsQuery = (params?.query as string) || undefined; + const [query, setQuery] = useState(undefined); + useEffect(() => setQuery(paramsQuery), [paramsQuery]); + + const [page, setPage] = useState(1); + const [isCreatingDomain, setIsCreatingDomain] = useState(false); + + const pageSize = DEFAULT_PAGE_SIZE; + const start = (page - 1) * pageSize; + + const { loading, error, data, client, refetch } = useListDomainsQuery({ + variables: { + input: { + start, + count: pageSize, + query, + }, + }, + fetchPolicy: query && query.length > 0 ? 'no-cache' : 'cache-first', + }); + + const totalDomains = data?.listDomains?.total || 0; + const lastResultIndex = start + pageSize > totalDomains ? totalDomains : start + pageSize; + const domains = data?.listDomains?.domains || []; + + const onChangePage = (newPage: number) => { + scrollToTop(); + setPage(newPage); + }; + + const handleDelete = (urn: string) => { + removeFromListDomainsCache(client, urn, page, pageSize); + setTimeout(() => { + refetch?.(); + }, 2000); + }; + + const allColumns = [ + { + title: 'Name', + dataIndex: '', + key: 'name', + sorter: (sourceA, sourceB) => { + return sourceA.name.localeCompare(sourceB.name); + }, + render: DomainNameColumn( + , + ), + }, + { + title: 'Owners', + dataIndex: 'ownership', + width: '10%', + key: 'ownership', + render: DomainOwnersColumn, + }, + { + title: '', + dataIndex: '', + width: '5%', + align: 'right' as AlignType, + key: 'menu', + render: DomainListMenuColumn(handleDelete), + }, + ]; + + const tableData = domains.map((domain) => { + const displayName = entityRegistry.getDisplayName(EntityType.Domain, domain); + const totalEntitiesText = getElasticCappedTotalValueText(domain.entities?.total || 0); + const url = entityRegistry.getEntityUrl(EntityType.Domain, domain.urn); + + return { + urn: domain.urn, + name: displayName, + entities: totalEntitiesText, + ownership: domain.ownership, + url, + }; + }); + + return ( + <> + {!data && loading && } + {error && } + + + + + null} + onQueryChange={(q) => setQuery(q && q.length > 0 ? q : undefined)} + entityRegistry={entityRegistry} + hideRecommendations + /> + + }} + /> + + + + {lastResultIndex > 0 ? (page - 1) * pageSize + 1 : 0} - {lastResultIndex} + + of {totalDomains} + + + + + {isCreatingDomain && ( + setIsCreatingDomain(false)} + onCreate={(urn, _, name, description) => { + addToListDomainsCache( + client, + { + urn, + properties: { + name, + description: description || null, + }, + ownership: null, + entities: null, + }, + pageSize, + ); + setTimeout(() => refetch(), 2000); + }} + /> + )} + + + ); +}; diff --git a/datahub-web-react/src/app/domainV2/EmptyDomainDescription.tsx b/datahub-web-react/src/app/domainV2/EmptyDomainDescription.tsx new file mode 100644 index 00000000000000..6a5f304e565be2 --- /dev/null +++ b/datahub-web-react/src/app/domainV2/EmptyDomainDescription.tsx @@ -0,0 +1,39 @@ +import { Typography } from 'antd'; +import React from 'react'; +import styled from 'styled-components/macro'; +import { ANTD_GRAY } from '../entity/shared/constants'; + +const StyledParagraph = styled(Typography.Paragraph)` + text-align: justify; + text-justify: inter-word; + margin: 40px 0; + font-size: 15px; +`; + +function EmptyDomainDescription() { + return ( + <> + + Welcome to your Data Domains! It looks like this space + is ready to be transformed into a well-organized data universe. Start by creating your first domain - a + high-level category for your data assets. + + + Create Nested Domains: Want to dive deeper? You can + also create nested domains to add granularity and structure. Just like nesting Russian dolls, its all + about refining your organization. + + + Build Data Products: Once your domains are set, go a + step further! Organize your data assets into data products to realize a data mesh architecture. Data + products empower you to treat data as a product, making it more accessible and manageable. + + + Ready to embark on this data adventure? Click the Create Domain button to begin shaping your data + landscape! + + + ); +} + +export default EmptyDomainDescription; diff --git a/datahub-web-react/src/app/domainV2/EmptyDomainsSection.tsx b/datahub-web-react/src/app/domainV2/EmptyDomainsSection.tsx new file mode 100644 index 00000000000000..6ec44999978df1 --- /dev/null +++ b/datahub-web-react/src/app/domainV2/EmptyDomainsSection.tsx @@ -0,0 +1,69 @@ +import { PlusOutlined } from '@ant-design/icons'; +import { Button, Empty, Typography } from 'antd'; +import React from 'react'; +import styled from 'styled-components/macro'; +import { ANTD_GRAY } from '../entity/shared/constants'; + +const EmptyDomainContainer = styled.div` + display: flex; + justify-content: center; + overflow-y: auto; +`; + +const StyledEmpty = styled(Empty)` + width: 35vw; + @media screen and (max-width: 1300px) { + width: 50vw; + } + @media screen and (max-width: 896px) { + overflow-y: auto; + max-height: 75vh; + &::-webkit-scrollbar { + width: 5px; + background: #d6d6d6; + } + } + padding: 20px; + .ant-empty-image { + display: none; + } +`; + +const StyledButton = styled(Button)` + margin: 18px 8px 0 0; +`; + +const IconContainer = styled.span` + color: ${ANTD_GRAY[7]}; + font-size: 40px; +`; + +interface Props { + title?: string; + setIsCreatingDomain: React.Dispatch>; + description?: React.ReactNode; + icon?: React.ReactNode; +} + +function EmptyDomainsSection(props: Props) { + const { title, description, setIsCreatingDomain, icon } = props; + return ( + + + {icon} + {title} + {description} + + } + > + setIsCreatingDomain(true)}> + Create Domain + + + + ); +} + +export default EmptyDomainsSection; diff --git a/datahub-web-react/src/app/domainV2/ManageDomainsPage.tsx b/datahub-web-react/src/app/domainV2/ManageDomainsPage.tsx new file mode 100644 index 00000000000000..98a312854deec2 --- /dev/null +++ b/datahub-web-react/src/app/domainV2/ManageDomainsPage.tsx @@ -0,0 +1,44 @@ +import { Typography } from 'antd'; +import React, { useState } from 'react'; +import styled from 'styled-components'; +import { DomainsList } from './DomainsList'; +import { DomainsContext } from './DomainsContext'; +import { GenericEntityProperties } from '../entity/shared/types'; + +const PageContainer = styled.div` + padding-top: 20px; +`; + +const PageHeaderContainer = styled.div` + && { + padding-left: 24px; + } +`; + +const PageTitle = styled(Typography.Title)` + && { + margin-bottom: 12px; + } +`; + +const ListContainer = styled.div``; + +export const ManageDomainsPage = () => { + const [entityData, setEntityData] = useState(null); + + return ( + + + + Domains + + View your DataHub Domains. Take administrative actions. + + + + + + + + ); +}; diff --git a/datahub-web-react/src/app/domainV2/__tests__/utils.test.ts b/datahub-web-react/src/app/domainV2/__tests__/utils.test.ts new file mode 100644 index 00000000000000..5a1b920f35bd04 --- /dev/null +++ b/datahub-web-react/src/app/domainV2/__tests__/utils.test.ts @@ -0,0 +1,179 @@ +import { renderHook } from '@testing-library/react-hooks'; +import { ApolloClient, InMemoryCache } from '@apollo/client'; +import { DomainMock1, DomainMock3, expectedResult } from '../../../Mocks'; +import { useEntityRegistry } from '../../useEntityRegistry'; +import { + addToListDomainsCache, + removeFromListDomainsCache, + updateListDomainsCache, + useSortedDomains, + getParentDomains, +} from '../utils'; +import { Entity, EntityType } from '../../../types.generated'; + +const apolloClient = new ApolloClient({ + cache: new InMemoryCache(), +}); + +describe('Domain V2 utils tests', () => { + test('addToListDomainsCache -> should add a new domain to the cache list', async () => { + const newDomain = 'new-domain.com'; + const pageSize = 10; + + const mockWriteQuery = vi.fn(); + ApolloClient.prototype.writeQuery = mockWriteQuery; + + addToListDomainsCache(apolloClient, newDomain, pageSize, 'example.com'); + expect(mockWriteQuery).toHaveBeenCalled(); + const args = mockWriteQuery.mock.calls; + const expectedValue = { + query: { + kind: 'Document', + definitions: expect.anything(), + loc: expect.anything(), + }, + + variables: { input: { start: 0, count: 10, parentDomain: 'example.com' } }, + data: { listDomains: { start: 0, count: 1, total: 1, domains: ['new-domain.com'] } }, + }; + + expect(args[0][0]).toMatchObject(expectedValue); + }); + test('updateListDomainsCache -> should update and add a domain to the cache list', async () => { + const urn = 'urn123'; + const id = 'domain123'; + const name = 'New Domain'; + const description = 'A test domain'; + const parentDomain = 'example.com'; + + const initialDomains = [ + { + id: 'domain1', + name: 'Domain 1', + description: 'Description for Domain 1', + parentDomain: 'example.com', + }, + { + id: 'domain2', + name: 'Domain 2', + description: 'Description for Domain 2', + parentDomain: 'example.com', + }, + ]; + + const mockReadQuery = vi.fn().mockReturnValue({ + listDomains: { + start: 0, + count: initialDomains.length, + total: initialDomains.length, + domains: initialDomains, + }, + }); + + const mockWriteQuery = vi.fn(); + ApolloClient.prototype.readQuery = mockReadQuery; + ApolloClient.prototype.writeQuery = mockWriteQuery; + + updateListDomainsCache(apolloClient, urn, id, name, description, parentDomain); + expect(mockReadQuery).toHaveBeenCalled(); + expect(mockWriteQuery).toHaveBeenCalled(); + const args = mockWriteQuery.mock.calls; + + const expectedResponseAfterUpdate = [ + { + urn, + id, + type: EntityType.Domain, + properties: { + name, + description: description || null, + }, + ownership: null, + entities: null, + children: null, + dataProducts: null, + parentDomains: null, + displayProperties: null, + }, + ...initialDomains, + ]; + + expect(args[0][0]?.data?.listDomains?.domains).toMatchObject(expectedResponseAfterUpdate); + }); + test('removeFromListDomainsCache -> should remove a domain from the cache list', async () => { + const urn = 'urn1'; + const pageSize = 1000; + const page = 1; + + const initialDomains = [ + { + urn: 'urn1', + id: 'domain1', + name: 'Domain 1', + description: 'Description for Domain 1', + parentDomain: 'example.com', + }, + { + urn: 'urn2', + id: 'domain2', + name: 'Domain 2', + description: 'Description for Domain 2', + parentDomain: 'example.com', + }, + ]; + const mockReadQuery = vi.fn().mockReturnValue({ + listDomains: { + start: 0, + count: initialDomains.length, + total: initialDomains.length, + domains: initialDomains, + }, + }); + + const mockWriteQuery = vi.fn(); + ApolloClient.prototype.readQuery = mockReadQuery; + ApolloClient.prototype.writeQuery = mockWriteQuery; + + removeFromListDomainsCache(apolloClient, urn, page, pageSize); + expect(mockReadQuery).toHaveBeenCalled(); + expect(mockWriteQuery).toHaveBeenCalled(); + const args = mockWriteQuery.mock.calls; + const expectedResultAfterDelete = [ + { + urn: 'urn2', + id: 'domain2', + name: 'Domain 2', + description: 'Description for Domain 2', + parentDomain: 'example.com', + }, + ]; + expect(args[0][0]?.data?.listDomains?.domains).toMatchObject(expectedResultAfterDelete); + }); + test('useSortedDomains -> should return all domains in an unsorted format if sortBy by is not provided', () => { + const unsortedDomains = [DomainMock3[1], DomainMock3[0]]; + + const { result } = renderHook(() => { + const entityRegistry = useEntityRegistry(); + entityRegistry.register(DomainMock3[0]); + entityRegistry.register(DomainMock3[1]); + + return useSortedDomains(unsortedDomains as unknown as Entity[]); + }); + expect(result.current).toStrictEqual(unsortedDomains); + }); + test('useSortedDomains -> should return all domains in a sorted format', () => { + const { result } = renderHook(() => { + return useSortedDomains(DomainMock3 as unknown as Entity[], 'displayName'); + }); + expect(result.current).toStrictEqual(DomainMock3); + }); + test('getParentDomains -> should get all parent domains', () => { + const { result } = renderHook(() => { + const entityRegistry = useEntityRegistry(); + entityRegistry.register(DomainMock1); + + return getParentDomains(DomainMock3[0] as unknown as Entity, entityRegistry); + }); + expect(result.current).toStrictEqual(expectedResult); + }); +}); diff --git a/datahub-web-react/src/app/domainV2/nestedDomains/DomainsSidebarHeader.tsx b/datahub-web-react/src/app/domainV2/nestedDomains/DomainsSidebarHeader.tsx new file mode 100644 index 00000000000000..23a5747e0c0024 --- /dev/null +++ b/datahub-web-react/src/app/domainV2/nestedDomains/DomainsSidebarHeader.tsx @@ -0,0 +1,56 @@ +import { useApolloClient } from '@apollo/client'; +import { PlusCircleOutlined } from '@ant-design/icons'; +import { Button } from 'antd'; +import { Tooltip } from '@components'; +import React, { useState } from 'react'; +import styled from 'styled-components'; +import CreateDomainModal from '../CreateDomainModal'; +import { updateListDomainsCache } from '../utils'; +import { REDESIGN_COLORS } from '../../entityV2/shared/constants'; + +const Wrapper = styled.div` + color: ${REDESIGN_COLORS.TITLE_PURPLE}; + font-size: 20px; + display: flex; + align-items: center; + justify-content: space-between; + width: 100%; +`; + +const StyledButton = styled(Button)` + padding: 0px 8px; + border: none; + box-shadow: none; + color: inherit; + font-size: inherit; +`; + +const DomainTitle = styled.div` + font-size: 16px; + font-weight: bold; + color: #374066; +`; + +export default function DomainsSidebarHeader() { + const [isCreatingDomain, setIsCreatingDomain] = useState(false); + const client = useApolloClient(); + + return ( + + Domains + + setIsCreatingDomain(true)}> + + + + {isCreatingDomain && ( + setIsCreatingDomain(false)} + onCreate={(urn, id, name, description, parentDomain) => { + updateListDomainsCache(client, urn, id, name, description, parentDomain); + }} + /> + )} + + ); +} diff --git a/datahub-web-react/src/app/domainV2/nestedDomains/ManageDomainsPageV2.tsx b/datahub-web-react/src/app/domainV2/nestedDomains/ManageDomainsPageV2.tsx new file mode 100644 index 00000000000000..9735136972d12e --- /dev/null +++ b/datahub-web-react/src/app/domainV2/nestedDomains/ManageDomainsPageV2.tsx @@ -0,0 +1,68 @@ +import React, { useEffect, useState } from 'react'; +import { useShowNavBarRedesign } from '@src/app/useShowNavBarRedesign'; +import { Button } from '@src/alchemy-components'; +import styled from 'styled-components/macro'; +import { PageTitle } from '@src/alchemy-components/components/PageTitle'; +import { useApolloClient } from '@apollo/client'; +import RootDomains from './RootDomains'; +import { DOMAINS_CREATE_DOMAIN_ID, DOMAINS_INTRO_ID } from '../../onboarding/config/DomainsOnboardingConfig'; +import { OnboardingTour } from '../../onboarding/OnboardingTour'; +import CreateDomainModal from '../CreateDomainModal'; +import { updateListDomainsCache } from '../utils'; +import { useDomainsContext as useDomainsContextV2 } from '../DomainsContext'; + +const PageWrapper = styled.div<{ $isShowNavBarRedesign?: boolean }>` + background-color: #ffffff; + flex: 1; + display: flex; + flex-direction: column; + overflow: hidden; + border-radius: ${(props) => + props.$isShowNavBarRedesign ? props.theme.styles['border-radius-navbar-redesign'] : '8px'}; + margin-left: ${(props) => (props.$isShowNavBarRedesign ? '0' : '12px')}; + ${(props) => props.$isShowNavBarRedesign && `box-shadow: ${props.theme.styles['box-shadow-navbar-redesign']};`} +`; + +const Header = styled.div` + display: flex; + justify-content: space-between; + padding: 16px 20px 20px 20px; + align-items: center; +`; + +export default function ManageDomainsPageV2() { + const { setEntityData } = useDomainsContextV2(); + const [isCreatingDomain, setIsCreatingDomain] = useState(false); + const client = useApolloClient(); + const isShowNavBarRedesign = useShowNavBarRedesign(); + + useEffect(() => { + setEntityData(null); + }, [setEntityData]); + + return ( + + +
+ + +
+ + {isCreatingDomain && ( + setIsCreatingDomain(false)} + onCreate={(urn, id, name, description, parentDomain) => + updateListDomainsCache(client, urn, id, name, description, parentDomain) + } + /> + )} +
+ ); +} diff --git a/datahub-web-react/src/app/domainV2/nestedDomains/ManageDomainsSidebar.tsx b/datahub-web-react/src/app/domainV2/nestedDomains/ManageDomainsSidebar.tsx new file mode 100644 index 00000000000000..ca192a1db1ee81 --- /dev/null +++ b/datahub-web-react/src/app/domainV2/nestedDomains/ManageDomainsSidebar.tsx @@ -0,0 +1,123 @@ +import React, { useCallback, useState } from 'react'; +import { Button, Divider } from 'antd'; +import { Tooltip } from '@components'; +import styled from 'styled-components'; +import { useShowNavBarRedesign } from '@src/app/useShowNavBarRedesign'; +import useSidebarWidth from '../../sharedV2/sidebar/useSidebarWidth'; +import DomainsSidebarHeader from './DomainsSidebarHeader'; +import DomainNavigator from './domainNavigator/DomainNavigator'; +import DomainSearch from '../DomainSearch'; +import { ANTD_GRAY } from '../../entity/shared/constants'; +import SidebarBackArrow from '../../../images/sidebarBackArrow.svg?react'; + +const PLATFORM_BROWSE_TRANSITION_MS = 300; + +// TODO: Clean up how we do expand / collapse +const StyledEntitySidebarContainer = styled.div<{ + isCollapsed: boolean; + $width?: number; + backgroundColor?: string; + $isShowNavBarRedesign?: boolean; + $isEntityProfile?: boolean; +}>` + flex-shrink: 0; + max-height: 100%; + + width: ${(props) => (props.isCollapsed ? '63px' : `${props.$width}px`)}; + margin-bottom: ${(props) => (props.$isShowNavBarRedesign ? '0' : '12px')}; + transition: width ${PLATFORM_BROWSE_TRANSITION_MS}ms ease-in-out; + + background-color: #ffffff; + border-radius: ${(props) => + props.$isShowNavBarRedesign ? props.theme.styles['border-radius-navbar-redesign'] : '8px'}; + display: flex; + flex-direction: column; + ${(props) => + props.$isShowNavBarRedesign && + ` + margin: ${props.$isEntityProfile ? '5px 12px 5px 5px' : '0 16px 0 0'}; + box-shadow: ${props.theme.styles['box-shadow-navbar-redesign']}; + `} +`; + +const Controls = styled.div<{ isCollapsed: boolean }>` + display: flex; + align-items: center; + justify-content: ${(props) => (props.isCollapsed ? 'center' : 'space-between')}; + padding: 15px 16px 10px 12px; + overflow: hidden; + height: 50px; +`; + +const CloseButton = styled(Button)<{ $isActive }>` + margin: 0px; + padding: 2px 0px; + display: flex; + align-items: center; + transition: transform ${PLATFORM_BROWSE_TRANSITION_MS}ms ease; + && { + color: ${(props) => (props.$isActive ? ANTD_GRAY[9] : '#8088a3')}; + } +`; + +const ThinDivider = styled(Divider)` + margin: 0px; + padding: 0px; +`; + +const StyledSidebarBackArrow = styled(SidebarBackArrow)<{ direction: 'left' | 'right' }>` + cursor: pointer; + ${(props) => (props.direction === 'right' && 'transform: scaleX(-1);') || undefined} +`; + +const StyledSidebar = styled.div` + overflow: auto; + height: 100%; + display: flex; + flex-direction: column; +`; + +type Props = { + isEntityProfile?: boolean; +}; + +export default function ManageDomainsSidebarV2({ isEntityProfile }: Props) { + const width = useSidebarWidth(0.2); + const [isClosed, setIsClosed] = useState(false); + const isShowNavBarRedesign = useShowNavBarRedesign(); + + const unhideSidebar = useCallback(() => { + setIsClosed(false); + }, []); + + return ( + + + {!isClosed && } + + setIsClosed(!isClosed)}> + + + + + + + + + + + + ); +} diff --git a/datahub-web-react/src/app/domainV2/nestedDomains/RootDomains.tsx b/datahub-web-react/src/app/domainV2/nestedDomains/RootDomains.tsx new file mode 100644 index 00000000000000..da76310b4394e2 --- /dev/null +++ b/datahub-web-react/src/app/domainV2/nestedDomains/RootDomains.tsx @@ -0,0 +1,57 @@ +import React from 'react'; +import styled from 'styled-components'; +import { ReadOutlined } from '@ant-design/icons'; +import { Message } from '../../shared/Message'; +import { useEntityRegistry } from '../../useEntityRegistry'; +import { EntityType } from '../../../types.generated'; +import useListDomains from '../useListDomains'; +import EmptyDomainsSection from '../EmptyDomainsSection'; +import EmptyDomainDescription from '../EmptyDomainDescription'; + +const DomainsWrapper = styled.div` + overflow: auto; + padding: 0 20px 12px 20px; +`; + +const ResultWrapper = styled.div` + padding: 16px; + margin: 0px; + margin-bottom: 12px; + display: flex; + align-items: center; + background-color: #ffffff; + border-radius: 12px; + overflow: hidden; + box-shadow: 0px 1px 2px 0px rgba(33, 23, 95, 0.07); + border: 1px solid #ebecf0; +`; + +interface Props { + setIsCreatingDomain: React.Dispatch>; +} +export default function RootDomains({ setIsCreatingDomain }: Props) { + const entityRegistry = useEntityRegistry(); + const { loading, error, data, sortedDomains } = useListDomains({}); + + return ( + <> + {!data && loading && } + {error && } + {!loading && (!data || !data?.listDomains?.domains?.length) && ( + } + title="Organize your data" + description={} + setIsCreatingDomain={setIsCreatingDomain} + /> + )} + + {sortedDomains?.map((domain) => ( + + {entityRegistry.renderSearchResult(EntityType.Domain, { entity: domain, matchedFields: [] })} + + ))} + + + ); +} diff --git a/datahub-web-react/src/app/domainV2/nestedDomains/domainNavigator/DomainNavigator.tsx b/datahub-web-react/src/app/domainV2/nestedDomains/domainNavigator/DomainNavigator.tsx new file mode 100644 index 00000000000000..aac78e744cdcc4 --- /dev/null +++ b/datahub-web-react/src/app/domainV2/nestedDomains/domainNavigator/DomainNavigator.tsx @@ -0,0 +1,50 @@ +import { Alert, Empty } from 'antd'; +import React from 'react'; +import styled from 'styled-components'; +import useListDomains from '../../useListDomains'; +import DomainNode from './DomainNode'; +import { Domain } from '../../../../types.generated'; +import { ANTD_GRAY } from '../../../entity/shared/constants'; + +const NavigatorWrapper = styled.div` + font-size: 14px; + max-height: calc(100% - 65px); + overflow: auto; +`; + +interface Props { + domainUrnToHide?: string; + selectDomainOverride?: (domain: Domain) => void; + isCollapsed: boolean; + unhideSidebar?: () => void; +} + +export default function DomainNavigator({ domainUrnToHide, isCollapsed, selectDomainOverride, unhideSidebar }: Props) { + const { sortedDomains, error, loading } = useListDomains({}); + const noDomainsFound: boolean = !sortedDomains || sortedDomains.length === 0; + + return ( + + {error && } + {!loading && noDomainsFound && ( + + )} + {!noDomainsFound && + sortedDomains?.map((domain) => ( + + ))} + + ); +} diff --git a/datahub-web-react/src/app/domainV2/nestedDomains/domainNavigator/DomainNode.tsx b/datahub-web-react/src/app/domainV2/nestedDomains/domainNavigator/DomainNode.tsx new file mode 100644 index 00000000000000..c973783ccdc963 --- /dev/null +++ b/datahub-web-react/src/app/domainV2/nestedDomains/domainNavigator/DomainNode.tsx @@ -0,0 +1,234 @@ +import { Typography } from 'antd'; +import { Tooltip } from '@components'; +import React, { useEffect, useMemo } from 'react'; +import { useHistory } from 'react-router'; +import styled from 'styled-components'; +import { Domain } from '../../../../types.generated'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { RotatingTriangle } from '../../../sharedV2/sidebar/components'; +import useListDomains from '../../useListDomains'; +import useToggle from '../../../shared/useToggle'; +import { BodyContainer, BodyGridExpander } from '../../../shared/components'; +import { useDomainsContext as useDomainsContextV2 } from '../../DomainsContext'; +import { DomainColoredIcon } from '../../../entityV2/shared/links/DomainColoredIcon'; +import { REDESIGN_COLORS, SEARCH_COLORS } from '../../../entityV2/shared/constants'; + +const Count = styled.div` + color: ${REDESIGN_COLORS.BLACK}; + font-size: 12px; + padding-left: 8px; + padding-right: 8px; + margin-left: 8px; + border-radius: 11px; + background-color: ${REDESIGN_COLORS.SIDE_BAR}; + width: 20%; + height: 22px; + display: flex; + align-items: center; + justify-content: center; + max-width: 32px; + transition: opacity 0.3s ease; /* add a smooth transition effect */ +`; + +const NameWrapper = styled(Typography.Text)<{ $isSelected: boolean; $addLeftPadding: boolean }>` + flex: 1; + padding: 2px; + ${(props) => props.$isSelected && `color: ${SEARCH_COLORS.TITLE_PURPLE};`} + ${(props) => props.$addLeftPadding && 'padding-left: 20px;'} + + &:hover { + cursor: pointer; + } + display: flex !important; + align-items: center; + justify-content: space-between; + transition: font-weight 0.3s ease-out; + width: 100%; +`; + +const DisplayName = styled.span<{ $isSelected: boolean }>` + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + color: ${(props) => (props.$isSelected ? REDESIGN_COLORS.TITLE_PURPLE : REDESIGN_COLORS.BODY_TEXT_GREY)}; +`; + +const ButtonWrapper = styled.span<{ $addLeftPadding: boolean; $isSelected: boolean }>` + margin-right: 4px; + font-size: 16px; + + svg { + font-size: 16px !important; + color: ${(props) => + props.$isSelected ? REDESIGN_COLORS.TITLE_PURPLE : REDESIGN_COLORS.BODY_TEXT_GREY} !important; + } + + .ant-btn { + height: 16px; + width: 16px; + } +`; + +const RowWrapper = styled.div<{ $isSelected: boolean; isOpen?: boolean }>` + align-items: center; + display: flex; + width: 100%; + border-bottom: 1px solid ${REDESIGN_COLORS.COLD_GREY_TEXT_BLUE_1}; + padding: 12px; + ${(props) => props.isOpen && `background-color: ${REDESIGN_COLORS.SECTION_BACKGROUND};`} + ${(props) => props.$isSelected && `background-color: ${REDESIGN_COLORS.LIGHT_TEXT_DARK_BACKGROUND};`} + &:hover { + background-color: ${REDESIGN_COLORS.COLD_GREY_TEXT_BLUE_1}; + ${ButtonWrapper} { + svg { + color: ${REDESIGN_COLORS.TITLE_PURPLE} !important; + } + } + ${DisplayName} { + color: ${REDESIGN_COLORS.TITLE_PURPLE}; + } + } +`; + +const StyledExpander = styled(BodyGridExpander)<{ paddingLeft: number }>` + padding-left: 0px; + background: ${REDESIGN_COLORS.SECTION_BACKGROUND}; + display: flex; + width: 100%; + overflow: auto; + ${RowWrapper} { + padding-left: ${(props) => props.paddingLeft + 12}px; + } +`; + +const Text = styled.div` + display: flex; + gap: 9px; + align-items: center; + font-size: 14px; + width: 80%; +`; + +interface Props { + domain: Domain; + numDomainChildren: number; + isCollapsed?: boolean; + domainUrnToHide?: string; + selectDomainOverride?: (domain: Domain) => void; + unhideSidebar?: () => void; + $paddingLeft?: number; +} + +export default function DomainNode({ + domain, + numDomainChildren, + domainUrnToHide, + isCollapsed, + selectDomainOverride, + unhideSidebar, + $paddingLeft = 0, +}: Props) { + const shouldHideDomain = domainUrnToHide === domain.urn; + const history = useHistory(); + const entityRegistry = useEntityRegistry(); + const { entityData } = useDomainsContextV2(); + const { isOpen, isClosing, toggle, toggleOpen, toggleClose } = useToggle({ + initialValue: false, + closeDelay: 250, + }); + const { sortedDomains } = useListDomains({ parentDomain: domain.urn, skip: !isOpen || shouldHideDomain }); + const isOnEntityPage = entityData && entityData.urn === domain.urn; + const displayName = entityRegistry.getDisplayName(domain.type, isOnEntityPage ? entityData : domain); + const isInSelectMode = !!selectDomainOverride; + const isDomainNodeSelected = !!isOnEntityPage && !isInSelectMode; + const shouldAutoOpen = useMemo( + () => !isInSelectMode && entityData?.parentDomains?.domains.some((parent) => parent.urn === domain.urn), + [isInSelectMode, entityData, domain.urn], + ); + const paddingLeft = $paddingLeft + 16; + + useEffect(() => { + if (shouldAutoOpen) toggleOpen(); + }, [shouldAutoOpen, toggleOpen]); + + useEffect(() => { + if (isCollapsed) { + toggleClose(); + } + }, [isCollapsed, toggleClose]); + + function handleSelectDomain() { + if (selectDomainOverride && !isCollapsed) { + selectDomainOverride(domain); + } else if (unhideSidebar && isCollapsed) { + unhideSidebar(); + } else { + history.push(entityRegistry.getEntityUrl(domain.type, domain.urn)); + } + } + + if (shouldHideDomain) return null; + + const finalNumChildren = sortedDomains?.length ?? numDomainChildren; + const hasDomainChildren = !!finalNumChildren; + + return ( + <> + + {!isCollapsed && hasDomainChildren && ( + + + + )} + + + + + + + + {!isCollapsed && displayName} + + + {!isCollapsed && hasDomainChildren && {finalNumChildren}} + + + + + + {sortedDomains?.map((childDomain) => ( + + ))} + + + + ); +} diff --git a/datahub-web-react/src/app/domainV2/useListDomains.tsx b/datahub-web-react/src/app/domainV2/useListDomains.tsx new file mode 100644 index 00000000000000..74f6b454f11d4b --- /dev/null +++ b/datahub-web-react/src/app/domainV2/useListDomains.tsx @@ -0,0 +1,27 @@ +import { useListDomainsQuery } from '../../graphql/domain.generated'; +import { useSortedDomains } from './utils'; + +interface Props { + parentDomain?: string; + skip?: boolean; + sortBy?: 'displayName'; +} + +export default function useListDomains({ parentDomain, skip, sortBy = 'displayName' }: Props) { + const { data, error, loading, refetch } = useListDomainsQuery({ + skip, + variables: { + input: { + start: 0, + count: 1000, // don't paginate the home page, get all root level domains + parentDomain, + }, + }, + fetchPolicy: 'network-only', // always use network request first to populate cache + nextFetchPolicy: 'cache-first', // then use cache after that so we can manipulate it + }); + + const sortedDomains = useSortedDomains(data?.listDomains?.domains, sortBy); + + return { data, sortedDomains, error, loading, refetch }; +} diff --git a/datahub-web-react/src/app/domainV2/utils.ts b/datahub-web-react/src/app/domainV2/utils.ts new file mode 100644 index 00000000000000..2e294189dfadda --- /dev/null +++ b/datahub-web-react/src/app/domainV2/utils.ts @@ -0,0 +1,146 @@ +import { ApolloClient } from '@apollo/client'; +import { useEffect } from 'react'; +import { isEqual } from 'lodash'; +import { ListDomainsDocument, ListDomainsQuery } from '../../graphql/domain.generated'; +import { Entity, EntityType } from '../../types.generated'; +import { GenericEntityProperties } from '../entity/shared/types'; +import usePrevious from '../shared/usePrevious'; +import { useDomainsContext } from './DomainsContext'; +import { useEntityRegistry } from '../useEntityRegistry'; +import EntityRegistry from '../entity/EntityRegistry'; + +/** + * Add an entry to the list domains cache. + */ +export const addToListDomainsCache = (client: ApolloClient, newDomain, pageSize, parentDomain?: string) => { + // Read the data from our cache for this query. + const currData: ListDomainsQuery | null = client.readQuery({ + query: ListDomainsDocument, + variables: { + input: { + start: 0, + count: pageSize, + parentDomain, + }, + }, + }); + + // Add our new domain into the existing list. + const newDomains = [newDomain, ...(currData?.listDomains?.domains || [])]; + + // Write our data back to the cache. + client.writeQuery({ + query: ListDomainsDocument, + variables: { + input: { + start: 0, + count: pageSize, + parentDomain, + }, + }, + data: { + listDomains: { + start: 0, + count: (currData?.listDomains?.count || 0) + 1, + total: (currData?.listDomains?.total || 0) + 1, + domains: newDomains, + }, + }, + }); +}; + +export const updateListDomainsCache = ( + client: ApolloClient, + urn: string, + id: string | undefined, + name: string, + description: string | undefined, + parentDomain?: string, +) => { + addToListDomainsCache( + client, + { + urn, + id: id || '', + type: EntityType.Domain, + properties: { + name, + description: description || null, + }, + ownership: null, + entities: null, + children: null, + dataProducts: null, + parentDomains: null, + displayProperties: null, + }, + 1000, + parentDomain, + ); +}; + +/** + * Remove an entry from the list domains cache. + */ +export const removeFromListDomainsCache = (client, urn, page, pageSize, parentDomain?: string) => { + // Read the data from our cache for this query. + const currData: ListDomainsQuery | null = client.readQuery({ + query: ListDomainsDocument, + variables: { + input: { + start: (page - 1) * pageSize, + count: pageSize, + parentDomain, + }, + }, + }); + + // Remove the domain from the existing domain set. + const newDomains = [...(currData?.listDomains?.domains || []).filter((domain) => domain.urn !== urn)]; + + // Write our data back to the cache. + client.writeQuery({ + query: ListDomainsDocument, + variables: { + input: { + start: (page - 1) * pageSize, + count: pageSize, + parentDomain, + }, + }, + data: { + listDomains: { + start: currData?.listDomains?.start || 0, + count: (currData?.listDomains?.count || 1) - 1, + total: (currData?.listDomains?.total || 1) - 1, + domains: newDomains, + }, + }, + }); +}; + +export function useUpdateDomainEntityDataOnChange(entityData: GenericEntityProperties | null, entityType: EntityType) { + const { setEntityData } = useDomainsContext(); + const previousEntityData = usePrevious(entityData); + + useEffect(() => { + if (EntityType.Domain === entityType && !isEqual(entityData, previousEntityData)) { + setEntityData(entityData); + } + }); +} + +export function useSortedDomains(domains?: Array, sortBy?: 'displayName') { + const entityRegistry = useEntityRegistry(); + if (!domains || !sortBy) return domains; + return [...domains].sort((a, b) => { + const nameA = entityRegistry.getDisplayName(EntityType.Domain, a) || ''; + const nameB = entityRegistry.getDisplayName(EntityType.Domain, b) || ''; + return nameA.localeCompare(nameB); + }); +} + +export function getParentDomains(domain: T, entityRegistry: EntityRegistry) { + const props = entityRegistry.getGenericEntityProperties(EntityType.Domain, domain); + return props?.parentDomains?.domains ?? []; +} diff --git a/datahub-web-react/src/app/entity/Entity.tsx b/datahub-web-react/src/app/entity/Entity.tsx index c56c97454a1d57..37a63e9971ae00 100644 --- a/datahub-web-react/src/app/entity/Entity.tsx +++ b/datahub-web-react/src/app/entity/Entity.tsx @@ -47,6 +47,7 @@ export enum IconStyleType { /** * A standard set of Entity Capabilities that span across entity types. + * Note: Must be kept in sync with V2 EntityCapabilityType. */ export enum EntityCapabilityType { /** @@ -74,13 +75,25 @@ export enum EntityCapabilityType { */ SOFT_DELETE, /** - * Assigning a role to an entity. Currently only supported for users. + * Run tests against an entity + */ + TEST, + /** + * Add roles to the entity */ ROLES, /** * Assigning the entity to a data product */ DATA_PRODUCTS, + /** + * Health status of an entity + */ + HEALTH, + /** + * Lineage information of an entity + */ + LINEAGE, /** * Assigning Business Attribute to a entity */ @@ -153,7 +166,7 @@ export interface Entity { * * TODO: Explore using getGenericEntityProperties for rendering profiles. */ - renderSearch: (result: SearchResult) => JSX.Element; + renderSearch: (result: SearchResult, previewType?: PreviewType, onCardClick?: (any: any) => any) => JSX.Element; /** * Constructs config to add entity to lineage viz diff --git a/datahub-web-react/src/app/entity/EntityRegistry.tsx b/datahub-web-react/src/app/entity/EntityRegistry.tsx index 827f0e6692442e..7aed83440f3ab3 100644 --- a/datahub-web-react/src/app/entity/EntityRegistry.tsx +++ b/datahub-web-react/src/app/entity/EntityRegistry.tsx @@ -151,14 +151,19 @@ export default class EntityRegistry { ); } - renderSearchResult(type: EntityType, searchResult: SearchResult): JSX.Element { + renderSearchResult( + type: EntityType, + searchResult: SearchResult, + previewType?: PreviewType, + onCardClick?: (any: any) => any, + ): JSX.Element { const entity = validatedGet(type, this.entityTypeToEntity); const genericEntityData = entity.getGenericEntityProperties(searchResult.entity); return ( - {entity.renderSearch(searchResult)} + {entity.renderSearch(searchResult, previewType, onCardClick)} ); @@ -214,6 +219,7 @@ export default class EntityRegistry { siblingPlatforms: genericEntityProperties?.siblingPlatforms, fineGrainedLineages, siblings: genericEntityProperties?.siblings, + siblingsSearch: genericEntityProperties?.siblingsSearch, schemaMetadata: genericEntityProperties?.schemaMetadata, inputFields: genericEntityProperties?.inputFields, canEditLineage: genericEntityProperties?.privileges?.canEditLineage, diff --git a/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx b/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx index abf8e732c47d61..cca6b0e60d22dd 100644 --- a/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx +++ b/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx @@ -240,7 +240,7 @@ export class DatasetEntity implements Entity { { component: SidebarSiblingsSection, display: { - visible: (_, dataset: GetDatasetQuery) => (dataset?.dataset?.siblings?.siblings?.length || 0) > 0, + visible: (_, dataset: GetDatasetQuery) => (dataset?.dataset?.siblingsSearch?.total || 0) > 0, }, }, { diff --git a/datahub-web-react/src/app/entity/dataset/profile/OperationsTab.tsx b/datahub-web-react/src/app/entity/dataset/profile/OperationsTab.tsx index d3371d3790d028..8896167469f9bd 100644 --- a/datahub-web-react/src/app/entity/dataset/profile/OperationsTab.tsx +++ b/datahub-web-react/src/app/entity/dataset/profile/OperationsTab.tsx @@ -137,7 +137,10 @@ export const OperationsTab = () => { const [page, setPage] = useState(1); // Fetch data across all siblings. - const allUrns = [urn, ...(entityData?.siblings?.siblings || []).map((sibling) => sibling?.urn).filter(notEmpty)]; + const allUrns = [ + urn, + ...(entityData?.siblingsSearch?.searchResults || []).map((sibling) => sibling.entity.urn).filter(notEmpty), + ]; const loadings: boolean[] = []; const datas: GetDatasetRunsQuery[] = []; allUrns.forEach((entityUrn) => { diff --git a/datahub-web-react/src/app/entity/dataset/profile/__tests__/Schema.test.tsx b/datahub-web-react/src/app/entity/dataset/profile/__tests__/Schema.test.tsx index 2cb155d43d302b..19917d23b9eccd 100644 --- a/datahub-web-react/src/app/entity/dataset/profile/__tests__/Schema.test.tsx +++ b/datahub-web-react/src/app/entity/dataset/profile/__tests__/Schema.test.tsx @@ -11,7 +11,7 @@ import { } from '../stories/sampleSchema'; import { mocks } from '../../../../../Mocks'; import { SchemaTab } from '../../../shared/tabs/Dataset/Schema/SchemaTab'; -import EntityContext from '../../../shared/EntityContext'; +import { EntityContext } from '../../../shared/EntityContext'; import { EntityType, SchemaMetadata } from '../../../../../types.generated'; import { SchemaRow } from '../../../shared/tabs/Dataset/Schema/components/SchemaRow'; diff --git a/datahub-web-react/src/app/entity/dataset/profile/stats/historical/charts/StatChart.tsx b/datahub-web-react/src/app/entity/dataset/profile/stats/historical/charts/StatChart.tsx index ac9d9f77cce2a3..a6d2d2b4c855c6 100644 --- a/datahub-web-react/src/app/entity/dataset/profile/stats/historical/charts/StatChart.tsx +++ b/datahub-web-react/src/app/entity/dataset/profile/stats/historical/charts/StatChart.tsx @@ -73,7 +73,7 @@ export default function StatChart({ title, values, tickInterval: interval, dateR return ( <>
- + {chartData.title} diff --git a/datahub-web-react/src/app/entity/domain/preview/DomainEntitiesSnippet.tsx b/datahub-web-react/src/app/entity/domain/preview/DomainEntitiesSnippet.tsx index 6d36964004d64f..a730d7ed0d233b 100644 --- a/datahub-web-react/src/app/entity/domain/preview/DomainEntitiesSnippet.tsx +++ b/datahub-web-react/src/app/entity/domain/preview/DomainEntitiesSnippet.tsx @@ -2,6 +2,7 @@ import { DatabaseOutlined, FileDoneOutlined } from '@ant-design/icons'; import { VerticalDivider } from '@remirror/react'; import React from 'react'; import styled from 'styled-components'; +import { useHoverEntityTooltipContext } from '@src/app/recommendations/HoverEntityTooltipContext'; import { SearchResultFields_Domain_Fragment } from '../../../../graphql/search.generated'; import { ANTD_GRAY_V2 } from '../../shared/constants'; import DomainIcon from '../../../domain/DomainIcon'; @@ -12,6 +13,7 @@ const Wrapper = styled.div` font-size: 12px; display: flex; align-items: center; + line-height: 20px; svg { margin-right: 4px; @@ -29,14 +31,18 @@ interface Props { } export default function DomainEntitiesSnippet({ domain }: Props) { - const entityCount = domain.entities?.total || 0; + const { entityCount } = useHoverEntityTooltipContext(); const subDomainCount = domain.children?.total || 0; const dataProductCount = domain.dataProducts?.total || 0; return ( - {entityCount} {entityCount === 1 ? 'entity' : 'entities'} - + {!!entityCount && ( + <> + {entityCount} {entityCount === 1 ? 'entity' : 'entities'} + + + )} {subDomainCount} {pluralize(subDomainCount, 'sub-domain')} {dataProductCount} {pluralize(dataProductCount, 'data product')} diff --git a/datahub-web-react/src/app/entity/group/AddGroupMembersModal.tsx b/datahub-web-react/src/app/entity/group/AddGroupMembersModal.tsx index 7666eb04612e5c..db1ec3355e726d 100644 --- a/datahub-web-react/src/app/entity/group/AddGroupMembersModal.tsx +++ b/datahub-web-react/src/app/entity/group/AddGroupMembersModal.tsx @@ -36,7 +36,7 @@ export const AddGroupMembersModal = ({ urn, open, onCloseModal, onSubmit }: Prop const [addGroupMembersMutation] = useAddGroupMembersMutation(); const [userSearch, { data: userSearchData }] = useGetSearchResultsLazyQuery(); const searchResults = userSearchData?.search?.searchResults?.map((searchResult) => searchResult.entity) || []; - const [recommendedData] = useGetRecommendations([EntityType.CorpUser]); + const { recommendedData } = useGetRecommendations([EntityType.CorpUser]); const inputEl = useRef(null); const handleUserSearch = (text: string) => { diff --git a/datahub-web-react/src/app/entity/group/GroupOwnerSideBarSection.tsx b/datahub-web-react/src/app/entity/group/GroupOwnerSideBarSection.tsx index de139d37caeedd..95ac2e67872ac7 100644 --- a/datahub-web-react/src/app/entity/group/GroupOwnerSideBarSection.tsx +++ b/datahub-web-react/src/app/entity/group/GroupOwnerSideBarSection.tsx @@ -11,6 +11,13 @@ const TITLE = 'Owners'; const SectionWrapper = styled.div``; +const OwnersWrapper = styled.div` + display: flex; + gap: 6px; + flex-wrap: wrap; + margin-bottom: 8px; +`; + const AddOwnerButton = styled(Button)``; type Props = { @@ -30,10 +37,12 @@ export default function GroupOwnerSideBarSection({ urn, ownership, refetch }: Pr {ownership?.owners?.length || ''} - {ownership && - ownership?.owners?.map((owner) => ( - - ))} + + {ownership && + ownership?.owners?.map((owner) => ( + + ))} + {ownersEmpty && ( No group owners added yet. )} diff --git a/datahub-web-react/src/app/entity/query/QueryEntity.tsx b/datahub-web-react/src/app/entity/query/QueryEntity.tsx new file mode 100644 index 00000000000000..aa97fe26fd63b2 --- /dev/null +++ b/datahub-web-react/src/app/entity/query/QueryEntity.tsx @@ -0,0 +1,97 @@ +import { GenericEntityProperties } from '@app/entity/shared/types'; +import { TYPE_ICON_CLASS_NAME } from '@app/entityV2/shared/components/subtypes'; +import { ConsoleSqlOutlined } from '@ant-design/icons'; +import { useGetQueryQuery } from '@graphql/query.generated'; +import { DataPlatform, EntityType, QueryEntity as Query } from '@types'; +import * as React from 'react'; +import { Entity, IconStyleType } from '../Entity'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; + +/** + * Definition of the DataHub DataPlatformInstance entity. + * Most of this still needs to be filled out. + */ +export class QueryEntity implements Entity { + type: EntityType = EntityType.Query; + + icon = (fontSize?: number, _styleType?: IconStyleType, color?: string) => { + return ( + + ); + }; + + isSearchEnabled = () => false; + + isBrowseEnabled = () => false; + + isLineageEnabled = () => false; + + getAutoCompleteFieldName = () => 'name'; + + getPathName = () => 'query'; + + getEntityName = () => 'Query'; + + getCollectionName = () => 'Queries'; + + useEntityQuery = useGetQueryQuery; + + renderProfile = (_urn: string) => { + return <>; + }; + + getOverridePropertiesFromEntity = (query?: Query | null): GenericEntityProperties => { + return { + name: query && this.displayName(query), + platform: query?.platform, + }; + }; + + renderEmbeddedProfile = (_: string) => <>; + + renderPreview = () => { + return <>; + }; + + renderSearch = () => { + return <>; + }; + + getLineageVizConfig = (query: Query) => { + // TODO: Set up types better here + const platform: DataPlatform | undefined = (query as any)?.queryPlatform; + return { + urn: query.urn, + name: query.properties?.name || query.urn, + type: EntityType.Query, + icon: platform?.properties?.logoUrl || undefined, + platform: platform || undefined, + }; + }; + + displayName = (data: Query) => { + return data?.properties?.name || (data?.properties?.source === 'SYSTEM' && 'System Query') || data?.urn; + }; + + getGenericEntityProperties = (data: Query) => { + return getDataForEntityType({ + data, + entityType: this.type, + getOverrideProperties: this.getOverridePropertiesFromEntity, + }); + }; + + supportedCapabilities = () => { + return new Set([]); + }; + + getGraphName = () => { + return 'query'; + }; +} diff --git a/datahub-web-react/src/app/entity/shared/EntityContext.ts b/datahub-web-react/src/app/entity/shared/EntityContext.ts index abc7fcfa6cced0..74b92e17a2531b 100644 --- a/datahub-web-react/src/app/entity/shared/EntityContext.ts +++ b/datahub-web-react/src/app/entity/shared/EntityContext.ts @@ -1,9 +1,9 @@ import React, { useContext } from 'react'; import { EntityType } from '../../../types.generated'; -import { useIsSeparateSiblingsMode } from './siblingUtils'; +import { shouldEntityBeTreatedAsPrimary, useIsSeparateSiblingsMode } from './siblingUtils'; import { EntityContextType, UpdateEntityType } from './types'; -const EntityContext = React.createContext({ +export const EntityContext = React.createContext({ urn: '', entityType: EntityType.Dataset, entityData: null, @@ -14,6 +14,7 @@ const EntityContext = React.createContext({ refetch: () => Promise.resolve({}), lineage: undefined, dataNotCombinedWithSiblings: null, + entityState: { shouldRefetchContents: false, setShouldRefetchContents: () => {} }, }); export default EntityContext; @@ -60,8 +61,8 @@ export const useLineageData = () => { export const useMutationUrn = () => { const { urn, entityData } = useContext(EntityContext); const isHideSiblingMode = useIsSeparateSiblingsMode(); - if (!entityData?.siblings || entityData?.siblings?.isPrimary || isHideSiblingMode) { + if (!entityData?.siblingsSearch?.searchResults || shouldEntityBeTreatedAsPrimary(entityData) || isHideSiblingMode) { return urn; } - return entityData?.siblings?.siblings?.[0]?.urn || urn; + return entityData?.siblingsSearch?.searchResults?.[0].entity.urn || urn; }; diff --git a/datahub-web-react/src/app/entity/shared/__tests__/siblingsUtils.test.ts b/datahub-web-react/src/app/entity/shared/__tests__/siblingsUtils.test.ts index f3b857ab2ef3be..3fd9dd32454c3c 100644 --- a/datahub-web-react/src/app/entity/shared/__tests__/siblingsUtils.test.ts +++ b/datahub-web-react/src/app/entity/shared/__tests__/siblingsUtils.test.ts @@ -169,6 +169,11 @@ const datasetPrimaryWithSiblings = { isPrimary: true, siblings: [datasetUnprimary], }, + siblingsSearch: { + count: 1, + total: 1, + searchResults: [{ entity: datasetUnprimary, matchedFields: [] }], + }, }; const datasetUnprimaryWithPrimarySiblings = { @@ -177,6 +182,11 @@ const datasetUnprimaryWithPrimarySiblings = { isPrimary: false, siblings: [datasetPrimary], }, + siblingsSearch: { + count: 1, + total: 1, + searchResults: [{ entity: datasetPrimary, matchedFields: [] }], + }, }; const datasetUnprimaryWithNoPrimarySiblings = { @@ -185,6 +195,11 @@ const datasetUnprimaryWithNoPrimarySiblings = { isPrimary: false, siblings: [datasetUnprimary], }, + siblingsSearch: { + count: 1, + total: 1, + searchResults: [{ entity: datasetUnprimary, matchedFields: [] }], + }, }; describe('siblingUtils', () => { diff --git a/datahub-web-react/src/app/entity/shared/components/styled/ExpandedOwner/ExpandedOwner.tsx b/datahub-web-react/src/app/entity/shared/components/styled/ExpandedOwner/ExpandedOwner.tsx index 9bd4edd0d19d3a..dac382ab8403ef 100644 --- a/datahub-web-react/src/app/entity/shared/components/styled/ExpandedOwner/ExpandedOwner.tsx +++ b/datahub-web-react/src/app/entity/shared/components/styled/ExpandedOwner/ExpandedOwner.tsx @@ -9,11 +9,12 @@ import { useEntityRegistry } from '../../../../../useEntityRegistry'; import analytics, { EventType, EntityActionType } from '../../../../../analytics'; import { useEntityData } from '../../../EntityContext'; import OwnerContent from './OwnerContent'; +import { useEmbeddedProfileLinkProps } from '../../../../../shared/useEmbeddedProfileLinkProps'; const OwnerTag = styled(Tag)` + margin: 0; padding: 2px; padding-right: 6px; - margin-bottom: 8px; display: inline-flex; align-items: center; `; @@ -30,6 +31,7 @@ type Props = { export const ExpandedOwner = ({ entityUrn, owner, hidePopOver, refetch, readOnly, fontSize }: Props) => { const entityRegistry = useEntityRegistry(); const { entityType } = useEntityData(); + const linkProps = useEmbeddedProfileLinkProps(); const [removeOwnerMutation] = useRemoveOwnerMutation(); let name = ''; let ownershipTypeName = ''; @@ -94,7 +96,7 @@ export const ExpandedOwner = ({ entityUrn, owner, hidePopOver, refetch, readOnly {readOnly && } {!readOnly && ( - + (null); const dataProductRelationships = entityData?.dataProduct?.relationships; - const siblingUrns: string[] = entityData?.siblings?.siblings?.map((sibling) => sibling?.urn || '') || []; + const siblingUrns: string[] = + entityData?.siblingsSearch?.searchResults?.map((sibling) => sibling.entity.urn || '') || []; useEffect(() => { if (dataProductRelationships && dataProductRelationships.length > 0) { diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/EditOwnersModal.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/EditOwnersModal.tsx index e57666471df1a6..2e2962ed865edc 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/EditOwnersModal.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/EditOwnersModal.tsx @@ -148,10 +148,12 @@ export const EditOwnersModal = ({ // User and group dropdown search results! const [userSearch, { data: userSearchData }] = useGetSearchResultsLazyQuery(); const [groupSearch, { data: groupSearchData }] = useGetSearchResultsLazyQuery(); - const userSearchResults = userSearchData?.search?.searchResults?.map((searchResult) => searchResult.entity) || []; - const groupSearchResults = groupSearchData?.search?.searchResults?.map((searchResult) => searchResult.entity) || []; + const userSearchResults: Array = + userSearchData?.search?.searchResults?.map((searchResult) => searchResult.entity) || []; + const groupSearchResults: Array = + groupSearchData?.search?.searchResults?.map((searchResult) => searchResult.entity) || []; const combinedSearchResults = [...userSearchResults, ...groupSearchResults]; - const [recommendedData] = useGetRecommendations([EntityType.CorpGroup, EntityType.CorpUser]); + const { recommendedData } = useGetRecommendations([EntityType.CorpGroup, EntityType.CorpUser]); const inputEl = useRef(null); // Invokes the search API as the owner types diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/sidebar/OwnershipTypeSection.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/sidebar/OwnershipTypeSection.tsx index 10e5f4d87764b3..03c25193d862ee 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/sidebar/OwnershipTypeSection.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/sidebar/OwnershipTypeSection.tsx @@ -25,6 +25,7 @@ const OwnersContainer = styled.div` flex-direction: row; flex-wrap: wrap; margin-top: 8px; + gap: 6px; `; interface Props { diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/SidebarSiblingsSection.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/SidebarSiblingsSection.tsx index 17e09045196a43..29073d6164f827 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/SidebarSiblingsSection.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/SidebarSiblingsSection.tsx @@ -36,7 +36,7 @@ export const SidebarSiblingsSection = () => { ); } - const siblingEntities = entityData?.siblings?.siblings || []; + const siblingEntities = entityData?.siblingsSearch?.searchResults?.map((r) => r.entity) || []; const entityDataWithoutSiblings = stripSiblingsFromEntity(dataNotCombinedWithSiblings.dataset); const allSiblingsInGroup = showSeparateSiblings diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/utils.ts b/datahub-web-react/src/app/entity/shared/containers/profile/utils.ts index bb35755826a899..ac074cc54ae348 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/utils.ts +++ b/datahub-web-react/src/app/entity/shared/containers/profile/utils.ts @@ -60,9 +60,12 @@ export function getDataForEntityType({ }; } - if (anyEntityData?.siblings?.siblings?.filter((sibling) => sibling.exists).length > 0 && !isHideSiblingMode) { - const genericSiblingProperties: GenericEntityProperties[] = anyEntityData?.siblings?.siblings?.map((sibling) => - getDataForEntityType({ data: sibling, getOverrideProperties: () => ({}) }), + if ( + anyEntityData?.siblingsSearch?.searchResults?.filter((sibling) => sibling.entity.exists).length > 0 && + !isHideSiblingMode + ) { + const genericSiblingProperties: GenericEntityProperties[] = anyEntityData?.siblingsSearch?.searchResults?.map( + (sibling) => getDataForEntityType({ data: sibling.entity, getOverrideProperties: () => ({}) }), ); const allPlatforms = anyEntityData.siblings.isPrimary diff --git a/datahub-web-react/src/app/entity/shared/embed/EmbeddedProfile.tsx b/datahub-web-react/src/app/entity/shared/embed/EmbeddedProfile.tsx index df928fc408de60..fc3f015ec4d938 100644 --- a/datahub-web-react/src/app/entity/shared/embed/EmbeddedProfile.tsx +++ b/datahub-web-react/src/app/entity/shared/embed/EmbeddedProfile.tsx @@ -5,7 +5,7 @@ import React from 'react'; import styled from 'styled-components'; import { EntityType, Exact } from '../../../../types.generated'; import useGetDataForProfile from '../containers/profile/useGetDataForProfile'; -import EntityContext from '../EntityContext'; +import { EntityContext } from '../EntityContext'; import { GenericEntityProperties } from '../types'; import EmbeddedHeader from './EmbeddedHeader'; import { SidebarAboutSection } from '../containers/profile/sidebar/AboutSection/SidebarAboutSection'; diff --git a/datahub-web-react/src/app/entity/shared/entityForm/FormByEntity.tsx b/datahub-web-react/src/app/entity/shared/entityForm/FormByEntity.tsx index b20aa26930fe92..61a70c145bd115 100644 --- a/datahub-web-react/src/app/entity/shared/entityForm/FormByEntity.tsx +++ b/datahub-web-react/src/app/entity/shared/entityForm/FormByEntity.tsx @@ -4,11 +4,13 @@ import Form from './Form'; import { ANTD_GRAY_V2 } from '../constants'; import ProfileSidebar from '../containers/profile/sidebar/ProfileSidebar'; import { useEntityRegistry } from '../../../useEntityRegistry'; -import EntityContext, { useEntityContext } from '../EntityContext'; +import { EntityContext, useEntityContext } from '../EntityContext'; import EntityInfo from '../containers/profile/sidebar/EntityInfo/EntityInfo'; import { useEntityFormContext } from './EntityFormContext'; import ProgressBar from './ProgressBar'; +import { useIsThemeV2 } from '../../../useIsThemeV2'; + const ContentWrapper = styled.div` background-color: ${ANTD_GRAY_V2[1]}; max-height: 100%; @@ -34,6 +36,15 @@ export default function FormByEntity({ formUrn }: Props) { const { entityType } = useEntityContext(); const entityRegistry = useEntityRegistry(); const sidebarSections = entityRegistry.getSidebarSections(selectedEntity?.type || entityType); + const isV2 = useIsThemeV2(); + + // Used for v2 - removes repeated entity header (we use EntityInfo in this component) + // SidebarEntityHeader is always the first index in sidebarSections, so remove it here + // TODO (OBS-677): remove this logic once we get form info into V2 sidebar + const cleanedSidebarSections = sidebarSections.slice(1); + + // Conditional sections based on theme version + const sections = isV2 ? cleanedSidebarSections : sidebarSections; return ( }} backgroundColor="white" alignLeft diff --git a/datahub-web-react/src/app/entity/shared/entityForm/__tests__/Form.test.tsx b/datahub-web-react/src/app/entity/shared/entityForm/__tests__/Form.test.tsx index 750ef71acbd120..88507b95513fd2 100644 --- a/datahub-web-react/src/app/entity/shared/entityForm/__tests__/Form.test.tsx +++ b/datahub-web-react/src/app/entity/shared/entityForm/__tests__/Form.test.tsx @@ -1,7 +1,7 @@ import { MockedProvider } from '@apollo/client/testing'; import React from 'react'; import { render, waitFor } from '@testing-library/react'; -import EntityContext from '../../EntityContext'; +import { EntityContext } from '../../EntityContext'; import { mockEntityDataWithFieldPrompts, mockEntityData } from '../mocks'; import { EntityType } from '../../../../../types.generated'; import Form from '../Form'; diff --git a/datahub-web-react/src/app/entity/shared/siblingUtils.ts b/datahub-web-react/src/app/entity/shared/siblingUtils.ts index aa9e4bcb5e46e1..5633da034a829f 100644 --- a/datahub-web-react/src/app/entity/shared/siblingUtils.ts +++ b/datahub-web-react/src/app/entity/shared/siblingUtils.ts @@ -1,8 +1,19 @@ +import { useEntityData } from '@app/entity/shared/EntityContext'; import merge from 'deepmerge'; -import { unionBy, keyBy, values } from 'lodash'; -import { useLocation } from 'react-router-dom'; +import { keyBy, unionBy, values } from 'lodash'; import * as QueryString from 'query-string'; -import { Dataset, Entity, Maybe, SiblingProperties } from '../../../types.generated'; +import { useLocation } from 'react-router-dom'; +import { + Dataset, + Entity, + Health, + HealthStatus, + HealthStatusType, + Maybe, + ScrollResults, + Operation, + SiblingProperties, +} from '../../../types.generated'; import { GenericEntityProperties } from './types'; import { useIsShowSeparateSiblingsEnabled } from '../../useAppConfig'; import { downgradeV2FieldPath } from '../dataset/profile/schema/utils/utils'; @@ -11,9 +22,11 @@ export function stripSiblingsFromEntity(entity: any) { return { ...entity, siblings: null, + siblingsSearch: null, siblingPlatforms: null, }; } + function cleanHelper(obj, visited) { if (visited.has(obj)) return obj; visited.add(obj); @@ -25,11 +38,15 @@ function cleanHelper(obj, visited) { } if ((v && typeof v === 'object' && !Object.keys(v).length) || v === null || v === undefined || v === '') { if (Array.isArray(object)) { - object.splice(Number(k), 1); + // do nothing } else if (Object.getOwnPropertyDescriptor(object, k)?.configurable) { // TODO(hsheth2): Not sure why we needed to add the above "configurable" check. // However, I was getting errors when it was not present in dev mode (but not in prod mode). - delete object[k]; + try { + delete object[k]; + } catch (e) { + console.warn('error deleting key', k, 'from object', object, e); + } } } }); @@ -94,10 +111,18 @@ const mergeAssertions = (destinationArray, sourceArray, _options) => { return unionBy(destinationArray, sourceArray, 'urn'); }; +const mergeIncidents = (destinationArray, sourceArray, _options) => { + return unionBy(destinationArray, sourceArray, 'urn'); +}; + const mergeProperties = (destinationArray, sourceArray, _options) => { return unionBy(destinationArray, sourceArray, 'key'); }; +const mergeStructuredProperties = (destinationArray, sourceArray, _options) => { + return unionBy(sourceArray, destinationArray, 'structuredProperty.urn'); +}; + const mergeOwners = (destinationArray, sourceArray, _options) => { return unionBy(destinationArray, sourceArray, 'owner.urn'); }; @@ -106,6 +131,112 @@ const mergeFields = (destinationArray, sourceArray, _options) => { return mergeArrayOfObjectsByKey(destinationArray, sourceArray, 'fieldPath', true); }; +const mergeForms = (destinationArray, sourceArray, _options) => { + return unionBy(sourceArray, destinationArray, 'form.urn'); +}; + +const mergeLastOperations = ( + destinationArray: Pick[], + sourceArray: Pick[], + _options, +) => { + // return whichever operation is more recent + // const lastUpdated = (operations?.length && operations[0].lastUpdatedTimestamp) || 0; + const destinationLastUpdated = (destinationArray?.length && destinationArray[0]?.lastUpdatedTimestamp) || 0; + const sourceLastUpdated = (sourceArray?.length && sourceArray[0]?.lastUpdatedTimestamp) || 0; + // return whichever operation is more recent + return destinationLastUpdated > sourceLastUpdated ? destinationArray : sourceArray; +}; + +const mergeSubtypes = (destinationArray: string[], sourceArray: string[], _options) => { + const seen = new Set(); + const result: string[] = []; + [...sourceArray, ...destinationArray].forEach((subtype) => { + if (!seen.has(subtype)) { + seen.add(subtype); + result.push(subtype); + } + }); + return result; +}; + +const mergeHealthStatus = (destStatus?: HealthStatus, sourceStatus?: HealthStatus): HealthStatus => { + if (destStatus === HealthStatus.Fail || sourceStatus === HealthStatus.Fail) { + return HealthStatus.Fail; + } + if (destStatus === HealthStatus.Warn || sourceStatus === HealthStatus.Warn) { + return HealthStatus.Warn; + } + return HealthStatus.Pass; +}; + +const mergeHealthMessage = (type: HealthStatusType, mergedStatus: HealthStatus): string => { + if (mergedStatus === HealthStatus.Fail) { + switch (type) { + case HealthStatusType.Assertions: + return 'See failing assertions →'; + case HealthStatusType.Incidents: + return 'See active incidents →'; + default: + return 'See failed checks →'; + } + } + if (mergedStatus === HealthStatus.Warn) { + switch (type) { + case HealthStatusType.Assertions: + return 'Some assertions have problems.'; + default: + return 'Some checks have problems.'; + } + } + if (mergedStatus === HealthStatus.Pass) { + switch (type) { + case HealthStatusType.Assertions: + return 'All assertions are passing'; + case HealthStatusType.Incidents: + return 'No active incidents'; + default: + return 'All checks are passing'; + } + } + return 'All checks are passing'; +}; + +// Merge entity health across siblings. +const mergeHealth = ( + destinationArray: Maybe | undefined, + sourceArray: Maybe | undefined, + _options, +) => { + const viewedHealthType = new Set(); + return [...(sourceArray || []), ...(destinationArray || [])] + .map((source) => { + if (viewedHealthType.has(source.type)) { + return null; + } + + viewedHealthType.add(source.type); + + const { type, status, causes } = source; + + const destHealth = destinationArray?.find((dest) => dest.type === type); + const destStatus = destHealth?.status; + const destCauses = destHealth?.causes; + + const finalStatus = mergeHealthStatus(destStatus, status); + const finalMessage = mergeHealthMessage(type, finalStatus); + const finalCauses = [...(causes || []), ...(destCauses || [])]; + + return { + type, + status: finalStatus, + message: finalMessage, + causes: finalCauses, + }; + }) + .filter((health) => health !== null); +}; + function getArrayMergeFunction(key) { switch (key) { case 'tags': @@ -118,15 +249,45 @@ function getArrayMergeFunction(key) { return mergeProperties; case 'owners': return mergeOwners; + case 'incidents': + return mergeIncidents; case 'fields': return mergeFields; case 'editableSchemaFieldInfo': return mergeFields; + case 'health': + return mergeHealth; + case 'typeNames': + return mergeSubtypes; + case 'lastOperation': + return mergeLastOperations; + case 'completedForms': + case 'incompleteForms': + case 'verifications': + return mergeForms; default: return undefined; } } +// needs its own merge function because "properties" exists as a key elsewhere +function structuredPropertiesMerge(isPrimary, key) { + if (key === 'properties') { + return (secondary, primary) => { + return merge(secondary, primary, { + arrayMerge: mergeStructuredProperties, + customMerge: customMerge.bind({}, isPrimary), + }); + }; + } + return (secondary, primary) => { + return merge(secondary, primary, { + arrayMerge: combineMerge, + customMerge: customMerge.bind({}, isPrimary), + }); + }; +} + function customMerge(isPrimary, key) { if (key === 'upstream' || key === 'downstream') { return (_secondary, primary) => primary; @@ -135,17 +296,56 @@ function customMerge(isPrimary, key) { if (key === 'platform' || key === 'siblings') { return (secondary, primary) => (isPrimary ? primary : secondary); } - if (key === 'forms') { + if (key === 'testResults') { return (_secondary, primary) => primary; } + if (key === 'activeIncidents') { + return (secondary, primary) => ({ ...primary, total: primary.total + secondary.total }); + } + if (key === 'lastModified') { + return (secondary, primary) => (secondary?.time || primary?.time < 0 || 0 ? secondary : primary); + } + if (key === 'statsSummary') { + return (secondary, primary) => { + if (!primary) { + return secondary; + } + if (!secondary) { + return primary; + } + return { + ...primary, + queryCountLast30Days: primary?.queryCountLast30Days || secondary?.queryCountLast30Days, + queryCountPercentileLast30Days: + primary?.queryCountPercentileLast30Days || secondary?.queryCountPercentileLast30Days, + uniqueUserCountLast30Days: primary?.uniqueUserCountLast30Days || secondary?.uniqueUserCountLast30Days, + uniqueUserPercentileLast30Days: + primary?.uniqueUserPercentileLast30Days || secondary?.uniqueUserPercentileLast30Days, + }; + }; + } + if (key === 'structuredProperties') { + return (secondary, primary) => { + return merge(secondary, primary, { + arrayMerge: combineMerge, + customMerge: structuredPropertiesMerge.bind({}, isPrimary), + }); + }; + } if ( key === 'tags' || key === 'terms' || key === 'assertions' || key === 'customProperties' || key === 'owners' || + key === 'incidents' || key === 'fields' || - key === 'editableSchemaFieldInfo' + key === 'editableSchemaFieldInfo' || + key === 'health' || + key === 'typeNames' || + key === 'completedForms' || + key === 'incompleteForms' || + key === 'verifications' ) { return (secondary, primary) => { return merge(secondary, primary, { @@ -174,13 +374,14 @@ export const getEntitySiblingData = (baseEntity: T): Maybe }; // should the entity's metadata win out against its siblings? -export const shouldEntityBeTreatedAsPrimary = (extractedBaseEntity: { siblings?: SiblingProperties | null }) => { - const siblingAspect = extractedBaseEntity?.siblings; - - const siblingsList = siblingAspect?.siblings || []; +export const shouldEntityBeTreatedAsPrimary = (extractedBaseEntity: { + siblings?: SiblingProperties | null; + siblingsSearch?: ScrollResults | null; +}) => { + const siblingsList = extractedBaseEntity?.siblingsSearch?.searchResults?.map((r) => r.entity) || []; // if the entity is marked as primary, take its metadata first - const isPrimarySibling = !!siblingAspect?.isPrimary; + const isPrimarySibling = !!extractedBaseEntity?.siblings?.isPrimary; // if no entity in the cohort is primary, just have the entity whos urn is navigated // to be primary @@ -192,15 +393,12 @@ export const shouldEntityBeTreatedAsPrimary = (extractedBaseEntity: { siblings?: }; const combineEntityWithSiblings = (entity: GenericEntityProperties) => { - // eslint-disable-next-line @typescript-eslint/dot-notation - const siblingAspect = entity.siblings; - if ((siblingAspect?.siblings || []).length === 0) { + const siblings = entity.siblingsSearch?.searchResults?.map((r) => r.entity) || []; + + if (!entity?.siblingsSearch?.count || !siblings.length) { return entity; } - // eslint-disable-next-line @typescript-eslint/dot-notation - const siblings = siblingAspect?.siblings || []; - const isPrimary = shouldEntityBeTreatedAsPrimary(entity); const combinedBaseEntity: any = siblings.reduce( @@ -212,12 +410,35 @@ const combineEntityWithSiblings = (entity: GenericEntityProperties) => { entity, ); + // if a key is null in the primary sibling, it will not merge with the secondary even if the secondary is not null + const secondarySibling = isPrimary ? siblings[0] : entity; + Object.keys(secondarySibling).forEach((key) => { + if (combinedBaseEntity[key] === null && secondarySibling[key] !== null) { + combinedBaseEntity[key] = secondarySibling[key]; + } + }); + // Force the urn of the combined entity to the current entity urn. combinedBaseEntity.urn = entity.urn; + combinedBaseEntity.properties = { + ...combinedBaseEntity.properties, + externalUrl: entity?.properties?.externalUrl, + }; + return combinedBaseEntity; }; +export function combineEntityData(entityValue: T, siblingValue: T, isPrimary: boolean) { + if (!entityValue) return siblingValue; + if (!siblingValue) return entityValue; + + return merge(clean(isPrimary ? siblingValue : entityValue), clean(isPrimary ? entityValue : siblingValue), { + arrayMerge: combineMerge, + customMerge: customMerge.bind({}, isPrimary), + }); +} + export const combineEntityDataWithSiblings = (baseEntity: T): T => { if (!baseEntity) { return baseEntity; @@ -225,9 +446,7 @@ export const combineEntityDataWithSiblings = (baseEntity: T): T => { const baseEntityKey = Object.keys(baseEntity)[0]; const extractedBaseEntity = baseEntity[baseEntityKey]; - // eslint-disable-next-line @typescript-eslint/dot-notation - const siblingAspect = extractedBaseEntity.siblings; - if ((siblingAspect?.siblings || []).length === 0) { + if (!extractedBaseEntity?.siblingsSearch?.count) { return baseEntity; } @@ -254,7 +473,8 @@ export function combineSiblingsForEntity(entity: Entity, visitedSiblingUrns: Set if (visitedSiblingUrns.has(entity.urn)) return { skipped: true }; const combinedEntity: CombinedEntity = { entity: combineEntityWithSiblings({ ...entity }) }; - const siblings = (combinedEntity.entity as GenericEntityProperties).siblings?.siblings ?? []; + const siblings = + (combinedEntity.entity as GenericEntityProperties).siblingsSearch?.searchResults.map((r) => r.entity) ?? []; const isPrimary = (combinedEntity.entity as GenericEntityProperties).siblings?.isPrimary; const siblingUrns = siblings.map((sibling) => sibling?.urn); @@ -289,3 +509,16 @@ export function useIsSeparateSiblingsMode() { return showSeparateSiblings || params[SEPARATE_SIBLINGS_URL_PARAM] === 'true'; } + +/** + * `siblingPlatforms` in GenericEntityProperties always puts the primary first. + * This method allows getting sibling platforms without considering the primary. + */ +export function useGetSiblingPlatforms() { + const { entityData } = useEntityData(); + const isPrimary = entityData?.siblings?.isPrimary ?? false; + return { + entityPlatform: isPrimary ? entityData?.siblingPlatforms?.[0] : entityData?.siblingPlatforms?.[1], + siblingPlatform: isPrimary ? entityData?.siblingPlatforms?.[1] : entityData?.siblingPlatforms?.[0], + }; +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/PropertyTypeLabel.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/PropertyTypeLabel.tsx index b4ffce294d136a..775680cb1eb294 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/PropertyTypeLabel.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/PropertyTypeLabel.tsx @@ -10,22 +10,29 @@ import { TypeData } from '../../../Properties/types'; type Props = { type: TypeData; dataType?: DataTypeEntity; + displayTransparent?: boolean; }; -export const PropertyTypeBadge = styled(Badge)` +export const PropertyTypeBadge = styled(Badge)<{ displayTransparent?: boolean }>` margin: 4px 0 4px 8px; &&& .ant-badge-count { + ${(props) => + props.displayTransparent + ? ` + background-color: transparent; + ` + : ` background-color: ${ANTD_GRAY[1]}; color: ${ANTD_GRAY_V2[8]}; border: 1px solid ${ANTD_GRAY_V2[6]}; + `} font-size: 12px; font-weight: 500; - height: 22px; font-family: 'Manrope'; } `; -export default function PropertyTypeLabel({ type, dataType }: Props) { +export default function PropertyTypeLabel({ type, dataType, displayTransparent }: Props) { // if unable to match type to DataHub, display native type info by default const { nativeDataType } = type; const nativeFallback = type.type === SchemaFieldDataType.Null; @@ -35,5 +42,5 @@ export default function PropertyTypeLabel({ type, dataType }: Props) { dataType?.info?.type || (nativeFallback ? truncate(250, nativeDataType) : type.type); - return ; + return ; } diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/acrylTypes.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/acrylTypes.tsx new file mode 100644 index 00000000000000..8a70a3d87c1478 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/acrylTypes.tsx @@ -0,0 +1,21 @@ +import { Assertion, AssertionType } from '../../../../../../types.generated'; + +export type AssertionStatusSummary = { + passing: number; + failing: number; + erroring: number; + total: number; // Total assertions with at least 1 run. + totalAssertions: number; +}; + +/** + * A group of assertions related by their logical type or category. + */ +export type AssertionGroup = { + name: string; + icon: React.ReactNode; + description?: string; + assertions: Assertion[]; + summary: AssertionStatusSummary; + type: AssertionType; +}; diff --git a/datahub-web-react/src/app/entity/shared/tabs/Documentation/__tests__/DocumentationTab.test.tsx b/datahub-web-react/src/app/entity/shared/tabs/Documentation/__tests__/DocumentationTab.test.tsx index 054331721df364..ea8afce4e99033 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Documentation/__tests__/DocumentationTab.test.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Documentation/__tests__/DocumentationTab.test.tsx @@ -5,7 +5,7 @@ import React from 'react'; import { mocks } from '../../../../../../Mocks'; import { EntityType } from '../../../../../../types.generated'; import TestPageContainer from '../../../../../../utils/test-utils/TestPageContainer'; -import EntityContext from '../../../EntityContext'; +import { EntityContext } from '../../../EntityContext'; import { DocumentationTab } from '../DocumentationTab'; describe('SchemaDescriptionField', () => { diff --git a/datahub-web-react/src/app/entity/shared/tabs/Entity/__tests__/DataJobFlowTab.test.tsx b/datahub-web-react/src/app/entity/shared/tabs/Entity/__tests__/DataJobFlowTab.test.tsx index 765e9e98232c24..2e90dcedd116f5 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Entity/__tests__/DataJobFlowTab.test.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Entity/__tests__/DataJobFlowTab.test.tsx @@ -5,7 +5,7 @@ import { dataJob1, mocks } from '../../../../../../Mocks'; import { EntityType } from '../../../../../../types.generated'; import TestPageContainer from '../../../../../../utils/test-utils/TestPageContainer'; import { getDataForEntityType } from '../../../containers/profile/utils'; -import EntityContext from '../../../EntityContext'; +import { EntityContext } from '../../../EntityContext'; import { DataJobFlowTab } from '../DataJobFlowTab'; describe('DataJobFlowTab', () => { diff --git a/datahub-web-react/src/app/entity/shared/tabs/Incident/incidentUtils.ts b/datahub-web-react/src/app/entity/shared/tabs/Incident/incidentUtils.ts index d7826bdc8907a1..32e15b4cded8e4 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Incident/incidentUtils.ts +++ b/datahub-web-react/src/app/entity/shared/tabs/Incident/incidentUtils.ts @@ -100,6 +100,7 @@ export const updateListIncidentsCache = (client, urn, incident, pageSize) => { }, // Add the missing 'siblings' field with the appropriate data siblings: currData?.entity?.siblings || null, + siblingsSearch: currData?.entity?.siblingsSearch || null, }, }, }); diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/AddPropertyButton.tsx b/datahub-web-react/src/app/entity/shared/tabs/Properties/AddPropertyButton.tsx index cac3e268c1df5e..50e6c64c7463b8 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Properties/AddPropertyButton.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/AddPropertyButton.tsx @@ -1,8 +1,10 @@ import { LoadingOutlined } from '@ant-design/icons'; import { colors, Icon, Input as InputComponent, Text } from '@src/alchemy-components'; import { useUserContext } from '@src/app/context/useUserContext'; +import { REDESIGN_COLORS } from '@src/app/entityV2/shared/constants'; import { getEntityTypesPropertyFilter, getNotHiddenPropertyFilter } from '@src/app/govern/structuredProperties/utils'; import { useEntityRegistry } from '@src/app/useEntityRegistry'; +import { useIsThemeV2 } from '@src/app/useIsThemeV2'; import { PageRoutes } from '@src/conf/Global'; import { useGetSearchResultsForMultipleQuery } from '@src/graphql/search.generated'; import { Dropdown } from 'antd'; @@ -14,9 +16,9 @@ import styled from 'styled-components'; import { useEntityData } from '../../EntityContext'; import EditStructuredPropertyModal from './Edit/EditStructuredPropertyModal'; -const AddButton = styled.div<{ isV1Drawer?: boolean }>` +const AddButton = styled.div<{ isThemeV2: boolean; isV1Drawer?: boolean }>` border-radius: 200px; - background-color: #5280e2; + background-color: ${(props) => (props.isThemeV2 ? colors.violet[500] : REDESIGN_COLORS.LINK_HOVER_BLUE)}; width: ${(props) => (props.isV1Drawer ? '24px' : '32px')}; height: ${(props) => (props.isV1Drawer ? '24px' : '32px')}; display: flex; @@ -78,6 +80,7 @@ interface Props { const AddPropertyButton = ({ fieldUrn, refetch, fieldProperties, isV1Drawer }: Props) => { const [searchQuery, setSearchQuery] = useState(''); const { entityData, entityType } = useEntityData(); + const isThemeV2 = useIsThemeV2(); const me = useUserContext(); const entityRegistry = useEntityRegistry(); const [isEditModalVisible, setIsEditModalVisible] = useState(false); @@ -131,19 +134,20 @@ const AddPropertyButton = ({ fieldUrn, refetch, fieldProperties, isV1Drawer }: P ) .map((prop) => { const entity = prop.entity as StructuredPropertyEntity; + const name = entityRegistry.getDisplayName(entity.type, entity); return { label: ( ), key: entity.urn, - name: entity.definition?.displayName || entity.urn, + name: name || entity.urn, }; }), - [data, fieldUrn, fieldPropertiesUrns, entityPropertiesUrns], + [data, fieldUrn, fieldPropertiesUrns, entityPropertiesUrns, entityRegistry], ); const canEditProperties = @@ -206,7 +210,7 @@ const AddPropertyButton = ({ fieldUrn, refetch, fieldProperties, isV1Drawer }: P )} > - + diff --git a/datahub-web-react/src/app/entity/shared/types.ts b/datahub-web-react/src/app/entity/shared/types.ts index ceba5b4bf30eb0..8ef1dbac678ab2 100644 --- a/datahub-web-react/src/app/entity/shared/types.ts +++ b/datahub-web-react/src/app/entity/shared/types.ts @@ -40,6 +40,9 @@ import { ParentDomainsResult, StructuredProperties, Forms, + ScrollResults, + Documentation, + DisplayProperties, } from '../../../types.generated'; import { FetchedEntity } from '../../lineage/types'; @@ -78,6 +81,7 @@ export type GenericEntityProperties = { sourceUrl?: Maybe; sourceRef?: Maybe; businessAttributeDataType?: Maybe; + externalUrl?: Maybe; }>; globalTags?: Maybe; glossaryTerms?: Maybe; @@ -91,12 +95,13 @@ export type GenericEntityProperties = { institutionalMemory?: Maybe; schemaMetadata?: Maybe; externalUrl?: Maybe; - // to indicate something is a Stream, View instead of Dataset... etc - entityTypeOverride?: Maybe; + entityTypeOverride?: Maybe; // to indicate something is a Stream, View instead of Dataset... etc /** Dataset specific- TODO, migrate these out */ editableSchemaMetadata?: Maybe; editableProperties?: Maybe; autoRenderAspects?: Maybe>; + lineageUrn?: string; // If set, render this urn's lineage instead if not in separate siblings mode + lineageSiblingIcon?: string; // If set, render this entity in lineage along with the sibling icon and do not separate siblings in the sidebar upstream?: Maybe; downstream?: Maybe; subTypes?: Maybe; @@ -105,12 +110,13 @@ export type GenericEntityProperties = { health?: Maybe>; status?: Maybe; deprecation?: Maybe; + siblings?: Maybe; + siblingsSearch?: Maybe; parentContainers?: Maybe; parentDomains?: Maybe; children?: Maybe; parentNodes?: Maybe; isAChildren?: Maybe; - siblings?: Maybe; siblingPlatforms?: Maybe; lastIngested?: Maybe; inputFields?: Maybe; @@ -119,10 +125,13 @@ export type GenericEntityProperties = { embed?: Maybe; exists?: boolean; origin?: Maybe; + documentation?: Maybe; browsePathV2?: Maybe; inputOutput?: Maybe; forms?: Maybe; parent?: Maybe; + displayProperties?: Maybe; + notes?: Maybe; }; export type GenericEntityUpdate = { @@ -146,6 +155,11 @@ export type UpdateEntityType = ( | undefined, ) => Promise, Record>>; +interface EntityState { + shouldRefetchContents: boolean; + setShouldRefetchContents: (shouldRefetch: boolean) => void; +} + export type EntityContextType = { urn: string; entityType: EntityType; @@ -156,9 +170,10 @@ export type EntityContextType = { updateEntity?: UpdateEntityType | null; routeToTab: (params: { tabName: string; tabParams?: Record; method?: 'push' | 'replace' }) => void; refetch: () => Promise; - lineage: FetchedEntity | undefined; + lineage?: FetchedEntity | undefined; shouldRefetchEmbeddedListSearch?: boolean; setShouldRefetchEmbeddedListSearch?: React.Dispatch>; + entityState?: EntityState; }; export type SchemaContextType = { diff --git a/datahub-web-react/src/app/entity/shared/useEntityState.ts b/datahub-web-react/src/app/entity/shared/useEntityState.ts new file mode 100644 index 00000000000000..3d891f33dd500f --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/useEntityState.ts @@ -0,0 +1,7 @@ +import { useState } from 'react'; + +export default function useEntityState() { + const [shouldRefetchContents, setShouldRefetchContents] = useState(false); + + return { shouldRefetchContents, setShouldRefetchContents }; +} diff --git a/datahub-web-react/src/app/entity/shared/utils.ts b/datahub-web-react/src/app/entity/shared/utils.ts index 217aaaaf9dde85..db0d7bc4b65660 100644 --- a/datahub-web-react/src/app/entity/shared/utils.ts +++ b/datahub-web-react/src/app/entity/shared/utils.ts @@ -114,9 +114,9 @@ export function getFineGrainedLineageWithSiblings( const fineGrainedLineages = [ ...(entityData?.fineGrainedLineages || entityData?.inputOutput?.fineGrainedLineages || []), ]; - entityData?.siblings?.siblings?.forEach((sibling) => { - if (sibling) { - const genericSiblingProps = getGenericEntityProperties(sibling.type, sibling); + entityData?.siblingsSearch?.searchResults?.forEach((sibling) => { + if (sibling.entity) { + const genericSiblingProps = getGenericEntityProperties(sibling.entity.type, sibling.entity); if (genericSiblingProps && genericSiblingProps.fineGrainedLineages) { fineGrainedLineages.push(...genericSiblingProps.fineGrainedLineages); } diff --git a/datahub-web-react/src/app/entityV2/Access/RoleEntity.tsx b/datahub-web-react/src/app/entityV2/Access/RoleEntity.tsx new file mode 100644 index 00000000000000..6ec79ebfe4b96f --- /dev/null +++ b/datahub-web-react/src/app/entityV2/Access/RoleEntity.tsx @@ -0,0 +1,99 @@ +import { TagOutlined, TagFilled } from '@ant-design/icons'; +import * as React from 'react'; +import styled from 'styled-components'; +import { Role, EntityType, SearchResult } from '../../../types.generated'; +import DefaultPreviewCard from '../../previewV2/DefaultPreviewCard'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import { urlEncodeUrn } from '../shared/utils'; +import RoleEntityProfile from './RoleEntityProfile'; +import { TYPE_ICON_CLASS_NAME } from '../shared/components/subtypes'; + +const PreviewTagIcon = styled(TagOutlined)` + font-size: 20px; +`; +// /** +// * Definition of the DataHub Access Role entity. +// */ +export class RoleEntity implements Entity { + type: EntityType = EntityType.Role; + + icon = (fontSize?: number, styleType?: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ; + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + isBrowseEnabled = () => false; + + isLineageEnabled = () => false; + + getAutoCompleteFieldName = () => 'name'; + + getPathName: () => string = () => 'role'; + + getCollectionName: () => string = () => 'Roles'; + + getEntityName: () => string = () => 'Role'; + + renderProfile: (urn: string) => JSX.Element = (_) => ; + + renderPreview = (previewType: PreviewType, data: Role) => { + const genericProperties = this.getGenericEntityProperties(data); + return ( + } + entityType={EntityType.Role} + typeIcon={this.icon(14, IconStyleType.ACCENT)} + previewType={previewType} + /> + ); + }; + + renderSearch = (result: SearchResult) => { + return this.renderPreview(PreviewType.SEARCH, result.entity as Role); + }; + + displayName = (data: Role) => { + return data.properties?.name || data.urn; + }; + + getOverridePropertiesFromEntity = (data: Role) => { + return { + name: data.properties?.name, + }; + }; + + getGenericEntityProperties = (role: Role) => { + return getDataForEntityType({ data: role, entityType: this.type, getOverrideProperties: (data) => data }); + }; + + supportedCapabilities = () => { + return new Set([EntityCapabilityType.OWNERS]); + }; + + getGraphName = () => { + return 'roleEntity'; + }; +} diff --git a/datahub-web-react/src/app/entityV2/Access/RoleEntityProfile.tsx b/datahub-web-react/src/app/entityV2/Access/RoleEntityProfile.tsx new file mode 100644 index 00000000000000..d8a31700fb918e --- /dev/null +++ b/datahub-web-react/src/app/entityV2/Access/RoleEntityProfile.tsx @@ -0,0 +1,75 @@ +import React from 'react'; + +import { useParams } from 'react-router'; +import { Divider, Typography } from 'antd'; +import { grey } from '@ant-design/colors'; +import styled from 'styled-components'; + +import { Message } from '../../shared/Message'; +import { decodeUrn } from '../shared/utils'; +import { useGetExternalRoleQuery } from '../../../graphql/accessrole.generated'; + +const PageContainer = styled.div` + padding: 32px 100px; +`; + +const LoadingMessage = styled(Message)` + margin-top: 10%; +`; + +type RolePageParams = { + urn: string; +}; + +const TitleLabel = styled(Typography.Text)` + &&& { + color: ${grey[2]}; + font-size: 12px; + display: block; + line-height: 20px; + font-weight: 700; + } +`; + +const DescriptionLabel = styled(Typography.Text)` + &&& { + text-align: left; + font-weight: bold; + font-size: 14px; + line-height: 28px; + color: rgb(38, 38, 38); + } +`; + +const TitleText = styled(Typography.Text)` + &&& { + color: ${grey[10]}; + font-weight: 700; + font-size: 20px; + line-height: 28px; + display: inline-block; + margin: 0px 7px; + } +`; + +const { Paragraph } = Typography; + +export default function RoleEntityProfile() { + const { urn: encodedUrn } = useParams(); + const urn = decodeUrn(encodedUrn); + const { data, loading } = useGetExternalRoleQuery({ variables: { urn } }); + + return ( + + {loading && } + Role + {data?.role?.properties?.name} + + {/* Role Description */} + About + + {data?.role?.properties?.description} + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/DefaultEntity.tsx b/datahub-web-react/src/app/entityV2/DefaultEntity.tsx new file mode 100644 index 00000000000000..01c496348beccf --- /dev/null +++ b/datahub-web-react/src/app/entityV2/DefaultEntity.tsx @@ -0,0 +1,37 @@ +import React from 'react'; +import { EntityType, SearchResult } from '../../types.generated'; +import { Entity, EntityMenuActions, IconStyleType, PreviewType } from './Entity'; + +class DefaultEntity implements Entity { + type: EntityType = EntityType.Other; + + icon = (_fontSize?: number | undefined, _styleType?: IconStyleType | undefined, _color?: string | undefined) => ( + <> + ); + + isSearchEnabled = () => false; + + isBrowseEnabled = () => false; + + isLineageEnabled = () => false; + + getPathName = () => ''; + + getCollectionName = () => ''; + + renderProfile = (_urn: string) => <>; + + renderPreview = (_type: PreviewType, _data: null, _actions?: EntityMenuActions | undefined) => <>; + + renderSearch = (_result: SearchResult) => <>; + + displayName = (_data: null) => ''; + + getGenericEntityProperties = (_data: null) => null; + + supportedCapabilities = () => new Set([]); + + getGraphName = () => ''; +} + +export default new DefaultEntity(); diff --git a/datahub-web-react/src/app/entityV2/Entity.tsx b/datahub-web-react/src/app/entityV2/Entity.tsx new file mode 100644 index 00000000000000..7df9deabf64033 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/Entity.tsx @@ -0,0 +1,240 @@ +import { QueryHookOptions, QueryResult } from '@apollo/client'; +import { EntityType, Exact, FeatureFlagsConfig, SearchResult } from '../../types.generated'; +import { GenericEntityProperties } from '../entity/shared/types'; +import { FetchedEntity } from '../lineage/types'; +import { EntitySidebarSection, EntitySidebarTab } from './shared/types'; + +export enum PreviewType { + /** + * A preview shown within the search experience + */ + SEARCH, + /** + * A preview shown within the browse experience + */ + BROWSE, + /** + * A generic preview shown within other entity pages, etc. + */ + PREVIEW, + /** + * A tiny search preview for text-box search. + */ + MINI_SEARCH, + /** + * Previews rendered when hovering over the entity in a compact list + */ + HOVER_CARD, +} + +export enum IconStyleType { + /** + * Colored Icon + */ + HIGHLIGHT, + /** + * Grayed out icon + */ + ACCENT, + /** + * Rendered in a Tab pane header + */ + TAB_VIEW, + /** + * Rendered in Lineage as default + */ + SVG, +} + +/** + * A standard set of Entity Capabilities that span across entity types. + */ +export enum EntityCapabilityType { + /** + * Ownership of an entity + */ + OWNERS, + /** + * Adding a glossary term to the entity + */ + GLOSSARY_TERMS, + /** + * Adding a tag to an entity + */ + TAGS, + /** + * Assigning the entity to a domain + */ + DOMAINS, + /** + * Deprecating an entity + */ + DEPRECATION, + /** + * Soft deleting an entity + */ + SOFT_DELETE, + /** + * Run tests against an entity + */ + TEST, + /** + * Add roles to the entity + */ + ROLES, + /** + * Assigning the entity to a data product + */ + DATA_PRODUCTS, + /** + * Health status of an entity + */ + HEALTH, + /** + * Lineage information of an entity + */ + LINEAGE, +} + +export interface EntityMenuActions { + onDelete?: () => void; + onEdit?: () => void; +} + +/** + * Base interface used for authoring DataHub Entities on the client side. + * + * the generated GraphQL data type associated with the entity. + */ +export interface Entity { + /** + * Corresponding GQL EntityType. + */ + type: EntityType; + + /** + * Ant-design icon associated with the Entity. For a list of all candidate icons, see + * https://ant.design/components/icon/ + */ + icon: (fontSize?: number, styleType?: IconStyleType, color?: string) => JSX.Element; + + /** + * Returns whether the entity search is enabled + */ + isSearchEnabled: () => boolean; + + /** + * Returns whether the entity browse is enabled + */ + isBrowseEnabled: () => boolean; + + /** + * Returns whether the entity browse is enabled + */ + isLineageEnabled: () => boolean; + + /** + * Returns the name of the entity as it appears in a URL, e.g. '/dataset/:urn'. + */ + getPathName: () => string; + + /** + * Returns the plural name of the entity used when displaying collections (search, browse results), e.g. 'Datasets'. + */ + getCollectionName: () => string; + + /** + * Returns the singular name of the entity used when referring to an individual + */ + getEntityName?: () => string; + + /** + * Renders the 'profile' of the entity on an entity details page. + * + * TODO: Explore using getGenericEntityProperties for rendering profiles. + */ + renderProfile: (urn: string) => JSX.Element; + + /** + * Renders a preview of the entity across different use cases like search, browse, etc. + * + * TODO: Explore using getGenericEntityProperties for rendering previews. + */ + renderPreview: (type: PreviewType, data: T, actions?: EntityMenuActions) => JSX.Element; + + /** + * Renders a search result + * + * TODO: Explore using getGenericEntityProperties for rendering profiles. + */ + renderSearch: (result: SearchResult) => JSX.Element; + + /** + * Renders search + */ + renderSearchMatches?: (result: SearchResult) => JSX.Element; + + /** + * Constructs config to add entity to lineage viz + */ + getLineageVizConfig?: (entity: T) => FetchedEntity; + + /** + * Returns a display name for the entity + * + * TODO: Migrate to using getGenericEntityProperties for display name retrieval. + */ + displayName: (data: T) => string; + + /** + * Returns generic entity properties for the entity + */ + getGenericEntityProperties: (data: T, flags?: FeatureFlagsConfig) => GenericEntityProperties | null; + + /** + * Returns the supported features for the entity + */ + supportedCapabilities: () => Set; + + /** + * Returns the graph name of the entity, as it appears in the GMS entity registry + */ + getGraphName: () => string; + + /** + * Returns the profile component to be displayed in our Chrome extension + */ + renderEmbeddedProfile?: (urn: string) => JSX.Element; + + /** + * Returns the entity profile sidebar sections for an entity type. Only implemented on Datasets for now. + */ + getSidebarSections?: () => EntitySidebarSection[]; + + /** + * Returns the entity profile sidebar tabs for an entity type. + */ + getSidebarTabs?: () => EntitySidebarTab[]; + + /** + * Get the query necessary for refetching data on an entity profile page + */ + useEntityQuery?: ( + baseOptions: QueryHookOptions< + any, + Exact<{ + urn: string; + }> + >, + ) => QueryResult< + any, + Exact<{ + urn: string; + }> + >; + + /** + * Returns the url to be navigated to when clicked on Cards + */ + getCustomCardUrlPath?: () => string | undefined; +} diff --git a/datahub-web-react/src/app/entityV2/EntityPage.tsx b/datahub-web-react/src/app/entityV2/EntityPage.tsx new file mode 100644 index 00000000000000..84c74b8304a150 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/EntityPage.tsx @@ -0,0 +1,124 @@ +import React, { useEffect, useState } from 'react'; +import { useParams } from 'react-router-dom'; +import { EntityType } from '../../types.generated'; +import { BrowsableEntityPage } from '../browse/BrowsableEntityPage'; +import LineageExplorer from '../lineage/LineageExplorer'; +import useIsLineageMode from '../lineage/utils/useIsLineageMode'; +import { useLineageV2 } from '../lineageV2/useLineageV2'; +import useSidebarWidth from '../sharedV2/sidebar/useSidebarWidth'; +import { useEntityRegistry } from '../useEntityRegistry'; +import analytics, { EventType } from '../analytics'; +import { decodeUrn } from './shared/utils'; +import { useGetGrantedPrivilegesQuery } from '../../graphql/policy.generated'; +import { UnauthorizedPage } from '../authorization/UnauthorizedPage'; +import { ErrorSection } from '../shared/error/ErrorSection'; +import { VIEW_ENTITY_PAGE } from './shared/constants'; +import { useUserContext } from '../context/useUserContext'; +import EntitySidebarContext from '../sharedV2/EntitySidebarContext'; +import TabFullSizedContext from '../shared/TabFullsizedContext'; + +interface RouteParams { + urn: string; +} + +interface Props { + entityType: EntityType; +} + +const ALLOWED_ENTITY_TYPES = [ + EntityType.Dataset, + EntityType.Dashboard, + EntityType.Chart, + EntityType.DataFlow, + EntityType.DataJob, + EntityType.Mlmodel, + EntityType.Mlfeature, + EntityType.MlprimaryKey, + EntityType.MlfeatureTable, + EntityType.MlmodelGroup, + EntityType.GlossaryTerm, + EntityType.GlossaryNode, + EntityType.SchemaField, +]; + +/** + * Responsible for rendering an Entity Profile + */ +export const EntityPage = ({ entityType }: Props) => { + const { urn: encodedUrn } = useParams(); + const urn = decodeUrn(encodedUrn); + const entityRegistry = useEntityRegistry(); + const entity = entityRegistry.getEntity(entityType); + const isBrowsable = entity.isBrowseEnabled(); + const isLineageSupported = entity.isLineageEnabled(); + const isLineageMode = useIsLineageMode(); + const authenticatedUserUrn = useUserContext()?.user?.urn; + const { error, data } = useGetGrantedPrivilegesQuery({ + variables: { + input: { + actorUrn: authenticatedUserUrn as string, + resourceSpec: { + resourceType: entityType, + resourceUrn: urn, + }, + }, + }, + skip: !authenticatedUserUrn, + fetchPolicy: 'cache-first', + }); + const privileges = data?.getGrantedPrivileges?.privileges || []; + + useEffect(() => { + analytics.event({ + type: EventType.EntityViewEvent, + entityType, + entityUrn: urn, + }); + }, [entityType, urn]); + + const canViewEntityPage = privileges.find((privilege) => privilege === VIEW_ENTITY_PAGE); + const showNewPage = ALLOWED_ENTITY_TYPES.includes(entityType); + + const isLineageV2 = useLineageV2(); + const showLineage = isLineageMode && isLineageSupported; + const [isSidebarClosed, setIsSidebarClosed] = useState(false); + const [isTabFullsize, setTabFullsize] = useState(false); + const sidebarWidth = useSidebarWidth(); + + return ( + <> + {error && } + {data && !canViewEntityPage && } + {canViewEntityPage && ( + + + {showNewPage && entityRegistry.renderProfile(entityType, urn)} + {!showNewPage && ( + + {showLineage && !isLineageV2 && } + {(!showLineage || isLineageV2) && entityRegistry.renderProfile(entityType, urn)} + + )} + + + )} + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/EntityRegistry.tsx b/datahub-web-react/src/app/entityV2/EntityRegistry.tsx new file mode 100644 index 00000000000000..94b54bf45ec201 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/EntityRegistry.tsx @@ -0,0 +1,369 @@ +import { QueryHookOptions, QueryResult } from '@apollo/client'; +import { downgradeV2FieldPath } from '@app/lineageV2/lineageUtils'; +import React from 'react'; +import { EntityLineageV2Fragment, LineageSchemaFieldFragment } from '@graphql/lineage.generated'; +import { Entity as EntityInterface, EntityType, Exact, FeatureFlagsConfig, SearchResult } from '../../types.generated'; +import { GenericEntityProperties } from '../entity/shared/types'; +import { FetchedEntity } from '../lineage/types'; +import { FetchedEntityV2, FetchedEntityV2Relationship, LineageAsset, LineageAssetType } from '../lineageV2/types'; +import { SearchResultProvider } from '../search/context/SearchResultContext'; +import DefaultEntity from './DefaultEntity'; +import { Entity, EntityCapabilityType, EntityMenuActions, IconStyleType, PreviewType } from './Entity'; +import PreviewContext from './shared/PreviewContext'; +import { GLOSSARY_ENTITY_TYPES } from './shared/constants'; +import { EntitySidebarSection, EntitySidebarTab } from './shared/types'; +import { dictToQueryStringParams, getFineGrainedLineageWithSiblings, urlEncodeUrn } from './shared/utils'; + +function validatedGet(key: K, map: Map, def: V): V { + if (map.has(key)) { + return map.get(key) as V; + } + return def; +} + +/** + * Serves as a singleton registry for all DataHub entities to appear on the frontend. + */ +export default class EntityRegistry { + entities: Array> = new Array>(); + + entityTypeToEntity: Map> = new Map>(); + + collectionNameToEntityType: Map = new Map(); + + pathNameToEntityType: Map = new Map(); + + graphNameToEntityType: Map = new Map(); + + register(entity: Entity) { + this.entities.push(entity); + this.entityTypeToEntity.set(entity.type, entity); + this.collectionNameToEntityType.set(entity.getCollectionName(), entity.type); + this.pathNameToEntityType.set(entity.getPathName(), entity.type); + this.graphNameToEntityType.set(entity.getGraphName(), entity.type); + } + + getEntity(type: EntityType): Entity { + return validatedGet(type, this.entityTypeToEntity, DefaultEntity); + } + + hasEntity(type: EntityType): boolean { + return this.entityTypeToEntity.has(type); + } + + getEntities(): Array> { + return this.entities; + } + + getEntitiesForSearchRoutes(): Array> { + return this.entities.filter( + (entity) => !GLOSSARY_ENTITY_TYPES.includes(entity.type) && entity.type !== EntityType.Domain, + ); + } + + getNonGlossaryEntities(): Array> { + return this.entities.filter((entity) => !GLOSSARY_ENTITY_TYPES.includes(entity.type)); + } + + getGlossaryEntities(): Array> { + return this.entities.filter((entity) => GLOSSARY_ENTITY_TYPES.includes(entity.type)); + } + + getSearchEntityTypes(): Array { + return this.entities.filter((entity) => entity.isSearchEnabled()).map((entity) => entity.type); + } + + getDefaultSearchEntityType(): EntityType { + return this.entities[0].type; + } + + getBrowseEntityTypes(): Array { + return this.entities.filter((entity) => entity.isBrowseEnabled()).map((entity) => entity.type); + } + + getLineageEntityTypes(): Array { + return this.entities.filter((entity) => entity.isLineageEnabled()).map((entity) => entity.type); + } + + getIcon(type: EntityType, fontSize?: number, styleType?: IconStyleType, color?: string): JSX.Element { + const entity = validatedGet(type, this.entityTypeToEntity, DefaultEntity); + return entity.icon(fontSize, styleType || IconStyleType.TAB_VIEW, color); + } + + getCollectionName(type: EntityType): string { + const entity = validatedGet(type, this.entityTypeToEntity, DefaultEntity); + return entity.getCollectionName(); + } + + getEntityName(type: EntityType): string | undefined { + const entity = validatedGet(type, this.entityTypeToEntity, DefaultEntity); + return entity.getEntityName?.(); + } + + getTypeFromCollectionName(name: string): EntityType { + return validatedGet(name, this.collectionNameToEntityType, DefaultEntity.type); + } + + getPathName(type: EntityType): string { + const entity = validatedGet(type, this.entityTypeToEntity, DefaultEntity); + return entity.getPathName(); + } + + getEntityUrl(type: EntityType, urn: string, params?: Record): string { + return `/${this.getPathName(type)}/${urlEncodeUrn(urn)}${params ? `?${dictToQueryStringParams(params)}` : ''}`; + } + + getTypeFromPathName(pathName: string): EntityType { + return validatedGet(pathName, this.pathNameToEntityType, DefaultEntity.type); + } + + getTypeOrDefaultFromPathName(pathName: string, def?: EntityType): EntityType | undefined { + return validatedGet(pathName, this.pathNameToEntityType, def); + } + + renderProfile(type: EntityType, urn: string): JSX.Element { + const entity = validatedGet(type, this.entityTypeToEntity, DefaultEntity); + return entity.renderProfile(urn); + } + + renderPreview(entityType: EntityType, type: PreviewType, data: T, actions?: EntityMenuActions): JSX.Element { + const entity = validatedGet(entityType, this.entityTypeToEntity, DefaultEntity); + const genericEntityData = entity.getGenericEntityProperties(data); + return ( + + {entity.renderPreview(type, data, actions)} + + ); + } + + renderSearchResult(type: EntityType, searchResult: SearchResult): JSX.Element { + const entity = validatedGet(type, this.entityTypeToEntity, DefaultEntity); + const genericEntityData = entity.getGenericEntityProperties(searchResult.entity); + return ( + + + {entity.renderSearch(searchResult)} + + + ); + } + + renderSearchMatches(type: EntityType, searchResult: SearchResult): JSX.Element { + const entity = validatedGet(type, this.entityTypeToEntity, DefaultEntity); + return ( + + {entity?.renderSearchMatches?.(searchResult) || <>} + + ); + } + + renderBrowse(type: EntityType, data: T): JSX.Element { + const entity = validatedGet(type, this.entityTypeToEntity, DefaultEntity); + return entity.renderPreview(PreviewType.BROWSE, data); + } + + // render the regular profile if embedded profile doesn't exist. Compact context should be set to true. + renderEmbeddedProfile(type: EntityType, urn: string): JSX.Element { + const entity = validatedGet(type, this.entityTypeToEntity, DefaultEntity); + return entity.renderEmbeddedProfile ? entity.renderEmbeddedProfile(urn) : entity.renderProfile(urn); + } + + getLineageVizConfig(type: EntityType, data: T): FetchedEntity { + const entity = validatedGet(type, this.entityTypeToEntity, DefaultEntity); + const genericEntityProperties = this.getGenericEntityProperties(type, data); + // combine fineGrainedLineages from this node as well as its siblings + const fineGrainedLineages = getFineGrainedLineageWithSiblings( + genericEntityProperties, + (t: EntityType, d: EntityInterface) => this.getGenericEntityProperties(t, d), + ); + return { + ...entity.getLineageVizConfig?.(data), + downstreamChildren: genericEntityProperties?.downstream?.relationships + ?.filter((relationship) => relationship.entity) + ?.map((relationship) => ({ + entity: relationship.entity as EntityInterface, + type: (relationship.entity as EntityInterface).type, + })), + downstreamRelationships: genericEntityProperties?.downstream?.relationships?.filter( + (relationship) => relationship.entity, + ), + numDownstreamChildren: + (genericEntityProperties?.downstream?.total || 0) - + (genericEntityProperties?.downstream?.filtered || 0), + upstreamChildren: genericEntityProperties?.upstream?.relationships + ?.filter((relationship) => relationship.entity) + ?.map((relationship) => ({ + entity: relationship.entity as EntityInterface, + type: (relationship.entity as EntityInterface).type, + })), + upstreamRelationships: genericEntityProperties?.upstream?.relationships?.filter( + (relationship) => relationship.entity, + ), + numUpstreamChildren: + (genericEntityProperties?.upstream?.total || 0) - (genericEntityProperties?.upstream?.filtered || 0), + status: genericEntityProperties?.status, + siblingPlatforms: genericEntityProperties?.siblingPlatforms, + fineGrainedLineages, + siblings: genericEntityProperties?.siblings, + schemaMetadata: genericEntityProperties?.schemaMetadata, + inputFields: genericEntityProperties?.inputFields, + canEditLineage: genericEntityProperties?.privileges?.canEditLineage, + } as FetchedEntity; + } + + getLineageVizConfigV2(type: EntityType, data: T, flags?: FeatureFlagsConfig): FetchedEntityV2 | null { + const entity = validatedGet(type, this.entityTypeToEntity, DefaultEntity); + const genericEntityProperties = this.getGenericEntityProperties(type, data, flags); + if (!genericEntityProperties || !entity.getLineageVizConfig) return null; + + const reversedBrowsePath = genericEntityProperties.browsePathV2?.path?.slice(); + reversedBrowsePath?.reverse(); + const containers = genericEntityProperties?.parentContainers?.containers?.length + ? genericEntityProperties?.parentContainers?.containers + ?.map((p) => this.getGenericEntityProperties(p.type, p)) + .filter((p): p is GenericEntityProperties => !!p) + : reversedBrowsePath + ?.map((p) => (p.entity ? this.getGenericEntityProperties(p.entity.type, p.entity) : { name: p.name })) + .filter((p): p is GenericEntityProperties => !!p); + + return { + ...entity.getLineageVizConfig(data), + containers, + fineGrainedLineages: + genericEntityProperties?.fineGrainedLineages || + genericEntityProperties?.inputOutput?.fineGrainedLineages || + [], + numDownstreamChildren: + (genericEntityProperties.downstream?.total || 0) - (genericEntityProperties.downstream?.filtered || 0), + numUpstreamChildren: + (genericEntityProperties.upstream?.total || 0) - (genericEntityProperties.upstream?.filtered || 0), + downstreamRelationships: genericEntityProperties.downstream?.relationships + ?.map((r) => ({ ...r, urn: r.entity?.urn })) + .filter((r): r is FetchedEntityV2Relationship => !!r.urn), + upstreamRelationships: genericEntityProperties.upstream?.relationships + ?.map((r) => ({ ...r, urn: r.entity?.urn })) + .filter((r): r is FetchedEntityV2Relationship => !!r.urn), + exists: genericEntityProperties.exists, + health: genericEntityProperties.health ?? undefined, + status: genericEntityProperties.status ?? undefined, + schemaMetadata: genericEntityProperties.schemaMetadata ?? undefined, + inputFields: genericEntityProperties.inputFields ?? undefined, + canEditLineage: genericEntityProperties.privileges?.canEditLineage ?? undefined, + lineageSiblingIcon: genericEntityProperties?.lineageSiblingIcon, + structuredProperties: genericEntityProperties.structuredProperties ?? undefined, + }; + } + + getLineageAssets(type: EntityType, data: EntityLineageV2Fragment): Map | undefined { + // TODO: Fold into entity registry? + if (data?.__typename === 'Domain') { + return data?.dataProducts?.searchResults.reduce((obj, r) => { + if (r.entity.__typename === 'DataProduct') { + const name = this.getDisplayName(r.entity.type, r.entity); + obj.set(name, { name, type: LineageAssetType.DataProduct, size: r.entity.entities?.total }); + } + return obj; + }, new Map()); + } + const fields = getSchemaFields(data, this.getGenericEntityProperties(type, data)); + if (fields) { + return new Map( + fields.map((field) => { + const name = downgradeV2FieldPath(field.fieldPath); + const value: LineageAsset = { + name, + type: LineageAssetType.Column, + dataType: field.type, + nativeDataType: field.nativeDataType, + }; + return [name, value]; + }), + ); + } + return undefined; + } + + getDisplayName(type: EntityType, data: T): string { + const entity = validatedGet(type, this.entityTypeToEntity, DefaultEntity); + return entity.displayName(data); + } + + getSidebarTabs(type: EntityType): EntitySidebarTab[] { + const entity = validatedGet(type, this.entityTypeToEntity, DefaultEntity); + return entity.getSidebarTabs ? entity.getSidebarTabs() : []; + } + + getSidebarSections(type: EntityType): EntitySidebarSection[] { + const entity = validatedGet(type, this.entityTypeToEntity, DefaultEntity); + return entity.getSidebarSections ? entity.getSidebarSections() : []; + } + + getGenericEntityProperties( + type: EntityType, + data: T, + flags?: FeatureFlagsConfig, + ): GenericEntityProperties | null { + const entity = validatedGet(type, this.entityTypeToEntity, DefaultEntity); + return entity.getGenericEntityProperties(data, flags); + } + + getSupportedEntityCapabilities(type: EntityType): Set { + const entity = validatedGet(type, this.entityTypeToEntity, DefaultEntity); + return entity.supportedCapabilities(); + } + + getTypesWithSupportedCapabilities(capability: EntityCapabilityType): Set { + return new Set( + this.getEntities() + .filter((entity) => entity.supportedCapabilities().has(capability)) + .map((entity) => entity.type), + ); + } + + getCustomCardUrlPath(type: EntityType): string | undefined { + const entity = validatedGet(type, this.entityTypeToEntity, DefaultEntity); + return entity.getCustomCardUrlPath?.() as string | undefined; + } + + getTypeFromGraphName(name: string): EntityType | undefined { + return this.graphNameToEntityType.get(name); + } + + getGraphNameFromType(type: EntityType): string { + return validatedGet(type, this.entityTypeToEntity, DefaultEntity).getGraphName(); + } + + getEntityQuery(type: EntityType): + | (( + baseOptions: QueryHookOptions< + any, + Exact<{ + urn: string; + }> + >, + ) => QueryResult< + any, + Exact<{ + urn: string; + }> + >) + | undefined { + const entity = validatedGet(type, this.entityTypeToEntity, DefaultEntity); + return entity.useEntityQuery; + } +} + +function getSchemaFields( + data: EntityLineageV2Fragment, + genericEntityProperties: GenericEntityProperties | null, +): LineageSchemaFieldFragment[] | undefined { + if (data?.__typename === 'Dataset') { + return data?.schemaMetadata?.fields; + } + if (data?.__typename === 'Chart') { + return data?.inputFields?.fields + ?.map((field) => field?.schemaField) + .filter((field): field is LineageSchemaFieldFragment => !!field); + } + return genericEntityProperties?.schemaMetadata?.fields; +} diff --git a/datahub-web-react/src/app/entityV2/businessAttribute/BusinessAttributeEntity.tsx b/datahub-web-react/src/app/entityV2/businessAttribute/BusinessAttributeEntity.tsx new file mode 100644 index 00000000000000..218402c753448a --- /dev/null +++ b/datahub-web-react/src/app/entityV2/businessAttribute/BusinessAttributeEntity.tsx @@ -0,0 +1,157 @@ +import * as React from 'react'; +import { GlobalOutlined } from '@ant-design/icons'; +import { BusinessAttribute, EntityType, SearchResult } from '../../../types.generated'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import { EntityProfile } from '../shared/containers/profile/EntityProfile'; +import { useGetBusinessAttributeQuery } from '../../../graphql/businessAttribute.generated'; +import { EntityMenuItems } from '../shared/EntityDropdown/EntityMenuActions'; +import { DocumentationTab } from '../shared/tabs/Documentation/DocumentationTab'; +import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; +import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection'; +import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection'; +import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection'; +import { Preview } from './preview/Preview'; +import { PageRoutes } from '../../../conf/Global'; +import BusinessAttributeRelatedEntity from './profile/BusinessAttributeRelatedEntity'; +import { BusinessAttributeDataTypeSection } from './profile/BusinessAttributeDataTypeSection'; + +/** + * Definition of datahub Business Attribute Entity + */ +/* eslint-disable @typescript-eslint/no-unused-vars */ +export class BusinessAttributeEntity implements Entity { + type: EntityType = EntityType.BusinessAttribute; + + icon = (fontSize?: number, styleType?: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ; + } + + if (styleType === IconStyleType.SVG) { + // TODO: Update the returned path value to the correct svg icon path + return ( + + ); + } + + return ( + + ); + }; + + displayName = (data: BusinessAttribute) => { + return data?.properties?.name || data?.urn; + }; + + getPathName = () => 'business-attribute'; + + getEntityName = () => 'Business Attribute'; + + getCollectionName = () => 'Business Attributes'; + + getGraphName = () => 'businessAttribute'; + + getCustomCardUrlPath = () => PageRoutes.BUSINESS_ATTRIBUTE; + + isBrowseEnabled = () => false; + + isLineageEnabled = () => false; + + isSearchEnabled = () => true; + + getOverridePropertiesFromEntity = (data: BusinessAttribute) => { + return { + name: data.properties?.name, + }; + }; + + getGenericEntityProperties = (data: BusinessAttribute) => { + return getDataForEntityType({ + data, + entityType: this.type, + getOverrideProperties: this.getOverridePropertiesFromEntity, + }); + }; + + renderPreview = (previewType: PreviewType, data: BusinessAttribute) => { + return ( + + ); + }; + + renderProfile = (urn: string) => { + return ( + + ); + }; + + renderSearch = (result: SearchResult) => { + return this.renderPreview(PreviewType.SEARCH, result.entity as BusinessAttribute); + }; + + supportedCapabilities = () => { + return new Set([ + EntityCapabilityType.OWNERS, + EntityCapabilityType.TAGS, + EntityCapabilityType.GLOSSARY_TERMS, + // EntityCapabilityType.BUSINESS_ATTRIBUTES, + ]); + }; +} diff --git a/datahub-web-react/src/app/entityV2/businessAttribute/preview/Preview.tsx b/datahub-web-react/src/app/entityV2/businessAttribute/preview/Preview.tsx new file mode 100644 index 00000000000000..323c287a0acd78 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/businessAttribute/preview/Preview.tsx @@ -0,0 +1,40 @@ +import React from 'react'; +import { GlobalOutlined } from '@ant-design/icons'; +import { EntityType, Owner } from '../../../../types.generated'; +import DefaultPreviewCard from '../../../preview/DefaultPreviewCard'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { IconStyleType, PreviewType } from '../../Entity'; +import UrlButton from '../../shared/UrlButton'; +import { getRelatedEntitiesUrl } from '../../../businessAttribute/businessAttributeUtils'; + +export const Preview = ({ + urn, + name, + description, + owners, + previewType, +}: { + urn: string; + name: string; + description?: string | null; + owners?: Array | null; + previewType: PreviewType; +}): JSX.Element => { + const entityRegistry = useEntityRegistry(); + return ( + } + type="Business Attribute" + typeIcon={entityRegistry.getIcon(EntityType.BusinessAttribute, 14, IconStyleType.ACCENT)} + entityTitleSuffix={ + View Related Entities + } + /> + ); +}; diff --git a/datahub-web-react/src/app/entityV2/businessAttribute/preview/_tests_/Preview.test.tsx b/datahub-web-react/src/app/entityV2/businessAttribute/preview/_tests_/Preview.test.tsx new file mode 100644 index 00000000000000..bca32de985d377 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/businessAttribute/preview/_tests_/Preview.test.tsx @@ -0,0 +1,26 @@ +import { MockedProvider } from '@apollo/client/testing'; +import { render } from '@testing-library/react'; +import React from 'react'; +import { mocks } from '../../../../../Mocks'; +import TestPageContainer from '../../../../../utils/test-utils/TestPageContainer'; +import { Preview } from '../Preview'; +import { PreviewType } from '../../../Entity'; + +describe('Preview', () => { + it('renders', () => { + const { getByText } = render( + + + + + , + ); + expect(getByText('definition')).toBeInTheDocument(); + }); +}); diff --git a/datahub-web-react/src/app/entityV2/businessAttribute/profile/BusinessAttributeDataTypeSection.tsx b/datahub-web-react/src/app/entityV2/businessAttribute/profile/BusinessAttributeDataTypeSection.tsx new file mode 100644 index 00000000000000..05b649be554bfd --- /dev/null +++ b/datahub-web-react/src/app/entityV2/businessAttribute/profile/BusinessAttributeDataTypeSection.tsx @@ -0,0 +1,98 @@ +import { Button, message, Select } from 'antd'; +import { EditOutlined } from '@ant-design/icons'; +import React, { useEffect, useState } from 'react'; +import styled from 'styled-components'; +import { useEntityData, useRefetch } from '@src/app/entity/shared/EntityContext'; +import { SidebarHeader } from '../../shared/containers/profile/sidebar/SidebarHeader'; +import { useUpdateBusinessAttributeMutation } from '../../../../graphql/businessAttribute.generated'; +import { SchemaFieldDataType } from '../../../businessAttribute/businessAttributeUtils'; + +interface Props { + readOnly?: boolean; +} + +const DataTypeSelect = styled(Select)` + && { + width: 100%; + margin-top: 1em; + margin-bottom: 1em; + } +`; +// Ensures that any newly added datatype is automatically included in the user dropdown. +const DATA_TYPES = Object.values(SchemaFieldDataType); +export const BusinessAttributeDataTypeSection = ({ readOnly }: Props) => { + const { urn, entityData } = useEntityData(); + const [originalDescription, setOriginalDescription] = useState(null); + const [isEditing, setEditing] = useState(false); + const refetch = useRefetch(); + + useEffect(() => { + if (entityData?.properties?.businessAttributeDataType) { + setOriginalDescription(entityData?.properties?.businessAttributeDataType); + } + }, [entityData]); + + const [updateBusinessAttribute] = useUpdateBusinessAttributeMutation(); + + const handleChange = (value) => { + if (value === originalDescription) { + setEditing(false); + return; + } + + updateBusinessAttribute({ variables: { urn, input: { type: value } } }) + .then(() => { + setEditing(false); + setOriginalDescription(value); + message.success({ content: 'Data Type Updated', duration: 2 }); + refetch(); + }) + .catch((e: unknown) => { + message.destroy(); + if (e instanceof Error) { + message.error({ content: `Failed to update Data Type: \n ${e.message || ''}`, duration: 3 }); + } + }); + }; + + // Toggle editing mode + const handleEditClick = () => { + setEditing(!isEditing); + }; + + return ( +
+ + + + ) + } + /> + {originalDescription} + {isEditing && ( + + {DATA_TYPES.map((dataType: SchemaFieldDataType) => ( + + {dataType} + + ))} + + )} +
+ ); +}; + +export default BusinessAttributeDataTypeSection; diff --git a/datahub-web-react/src/app/entityV2/businessAttribute/profile/BusinessAttributeRelatedEntity.tsx b/datahub-web-react/src/app/entityV2/businessAttribute/profile/BusinessAttributeRelatedEntity.tsx new file mode 100644 index 00000000000000..e7c46021f430a6 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/businessAttribute/profile/BusinessAttributeRelatedEntity.tsx @@ -0,0 +1,43 @@ +import * as React from 'react'; +import { useEntityData } from '@src/app/entity/shared/EntityContext'; +import { UnionType } from '../../../search/utils/constants'; +import { EmbeddedListSearchSection } from '../../shared/components/styled/search/EmbeddedListSearchSection'; + +export default function BusinessAttributeRelatedEntity() { + const { entityData } = useEntityData(); + + const entityUrn = entityData?.urn; + + const fixedOrFilters = + (entityUrn && [ + { + field: 'businessAttribute', + values: [entityUrn], + }, + ]) || + []; + + entityData?.isAChildren?.relationships?.forEach((businessAttribute) => { + const childUrn = businessAttribute.entity?.urn; + + if (childUrn) { + fixedOrFilters.push({ + field: 'businessAttributes', + values: [childUrn], + }); + } + }); + + return ( + + ); +} diff --git a/datahub-web-react/src/app/entityV2/chart/ChartEntity.tsx b/datahub-web-react/src/app/entityV2/chart/ChartEntity.tsx new file mode 100644 index 00000000000000..36777846155cf5 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/chart/ChartEntity.tsx @@ -0,0 +1,391 @@ +import { + DashboardOutlined, + EyeOutlined, + FileOutlined, + LayoutOutlined, + LineChartOutlined, + PartitionOutlined, + UnorderedListOutlined, + WarningOutlined, +} from '@ant-design/icons'; +import * as React from 'react'; +import { GetChartQuery, useGetChartQuery, useUpdateChartMutation } from '../../../graphql/chart.generated'; +import { Chart, EntityType, LineageDirection, SearchResult } from '../../../types.generated'; +import { GenericEntityProperties } from '../../entity/shared/types'; +import { LOOKER_URN, MODE, MODE_URN } from '../../ingest/source/builder/constants'; +import { MatchedFieldList } from '../../searchV2/matches/MatchedFieldList'; +import { matchedInputFieldRenderer } from '../../searchV2/matches/matchedInputFieldRenderer'; +import { capitalizeFirstLetterOnly } from '../../shared/textUtil'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { EntityMenuItems } from '../shared/EntityDropdown/EntityMenuActions'; +import { SubType, TYPE_ICON_CLASS_NAME } from '../shared/components/subtypes'; +import { EntityProfile } from '../shared/containers/profile/EntityProfile'; +import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection'; +import SidebarChartHeaderSection from '../shared/containers/profile/sidebar/Chart/Header/SidebarChartHeaderSection'; +import DataProductSection from '../shared/containers/profile/sidebar/DataProduct/DataProductSection'; +import { SidebarDomainSection } from '../shared/containers/profile/sidebar/Domain/SidebarDomainSection'; +import SidebarLineageSection from '../shared/containers/profile/sidebar/Lineage/SidebarLineageSection'; +import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection'; +import SidebarEntityHeader from '../shared/containers/profile/sidebar/SidebarEntityHeader'; +import { SidebarGlossaryTermsSection } from '../shared/containers/profile/sidebar/SidebarGlossaryTermsSection'; +import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection'; +import StatusSection from '../shared/containers/profile/sidebar/shared/StatusSection'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import EmbeddedProfile from '../shared/embed/EmbeddedProfile'; +import SidebarStructuredProperties from '../shared/sidebarSection/SidebarStructuredProperties'; +import { SUMMARY_TAB_ICON } from '../shared/summary/HeaderComponents'; +import { DocumentationTab } from '../shared/tabs/Documentation/DocumentationTab'; +import { EmbedTab } from '../shared/tabs/Embed/EmbedTab'; +import { ChartDashboardsTab } from '../shared/tabs/Entity/ChartDashboardsTab'; +import { InputFieldsTab } from '../shared/tabs/Entity/InputFieldsTab'; +import TabNameWithCount from '../shared/tabs/Entity/TabNameWithCount'; +import { IncidentTab } from '../shared/tabs/Incident/IncidentTab'; +import { LineageTab } from '../shared/tabs/Lineage/LineageTab'; +import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; +import { SidebarTitleActionType, getDashboardLastUpdatedMs, getDataProduct, isOutputPort } from '../shared/utils'; +import { ChartPreview } from './preview/ChartPreview'; +import { ChartStatsSummarySubHeader } from './profile/stats/ChartStatsSummarySubHeader'; +import ChartSummaryTab from './summary/ChartSummaryTab'; +import SidebarNotesSection from '../shared/sidebarSection/SidebarNotesSection'; + +const PREVIEW_SUPPORTED_PLATFORMS = [LOOKER_URN, MODE_URN]; + +const headerDropdownItems = new Set([ + EntityMenuItems.EXTERNAL_URL, + EntityMenuItems.SHARE, + EntityMenuItems.UPDATE_DEPRECATION, + EntityMenuItems.ANNOUNCE, +]); + +/** + * Definition of the DataHub Chart entity. + */ +export class ChartEntity implements Entity { + type: EntityType = EntityType.Chart; + + icon = (fontSize?: number, styleType?: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ( + + ); + } + + if (styleType === IconStyleType.SVG) { + return ( + + ); + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + isBrowseEnabled = () => true; + + isLineageEnabled = () => true; + + getAutoCompleteFieldName = () => 'title'; + + getGraphName = () => 'chart'; + + getPathName = () => this.getGraphName(); + + getEntityName = () => 'Chart'; + + getCollectionName = () => 'Charts'; + + useEntityQuery = useGetChartQuery; + + renderProfile = (urn: string) => ( + + !!chart?.chart?.subTypes?.typeNames?.includes(SubType.TableauWorksheet) || + !!chart?.chart?.subTypes?.typeNames?.includes(SubType.Looker) || + chart?.chart?.platform.name === MODE, + enabled: () => true, + }, + }, + { + name: 'Documentation', + component: DocumentationTab, + icon: FileOutlined, + }, + { + name: 'Fields', + component: InputFieldsTab, + icon: LayoutOutlined, + display: { + visible: (_, chart: GetChartQuery) => (chart?.chart?.inputFields?.fields?.length || 0) > 0, + enabled: (_, chart: GetChartQuery) => (chart?.chart?.inputFields?.fields?.length || 0) > 0, + }, + }, + { + name: 'Preview', + component: EmbedTab, + icon: EyeOutlined, + display: { + visible: (_, chart: GetChartQuery) => + !!chart?.chart?.embed?.renderUrl && + PREVIEW_SUPPORTED_PLATFORMS.includes(chart?.chart?.platform.urn), + enabled: (_, chart: GetChartQuery) => + !!chart?.chart?.embed?.renderUrl && + PREVIEW_SUPPORTED_PLATFORMS.includes(chart?.chart?.platform.urn), + }, + }, + { + name: 'Lineage', + component: LineageTab, + icon: PartitionOutlined, + properties: { + defaultDirection: LineageDirection.Upstream, + }, + }, + { + name: 'Properties', + component: PropertiesTab, + icon: UnorderedListOutlined, + }, + { + name: 'Dashboards', + component: ChartDashboardsTab, + icon: DashboardOutlined, + display: { + visible: (_, _1) => true, + enabled: (_, chart: GetChartQuery) => (chart?.chart?.dashboards?.total || 0) > 0, + }, + }, + { + name: 'Incidents', + getDynamicName: (_, chart, loading) => { + const activeIncidentCount = chart?.chart?.activeIncidents?.total; + return ; + }, + icon: WarningOutlined, + component: IncidentTab, + }, + ]} + sidebarSections={this.getSidebarSections()} + sidebarTabs={this.getSidebarTabs()} + /> + ); + + getSidebarSections = () => [ + { + component: SidebarEntityHeader, + }, + { + component: SidebarChartHeaderSection, + }, + { + component: SidebarAboutSection, + }, + { + component: SidebarNotesSection, + }, + { + component: SidebarLineageSection, + }, + { + component: SidebarOwnerSection, + }, + { + component: SidebarDomainSection, + }, + { + component: DataProductSection, + }, + { + component: SidebarTagsSection, + }, + { + component: SidebarGlossaryTermsSection, + }, + { + component: StatusSection, + }, + { + component: SidebarStructuredProperties, + }, + ]; + + getSidebarTabs = () => [ + { + name: 'Lineage', + component: LineageTab, + description: "View this data asset's upstream and downstream dependencies", + icon: PartitionOutlined, + properties: { + actionType: SidebarTitleActionType.LineageExplore, + }, + }, + { + name: 'Properties', + component: PropertiesTab, + description: 'View additional properties about this asset', + icon: UnorderedListOutlined, + }, + ]; + + getOverridePropertiesFromEntity = (chart?: Chart | null): GenericEntityProperties => { + // TODO: Get rid of this once we have correctly formed platform coming back. + const name = chart?.properties?.name; + const subTypes = chart?.subTypes; + const externalUrl = chart?.properties?.externalUrl; + return { + name, + externalUrl, + entityTypeOverride: subTypes ? capitalizeFirstLetterOnly(subTypes.typeNames?.[0]) : '', + }; + }; + + renderPreview = (_: PreviewType, data: Chart) => { + const genericProperties = this.getGenericEntityProperties(data); + + return ( + + ); + }; + + renderSearch = (result: SearchResult) => { + const data = result.entity as Chart; + const genericProperties = this.getGenericEntityProperties(data); + return ( + matchedInputFieldRenderer(matchedField, data)} + /> + } + degree={(result as any).degree} + paths={(result as any).paths} + isOutputPort={isOutputPort(result)} + headerDropdownItems={headerDropdownItems} + browsePaths={data.browsePathV2 || undefined} + /> + ); + }; + + renderSearchMatches = (result: SearchResult) => { + const data = result.entity as Chart; + return ( + matchedInputFieldRenderer(matchedField, data)} /> + ); + }; + + getLineageVizConfig = (entity: Chart) => { + return { + urn: entity.urn, + name: entity.properties?.name || entity.urn, + type: EntityType.Chart, + icon: entity?.platform?.properties?.logoUrl || undefined, + platform: entity?.platform, + subtype: entity?.subTypes?.typeNames?.[0] || undefined, + deprecation: entity?.deprecation, + }; + }; + + displayName = (data: Chart) => { + return data.properties?.name || data.urn; + }; + + getGenericEntityProperties = (data: Chart) => { + return getDataForEntityType({ + data, + entityType: this.type, + getOverrideProperties: this.getOverridePropertiesFromEntity, + }); + }; + + supportedCapabilities = () => { + return new Set([ + EntityCapabilityType.OWNERS, + EntityCapabilityType.GLOSSARY_TERMS, + EntityCapabilityType.TAGS, + EntityCapabilityType.DOMAINS, + EntityCapabilityType.DEPRECATION, + EntityCapabilityType.SOFT_DELETE, + EntityCapabilityType.DATA_PRODUCTS, + EntityCapabilityType.TEST, + EntityCapabilityType.LINEAGE, + EntityCapabilityType.HEALTH, + ]); + }; + + renderEmbeddedProfile = (urn: string) => ( + + ); +} diff --git a/datahub-web-react/src/app/entityV2/chart/preview/ChartPreview.tsx b/datahub-web-react/src/app/entityV2/chart/preview/ChartPreview.tsx new file mode 100644 index 00000000000000..933fc50bb7d019 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/chart/preview/ChartPreview.tsx @@ -0,0 +1,137 @@ +import { GenericEntityProperties } from '@app/entity/shared/types'; +import React from 'react'; +import { + AccessLevel, + ChartStatsSummary, + Container, + DataProduct, + Deprecation, + Domain, + EntityPath, + EntityType, + GlobalTags, + GlossaryTerms, + Owner, + ParentContainersResult, + SearchInsight, + BrowsePathV2, +} from '../../../../types.generated'; +import DefaultPreviewCard from '../../../previewV2/DefaultPreviewCard'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { IconStyleType } from '../../Entity'; +import { PopularityTier } from '../../shared/containers/profile/sidebar/shared/utils'; +import { summaryHasStats, DashboardLastUpdatedMs } from '../../shared/utils'; +import { ChartStatsSummary as ChartStatsSummaryView } from '../shared/ChartStatsSummary'; +import { EntityMenuItems } from '../../shared/EntityDropdown/EntityMenuActions'; + +export const ChartPreview = ({ + urn, + data, + name, + description, + platform, + platformInstanceId, + access, + owners, + tags, + glossaryTerms, + domain, + dataProduct, + container, + insights, + logoUrl, + deprecation, + statsSummary, + lastUpdatedMs, + createdMs, + externalUrl, + parentContainers, + snippet, + degree, + paths, + subType, + isOutputPort, + tier, + headerDropdownItems, + browsePaths, +}: { + urn: string; + data: GenericEntityProperties | null; + platform?: string; + platformInstanceId?: string; + name?: string; + description?: string | null; + access?: AccessLevel | null; + owners?: Array | null; + tags?: GlobalTags; + glossaryTerms?: GlossaryTerms | null; + domain?: Domain | null; + dataProduct?: DataProduct | null; + container?: Container | null; + insights?: Array | null; + logoUrl?: string | null; + deprecation?: Deprecation | null; + statsSummary?: ChartStatsSummary | null; + lastUpdatedMs?: DashboardLastUpdatedMs; + createdMs?: number | null; + externalUrl?: string | null; + parentContainers?: ParentContainersResult | null; + snippet?: React.ReactNode | null; + degree?: number; + paths?: EntityPath[]; + subType?: string | null; + isOutputPort?: boolean; + tier?: PopularityTier; + headerDropdownItems?: Set; + browsePaths?: BrowsePathV2 | undefined; +}): JSX.Element => { + const entityRegistry = useEntityRegistry(); + const hasStats = summaryHasStats(statsSummary); + + return ( + + ) + } + degree={degree} + paths={paths} + lastUpdatedMs={lastUpdatedMs} + isOutputPort={isOutputPort} + tier={tier} + headerDropdownItems={headerDropdownItems} + statsSummary={statsSummary} + browsePaths={browsePaths} + /> + ); +}; diff --git a/datahub-web-react/src/app/entityV2/chart/profile/stats/ChartStatsSummarySubHeader.tsx b/datahub-web-react/src/app/entityV2/chart/profile/stats/ChartStatsSummarySubHeader.tsx new file mode 100644 index 00000000000000..31326f8e0bab1d --- /dev/null +++ b/datahub-web-react/src/app/entityV2/chart/profile/stats/ChartStatsSummarySubHeader.tsx @@ -0,0 +1,26 @@ +import React from 'react'; +import { ChartStatsSummary as ChartStatsSummaryObj } from '../../../../../types.generated'; +import { useBaseEntity } from '../../../../entity/shared/EntityContext'; +import { GetChartQuery } from '../../../../../graphql/chart.generated'; +import { ChartStatsSummary } from '../../shared/ChartStatsSummary'; + +export const ChartStatsSummarySubHeader = () => { + const result = useBaseEntity(); + const chart = result?.chart; + const maybeStatsSummary = chart?.statsSummary as ChartStatsSummaryObj; + const viewCount = maybeStatsSummary?.viewCount; + const uniqueUserCountLast30Days = maybeStatsSummary?.uniqueUserCountLast30Days; + const lastUpdatedMs = chart?.properties?.lastModified?.time; + const createdMs = chart?.properties?.created?.time; + const viewCountLast30Days = maybeStatsSummary?.viewCountLast30Days; + + return ( + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/chart/shared/ChartStatsSummary.tsx b/datahub-web-react/src/app/entityV2/chart/shared/ChartStatsSummary.tsx new file mode 100644 index 00000000000000..87e5d9a4130379 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/chart/shared/ChartStatsSummary.tsx @@ -0,0 +1,113 @@ +import React from 'react'; +import styled from 'styled-components'; +import { Typography } from 'antd'; +import { Popover, Tooltip } from '@components'; +import { ClockCircleOutlined, EyeOutlined, TeamOutlined, QuestionCircleOutlined } from '@ant-design/icons'; +import { formatNumber, formatNumberWithoutAbbreviation } from '../../../shared/formatNumber'; +import { ANTD_GRAY } from '../../shared/constants'; +import { toLocalDateTimeString, toRelativeTimeString } from '../../../shared/time/timeUtils'; +import { StatsSummary } from '../../shared/components/styled/StatsSummary'; +import { PercentileLabel } from '../../shared/stats/PercentileLabel'; +import { countFormatter, needsFormatting } from '../../../../utils/formatter'; +import ExpandingStat from '../../dataset/shared/ExpandingStat'; + +const StatText = styled.span` + color: ${ANTD_GRAY[8]}; +`; + +const HelpIcon = styled(QuestionCircleOutlined)` + color: ${ANTD_GRAY[7]}; + padding-left: 4px; +`; + +type Props = { + chartCount?: number | null; + viewCount?: number | null; + viewCountLast30Days?: number | null; + viewCountPercentileLast30Days?: number | null; + uniqueUserCountLast30Days?: number | null; + uniqueUserPercentileLast30Days?: number | null; + lastUpdatedMs?: number | null; + createdMs?: number | null; +}; + +export const ChartStatsSummary = ({ + chartCount, + viewCount, + viewCountLast30Days, + viewCountPercentileLast30Days, + uniqueUserCountLast30Days, + uniqueUserPercentileLast30Days, + lastUpdatedMs, + createdMs, +}: Props) => { + // acryl-main only. + const effectiveViewCount = (!!viewCountLast30Days && viewCountLast30Days) || viewCount; + const effectiveViewCountText = (!!viewCountLast30Days && 'views last month') || 'views'; + + const statsViews = [ + (!!chartCount && ( + ( + + {isExpanded ? formatNumberWithoutAbbreviation(chartCount) : countFormatter(chartCount)}{' '} + charts + + )} + /> + )) || + undefined, + (!!effectiveViewCount && ( + + + {formatNumber(effectiveViewCount)} {effectiveViewCountText} + {!!viewCountPercentileLast30Days && ( + + )} + + )) || + undefined, + (!!uniqueUserCountLast30Days && ( + + + {formatNumber(uniqueUserCountLast30Days)} users + {!!uniqueUserPercentileLast30Days && ( + + + + )} + + )) || + undefined, + (!!lastUpdatedMs && ( + + {createdMs &&
Created on {toLocalDateTimeString(createdMs)}.
} +
+ Changed on {toLocalDateTimeString(lastUpdatedMs)}.{' '} + + + +
+ + } + > + + + Changed {toRelativeTimeString(lastUpdatedMs)} + +
+ )) || + undefined, + ].filter((stat) => stat !== undefined); + + return <>{statsViews.length > 0 && }; +}; diff --git a/datahub-web-react/src/app/entityV2/chart/summary/ChartFieldsTable.tsx b/datahub-web-react/src/app/entityV2/chart/summary/ChartFieldsTable.tsx new file mode 100644 index 00000000000000..e7e5971e3cbfcf --- /dev/null +++ b/datahub-web-react/src/app/entityV2/chart/summary/ChartFieldsTable.tsx @@ -0,0 +1,121 @@ +import React from 'react'; +import styled from 'styled-components'; +import { Link } from 'react-router-dom'; +import { Table, Typography } from 'antd'; +import { EntityType, SchemaField } from '../../../../types.generated'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { CompactFieldIconWithTooltip } from '../../../sharedV2/icons/CompactFieldIcon'; +import { REDESIGN_COLORS } from '../../shared/constants'; + +const MAX_ROWS = 5; + +const TableContainer = styled.div` + display: flex; + flex-direction: column; + gap: 10px; + .ant-table-thead > tr > th { + background-color: transparent; + font-weight: 700; + font-size: 14px; + color: ${REDESIGN_COLORS.SUBTITLE}; + } + && .ant-table-tbody > tr > td { + padding: 8px 5px; + border-bottom: none; + border-right: 1px solid ${REDESIGN_COLORS.COLD_GREY_TEXT_BLUE_1}; + } +`; + +const SeeMoreLink = styled(Link)` + color: ${REDESIGN_COLORS.SUBTITLE}; + font-size: 12px; + font-weight: 600; +`; + +interface Props { + urn: string; + rows: SchemaField[]; +} + +export default function ChartFieldsTable({ urn, rows }: Props) { + const entityRegistry = useEntityRegistry(); + const hasSeeMore = rows.length > MAX_ROWS; + + const nameColumn = { + ellipsis: true, + width: '45%', + title: 'Name', + dataIndex: 'fieldPath', + key: 'fieldPath', + filtered: true, + render: nameRender, + }; + + const descriptionColumn = { + ellipsis: true, + width: '45%', + title: 'Description', + dataIndex: 'description', + key: 'description', + render: descriptionRender, + }; + + return ( + +
({ + style: { + padding: '0px', + maxWidth: '300px', + minWidth: '300px', + }, + id: `column-${record.fieldPath}`, + })} + /> + {hasSeeMore && ( + + View {rows.length - MAX_ROWS} More + + )} + + ); +} + +const TypeWrapper = styled.span` + color: ${REDESIGN_COLORS.SUBTITLE}; + margin-right: 4px; + width: 11px; +`; + +const FieldPathText = styled(Typography.Text)` + font-size: 12px; + font-weight: 500; + color: ${REDESIGN_COLORS.SUBTITLE}; +`; + +const Description = styled(Typography.Text)` + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + color: ${REDESIGN_COLORS.SUBTITLE}; +`; + +function nameRender(fieldPath: string, row: SchemaField) { + return ( + + + + + {fieldPath} + + ); +} + +function descriptionRender(description: string) { + return {description}; +} diff --git a/datahub-web-react/src/app/entityV2/chart/summary/ChartSummaryOverview.tsx b/datahub-web-react/src/app/entityV2/chart/summary/ChartSummaryOverview.tsx new file mode 100644 index 00000000000000..bbd55e8f61d5d9 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/chart/summary/ChartSummaryOverview.tsx @@ -0,0 +1,118 @@ +import React from 'react'; +import styled from 'styled-components'; +import { GetChartQuery } from '../../../../graphql/chart.generated'; +import { Entity, EntityType } from '../../../../types.generated'; +import { useBaseEntity, useEntityData } from '../../../entity/shared/EntityContext'; +import Loading from '../../../shared/Loading'; +import SummaryEntityCard from '../../../sharedV2/cards/SummaryEntityCard'; +import { useEntityRegistryV2 } from '../../../useEntityRegistry'; +import { SubType } from '../../shared/components/subtypes'; +import { HorizontalList, SummaryColumns } from '../../shared/summary/ListComponents'; +import SummaryCreatedBySection from '../../shared/summary/SummaryCreatedBySection'; +import SummaryQuerySection from './SummaryQuerySection'; +import { MainSection, StyledTitle, SummaryHeader, VerticalDivider } from './styledComponents'; + +const Count = styled.div` + padding: 1px 8px; + display: flex; + justify-content: center; + border-radius: 10px; + background-color: #e5ece9; + font-size: 10px; + font-weight: 400; + margin-left: 8px; +`; + +const FirstRow = styled.div` + display: flex; +`; + +const SectionContainer = styled.div` + display: flex; + flex-direction: column; +`; + +export default function ChartSummaryOverview() { + const { loading } = useEntityData(); + const chart = useBaseEntity()?.chart; + const entityRegistry = useEntityRegistryV2(); + + // TODO: Fix casting + // TODO: Check workbook + data source platform actually matches this entity's platform + const workbook = chart?.parentContainers?.containers?.find((c) => + c.subTypes?.typeNames?.includes(SubType.TableauWorkbook), + ) as Entity; + + // TODO: Calculate this better? + const dataSources = (chart?.inputs?.relationships + ?.map((r) => r.entity) + ?.filter((e) => e?.__typename === 'Dataset') || []) as Entity[]; + + const dashboards = (chart?.dashboards?.relationships?.map((r) => r.entity) || []) as Entity[]; + + const owner = chart?.ownership?.owners && chart?.ownership?.owners[0]?.owner; + + const query = chart?.query?.rawQuery || ''; + + if (loading) { + return ; + } + + return ( + + + General Info + + {!!owner && } + + {!!dataSources?.length && ( + <> + + + + + Data Sources + {dataSources.length} + + + {dataSources.map((dataSource) => ( + + ))} + + + + )} + {!!query && ( + <> + + + + Query + + + + )} + + {workbook && ( + <> + {SubType.TableauWorkbook} + + + )} + + {!!dashboards?.length && ( + <> + + {entityRegistry.getEntityName(EntityType.Dashboard)} + {dashboards?.length} + + + {dashboards.map((dashboard) => ( + + ))} + + + )} + + ); +} diff --git a/datahub-web-react/src/app/entityV2/chart/summary/ChartSummaryTab.tsx b/datahub-web-react/src/app/entityV2/chart/summary/ChartSummaryTab.tsx new file mode 100644 index 00000000000000..a5977ae2e41f3a --- /dev/null +++ b/datahub-web-react/src/app/entityV2/chart/summary/ChartSummaryTab.tsx @@ -0,0 +1,67 @@ +import React from 'react'; +import styled from 'styled-components'; +import { Divider } from 'antd'; +import { SummaryTabWrapper } from '../../shared/summary/HeaderComponents'; +import TableauEmbed from './TableauEmbed'; +import ChartSummaryOverview from './ChartSummaryOverview'; +import { TABLEAU_URN, LOOKER_URN, MODE_URN } from '../../../ingest/source/builder/constants'; +import SummaryAboutSection from '../../shared/summary/SummaryAboutSection'; +import { useEntityData } from '../../../entity/shared/EntityContext'; +import { useGetTagFields } from './useGetTagFields'; +import { SummaryColumns } from '../../shared/summary/ListComponents'; +import FieldTableByTag from './FieldTableByTag'; +import EmbedPreview from './EmbedPreview'; + +const StyledDivider = styled(Divider)` + width: 100%; + border-top-width: 2px; + margin: 10px 0; +`; + +const MEASURE_TAG = 'urn:li:tag:Measure'; +const DIMENSION_TAG = 'urn:li:tag:Dimension'; +const TEMPORAL_TAG = 'urn:li:tag:Temporal'; + +export default function ChartSummaryTab(): JSX.Element | null { + const { entityData } = useEntityData(); + + const measureFields = useGetTagFields(MEASURE_TAG); + const dimensionFields = useGetTagFields(DIMENSION_TAG); + const temporalFields = useGetTagFields(TEMPORAL_TAG); + + const areTagFieldsPresent = !!(measureFields?.length || dimensionFields?.length || temporalFields?.length); + + return ( + + + + {(entityData?.platform?.urn === TABLEAU_URN || + entityData?.platform?.urn === LOOKER_URN || + entityData?.platform?.urn === MODE_URN) && + areTagFieldsPresent && ( + <> + + + + {measureFields?.length && } + {dimensionFields?.length && } + {temporalFields?.length && } + + + )} + + + + + {entityData?.platform?.urn === TABLEAU_URN && entityData?.externalUrl && ( + <> + + {entityData?.platform?.urn === TABLEAU_URN && } + + )} + {entityData?.platform?.urn === LOOKER_URN && entityData.embed?.renderUrl && ( + + )} + + ); +} diff --git a/datahub-web-react/src/app/entityV2/chart/summary/EmbedPreview.tsx b/datahub-web-react/src/app/entityV2/chart/summary/EmbedPreview.tsx new file mode 100644 index 00000000000000..ceacfd37791342 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/chart/summary/EmbedPreview.tsx @@ -0,0 +1,28 @@ +import React from 'react'; +import styled from 'styled-components'; +import HeaderIcon from '@mui/icons-material/VisibilityOutlined'; +import { SummaryTabHeaderTitle, SummaryTabHeaderWrapper } from '../../shared/summary/HeaderComponents'; + +const Wrapper = styled.div` + height: fit-content; +`; + +const StyledIframe = styled.iframe` + width: 100%; + height: 70vh; +`; + +interface Props { + embedUrl: string; +} + +export default function EmbedPreview({ embedUrl }: Props) { + return ( + + + } title="Preview" /> + + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/chart/summary/FieldTableByTag.tsx b/datahub-web-react/src/app/entityV2/chart/summary/FieldTableByTag.tsx new file mode 100644 index 00000000000000..bae4ef40312053 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/chart/summary/FieldTableByTag.tsx @@ -0,0 +1,33 @@ +import React from 'react'; +import styled from 'styled-components'; +import ChartFieldsTable from './ChartFieldsTable'; +import { SchemaField } from '../../../../types.generated'; +import { SummaryTabHeaderTitle } from '../../shared/summary/HeaderComponents'; +import { useEntityData } from '../../../entity/shared/EntityContext'; + +const ColumnWrapper = styled.div` + display: flex; + flex-direction: column; +`; + +interface Props { + title: string; + fields: SchemaField[]; +} + +export default function FieldTableByTag({ title, fields }: Props) { + const { urn } = useEntityData(); + + if (!fields?.length) { + return null; + } + + return ( + <> + + + + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/chart/summary/SummaryQuerySection.tsx b/datahub-web-react/src/app/entityV2/chart/summary/SummaryQuerySection.tsx new file mode 100644 index 00000000000000..ea0b14e86cfc1d --- /dev/null +++ b/datahub-web-react/src/app/entityV2/chart/summary/SummaryQuerySection.tsx @@ -0,0 +1,75 @@ +import React, { useState } from 'react'; +import styled from 'styled-components/macro'; +import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter'; +import { Button, Modal } from 'antd'; +import { REDESIGN_COLORS } from '../../shared/constants'; + +const PreviewSyntax = styled(SyntaxHighlighter)` + max-height: 68px; + overflow: hidden !important; + border-radius: 12px; + max-width: 100%; + background: #fafafc !important; + + span { + font-family: 'Roboto Mono', monospace; + } +`; + +const ModalSyntaxContainer = styled.div` + margin: 20px; + overflow: auto; +`; + +const Container = styled.div` + display: flex; + flex-direction: column; + flex-wrap: wrap; + max-width: 400px; +`; + +const StyledButton = styled(Button)` + color: ${REDESIGN_COLORS.TITLE_PURPLE}; + display: flex; + width: fit-content; + + :hover { + color: ${REDESIGN_COLORS.HOVER_PURPLE}; + background: transparent; + } +`; + +interface Props { + query: string; +} + +const SummaryQuerySection = ({ query }: Props) => { + const [showFullContentModal, setShowFullContentModal] = useState(false); + + return ( + + setShowFullContentModal(false)}>Dismiss} + open={showFullContentModal} + onCancel={() => setShowFullContentModal(false)} + > + + + {query} + + + + + + {query} + + setShowFullContentModal(true)}> + Read More + + + ); +}; + +export default SummaryQuerySection; diff --git a/datahub-web-react/src/app/entityV2/chart/summary/TableauEmbed.tsx b/datahub-web-react/src/app/entityV2/chart/summary/TableauEmbed.tsx new file mode 100644 index 00000000000000..d217dcdf955c92 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/chart/summary/TableauEmbed.tsx @@ -0,0 +1,40 @@ +import React, { useEffect } from 'react'; +import styled from 'styled-components'; +import HeaderIcon from '@mui/icons-material/VisibilityOutlined'; +import { SummaryTabHeaderTitle, SummaryTabHeaderWrapper } from '../../shared/summary/HeaderComponents'; + +const Wrapper = styled.div` + height: fit-content; +`; + +interface Props { + externalUrl: string; +} + +export default function TableauEmbed({ externalUrl }: Props) { + useEffect(() => { + const script = document.createElement('script'); + script.type = 'module'; + script.src = 'https://public.tableau.com/javascripts/api/tableau.embedding.3.latest.min.js'; + script.async = true; + + document.head.appendChild(script); + return () => { + document.head.removeChild(script); + }; + }, []); + + // TODO: Calculate height better (desired: height of tab content) + return ( + + + } title="Preview" /> + + { + // eslint-disable-next-line @typescript-eslint/ban-ts-comment + // @ts-ignore + + } + + ); +} diff --git a/datahub-web-react/src/app/entityV2/chart/summary/styledComponents.ts b/datahub-web-react/src/app/entityV2/chart/summary/styledComponents.ts new file mode 100644 index 00000000000000..1ead96bbefa7db --- /dev/null +++ b/datahub-web-react/src/app/entityV2/chart/summary/styledComponents.ts @@ -0,0 +1,32 @@ +import { Typography } from 'antd'; +import styled from 'styled-components'; +import { REDESIGN_COLORS } from '../../shared/constants'; +import { HeaderTitle } from '../../shared/summary/HeaderComponents'; + +export const MainSection = styled.div` + display: flex; + flex-direction: column; +`; + +export const SummaryHeader = styled(Typography.Text)` + margin-bottom: 20px; + font-size: 18px; + color: ${REDESIGN_COLORS.TEXT_HEADING}; + font-weight: 500; +`; + +export const VerticalDivider = styled.hr` + align-self: stretch; + height: auto; + margin: 0 20px; + color: ${REDESIGN_COLORS.COLD_GREY_TEXT_BLUE_1}; + border-width: 1px; + opacity: 0.2; +`; + +export const StyledTitle = styled(HeaderTitle)` + margin-bottom: 12px; + font-size: 14px; + color: ${REDESIGN_COLORS.TEXT_HEADING}; + font-weight: 700; +`; diff --git a/datahub-web-react/src/app/entityV2/chart/summary/useGetTagFields.ts b/datahub-web-react/src/app/entityV2/chart/summary/useGetTagFields.ts new file mode 100644 index 00000000000000..8495351f37f56e --- /dev/null +++ b/datahub-web-react/src/app/entityV2/chart/summary/useGetTagFields.ts @@ -0,0 +1,16 @@ +import { GetChartQuery } from '../../../../graphql/chart.generated'; +import { SchemaField } from '../../../../types.generated'; +import { useBaseEntity } from '../../../entity/shared/EntityContext'; + +export function useGetTagFields(tag: string): SchemaField[] | undefined { + const chart = useBaseEntity()?.chart; + + // Have to type cast because of line `businessAttributeDataType: type` in `businessAttribute` fragment + // Can we get rid of this? Why is this renamed? + return chart?.inputFields?.fields + ?.filter((f) => + f?.schemaField?.globalTags?.tags?.map((t) => t.tag.urn.toLowerCase()).includes(tag.toLowerCase()), + ) + .map((f) => f?.schemaField) + .filter((f) => !!f) as SchemaField[] | undefined; +} diff --git a/datahub-web-react/src/app/entityV2/container/ContainerEntitiesTab.tsx b/datahub-web-react/src/app/entityV2/container/ContainerEntitiesTab.tsx new file mode 100644 index 00000000000000..0f760e3ccca3d2 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/container/ContainerEntitiesTab.tsx @@ -0,0 +1,25 @@ +import React from 'react'; +import { useEntityData } from '../../entity/shared/EntityContext'; +import { EmbeddedListSearchSection } from '../shared/components/styled/search/EmbeddedListSearchSection'; +import { UnionType } from '../../search/utils/constants'; + +export const ContainerEntitiesTab = () => { + const { urn } = useEntityData(); + + const fixedFilter = { + field: 'container', + values: [urn], + }; + + return ( + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/container/ContainerEntity.tsx b/datahub-web-react/src/app/entityV2/container/ContainerEntity.tsx new file mode 100644 index 00000000000000..a23a1c2e017f62 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/container/ContainerEntity.tsx @@ -0,0 +1,279 @@ +import { AppstoreOutlined, FileOutlined, FolderOutlined, UnorderedListOutlined } from '@ant-design/icons'; +import * as React from 'react'; +import { GetContainerQuery, useGetContainerQuery } from '../../../graphql/container.generated'; +import { Container, EntityType, SearchResult } from '../../../types.generated'; +import { capitalizeFirstLetterOnly } from '../../shared/textUtil'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { EntityMenuItems } from '../shared/EntityDropdown/EntityMenuActions'; +import { SubType, TYPE_ICON_CLASS_NAME } from '../shared/components/subtypes'; +import { EntityProfile } from '../shared/containers/profile/EntityProfile'; +import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection'; +import SidebarContentsSection from '../shared/containers/profile/sidebar/Container/SidebarContentsSection'; +import DataProductSection from '../shared/containers/profile/sidebar/DataProduct/DataProductSection'; +import { SidebarDomainSection } from '../shared/containers/profile/sidebar/Domain/SidebarDomainSection'; +import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection'; +import SidebarEntityHeader from '../shared/containers/profile/sidebar/SidebarEntityHeader'; +import { SidebarGlossaryTermsSection } from '../shared/containers/profile/sidebar/SidebarGlossaryTermsSection'; +import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection'; +import StatusSection from '../shared/containers/profile/sidebar/shared/StatusSection'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import EmbeddedProfile from '../shared/embed/EmbeddedProfile'; +import SidebarStructuredProperties from '../shared/sidebarSection/SidebarStructuredProperties'; +import { SUMMARY_TAB_ICON } from '../shared/summary/HeaderComponents'; +import { DocumentationTab } from '../shared/tabs/Documentation/DocumentationTab'; +import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; +import { getDataProduct, isOutputPort } from '../shared/utils'; +import { ContainerEntitiesTab } from './ContainerEntitiesTab'; +import ContainerSummaryTab from './ContainerSummaryTab'; +import { Preview } from './preview/Preview'; +import SidebarNotesSection from '../shared/sidebarSection/SidebarNotesSection'; + +const headerDropdownItems = new Set([EntityMenuItems.EXTERNAL_URL, EntityMenuItems.SHARE, EntityMenuItems.ANNOUNCE]); + +/** + * Definition of the DataHub Container entity. + */ +export class ContainerEntity implements Entity { + type: EntityType = EntityType.Container; + + icon = (fontSize?: number, styleType?: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ; + } + + if (styleType === IconStyleType.SVG) { + return ( + + ); + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + isBrowseEnabled = () => false; + + isLineageEnabled = () => false; + + getAutoCompleteFieldName = () => 'name'; + + getGraphName = () => 'container'; + + getPathName = () => this.getGraphName(); + + getEntityName = () => 'Container'; + + getCollectionName = () => 'Containers'; + + useEntityQuery = useGetContainerQuery; + + renderProfile = (urn: string) => ( + + !!container?.container?.subTypes?.typeNames?.includes(SubType.TableauWorkbook), + enabled: () => true, + }, + }, + { + name: 'Contents', + component: ContainerEntitiesTab, + icon: AppstoreOutlined, + }, + { + name: 'Documentation', + component: DocumentationTab, + icon: FileOutlined, + }, + { + name: 'Properties', + component: PropertiesTab, + icon: UnorderedListOutlined, + }, + ]} + sidebarSections={this.getSidebarSections()} + sidebarTabs={this.getSidebarTabs()} + /> + ); + + getSidebarSections = () => [ + { + component: SidebarEntityHeader, + }, + { + component: SidebarAboutSection, + }, + { + component: SidebarNotesSection, + }, + { + component: SidebarContentsSection, + }, + { + component: SidebarOwnerSection, + }, + { + component: SidebarDomainSection, + }, + { + component: DataProductSection, + }, + { + component: SidebarTagsSection, + }, + { + component: SidebarGlossaryTermsSection, + }, + { + component: StatusSection, + }, + { + component: SidebarStructuredProperties, + }, + // TODO: Add back once entity-level recommendations are complete. + // { + // component: SidebarRecommendationsSection, + // }, + ]; + + getSidebarTabs = () => [ + { + name: 'Properties', + component: PropertiesTab, + description: 'View additional properties about this asset', + icon: UnorderedListOutlined, + }, + ]; + + renderPreview = (_: PreviewType, data: Container) => { + const genericProperties = this.getGenericEntityProperties(data); + return ( + + ); + }; + + renderSearch = (result: SearchResult) => { + const data = result.entity as Container; + const genericProperties = this.getGenericEntityProperties(data); + + return ( + + ); + }; + + getLineageVizConfig(entity: Container) { + return { + urn: entity.urn, + name: this.displayName(entity), + type: this.type, + icon: entity?.platform?.properties?.logoUrl || undefined, + platform: entity?.platform, + subtype: entity?.subTypes?.typeNames?.[0] || undefined, + }; + } + + displayName = (data: Container) => { + return data?.properties?.name || data?.properties?.qualifiedName || data?.urn; + }; + + getOverridePropertiesFromEntity = (data: Container) => { + return { + name: this.displayName(data), + externalUrl: data.properties?.externalUrl, + entityCount: data.entities?.total, + }; + }; + + getGenericEntityProperties = (data: Container) => { + return getDataForEntityType({ + data, + entityType: this.type, + getOverrideProperties: this.getOverridePropertiesFromEntity, + }); + }; + + supportedCapabilities = () => { + return new Set([ + EntityCapabilityType.OWNERS, + EntityCapabilityType.GLOSSARY_TERMS, + EntityCapabilityType.TAGS, + EntityCapabilityType.DOMAINS, + EntityCapabilityType.SOFT_DELETE, + EntityCapabilityType.DATA_PRODUCTS, + EntityCapabilityType.TEST, + ]); + }; + + renderEmbeddedProfile = (urn: string) => ( + + ); +} diff --git a/datahub-web-react/src/app/entityV2/container/ContainerSummaryTab.tsx b/datahub-web-react/src/app/entityV2/container/ContainerSummaryTab.tsx new file mode 100644 index 00000000000000..74c9049365c7e5 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/container/ContainerSummaryTab.tsx @@ -0,0 +1,15 @@ +import React from 'react'; +import { useEntityData } from '../../entity/shared/EntityContext'; +import { SubType } from '../shared/components/subtypes'; +import TableauWorkbookSummaryTab from './tableau/TableauWorkbookSummaryTab'; + +export default function ContainerSummaryTab() { + const { entityData } = useEntityData(); + const subtype = entityData?.subTypes?.typeNames?.[0]; + switch (subtype) { + case SubType.TableauWorkbook: + return ; + default: + return null; + } +} diff --git a/datahub-web-react/src/app/entityV2/container/preview/Preview.tsx b/datahub-web-react/src/app/entityV2/container/preview/Preview.tsx new file mode 100644 index 00000000000000..7d6f6176e9efd5 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/container/preview/Preview.tsx @@ -0,0 +1,110 @@ +import { GenericEntityProperties } from '@app/entity/shared/types'; +import React from 'react'; +import { + Container, + EntityType, + Owner, + SearchInsight, + SubTypes, + Domain, + ParentContainersResult, + GlobalTags, + Deprecation, + GlossaryTerms, + DataProduct, + EntityPath, + BrowsePathV2, +} from '../../../../types.generated'; +import DefaultPreviewCard from '../../../previewV2/DefaultPreviewCard'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import ContainerIcon from '../../shared/containers/profile/header/PlatformContent/ContainerIcon'; +import EntityCount from '../../shared/containers/profile/header/EntityCount'; +import { EntityMenuItems } from '../../shared/EntityDropdown/EntityMenuActions'; + +export const Preview = ({ + urn, + data, + name, + platformName, + platformLogo, + platformInstanceId, + description, + owners, + tags, + glossaryTerms, + insights, + subTypes, + logoComponent, + container, + domain, + dataProduct, + parentContainers, + externalUrl, + deprecation, + degree, + paths, + entityCount, + isOutputPort, + headerDropdownItems, + browsePaths, +}: { + urn: string; + data: GenericEntityProperties | null; + name: string; + platformName?: string; + platformLogo?: string | null; + platformInstanceId?: string; + description?: string | null; + owners?: Array | null; + tags?: GlobalTags | null; + glossaryTerms?: GlossaryTerms | null; + insights?: Array | null; + subTypes?: SubTypes | null; + logoComponent?: JSX.Element; + container?: Container | null; + domain?: Domain | null; + dataProduct?: DataProduct | null; + deprecation?: Deprecation | null; + parentContainers?: ParentContainersResult | null; + externalUrl?: string | null; + degree?: number; + paths?: EntityPath[]; + entityCount?: number; + isOutputPort?: boolean; + headerDropdownItems?: Set; + browsePaths?: BrowsePathV2; +}): JSX.Element => { + const entityRegistry = useEntityRegistry(); + return ( + } + domain={domain || undefined} + dataProduct={dataProduct} + parentEntities={parentContainers?.containers} + tags={tags || undefined} + glossaryTerms={glossaryTerms || undefined} + externalUrl={externalUrl} + degree={degree} + paths={paths} + subHeader={} + isOutputPort={isOutputPort} + headerDropdownItems={headerDropdownItems} + browsePaths={browsePaths} + /> + ); +}; diff --git a/datahub-web-react/src/app/entityV2/container/tableau/TableauDataSourcesSection.tsx b/datahub-web-react/src/app/entityV2/container/tableau/TableauDataSourcesSection.tsx new file mode 100644 index 00000000000000..7d03c40034153c --- /dev/null +++ b/datahub-web-react/src/app/entityV2/container/tableau/TableauDataSourcesSection.tsx @@ -0,0 +1,56 @@ +import React from 'react'; +import { useEntityData } from '../../../entity/shared/EntityContext'; +import { useGetSearchResultsForMultipleQuery } from '../../../../graphql/search.generated'; +import { EntityType, FilterOperator } from '../../../../types.generated'; +import { HorizontalList } from '../../shared/summary/ListComponents'; +import { HeaderTitle } from '../../shared/summary/HeaderComponents'; +import { getSubTypeIcon, SubType } from '../../shared/components/subtypes'; +import { CONTAINER_FILTER_NAME, TYPE_NAMES_FILTER_NAME } from '../../../searchV2/utils/constants'; +import SummaryEntityCard from '../../../sharedV2/cards/SummaryEntityCard'; + +export default function TableauDataSourcesSection() { + const { urn } = useEntityData(); + + const { data: searchData } = useGetSearchResultsForMultipleQuery({ + skip: !urn, + variables: { + input: { + types: [EntityType.Dataset], + query: '', + count: 10, + orFilters: [ + { + and: [ + { field: CONTAINER_FILTER_NAME, values: [urn] }, + { + field: TYPE_NAMES_FILTER_NAME, + values: [SubType.TableauEmbeddedDataSource, SubType.TableauPublishedDataSource], + condition: FilterOperator.Contain, + }, + ], + }, + ], + }, + }, + fetchPolicy: 'cache-first', + }); + + const dataSources = searchData?.searchAcrossEntities?.searchResults.map((r) => r.entity); + + if (!dataSources?.length) { + return null; + } + return ( + <> + + {getSubTypeIcon(SubType.TableauPublishedDataSource)} + Data Sources ({searchData?.searchAcrossEntities?.total}) + + + {dataSources.map((dataSource) => ( + + ))} + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/container/tableau/TableauViewsSection.tsx b/datahub-web-react/src/app/entityV2/container/tableau/TableauViewsSection.tsx new file mode 100644 index 00000000000000..d71b13c2f26b63 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/container/tableau/TableauViewsSection.tsx @@ -0,0 +1,55 @@ +import React from 'react'; +import Icon from '@ant-design/icons'; +import { useEntityData } from '../../../entity/shared/EntityContext'; +import { useGetSearchResultsForMultipleQuery } from '../../../../graphql/search.generated'; +import { EntityType } from '../../../../types.generated'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { HorizontalList } from '../../shared/summary/ListComponents'; +import { HeaderTitle } from '../../shared/summary/HeaderComponents'; +import TableauViewIcon from '../../../../images/tableau-view.svg?react'; +import { CONTAINER_FILTER_NAME } from '../../../searchV2/utils/constants'; +import SummaryEntityCard from '../../../sharedV2/cards/SummaryEntityCard'; + +const viewPattern = /.*tableau.com.*\/#(\/site\/[^/]*)?\/views\/(.*)/; + +export default function TableauViewsSection() { + const { urn } = useEntityData(); + + const { data } = useGetSearchResultsForMultipleQuery({ + skip: !urn, + variables: { + input: { + types: [EntityType.Chart, EntityType.Dashboard], + query: '', + orFilters: [{ and: [{ field: CONTAINER_FILTER_NAME, values: [urn] }] }], + // TODO: Store this information in a filterable property + count: 1000, + }, + }, + fetchPolicy: 'cache-first', + }); + + const entityRegistry = useEntityRegistry(); + const views = data?.searchAcrossEntities?.searchResults + .filter((r) => + entityRegistry.getGenericEntityProperties(r.entity.type, r.entity)?.externalUrl?.match(viewPattern), + ) + .map((r) => r.entity); + + if (!views?.length) { + return null; + } + return ( + <> + + + Views ({views.length}) + + + {views.map((view) => ( + + ))} + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/container/tableau/TableauWorkbookSummaryTab.tsx b/datahub-web-react/src/app/entityV2/container/tableau/TableauWorkbookSummaryTab.tsx new file mode 100644 index 00000000000000..4e0142c76a2fe2 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/container/tableau/TableauWorkbookSummaryTab.tsx @@ -0,0 +1,15 @@ +import React from 'react'; +import SummaryAboutSection from '../../shared/summary/SummaryAboutSection'; +import { SummaryTabWrapper } from '../../shared/summary/HeaderComponents'; +import TableauViewsSection from './TableauViewsSection'; +import TableauDataSourcesSection from './TableauDataSourcesSection'; + +export default function TableauWorkbookSummaryTab() { + return ( + + + + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/dashboard/DashboardEntity.tsx b/datahub-web-react/src/app/entityV2/dashboard/DashboardEntity.tsx new file mode 100644 index 00000000000000..88494f8f2b75b5 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dashboard/DashboardEntity.tsx @@ -0,0 +1,404 @@ +import { + AppstoreOutlined, + DashboardFilled, + DashboardOutlined, + EyeOutlined, + FileOutlined, + PartitionOutlined, + TableOutlined, + UnorderedListOutlined, + WarningOutlined, +} from '@ant-design/icons'; +import * as React from 'react'; +import { + GetDashboardQuery, + useGetDashboardQuery, + useUpdateDashboardMutation, +} from '../../../graphql/dashboard.generated'; +import { Dashboard, EntityType, LineageDirection, SearchResult } from '../../../types.generated'; +import { GenericEntityProperties } from '../../entity/shared/types'; +import { LOOKER_URN, MODE_URN } from '../../ingest/source/builder/constants'; +import { matchedInputFieldRenderer } from '../../search/matches/matchedInputFieldRenderer'; +import { MatchedFieldList } from '../../searchV2/matches/MatchedFieldList'; +import { capitalizeFirstLetterOnly } from '../../shared/textUtil'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { EntityMenuItems } from '../shared/EntityDropdown/EntityMenuActions'; +import { TYPE_ICON_CLASS_NAME } from '../shared/components/subtypes'; +import { EntityProfile } from '../shared/containers/profile/EntityProfile'; +import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection'; +import SidebarDashboardHeaderSection from '../shared/containers/profile/sidebar/Dashboard/Header/SidebarDashboardHeaderSection'; +import DataProductSection from '../shared/containers/profile/sidebar/DataProduct/DataProductSection'; +import { SidebarDomainSection } from '../shared/containers/profile/sidebar/Domain/SidebarDomainSection'; +import SidebarLineageSection from '../shared/containers/profile/sidebar/Lineage/SidebarLineageSection'; +import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection'; +import SidebarEntityHeader from '../shared/containers/profile/sidebar/SidebarEntityHeader'; +import { SidebarGlossaryTermsSection } from '../shared/containers/profile/sidebar/SidebarGlossaryTermsSection'; +import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection'; +import StatusSection from '../shared/containers/profile/sidebar/shared/StatusSection'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import EmbeddedProfile from '../shared/embed/EmbeddedProfile'; +import SidebarStructuredProperties from '../shared/sidebarSection/SidebarStructuredProperties'; +import { SUMMARY_TAB_ICON } from '../shared/summary/HeaderComponents'; +import { DocumentationTab } from '../shared/tabs/Documentation/DocumentationTab'; +import { EmbedTab } from '../shared/tabs/Embed/EmbedTab'; +import { DashboardChartsTab } from '../shared/tabs/Entity/DashboardChartsTab'; +import { DashboardDatasetsTab } from '../shared/tabs/Entity/DashboardDatasetsTab'; +import TabNameWithCount from '../shared/tabs/Entity/TabNameWithCount'; +import { IncidentTab } from '../shared/tabs/Incident/IncidentTab'; +import { LineageTab } from '../shared/tabs/Lineage/LineageTab'; +import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; +import { SidebarTitleActionType, getDashboardLastUpdatedMs, getDataProduct, isOutputPort } from '../shared/utils'; +import { DashboardPreview } from './preview/DashboardPreview'; +import { DashboardStatsSummarySubHeader } from './profile/DashboardStatsSummarySubHeader'; +import DashboardSummaryTab from './summary/DashboardSummaryTab'; +import SidebarNotesSection from '../shared/sidebarSection/SidebarNotesSection'; + +const PREVIEW_SUPPORTED_PLATFORMS = [LOOKER_URN, MODE_URN]; + +/** + * Definition of the DataHub Dashboard entity. + */ + +const headerDropdownItems = new Set([ + EntityMenuItems.EXTERNAL_URL, + EntityMenuItems.SHARE, + EntityMenuItems.UPDATE_DEPRECATION, + EntityMenuItems.ANNOUNCE, +]); + +export class DashboardEntity implements Entity { + type: EntityType = EntityType.Dashboard; + + icon = (fontSize?: number, styleType?: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ( + + ); + } + + if (styleType === IconStyleType.SVG) { + return ( + + ); + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + isBrowseEnabled = () => true; + + isLineageEnabled = () => true; + + getAutoCompleteFieldName = () => 'title'; + + getPathName = () => 'dashboard'; + + getEntityName = () => 'Dashboard'; + + getCollectionName = () => 'Dashboards'; + + useEntityQuery = useGetDashboardQuery; + + renderProfile = (urn: string) => ( + + (dashboard?.dashboard?.charts?.total || 0) > 0 || + (dashboard?.dashboard?.datasets?.total || 0) === 0, + enabled: (_, dashboard: GetDashboardQuery) => (dashboard?.dashboard?.charts?.total || 0) > 0, + }, + }, + { + name: 'Datasets', + component: DashboardDatasetsTab, + icon: TableOutlined, + display: { + visible: (_, dashboard: GetDashboardQuery) => (dashboard?.dashboard?.datasets?.total || 0) > 0, + enabled: (_, dashboard: GetDashboardQuery) => (dashboard?.dashboard?.datasets?.total || 0) > 0, + }, + }, + { + name: 'Documentation', + component: DocumentationTab, + icon: FileOutlined, + }, + { + name: 'Preview', + component: EmbedTab, + icon: EyeOutlined, + display: { + visible: (_, dashboard: GetDashboardQuery) => + !!dashboard?.dashboard?.embed?.renderUrl && + PREVIEW_SUPPORTED_PLATFORMS.includes(dashboard?.dashboard?.platform.urn), + enabled: (_, dashboard: GetDashboardQuery) => + !!dashboard?.dashboard?.embed?.renderUrl && + PREVIEW_SUPPORTED_PLATFORMS.includes(dashboard?.dashboard?.platform.urn), + }, + }, + { + name: 'Lineage', + component: LineageTab, + icon: PartitionOutlined, + properties: { + defaultDirection: LineageDirection.Upstream, + }, + }, + { + name: 'Properties', + component: PropertiesTab, + icon: UnorderedListOutlined, + }, + { + name: 'Incidents', + icon: WarningOutlined, + component: IncidentTab, + getDynamicName: (_, dashboard, loading) => { + const activeIncidentCount = dashboard?.dashboard?.activeIncidents?.total; + return ; + }, + }, + ]} + sidebarSections={this.getSidebarSections()} + sidebarTabs={this.getSidebarTabs()} + /> + ); + + getSidebarSections = () => [ + { + component: SidebarEntityHeader, + }, + { + component: SidebarDashboardHeaderSection, + }, + { + component: SidebarAboutSection, + }, + { + component: SidebarNotesSection, + }, + { + component: SidebarLineageSection, + }, + { + component: SidebarOwnerSection, + }, + { + component: SidebarDomainSection, + }, + { + component: DataProductSection, + }, + { + component: SidebarGlossaryTermsSection, + }, + { + component: SidebarTagsSection, + }, + { + component: StatusSection, + }, + { + component: SidebarStructuredProperties, + }, + ]; + + getSidebarTabs = () => [ + { + name: 'Lineage', + component: LineageTab, + description: "View this data asset's upstream and downstream dependencies", + icon: PartitionOutlined, + properties: { + defaultDirection: LineageDirection.Upstream, + actionType: SidebarTitleActionType.LineageExplore, + }, + }, + { + name: 'Properties', + component: PropertiesTab, + description: 'View additional properties about this asset', + icon: UnorderedListOutlined, + }, + ]; + + getOverridePropertiesFromEntity = (dashboard?: Dashboard | null): GenericEntityProperties => { + // TODO: Get rid of this once we have correctly formed platform coming back. + const name = dashboard?.properties?.name; + const externalUrl = dashboard?.properties?.externalUrl; + const subTypes = dashboard?.subTypes; + return { + name, + externalUrl, + entityTypeOverride: subTypes ? capitalizeFirstLetterOnly(subTypes.typeNames?.[0]) : '', + }; + }; + + renderPreview = (previewType: PreviewType, data: Dashboard) => { + const genericProperties = this.getGenericEntityProperties(data); + return ( + + ); + }; + + renderSearch = (result: SearchResult) => { + const data = result.entity as Dashboard; + const genericProperties = this.getGenericEntityProperties(data); + + return ( + matchedInputFieldRenderer(matchedField, data)} + matchSuffix="on a contained chart" + /> + } + subtype={data.subTypes?.typeNames?.[0]} + degree={(result as any).degree} + paths={(result as any).paths} + isOutputPort={isOutputPort(result)} + headerDropdownItems={headerDropdownItems} + browsePaths={data.browsePathV2 || undefined} + /> + ); + }; + + renderSearchMatches = (result: SearchResult) => { + const data = result.entity as Dashboard; + return ( + matchedInputFieldRenderer(matchedField, data)} + matchSuffix="on a contained chart" + /> + ); + }; + + getLineageVizConfig = (entity: Dashboard) => { + return { + urn: entity.urn, + name: entity.properties?.name || entity.urn, + type: EntityType.Dashboard, + subtype: entity?.subTypes?.typeNames?.[0] || undefined, + icon: entity?.platform?.properties?.logoUrl || undefined, + platform: entity?.platform, + deprecation: entity?.deprecation, + }; + }; + + displayName = (data: Dashboard) => { + return data.properties?.name || data.urn; + }; + + getGenericEntityProperties = (data: Dashboard) => { + return getDataForEntityType({ + data, + entityType: this.type, + getOverrideProperties: this.getOverridePropertiesFromEntity, + }); + }; + + supportedCapabilities = () => { + return new Set([ + EntityCapabilityType.OWNERS, + EntityCapabilityType.GLOSSARY_TERMS, + EntityCapabilityType.TAGS, + EntityCapabilityType.DOMAINS, + EntityCapabilityType.DEPRECATION, + EntityCapabilityType.SOFT_DELETE, + EntityCapabilityType.DATA_PRODUCTS, + EntityCapabilityType.TEST, + EntityCapabilityType.LINEAGE, + EntityCapabilityType.HEALTH, + ]); + }; + + getGraphName = () => this.getPathName(); + + renderEmbeddedProfile = (urn: string) => ( + + ); +} diff --git a/datahub-web-react/src/app/entityV2/dashboard/preview/DashboardPreview.tsx b/datahub-web-react/src/app/entityV2/dashboard/preview/DashboardPreview.tsx new file mode 100644 index 00000000000000..c1800d38a12275 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dashboard/preview/DashboardPreview.tsx @@ -0,0 +1,143 @@ +import { GenericEntityProperties } from '@app/entity/shared/types'; +import React from 'react'; +import { + AccessLevel, + Container, + DashboardStatsSummary, + DataProduct, + Deprecation, + Domain, + EntityPath, + EntityType, + GlobalTags, + GlossaryTerms, + Owner, + ParentContainersResult, + SearchInsight, + BrowsePathV2, +} from '../../../../types.generated'; +import DefaultPreviewCard from '../../../previewV2/DefaultPreviewCard'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { IconStyleType, PreviewType } from '../../Entity'; +import { PopularityTier } from '../../shared/containers/profile/sidebar/shared/utils'; +import { summaryHasStats, DashboardLastUpdatedMs } from '../../shared/utils'; +import { DashboardStatsSummary as DashboardStatsSummaryView } from '../shared/DashboardStatsSummary'; +import { EntityMenuItems } from '../../shared/EntityDropdown/EntityMenuActions'; + +export const DashboardPreview = ({ + urn, + data, + platform, + platformInstanceId, + name, + subtype, + description, + access, + owners, + tags, + glossaryTerms, + domain, + dataProduct, + container, + insights, + logoUrl, + chartCount, + statsSummary, + lastUpdatedMs, + createdMs, + externalUrl, + parentContainers, + deprecation, + snippet, + degree, + paths, + isOutputPort, + tier, + headerDropdownItems, + previewType, + browsePaths, +}: { + urn: string; + data: GenericEntityProperties | null; + platform?: string; + platformInstanceId?: string; + name?: string; + subtype?: string | null; + description?: string | null; + access?: AccessLevel | null; + owners?: Array | null; + tags?: GlobalTags; + glossaryTerms?: GlossaryTerms | null; + domain?: Domain | null; + dataProduct?: DataProduct | null; + container?: Container | null; + deprecation?: Deprecation | null; + insights?: Array | null; + logoUrl?: string | null; + chartCount?: number | null; + statsSummary?: DashboardStatsSummary | null; + lastUpdatedMs?: DashboardLastUpdatedMs; + createdMs?: number | null; + externalUrl?: string | null; + parentContainers?: ParentContainersResult | null; + snippet?: React.ReactNode | null; + degree?: number; + paths?: EntityPath[]; + isOutputPort?: boolean; + tier?: PopularityTier; + headerDropdownItems?: Set; + previewType?: PreviewType; + browsePaths?: BrowsePathV2; +}): JSX.Element => { + const entityRegistry = useEntityRegistry(); + const hasStats = summaryHasStats(statsSummary); + + return ( + + ) + } + degree={degree} + paths={paths} + lastUpdatedMs={lastUpdatedMs} + isOutputPort={isOutputPort} + tier={tier} + headerDropdownItems={headerDropdownItems} + statsSummary={statsSummary} + previewType={previewType} + browsePaths={browsePaths} + /> + ); +}; diff --git a/datahub-web-react/src/app/entityV2/dashboard/profile/DashboardStatsSummarySubHeader.tsx b/datahub-web-react/src/app/entityV2/dashboard/profile/DashboardStatsSummarySubHeader.tsx new file mode 100644 index 00000000000000..ceaf5d9c971f44 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dashboard/profile/DashboardStatsSummarySubHeader.tsx @@ -0,0 +1,28 @@ +import React from 'react'; +import { DashboardStatsSummary as DashboardStatsSummaryObj } from '../../../../types.generated'; +import { useBaseEntity } from '../../../entity/shared/EntityContext'; +import { GetDashboardQuery } from '../../../../graphql/dashboard.generated'; +import { DashboardStatsSummary } from '../shared/DashboardStatsSummary'; + +export const DashboardStatsSummarySubHeader = () => { + const result = useBaseEntity(); + const dashboard = result?.dashboard; + const maybeStatsSummary = dashboard?.statsSummary as DashboardStatsSummaryObj; + const chartCount = dashboard?.charts?.total; + const viewCount = maybeStatsSummary?.viewCount; + const viewCountLast30Days = maybeStatsSummary?.viewCountLast30Days; + const uniqueUserCountLast30Days = maybeStatsSummary?.uniqueUserCountLast30Days; + const lastUpdatedMs = dashboard?.properties?.lastModified?.time; + const createdMs = dashboard?.properties?.created?.time; + + return ( + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/dashboard/shared/DashboardStatsSummary.tsx b/datahub-web-react/src/app/entityV2/dashboard/shared/DashboardStatsSummary.tsx new file mode 100644 index 00000000000000..8e04f08860f7f4 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dashboard/shared/DashboardStatsSummary.tsx @@ -0,0 +1,117 @@ +import React from 'react'; +import styled from 'styled-components'; +import { Typography } from 'antd'; +import { Popover, Tooltip } from '@components'; +import { ClockCircleOutlined, EyeOutlined, TeamOutlined, QuestionCircleOutlined } from '@ant-design/icons'; +import { formatNumber, formatNumberWithoutAbbreviation } from '../../../shared/formatNumber'; +import { ANTD_GRAY } from '../../shared/constants'; +import { toLocalDateTimeString, toRelativeTimeString } from '../../../shared/time/timeUtils'; +import { StatsSummary } from '../../shared/components/styled/StatsSummary'; +import { PercentileLabel } from '../../shared/stats/PercentileLabel'; +import { countFormatter, needsFormatting } from '../../../../utils/formatter'; +import ExpandingStat from '../../dataset/shared/ExpandingStat'; + +const StatText = styled.span` + color: ${ANTD_GRAY[8]}; + @media (min-width: 1024px) { + white-space: nowrap; +`; + +const HelpIcon = styled(QuestionCircleOutlined)` + color: ${ANTD_GRAY[7]}; + padding-left: 4px; +`; + +type Props = { + chartCount?: number | null; + viewCount?: number | null; + viewCountLast30Days?: number | null; + viewCountPercentileLast30Days?: number | null; + uniqueUserCountLast30Days?: number | null; + uniqueUserPercentileLast30Days?: number | null; + lastUpdatedMs?: number | null; + createdMs?: number | null; +}; + +export const DashboardStatsSummary = ({ + chartCount, + viewCount, + viewCountLast30Days, + viewCountPercentileLast30Days, + uniqueUserCountLast30Days, + uniqueUserPercentileLast30Days, + lastUpdatedMs, + createdMs, +}: Props) => { + // acryl-main only. + const effectiveViewCount = (!!viewCountLast30Days && viewCountLast30Days) || viewCount; + const effectiveViewCountText = (!!viewCountLast30Days && 'views last month') || 'views'; + + const statsViews = [ + (!!chartCount && ( + ( + + {isExpanded ? formatNumberWithoutAbbreviation(chartCount) : countFormatter(chartCount)}{' '} + charts + + )} + /> + )) || + undefined, + (!!effectiveViewCount && ( + + + {formatNumber(effectiveViewCount)} {effectiveViewCountText} + {!!viewCountPercentileLast30Days && ( + + + + )} + + )) || + undefined, + (!!uniqueUserCountLast30Days && ( + + + {formatNumber(uniqueUserCountLast30Days)} users + {!!uniqueUserPercentileLast30Days && ( + + + + )} + + )) || + undefined, + (!!lastUpdatedMs && ( + + {createdMs &&
Created on {toLocalDateTimeString(createdMs)}.
} +
+ Changed on {toLocalDateTimeString(lastUpdatedMs)}.{' '} + + + +
+ + } + > + + + Changed {toRelativeTimeString(lastUpdatedMs)} + +
+ )) || + undefined, + ].filter((stat) => stat !== undefined); + + return <>{statsViews.length > 0 && }; +}; diff --git a/datahub-web-react/src/app/entityV2/dashboard/summary/DashboardSummaryOverview.tsx b/datahub-web-react/src/app/entityV2/dashboard/summary/DashboardSummaryOverview.tsx new file mode 100644 index 00000000000000..b4f8c74c308b8f --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dashboard/summary/DashboardSummaryOverview.tsx @@ -0,0 +1,157 @@ +import React from 'react'; +import { Link } from 'react-router-dom'; +import styled from 'styled-components'; +import { GetDashboardQuery } from '../../../../graphql/dashboard.generated'; +import { Entity, EntityType } from '../../../../types.generated'; +import { useBaseEntity, useEntityData } from '../../../entity/shared/EntityContext'; +import { GenericEntityProperties } from '../../../entity/shared/types'; +import { HoverEntityTooltip } from '../../../recommendations/renderer/component/HoverEntityTooltip'; +import PlatformIcon from '../../../sharedV2/icons/PlatformIcon'; +import { useEntityRegistryV2 } from '../../../useEntityRegistry'; +import { MainSection, StyledTitle, SummaryHeader, VerticalDivider } from '../../chart/summary/styledComponents'; +import { REDESIGN_COLORS } from '../../shared/constants'; +import { SummaryColumns } from '../../shared/summary/ListComponents'; +import SummaryCreatedBySection from '../../shared/summary/SummaryCreatedBySection'; + +import { useGetSearchResultsQuery } from '../../../../graphql/search.generated'; +import Loading from '../../../shared/Loading'; + +const Count = styled.div` + padding: 1px 8px; + display: flex; + justify-content: center; + border-radius: 10px; + background-color: #e5ece9; + font-size: 10px; + font-weight: 400; + margin-left: 8px; +`; + +const EntityItem = styled.div` + display: flex; + align-items: center; + padding: 6px 40px 6px 0; + gap: 8px; + font-size: 14px; + font-weight: 500; + color: ${REDESIGN_COLORS.SUBTITLE}; +`; + +const AssetSections = styled.div` + display: flex; +`; + +const EntitiesList = styled.div` + display: flex; + flex-direction: column; + overflow: auto; + max-height: 220px; +`; + +export default function DashboardSummaryOverview() { + const { loading } = useEntityData(); + const dashboard = useBaseEntity()?.dashboard; + const entityRegistry = useEntityRegistryV2(); + + const charts = (dashboard?.charts?.relationships?.map((r) => r.entity) || []) as Entity[]; + + const sources = charts + .flatMap((chart: any) => chart?.upstream?.relationships?.map((r) => r.entity)) + .filter((e) => e.type === EntityType.Dataset) + .map((dataSource) => dataSource.urn); + + const { data: dataSourcesData } = useGetSearchResultsQuery({ + variables: { + input: { + type: EntityType.Dataset, + query: '', + filters: [ + { + field: 'urn', + values: sources, + }, + ], + }, + }, + }); + + if (loading) { + return ; + } + + const dataSources = (dataSourcesData?.search?.searchResults?.map((result) => result.entity) || []) as Entity[]; + + const owner = dashboard?.ownership?.owners && dashboard?.ownership?.owners[0]?.owner; + const displayName = entityRegistry.getDisplayName(EntityType.Dashboard, dashboard); + + return ( + + + General Info + + {!!owner && } + + + + Related Assets + + {!!dataSources?.length && ( + + + Data Sources + {dataSources.length} + + + {dataSources.map((dataSource) => ( + + + + + {entityRegistry.getDisplayName( + dataSource?.type as EntityType, + dataSource, + )} + + + + ))} + + + )} + + + {!!charts?.length && ( + + + Contents + {charts.length} + + + {charts.map((chart) => ( + + + + + {entityRegistry.getDisplayName(chart?.type as EntityType, chart)} + + + + ))} + + + )} + + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/dashboard/summary/DashboardSummaryTab.tsx b/datahub-web-react/src/app/entityV2/dashboard/summary/DashboardSummaryTab.tsx new file mode 100644 index 00000000000000..b8474f5a08515a --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dashboard/summary/DashboardSummaryTab.tsx @@ -0,0 +1,33 @@ +import React from 'react'; +import styled from 'styled-components'; +import { Divider } from 'antd'; +import { useEntityData } from '../../../entity/shared/EntityContext'; +import { SummaryTabWrapper } from '../../shared/summary/HeaderComponents'; +import SummaryAboutSection from '../../shared/summary/SummaryAboutSection'; +import EmbedPreview from '../../chart/summary/EmbedPreview'; +import DashboardSummaryOverview from './DashboardSummaryOverview'; + +const StyledDivider = styled(Divider)` + width: 100%; + border-top-width: 2px; + margin: 10px 0; +`; + +export default function DashboardSummaryTab(): JSX.Element | null { + const { entityData } = useEntityData(); + + return ( + + + + + + {entityData?.embed?.renderUrl && ( + <> + + + + )} + + ); +} diff --git a/datahub-web-react/src/app/entityV2/dataFlow/DataFlowEntity.tsx b/datahub-web-react/src/app/entityV2/dataFlow/DataFlowEntity.tsx new file mode 100644 index 00000000000000..2f6f217d7e648f --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataFlow/DataFlowEntity.tsx @@ -0,0 +1,263 @@ +import { + ConsoleSqlOutlined, + FileOutlined, + ShareAltOutlined, + UnorderedListOutlined, + WarningOutlined, +} from '@ant-design/icons'; +import * as React from 'react'; +import { useGetDataFlowQuery, useUpdateDataFlowMutation } from '../../../graphql/dataFlow.generated'; +import { DataFlow, EntityType, SearchResult } from '../../../types.generated'; +import { GenericEntityProperties } from '../../entity/shared/types'; +import { capitalizeFirstLetterOnly } from '../../shared/textUtil'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { TYPE_ICON_CLASS_NAME } from '../shared/components/subtypes'; +import { EntityProfile } from '../shared/containers/profile/EntityProfile'; +import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection'; +import DataProductSection from '../shared/containers/profile/sidebar/DataProduct/DataProductSection'; +import { SidebarDomainSection } from '../shared/containers/profile/sidebar/Domain/SidebarDomainSection'; +import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection'; +import StatusSection from '../shared/containers/profile/sidebar/shared/StatusSection'; +import SidebarEntityHeader from '../shared/containers/profile/sidebar/SidebarEntityHeader'; +import { SidebarGlossaryTermsSection } from '../shared/containers/profile/sidebar/SidebarGlossaryTermsSection'; +import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import { EntityMenuItems } from '../shared/EntityDropdown/EntityMenuActions'; +import SidebarStructuredProperties from '../shared/sidebarSection/SidebarStructuredProperties'; +import { DocumentationTab } from '../shared/tabs/Documentation/DocumentationTab'; +import { DataFlowJobsTab } from '../shared/tabs/Entity/DataFlowJobsTab'; +import TabNameWithCount from '../shared/tabs/Entity/TabNameWithCount'; +import { IncidentTab } from '../shared/tabs/Incident/IncidentTab'; +import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; +import { getDataProduct, isOutputPort } from '../shared/utils'; +import { Preview } from './preview/Preview'; +import SidebarNotesSection from '../shared/sidebarSection/SidebarNotesSection'; + +const headerDropdownItems = new Set([ + EntityMenuItems.EXTERNAL_URL, + EntityMenuItems.SHARE, + EntityMenuItems.UPDATE_DEPRECATION, + EntityMenuItems.ANNOUNCE, +]); + +/** + * Definition of the DataHub DataFlow entity. + */ +export class DataFlowEntity implements Entity { + type: EntityType = EntityType.DataFlow; + + icon = (fontSize?: number, styleType?: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ( + + ); + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + isBrowseEnabled = () => true; + + isLineageEnabled = () => false; + + getAutoCompleteFieldName = () => 'name'; + + getGraphName = () => 'dataFlow'; + + getPathName = () => 'pipelines'; + + getEntityName = () => 'Pipeline'; + + getCollectionName = () => 'Pipelines'; + + useEntityQuery = useGetDataFlowQuery; + + renderProfile = (urn: string) => ( + { + const activeIncidentCount = dataFlow?.dataFlow?.activeIncidents?.total; + return ; + }, + }, + { + name: 'Properties', + component: PropertiesTab, + icon: UnorderedListOutlined, + }, + ]} + sidebarSections={this.getSidebarSections()} + sidebarTabs={this.getSidebarTabs()} + /> + ); + + getSidebarSections = () => [ + { + component: SidebarEntityHeader, + }, + { + component: SidebarAboutSection, + }, + { + component: SidebarNotesSection, + }, + { + component: SidebarOwnerSection, + }, + { + component: SidebarDomainSection, + }, + { + component: DataProductSection, + }, + { + component: SidebarGlossaryTermsSection, + }, + { + component: SidebarTagsSection, + }, + { + component: StatusSection, + }, + { + component: SidebarStructuredProperties, + }, + ]; + + getSidebarTabs = () => [ + { + name: 'Properties', + component: PropertiesTab, + description: 'View additional properties about this asset', + icon: UnorderedListOutlined, + }, + ]; + + getOverridePropertiesFromEntity = (dataFlow?: DataFlow | null): GenericEntityProperties => { + // TODO: Get rid of this once we have correctly formed platform coming back. + const name = dataFlow?.properties?.name; + const externalUrl = dataFlow?.properties?.externalUrl; + return { + name, + externalUrl, + }; + }; + + renderPreview = (previewType: PreviewType, data: DataFlow) => { + const genericProperties = this.getGenericEntityProperties(data); + return ( + + ); + }; + + renderSearch = (result: SearchResult) => { + const data = result.entity as DataFlow; + const genericProperties = this.getGenericEntityProperties(data); + return ( + + ); + }; + + displayName = (data: DataFlow) => { + return data.properties?.name || data.urn; + }; + + getGenericEntityProperties = (data: DataFlow) => { + return getDataForEntityType({ + data, + entityType: this.type, + getOverrideProperties: this.getOverridePropertiesFromEntity, + }); + }; + + supportedCapabilities = () => { + return new Set([ + EntityCapabilityType.OWNERS, + EntityCapabilityType.GLOSSARY_TERMS, + EntityCapabilityType.TAGS, + EntityCapabilityType.DOMAINS, + EntityCapabilityType.DEPRECATION, + EntityCapabilityType.SOFT_DELETE, + EntityCapabilityType.DATA_PRODUCTS, + EntityCapabilityType.TEST, + EntityCapabilityType.LINEAGE, + EntityCapabilityType.HEALTH, + ]); + }; +} diff --git a/datahub-web-react/src/app/entityV2/dataFlow/preview/Preview.tsx b/datahub-web-react/src/app/entityV2/dataFlow/preview/Preview.tsx new file mode 100644 index 00000000000000..af1997bfae6f1a --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataFlow/preview/Preview.tsx @@ -0,0 +1,110 @@ +import { GenericEntityProperties } from '@app/entity/shared/types'; +import React from 'react'; +import { Typography } from 'antd'; +import styled from 'styled-components'; +import { + DataProduct, + Deprecation, + Domain, + EntityPath, + EntityType, + GlobalTags, + Owner, + ParentContainersResult, + SearchInsight, +} from '../../../../types.generated'; +import DefaultPreviewCard from '../../../previewV2/DefaultPreviewCard'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { IconStyleType, PreviewType } from '../../Entity'; +import { ANTD_GRAY } from '../../shared/constants'; +import { EntityMenuItems } from '../../shared/EntityDropdown/EntityMenuActions'; + +const StatText = styled(Typography.Text)` + color: ${ANTD_GRAY[8]}; +`; + +export const Preview = ({ + urn, + data, + name, + platformInstanceId, + description, + platformName, + platformLogo, + owners, + globalTags, + domain, + dataProduct, + externalUrl, + snippet, + insights, + jobCount, + deprecation, + degree, + paths, + isOutputPort, + headerDropdownItems, + previewType, + parentContainers, +}: { + urn: string; + data: GenericEntityProperties | null; + name: string; + platformInstanceId?: string; + description?: string | null; + platformName?: string; + platformLogo?: string | null; + owners?: Array | null; + domain?: Domain | null; + dataProduct?: DataProduct | null; + globalTags?: GlobalTags | null; + deprecation?: Deprecation | null; + externalUrl?: string | null; + snippet?: React.ReactNode | null; + insights?: Array | null; + jobCount?: number | null; + degree?: number; + paths?: EntityPath[]; + isOutputPort?: boolean; + headerDropdownItems?: Set; + previewType?: PreviewType; + parentContainers?: ParentContainersResult | null; +}): JSX.Element => { + const entityRegistry = useEntityRegistry(); + return ( + + {jobCount} {entityRegistry.getCollectionName(EntityType.DataJob)} + , + ]) || + undefined + } + degree={degree} + paths={paths} + isOutputPort={isOutputPort} + headerDropdownItems={headerDropdownItems} + previewType={previewType} + parentEntities={parentContainers?.containers} + /> + ); +}; diff --git a/datahub-web-react/src/app/entityV2/dataJob/DataJobEntity.tsx b/datahub-web-react/src/app/entityV2/dataJob/DataJobEntity.tsx new file mode 100644 index 00000000000000..7fd7d21314ec81 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataJob/DataJobEntity.tsx @@ -0,0 +1,329 @@ +import { + ConsoleSqlOutlined, + FileOutlined, + PartitionOutlined, + ShareAltOutlined, + SyncOutlined, + UnorderedListOutlined, + WarningOutlined, +} from '@ant-design/icons'; +import * as React from 'react'; +import { GetDataJobQuery, useGetDataJobQuery, useUpdateDataJobMutation } from '../../../graphql/dataJob.generated'; +import { DataJob, EntityType, SearchResult } from '../../../types.generated'; +import { GenericEntityProperties } from '../../entity/shared/types'; +import { EntityAndType } from '../../lineage/types'; +import { capitalizeFirstLetterOnly } from '../../shared/textUtil'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { DataFlowEntity } from '../dataFlow/DataFlowEntity'; +import { EntityMenuItems } from '../shared/EntityDropdown/EntityMenuActions'; +import { TYPE_ICON_CLASS_NAME } from '../shared/components/subtypes'; +import { EntityProfile } from '../shared/containers/profile/EntityProfile'; +import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection'; +import DataProductSection from '../shared/containers/profile/sidebar/DataProduct/DataProductSection'; +import { SidebarDomainSection } from '../shared/containers/profile/sidebar/Domain/SidebarDomainSection'; +import SidebarLineageSection from '../shared/containers/profile/sidebar/Lineage/SidebarLineageSection'; +import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection'; +import SidebarQueryOperationsSection from '../shared/containers/profile/sidebar/Query/SidebarQueryOperationsSection'; +import SidebarEntityHeader from '../shared/containers/profile/sidebar/SidebarEntityHeader'; +import { SidebarGlossaryTermsSection } from '../shared/containers/profile/sidebar/SidebarGlossaryTermsSection'; +import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection'; +import StatusSection from '../shared/containers/profile/sidebar/shared/StatusSection'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import SidebarStructuredProperties from '../shared/sidebarSection/SidebarStructuredProperties'; +import { DocumentationTab } from '../shared/tabs/Documentation/DocumentationTab'; +import { DataJobFlowTab } from '../shared/tabs/Entity/DataJobFlowTab'; +import TabNameWithCount from '../shared/tabs/Entity/TabNameWithCount'; +import { IncidentTab } from '../shared/tabs/Incident/IncidentTab'; +import { LineageTab } from '../shared/tabs/Lineage/LineageTab'; +import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; +import { SidebarTitleActionType, getDataProduct, isOutputPort } from '../shared/utils'; +import { Preview } from './preview/Preview'; +import { RunsTab } from './tabs/RunsTab'; +import SidebarNotesSection from '../shared/sidebarSection/SidebarNotesSection'; + +const getDataJobPlatformName = (data?: DataJob): string => { + return ( + data?.dataFlow?.platform?.properties?.displayName || + capitalizeFirstLetterOnly(data?.dataFlow?.platform?.name) || + '' + ); +}; + +const headerDropdownItems = new Set([ + EntityMenuItems.EXTERNAL_URL, + EntityMenuItems.SHARE, + EntityMenuItems.UPDATE_DEPRECATION, + EntityMenuItems.ANNOUNCE, +]); + +/** + * Definition of the DataHub DataJob entity. + */ +export class DataJobEntity implements Entity { + type: EntityType = EntityType.DataJob; + + icon = (fontSize?: number, styleType?: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ( + + ); + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + isBrowseEnabled = () => false; + + isLineageEnabled = () => true; + + getAutoCompleteFieldName = () => 'name'; + + getGraphName = () => 'dataJob'; + + getPathName = () => 'tasks'; + + getEntityName = () => 'Task'; + + getCollectionName = () => 'Tasks'; + + useEntityQuery = useGetDataJobQuery; + + renderProfile = (urn: string) => ( + true, + enabled: (_, dataJob: GetDataJobQuery) => (dataJob?.dataJob?.runs?.total || 0) !== 0, + }, + }, + { + name: 'Incidents', + icon: WarningOutlined, + component: IncidentTab, + getDynamicName: (_, dataJob, loading) => { + const activeIncidentCount = dataJob?.dataJob?.activeIncidents?.total; + return ; + }, + }, + ]} + sidebarSections={this.getSidebarSections()} + sidebarTabs={this.getSidebarTabs()} + /> + ); + + getSidebarSections = () => [ + { component: SidebarEntityHeader }, + { component: SidebarQueryOperationsSection }, + { component: SidebarAboutSection }, + { component: SidebarNotesSection }, + { component: SidebarLineageSection }, + { component: SidebarOwnerSection }, + { component: SidebarDomainSection }, + { component: DataProductSection }, + { component: SidebarGlossaryTermsSection }, + { component: SidebarTagsSection }, + { component: StatusSection }, + { + component: SidebarStructuredProperties, + }, + ]; + + getSidebarTabs = () => [ + { + name: 'Lineage', + component: LineageTab, + description: "View this data asset's upstream and downstream dependencies", + icon: PartitionOutlined, + properties: { + actionType: SidebarTitleActionType.LineageExplore, + }, + }, + { + name: 'Properties', + component: PropertiesTab, + description: 'View additional properties about this asset', + icon: UnorderedListOutlined, + }, + ]; + + getOverridePropertiesFromEntity = (dataJob?: DataJob | null): GenericEntityProperties => { + // TODO: Get rid of this once we have correctly formed platform coming back. + const name = dataJob?.properties?.name; + const externalUrl = dataJob?.properties?.externalUrl; + return { + name, + externalUrl, + platform: dataJob?.dataFlow?.platform, + }; + }; + + renderPreview = (previewType: PreviewType, data: DataJob) => { + const genericProperties = this.getGenericEntityProperties(data); + return ( + + ); + }; + + renderSearch = (result: SearchResult) => { + const data = result.entity as DataJob; + const genericProperties = this.getGenericEntityProperties(data); + return ( + + ); + }; + + getExpandedNameForDataJob = (entity: DataJob): string => { + const name = this.displayName(entity); + const flowName = entity?.dataFlow ? new DataFlowEntity().displayName(entity?.dataFlow) : undefined; + + // if we have no name, just return blank. this should not happen, so dont try & construct a name + if (!name) { + return ''; + } + + // if we have a flow name, return the full name of flow.task + if (flowName) { + return `${flowName}.${name}`; + } + + // otherwise, just return the task name (same as non-expanded) + return name; + }; + + getLineageVizConfig = (entity: DataJob) => { + return { + urn: entity?.urn, + name: this.displayName(entity), + expandedName: this.getExpandedNameForDataJob(entity), + type: EntityType.DataJob, + icon: entity?.dataFlow?.platform?.properties?.logoUrl || undefined, // eslint-disable-next-line @typescript-eslint/dot-notation + downstreamChildren: entity?.['downstream']?.relationships?.map( + (relationship) => + ({ + entity: relationship.entity, + type: relationship.entity.type, + } as EntityAndType), + ), // eslint-disable-next-line @typescript-eslint/dot-notation + upstreamChildren: entity?.['upstream']?.relationships?.map( + (relationship) => + ({ + entity: relationship.entity, + type: relationship.entity.type, + } as EntityAndType), + ), + platform: entity?.dataFlow?.platform, + }; + }; + + displayName = (data: DataJob) => { + return data.properties?.name || data.urn; + }; + + getGenericEntityProperties = (data: DataJob) => { + return getDataForEntityType({ + data, + entityType: this.type, + getOverrideProperties: this.getOverridePropertiesFromEntity, + }); + }; + + supportedCapabilities = () => { + return new Set([ + EntityCapabilityType.OWNERS, + EntityCapabilityType.GLOSSARY_TERMS, + EntityCapabilityType.TAGS, + EntityCapabilityType.DOMAINS, + EntityCapabilityType.DEPRECATION, + EntityCapabilityType.SOFT_DELETE, + EntityCapabilityType.DATA_PRODUCTS, + EntityCapabilityType.TEST, + EntityCapabilityType.LINEAGE, + EntityCapabilityType.HEALTH, + ]); + }; +} diff --git a/datahub-web-react/src/app/entityV2/dataJob/preview/Preview.tsx b/datahub-web-react/src/app/entityV2/dataJob/preview/Preview.tsx new file mode 100644 index 00000000000000..c8485016712cd5 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataJob/preview/Preview.tsx @@ -0,0 +1,122 @@ +import { GenericEntityProperties } from '@app/entity/shared/types'; +import React from 'react'; +import styled from 'styled-components'; +import { Typography } from 'antd'; +import { ClockCircleOutlined } from '@ant-design/icons'; + +import { + BrowsePathV2, + DataProduct, + Deprecation, + Domain, + EntityPath, + EntityType, + GlobalTags, + Owner, + ParentContainersResult, + SearchInsight, +} from '../../../../types.generated'; +import DefaultPreviewCard from '../../../previewV2/DefaultPreviewCard'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { IconStyleType, PreviewType } from '../../Entity'; +import { ANTD_GRAY } from '../../shared/constants'; +import { toRelativeTimeString } from '../../../shared/time/timeUtils'; +import { EntityMenuItems } from '../../shared/EntityDropdown/EntityMenuActions'; + +const StatText = styled(Typography.Text)` + color: ${ANTD_GRAY[8]}; +`; + +export const Preview = ({ + urn, + data, + name, + subtype, + description, + platformName, + platformLogo, + platformInstanceId, + owners, + domain, + dataProduct, + deprecation, + globalTags, + snippet, + insights, + lastRunTimeMs, + externalUrl, + degree, + paths, + isOutputPort, + headerDropdownItems, + previewType, + browsePaths, + parentContainers, +}: { + urn: string; + data: GenericEntityProperties | null; + name: string; + subtype?: string | null; + description?: string | null; + platformName: string; + platformLogo?: string | null; + platformInstanceId?: string; + owners?: Array | null; + domain?: Domain | null; + dataProduct?: DataProduct | null; + deprecation?: Deprecation | null; + globalTags?: GlobalTags | null; + snippet?: React.ReactNode | null; + insights?: Array | null; + lastRunTimeMs?: number | null; + externalUrl?: string | null; + degree?: number; + paths?: EntityPath[]; + isOutputPort?: boolean; + headerDropdownItems?: Set; + previewType?: PreviewType; + browsePaths?: BrowsePathV2; + parentContainers?: ParentContainersResult | null; +}): JSX.Element => { + const entityRegistry = useEntityRegistry(); + return ( + + + Last run {toRelativeTimeString(lastRunTimeMs)} + , + ]) || + undefined + } + degree={degree} + paths={paths} + isOutputPort={isOutputPort} + headerDropdownItems={headerDropdownItems} + previewType={previewType} + browsePaths={browsePaths} + parentEntities={parentContainers?.containers} + /> + ); +}; diff --git a/datahub-web-react/src/app/entityV2/dataJob/tabs/RunsTab.tsx b/datahub-web-react/src/app/entityV2/dataJob/tabs/RunsTab.tsx new file mode 100644 index 00000000000000..dffc8d35ea9ad6 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataJob/tabs/RunsTab.tsx @@ -0,0 +1,168 @@ +import { DeliveredProcedureOutlined } from '@ant-design/icons'; +import { Pagination, Table, Typography } from 'antd'; +import { Tooltip } from '@components'; +import React, { useState } from 'react'; +import styled from 'styled-components'; + +import { useGetDataJobRunsQuery } from '../../../../graphql/dataJob.generated'; +import { DataProcessInstanceRunResultType, DataProcessRunStatus } from '../../../../types.generated'; +import { + getExecutionRequestStatusDisplayColor, + getExecutionRequestStatusDisplayText, + getExecutionRequestStatusIcon, +} from '../../../ingest/source/utils'; +import { CompactEntityNameList } from '../../../recommendations/renderer/component/CompactEntityNameList'; +import { ANTD_GRAY } from '../../shared/constants'; +import { useEntityData } from '../../../entity/shared/EntityContext'; +import LoadingSvg from '../../../../images/datahub-logo-color-loading_pendulum.svg?react'; +import { scrollToTop } from '../../../shared/searchUtils'; + +const ExternalUrlLink = styled.a` + font-size: 16px; + color: ${ANTD_GRAY[8]}; +`; + +const PaginationControlContainer = styled.div` + padding-top: 16px; + padding-bottom: 16px; + text-align: center; +`; + +const LoadingText = styled.div` + margin-top: 18px; + font-size: 12px; +`; + +const LoadingContainer = styled.div` + padding-top: 40px; + padding-bottom: 40px; + width: 100%; + text-align: center; +`; + +function getStatusForStyling(status: DataProcessRunStatus, resultType: DataProcessInstanceRunResultType) { + if (status === 'COMPLETE') { + if (resultType === 'SKIPPED') { + return 'CANCELLED'; + } + return resultType; + } + return 'RUNNING'; +} + +const columns = [ + { + title: 'Time', + dataIndex: 'time', + key: 'time', + render: (value) => ( + {new Date(Number(value)).toLocaleString()} + ), + }, + { + title: 'Run ID', + dataIndex: 'name', + key: 'name', + }, + { + title: 'Status', + dataIndex: 'status', + key: 'status', + render: (status: any, row) => { + const statusForStyling = getStatusForStyling(status, row?.resultType); + const Icon = getExecutionRequestStatusIcon(statusForStyling); + const text = getExecutionRequestStatusDisplayText(statusForStyling); + const color = getExecutionRequestStatusDisplayColor(statusForStyling); + return ( + <> +
+ {Icon && } + + {text || 'N/A'} + +
+ + ); + }, + }, + { + title: 'Inputs', + dataIndex: 'inputs', + key: 'inputs', + render: (inputs) => , + width: 150, + }, + { + title: 'Outputs', + dataIndex: 'outputs', + key: 'outputs', + render: (outputs) => , + width: 150, + }, + { + title: '', + dataIndex: 'externalUrl', + key: 'externalUrl', + render: (externalUrl) => + externalUrl && ( + + + + + + ), + }, +]; + +const PAGE_SIZE = 20; + +export const RunsTab = () => { + const { urn } = useEntityData(); + const [page, setPage] = useState(1); + + const { loading, data } = useGetDataJobRunsQuery({ + variables: { urn, start: (page - 1) * PAGE_SIZE, count: PAGE_SIZE }, + }); + const runs = data && data?.dataJob?.runs?.runs; + + const tableData = runs + ?.filter((run) => run?.state?.length) + .map((run) => ({ + time: run?.created?.time, + name: run?.name, + status: run?.state?.[0]?.status, + resultType: run?.state?.[0]?.result?.resultType, + inputs: run?.inputs?.relationships.map((relationship) => relationship.entity), + outputs: run?.outputs?.relationships.map((relationship) => relationship.entity), + externalUrl: run?.externalUrl, + })); + if (loading) { + return ( + + + Fetching runs... + + ); + } + + const onChangePage = (newPage: number) => { + scrollToTop(); + setPage(newPage); + }; + + return ( + <> +
+ + + + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/dataPlatform/DataPlatformEntity.tsx b/datahub-web-react/src/app/entityV2/dataPlatform/DataPlatformEntity.tsx new file mode 100644 index 00000000000000..f7c2256ca55d9f --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataPlatform/DataPlatformEntity.tsx @@ -0,0 +1,77 @@ +import * as React from 'react'; +import { DatabaseOutlined } from '@ant-design/icons'; +import { DataPlatform, EntityType, SearchResult } from '../../../types.generated'; +import { Entity, IconStyleType, PreviewType } from '../Entity'; +import { GenericEntityProperties } from '../../entity/shared/types'; +import { TYPE_ICON_CLASS_NAME } from '../shared/components/subtypes'; + +const getDisplayName = (data?: DataPlatform): string => { + return data?.properties?.displayName || data?.name || ''; +}; + +/** + * Definition of the DataHub DataJob entity. + */ +export class DataPlatformEntity implements Entity { + type: EntityType = EntityType.DataPlatform; + + icon = (fontSize?: number, _styleType?: IconStyleType, color?: string) => { + return ( + + ); + }; + + isSearchEnabled = () => false; + + isBrowseEnabled = () => false; + + isLineageEnabled = () => false; + + // Currently unused. + getAutoCompleteFieldName = () => 'name'; + + // Currently unused. + getPathName = () => 'platform'; + + // Currently unused. + getEntityName = () => 'Data Platform'; + + // Currently unused. + getCollectionName = () => 'Data Platforms'; + + // Currently unused. + renderProfile = (_: string) => <>; + + // Currently unused. + renderPreview = (_: PreviewType, _1: DataPlatform) => <>; + + // Currently unused. + renderSearch = (_: SearchResult) => <>; + + displayName = (data: DataPlatform) => { + return getDisplayName(data); + }; + + getGenericEntityProperties = (data: DataPlatform) => { + return { + ...data, + entityType: this.type, + name: getDisplayName(data), + platform: data, + } as GenericEntityProperties; + }; + + supportedCapabilities = () => { + return new Set([]); + }; + + getGraphName = () => { + return 'dataPlatform'; + }; +} diff --git a/datahub-web-react/src/app/entityV2/dataPlatformInstance/DataPlatformInstanceEntity.tsx b/datahub-web-react/src/app/entityV2/dataPlatformInstance/DataPlatformInstanceEntity.tsx new file mode 100644 index 00000000000000..f76c94782e9735 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataPlatformInstance/DataPlatformInstanceEntity.tsx @@ -0,0 +1,65 @@ +import * as React from 'react'; +import { DataPlatformInstance, EntityType } from '../../../types.generated'; +import { Entity } from '../Entity'; +import { GenericEntityProperties } from '../../entity/shared/types'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; + +/** + * Definition of the DataHub DataPlatformInstance entity. + * Most of this still needs to be filled out. + */ +export class DataPlatformInstanceEntity implements Entity { + type: EntityType = EntityType.DataPlatformInstance; + + icon = () => { + return <>; + }; + + isSearchEnabled = () => false; + + isBrowseEnabled = () => false; + + isLineageEnabled = () => false; + + getAutoCompleteFieldName = () => 'name'; + + getPathName = () => 'dataPlatformInstance'; + + getEntityName = () => 'Data Platform Instance'; + + getCollectionName = () => 'Data Platform Instances'; + + renderProfile = () => <>; + + getOverridePropertiesFromEntity = (): GenericEntityProperties => { + return {}; + }; + + renderPreview = () => { + return <>; + }; + + renderSearch = () => { + return <>; + }; + + displayName = (data: DataPlatformInstance) => { + return data?.instanceId || data.urn; + }; + + getGenericEntityProperties = (data: DataPlatformInstance) => { + return getDataForEntityType({ + data, + entityType: this.type, + getOverrideProperties: this.getOverridePropertiesFromEntity, + }); + }; + + supportedCapabilities = () => { + return new Set([]); + }; + + getGraphName = () => { + return 'dataPlatformInstance'; + }; +} diff --git a/datahub-web-react/src/app/entityV2/dataProcessInstance/DataProcessInstanceEntity.tsx b/datahub-web-react/src/app/entityV2/dataProcessInstance/DataProcessInstanceEntity.tsx new file mode 100644 index 00000000000000..3ab8c2b268aac3 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataProcessInstance/DataProcessInstanceEntity.tsx @@ -0,0 +1,265 @@ +import React from 'react'; +import { ApiOutlined } from '@ant-design/icons'; +import { GenericEntityProperties } from '@src/app/entity/shared/types'; +import { + DataProcessInstance, + Entity as GeneratedEntity, + EntityType, + OwnershipType, + SearchResult, +} from '../../../types.generated'; +import { Preview } from './preview/Preview'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { EntityProfile } from '../shared/containers/profile/EntityProfile'; +import { useGetDataProcessInstanceQuery } from '../../../graphql/dataProcessInstance.generated'; +import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; +import { LineageTab } from '../shared/tabs/Lineage/LineageTab'; +import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection'; +import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection'; +import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import { SidebarDomainSection } from '../shared/containers/profile/sidebar/Domain/SidebarDomainSection'; +import { capitalizeFirstLetterOnly } from '../../shared/textUtil'; +import DataProductSection from '../shared/containers/profile/sidebar/DataProduct/DataProductSection'; +import { getDataProduct } from '../shared/utils'; +// import SummaryTab from './profile/DataProcessInstaceSummary'; + +// const getProcessPlatformName = (data?: DataProcessInstance): string => { +// return ( +// data?.dataPlatformInstance?.platform?.properties?.displayName || +// capitalizeFirstLetterOnly(data?.dataPlatformInstance?.platform?.name) || +// '' +// ); +// }; + +const getParentEntities = (data: DataProcessInstance): GeneratedEntity[] => { + const parentEntity = data?.relationships?.relationships?.find( + (rel) => rel.type === 'InstanceOf' && rel.entity?.type === EntityType.DataJob, + ); + + if (!parentEntity?.entity) return []; + + // Convert to GeneratedEntity + return [ + { + type: parentEntity.entity.type, + urn: (parentEntity.entity as any).urn, // Make sure urn exists + relationships: (parentEntity.entity as any).relationships, + }, + ]; +}; +/** + * Definition of the DataHub DataProcessInstance entity. + */ +export class DataProcessInstanceEntity implements Entity { + type: EntityType = EntityType.DataProcessInstance; + + icon = (fontSize?: number, styleType?: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ; + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + isBrowseEnabled = () => true; + + isLineageEnabled = () => true; + + getAutoCompleteFieldName = () => 'name'; + + getPathName = () => 'dataProcessInstance'; + + getEntityName = () => 'Process Instance'; + + getGraphName = () => 'dataProcessInstance'; + + getCollectionName = () => 'Process Instances'; + + useEntityQuery = useGetDataProcessInstanceQuery; + + renderProfile = (urn: string) => ( + { + // const activeIncidentCount = processInstance?.dataProcessInstance?.activeIncidents.total; + // return `Incidents${(activeIncidentCount && ` (${activeIncidentCount})`) || ''}`; + // }, + // }, + ]} + sidebarSections={this.getSidebarSections()} + /> + ); + + getSidebarSections = () => [ + { + component: SidebarAboutSection, + }, + { + component: SidebarOwnerSection, + properties: { + defaultOwnerType: OwnershipType.TechnicalOwner, + }, + }, + { + component: SidebarTagsSection, + properties: { + hasTags: true, + hasTerms: true, + }, + }, + { + component: SidebarDomainSection, + }, + { + component: DataProductSection, + }, + ]; + + getOverridePropertiesFromEntity = (processInstance?: DataProcessInstance | null): GenericEntityProperties => { + const name = processInstance?.name; + const externalUrl = processInstance?.externalUrl; + return { + name, + externalUrl, + platform: processInstance?.dataPlatformInstance?.platform, + }; + }; + + renderPreview = (_: PreviewType, data: DataProcessInstance) => { + const genericProperties = this.getGenericEntityProperties(data); + const parentEntities = getParentEntities(data); + return ( + + ); + }; + + renderSearch = (result: SearchResult) => { + const data = result.entity as DataProcessInstance; + const genericProperties = this.getGenericEntityProperties(data); + const parentEntities = getParentEntities(data); + return ( + + ); + }; + + getLineageVizConfig = (entity: DataProcessInstance) => { + return { + urn: entity?.urn, + name: this.displayName(entity), + type: EntityType.DataProcessInstance, + subtype: entity?.subTypes?.typeNames?.[0], + icon: entity?.dataPlatformInstance?.platform?.properties?.logoUrl || undefined, + platform: entity?.dataPlatformInstance?.platform, + container: entity?.container, + // health: entity?.health || undefined, + }; + }; + + displayName = (data: DataProcessInstance) => { + return data.properties?.name || data.urn; + }; + + getGenericEntityProperties = (data: DataProcessInstance) => { + return getDataForEntityType({ + data, + entityType: this.type, + getOverrideProperties: this.getOverridePropertiesFromEntity, + }); + }; + + supportedCapabilities = () => { + return new Set([ + EntityCapabilityType.OWNERS, + EntityCapabilityType.GLOSSARY_TERMS, + EntityCapabilityType.TAGS, + EntityCapabilityType.DOMAINS, + EntityCapabilityType.DEPRECATION, + EntityCapabilityType.SOFT_DELETE, + EntityCapabilityType.DATA_PRODUCTS, + ]); + }; +} diff --git a/datahub-web-react/src/app/entityV2/dataProcessInstance/preview/Preview.tsx b/datahub-web-react/src/app/entityV2/dataProcessInstance/preview/Preview.tsx new file mode 100644 index 00000000000000..3a3b0340695d96 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataProcessInstance/preview/Preview.tsx @@ -0,0 +1,103 @@ +import React from 'react'; +import { + DataProduct, + Deprecation, + Domain, + Entity as GeneratedEntity, + EntityPath, + EntityType, + GlobalTags, + Health, + Owner, + SearchInsight, + Container, + ParentContainersResult, +} from '../../../../types.generated'; +import DefaultPreviewCard from '../../../preview/DefaultPreviewCard'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { IconStyleType } from '../../Entity'; + +export const Preview = ({ + urn, + name, + subType, + description, + platformName, + platformLogo, + platformInstanceId, + container, + owners, + domain, + dataProduct, + deprecation, + globalTags, + snippet, + insights, + externalUrl, + degree, + paths, + health, + parentEntities, + parentContainers, +}: // duration, +// status, +// startTime, +{ + urn: string; + name: string; + subType?: string | null; + description?: string | null; + platformName?: string; + platformLogo?: string | null; + platformInstanceId?: string; + container?: Container; + owners?: Array | null; + domain?: Domain | null; + dataProduct?: DataProduct | null; + deprecation?: Deprecation | null; + globalTags?: GlobalTags | null; + snippet?: React.ReactNode | null; + insights?: Array | null; + externalUrl?: string | null; + degree?: number; + paths?: EntityPath[]; + health?: Health[] | null; + parentEntities?: Array | null; + parentContainers?: ParentContainersResult | null; + // duration?: number | null; + // status?: string | null; + // startTime?: number | null; +}): JSX.Element => { + const entityRegistry = useEntityRegistry(); + return ( + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/dataProduct/AddOutputPortCard.tsx b/datahub-web-react/src/app/entityV2/dataProduct/AddOutputPortCard.tsx new file mode 100644 index 00000000000000..e6e3af90227b36 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataProduct/AddOutputPortCard.tsx @@ -0,0 +1,23 @@ +import { PlusOutlined } from '@ant-design/icons'; +import React from 'react'; +import styled from 'styled-components'; +import { Card } from '../../sharedV2/cards/components'; +import { REDESIGN_COLORS } from '../shared/constants'; + +const DataProductTitle = styled.div` + font-size: 16px; + font-weight: 400; + color: ${REDESIGN_COLORS.BLUE}; + padding: 10px 14px; +`; + +export default function AddOutputPortCard() { + return ( + + + + Add Output Port + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/dataProduct/AssetsSections.tsx b/datahub-web-react/src/app/entityV2/dataProduct/AssetsSections.tsx new file mode 100644 index 00000000000000..b2ac5adeac5816 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataProduct/AssetsSections.tsx @@ -0,0 +1,95 @@ +import { Button } from 'antd'; +import React from 'react'; +import { useHistory } from 'react-router'; +import { AppstoreOutlined } from '@ant-design/icons'; +import styled from 'styled-components'; +import { useListDataProductAssetsQuery } from '../../../graphql/search.generated'; +import { pluralize } from '../../shared/textUtil'; +import { EntityCountCard } from '../../sharedV2/cards/EntityCountCard'; +import { useEntityRegistry } from '../../useEntityRegistry'; +import ContentSectionLoading from '../domain/summary/ContentSectionLoading'; +import { useEntityData } from '../../entity/shared/EntityContext'; +import { + getContentsSummary, + getDomainEntitiesFilterUrl, + navigateToDomainEntities, +} from '../shared/containers/profile/sidebar/Domain/utils'; +import { SummaryTabHeaderTitle, SummaryTabHeaderWrapper } from '../shared/summary/HeaderComponents'; +import { HorizontalList } from '../shared/summary/ListComponents'; +import { getContentTypeIcon } from '../shared/summary/IconComponents'; + +const AssetsSectionWrapper = styled.div` + flex: 1; + min-width: 100px; +`; + +export const StyledHeaderWrapper = styled(SummaryTabHeaderWrapper)` + margin-bottom: 8px; +`; + +export const AssetsSection = () => { + const history = useHistory(); + const entityRegistry = useEntityRegistry(); + const { urn, entityType } = useEntityData(); + const { data, loading } = useListDataProductAssetsQuery({ + variables: { + urn, + input: { + query: '*', + start: 0, + count: 0, + filters: [], + }, + }, + }); + + const contentsSummary = data?.listDataProductAssets && getContentsSummary(data.listDataProductAssets); + const contentsCount = contentsSummary?.total || 0; + const hasContents = contentsCount > 0; + + if (!hasContents) { + return null; + } + + return ( + + + } title={`Assets (${contentsCount})`} /> + + + {loading && } + + + {!loading && + contentsSummary?.types.map((summary) => { + const { type, count, entityType: summaryEntityType } = summary; + const typeName = ( + type || + entityRegistry.getEntityName(summaryEntityType) || + summaryEntityType + ).toLocaleLowerCase(); + const link = getDomainEntitiesFilterUrl( + urn, + entityType, + entityRegistry, + [summary.entityType], + summary.type ? [summary.type] : undefined, + ); + return ( + + ); + })} + + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/dataProduct/DataProductEntitiesTab.tsx b/datahub-web-react/src/app/entityV2/dataProduct/DataProductEntitiesTab.tsx new file mode 100644 index 00000000000000..2b7d32c0d0662f --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataProduct/DataProductEntitiesTab.tsx @@ -0,0 +1,23 @@ +import React from 'react'; +import { useEntityData } from '../../entity/shared/EntityContext'; +import { EmbeddedListSearchSection } from '../shared/components/styled/search/EmbeddedListSearchSection'; +import generateUseListDataProductAssets from './generateUseListDataProductAssets'; +import { SearchCardContext } from '../shared/SearchCardContext'; +import { generateUseListDataProductAssetsCount } from './generateUseListDataProductAssetsCount'; + +export function DataProductEntitiesTab() { + const { urn } = useEntityData(); + + return ( + + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/dataProduct/DataProductEntity.tsx b/datahub-web-react/src/app/entityV2/dataProduct/DataProductEntity.tsx new file mode 100644 index 00000000000000..a8d904e476dda6 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataProduct/DataProductEntity.tsx @@ -0,0 +1,268 @@ +import { + AppstoreOutlined, + FileDoneOutlined, + FileOutlined, + ReadOutlined, + UnorderedListOutlined, +} from '@ant-design/icons'; +import * as React from 'react'; +import { useGetDataProductQuery } from '../../../graphql/dataProduct.generated'; +import { GetDatasetQuery } from '../../../graphql/dataset.generated'; +import { DataProduct, EntityType, SearchResult } from '../../../types.generated'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { EntityMenuItems } from '../shared/EntityDropdown/EntityMenuActions'; +import { TYPE_ICON_CLASS_NAME } from '../shared/components/subtypes'; +import { EntityProfileTab } from '../shared/constants'; +import { EntityProfile } from '../shared/containers/profile/EntityProfile'; +import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection'; +import { SidebarViewDefinitionSection } from '../shared/containers/profile/sidebar/Dataset/View/SidebarViewDefinitionSection'; +import { SidebarDomainSection } from '../shared/containers/profile/sidebar/Domain/SidebarDomainSection'; +import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection'; +import SidebarEntityHeader from '../shared/containers/profile/sidebar/SidebarEntityHeader'; +import { SidebarGlossaryTermsSection } from '../shared/containers/profile/sidebar/SidebarGlossaryTermsSection'; +import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection'; +import StatusSection from '../shared/containers/profile/sidebar/shared/StatusSection'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import { EntityActionItem } from '../shared/entity/EntityActions'; +import SidebarStructuredProperties from '../shared/sidebarSection/SidebarStructuredProperties'; +import { DocumentationTab } from '../shared/tabs/Documentation/DocumentationTab'; +import TabNameWithCount from '../shared/tabs/Entity/TabNameWithCount'; +import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; +import { DataProductEntitiesTab } from './DataProductEntitiesTab'; +import { DataProductSummaryTab } from './DataProductSummaryTab'; +import { Preview } from './preview/Preview'; +import SidebarNotesSection from '../shared/sidebarSection/SidebarNotesSection'; + +const headerDropdownItems = new Set([ + EntityMenuItems.SHARE, + EntityMenuItems.DELETE, + EntityMenuItems.EDIT, + EntityMenuItems.ANNOUNCE, +]); + +/** + * Definition of the DataHub Data Product entity. + */ +export class DataProductEntity implements Entity { + type: EntityType = EntityType.DataProduct; + + icon = (fontSize?: number, styleType?: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ( + + ); + } + + if (styleType === IconStyleType.SVG) { + return ( + + ); + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + isBrowseEnabled = () => true; + + isLineageEnabled = () => false; + + getAutoCompleteFieldName = () => 'name'; + + getPathName = () => 'dataProduct'; + + getEntityName = () => 'Data Product'; + + getCollectionName = () => 'Data Products'; + + useEntityQuery = useGetDataProductQuery; + + renderProfile = (urn: string) => ( + { + const assetCount = entityData?.entities?.total; + return ; + }, + component: DataProductEntitiesTab, + icon: AppstoreOutlined, + }, + { + name: 'Properties', + component: PropertiesTab, + icon: UnorderedListOutlined, + }, + ]} + sidebarSections={this.getSidebarSections()} + sidebarTabs={this.getSidebarTabs()} + /> + ); + + getSidebarSections = () => [ + { + component: SidebarEntityHeader, + }, + { + component: SidebarAboutSection, + }, + { + component: SidebarNotesSection, + }, + { + component: SidebarOwnerSection, + }, + { + component: SidebarDomainSection, + properties: { + updateOnly: true, + }, + }, + // TODO: Is someone actually using the below code? + { + component: SidebarViewDefinitionSection, + display: { + // to do - change when we have a GetDataProductQuery + visible: (_, dataset: GetDatasetQuery) => (dataset?.dataset?.viewProperties?.logic && true) || false, + }, + }, + { + component: SidebarTagsSection, + }, + { + component: SidebarGlossaryTermsSection, + }, + { + component: StatusSection, + }, + { + component: SidebarStructuredProperties, + }, + ]; + + getSidebarTabs = () => [ + { + name: 'Properties', + component: PropertiesTab, + description: 'View additional properties about this asset', + icon: UnorderedListOutlined, + }, + ]; + + renderPreview = (previewType: PreviewType, data: DataProduct, actions) => { + const genericProperties = this.getGenericEntityProperties(data); + return ( + + ); + }; + + renderSearch = (result: SearchResult) => { + const data = result.entity as DataProduct; + const genericProperties = this.getGenericEntityProperties(data); + return ( + + ); + }; + + displayName = (data: DataProduct) => { + return data?.properties?.name || data.urn; + }; + + getOverridePropertiesFromEntity = (data: DataProduct) => { + const name = data?.properties?.name; + const externalUrl = data?.properties?.externalUrl; + const entityCount = data?.entities?.total || undefined; + const parentDomains = { + domains: (data?.domain && [data?.domain?.domain]) || [], + count: (data?.domain && 1) || 0, + }; + return { + name, + externalUrl, + entityCount, + parentDomains, + }; + }; + + getGenericEntityProperties = (data: DataProduct) => { + return getDataForEntityType({ + data, + entityType: this.type, + getOverrideProperties: this.getOverridePropertiesFromEntity, + }); + }; + + supportedCapabilities = () => { + return new Set([ + EntityCapabilityType.OWNERS, + EntityCapabilityType.GLOSSARY_TERMS, + EntityCapabilityType.TAGS, + EntityCapabilityType.DOMAINS, + ]); + }; + + getGraphName = () => { + return 'dataProduct'; + }; +} diff --git a/datahub-web-react/src/app/entityV2/dataProduct/DataProductSummaryTab.tsx b/datahub-web-react/src/app/entityV2/dataProduct/DataProductSummaryTab.tsx new file mode 100644 index 00000000000000..b35b8ce99c80c7 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataProduct/DataProductSummaryTab.tsx @@ -0,0 +1,15 @@ +import React from 'react'; +import { SummaryTabWrapper } from '../shared/summary/HeaderComponents'; +import SummaryAboutSection from '../shared/summary/SummaryAboutSection'; +import { AssetsSection } from './AssetsSections'; +import { OutputPortsSection } from './OutputPortsSection'; + +export const DataProductSummaryTab = () => { + return ( + + + + + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/dataProduct/OutputPortsSection.tsx b/datahub-web-react/src/app/entityV2/dataProduct/OutputPortsSection.tsx new file mode 100644 index 00000000000000..99362ea4bd2f45 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataProduct/OutputPortsSection.tsx @@ -0,0 +1,112 @@ +import React, { useEffect, useState } from 'react'; +import RefreshIcon from '@mui/icons-material/Refresh'; +import OutputIcon from '@mui/icons-material/Output'; +import styled from 'styled-components'; +import { useListDataProductAssetsLazyQuery, useListDataProductAssetsQuery } from '../../../graphql/search.generated'; +import { useEntityData } from '../../entity/shared/EntityContext'; +import { SummaryTabHeaderTitle } from '../shared/summary/HeaderComponents'; +import { HorizontalList } from '../shared/summary/ListComponents'; +import { SCREEN_WIDTH_BREAK_POINT } from './constants'; +import { Card } from '../../sharedV2/cards/components'; +// import AddOutputPortCard from './AddOutputPortCard'; +import { StyledHeaderWrapper } from './AssetsSections'; +import { SearchResult } from '../../../types.generated'; +import { ANTD_GRAY } from '../shared/constants'; +import { OUTPUT_PORTS_FIELD } from '../../search/utils/constants'; +import SummaryEntityCard from '../../sharedV2/cards/SummaryEntityCard'; + +const OutputPortsWrapper = styled.div` + display: flex; + flex-direction: column; + flex: 1; + min-width: 100px; + @media (max-width: ${SCREEN_WIDTH_BREAK_POINT}px) { + margin: 16px 0 0 0; + } +`; + +const StyledHorizontalList = styled(HorizontalList)` + flex: 1; +`; + +const LoadMoreButton = styled(Card)` + font-size: 16px; + font-weight: 400; + font-family: Mulish; + padding: 10px 14px; + color: ${ANTD_GRAY[8]}; +`; + +const COUNT = 10; + +export const OutputPortsSection = () => { + const [additionalResults, setAdditionalResults] = useState([]); + const [hasFetchedNewData, setHasFetchedNewData] = useState(false); + const [start, setStart] = useState(0); + const { urn } = useEntityData(); + const [listDataProductAssets, { data: additionalData }] = useListDataProductAssetsLazyQuery(); + const variables = { + urn, + input: { + query: '*', + start: 0, + count: COUNT, + filters: [{ field: OUTPUT_PORTS_FIELD, value: 'true' }], // we use this filter hardcoded in list data product assets resolver + }, + }; + + const { data, loading } = useListDataProductAssetsQuery({ variables }); + const numResults = data?.listDataProductAssets?.total; + const showLoadMoreButton = (numResults || 0) > start + COUNT; + const finalResults = [...(data?.listDataProductAssets?.searchResults || []), ...additionalResults]; + + function loadMore() { + const newStart = start + COUNT; + listDataProductAssets({ + variables: { + ...variables, + input: { + ...variables.input, + start: newStart, + }, + }, + }); + setStart(newStart); + setHasFetchedNewData(true); + } + + useEffect(() => { + if (additionalData && additionalData.listDataProductAssets?.searchResults && hasFetchedNewData) { + setAdditionalResults([...additionalResults, ...additionalData.listDataProductAssets.searchResults]); + setHasFetchedNewData(false); + } + }, [additionalData, additionalResults, hasFetchedNewData]); + + if (!data || !finalResults?.length) return null; + + return ( + + + } + title={`Output Ports (${numResults})`} + /> + + + {!loading && + finalResults.map((searchResult) => { + const { entity } = searchResult; + return ; + })} + {showLoadMoreButton && ( + + + Load more + + )} + {/* KEEPING THIS COMMENTED UNTIL DESIGN IS READY FOR OUTPUT PORT */} + {/* */} + + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/dataProduct/constants.ts b/datahub-web-react/src/app/entityV2/dataProduct/constants.ts new file mode 100644 index 00000000000000..821271a7c01f19 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataProduct/constants.ts @@ -0,0 +1 @@ +export const SCREEN_WIDTH_BREAK_POINT = 1300; diff --git a/datahub-web-react/src/app/entityV2/dataProduct/generateUseListDataProductAssets.ts b/datahub-web-react/src/app/entityV2/dataProduct/generateUseListDataProductAssets.ts new file mode 100644 index 00000000000000..9063c3de1933b1 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataProduct/generateUseListDataProductAssets.ts @@ -0,0 +1,23 @@ +import { useListDataProductAssetsQuery } from '../../../graphql/search.generated'; +import { GetSearchResultsParams } from '../shared/components/styled/search/types'; + +export default function generateUseListDataProductAssets({ urn }: { urn: string }) { + return (params: GetSearchResultsParams) => { + const { + variables: { input }, + } = params; + + const { data, loading, error, refetch } = useListDataProductAssetsQuery({ + variables: { urn, input }, + }); + + return { + data: data?.listDataProductAssets, + loading, + error, + refetch: (refetchParams: GetSearchResultsParams['variables']) => { + return refetch({ urn, input: refetchParams.input }).then((res) => res.data.listDataProductAssets); + }, + }; + }; +} diff --git a/datahub-web-react/src/app/entityV2/dataProduct/generateUseListDataProductAssetsCount.ts b/datahub-web-react/src/app/entityV2/dataProduct/generateUseListDataProductAssetsCount.ts new file mode 100644 index 00000000000000..4790a2d437ad45 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataProduct/generateUseListDataProductAssetsCount.ts @@ -0,0 +1,16 @@ +import { useListDataProductAssetsQuery } from '@src/graphql/search.generated'; +import { GetSearchResultsParams } from '@src/app/entity/shared/components/styled/search/types'; + +export function generateUseListDataProductAssetsCount({ urn }: { urn: string }) { + return function useListDataProductAssetsCount({ variables: { input } }: GetSearchResultsParams) { + const { data, loading, error } = useListDataProductAssetsQuery({ + variables: { + urn, + input: { ...input, count: 0 }, + }, + fetchPolicy: 'cache-first', + }); + + return { total: data?.listDataProductAssets?.total, loading, error }; + }; +} diff --git a/datahub-web-react/src/app/entityV2/dataProduct/preview/Preview.tsx b/datahub-web-react/src/app/entityV2/dataProduct/preview/Preview.tsx new file mode 100644 index 00000000000000..d29e4bb6baaa26 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataProduct/preview/Preview.tsx @@ -0,0 +1,70 @@ +import { GenericEntityProperties } from '@app/entity/shared/types'; +import React from 'react'; +import { EntityType, Owner, GlobalTags, GlossaryTerms, Domain, EntityPath } from '../../../../types.generated'; +import DefaultPreviewCard from '../../../previewV2/DefaultPreviewCard'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { EntityMenuActions, IconStyleType, PreviewType } from '../../Entity'; +import { EntityMenuItems } from '../../shared/EntityDropdown/EntityMenuActions'; + +interface Props { + urn: string; + data: GenericEntityProperties | null; + name: string; + description?: string | null; + owners?: Array | null; + domain?: Domain | null; + globalTags?: GlobalTags | null; + glossaryTerms?: GlossaryTerms | null; + entityCount?: number; + externalUrl?: string | null; + degree?: number; + paths?: EntityPath[]; + headerDropdownItems?: Set; + previewType?: PreviewType; + actions?: EntityMenuActions; +} + +export const Preview = ({ + urn, + data, + name, + description, + owners, + globalTags, + domain, + glossaryTerms, + entityCount, + externalUrl, + degree, + paths, + headerDropdownItems, + previewType, + actions, +}: Props): JSX.Element => { + const entityRegistry = useEntityRegistry(); + + return ( + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/dataset/DatasetEntity.tsx b/datahub-web-react/src/app/entityV2/dataset/DatasetEntity.tsx new file mode 100644 index 00000000000000..2257940158afa1 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/DatasetEntity.tsx @@ -0,0 +1,517 @@ +import { + CheckCircleOutlined, + CodeOutlined, + ConsoleSqlOutlined, + EyeOutlined, + FileOutlined, + FundOutlined, + LayoutOutlined, + PartitionOutlined, + UnlockOutlined, + UnorderedListOutlined, + WarningOutlined, +} from '@ant-design/icons'; +import { DBT_URN } from '@app/ingest/source/builder/constants'; +import ViewComfyOutlinedIcon from '@mui/icons-material/ViewComfyOutlined'; +import { GovernanceTab } from '@src/app/entity/shared/tabs/Dataset/Governance/GovernanceTab'; +import * as React from 'react'; +import { GetDatasetQuery, useGetDatasetQuery, useUpdateDatasetMutation } from '../../../graphql/dataset.generated'; +import GovernMenuIcon from '../../../images/governMenuIcon.svg?react'; +import { Dataset, DatasetProperties, EntityType, FeatureFlagsConfig, SearchResult } from '../../../types.generated'; +import { GenericEntityProperties } from '../../entity/shared/types'; +import { MatchedFieldList } from '../../searchV2/matches/MatchedFieldList'; +import { matchedFieldPathsRenderer } from '../../searchV2/matches/matchedFieldPathsRenderer'; +import { capitalizeFirstLetterOnly } from '../../shared/textUtil'; +import { useAppConfig } from '../../useAppConfig'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { EntityMenuItems } from '../shared/EntityDropdown/EntityMenuActions'; +import { SubType, TYPE_ICON_CLASS_NAME } from '../shared/components/subtypes'; +import { EntityProfile } from '../shared/containers/profile/EntityProfile'; +import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection'; +import DataProductSection from '../shared/containers/profile/sidebar/DataProduct/DataProductSection'; +import SidebarDatasetHeaderSection from '../shared/containers/profile/sidebar/Dataset/Header/SidebarDatasetHeaderSection'; +import { SidebarDomainSection } from '../shared/containers/profile/sidebar/Domain/SidebarDomainSection'; +import SidebarLineageSection from '../shared/containers/profile/sidebar/Lineage/SidebarLineageSection'; +import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection'; +import SidebarQueryOperationsSection from '../shared/containers/profile/sidebar/Query/SidebarQueryOperationsSection'; +import SidebarEntityHeader from '../shared/containers/profile/sidebar/SidebarEntityHeader'; +import { SidebarGlossaryTermsSection } from '../shared/containers/profile/sidebar/SidebarGlossaryTermsSection'; +import { SidebarDatasetViewDefinitionSection } from '../shared/containers/profile/sidebar/SidebarLogicSection'; +import { SidebarSiblingsSection } from '../shared/containers/profile/sidebar/SidebarSiblingsSection'; +import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection'; +import StatusSection from '../shared/containers/profile/sidebar/shared/StatusSection'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import EmbeddedProfile from '../shared/embed/EmbeddedProfile'; +import SidebarNotesSection from '../shared/sidebarSection/SidebarNotesSection'; +import SidebarStructuredProperties from '../shared/sidebarSection/SidebarStructuredProperties'; +import AccessManagement from '../shared/tabs/Dataset/AccessManagement/AccessManagement'; +import QueriesTab from '../shared/tabs/Dataset/Queries/QueriesTab'; +import { SchemaTab } from '../shared/tabs/Dataset/Schema/SchemaTab'; +import { AcrylValidationsTab } from '../shared/tabs/Dataset/Validations/AcrylValidationsTab'; +import ViewDefinitionTab from '../shared/tabs/Dataset/View/ViewDefinitionTab'; +import { DocumentationTab } from '../shared/tabs/Documentation/DocumentationTab'; +import { EmbedTab } from '../shared/tabs/Embed/EmbedTab'; +import ColumnTabNameHeader from '../shared/tabs/Entity/ColumnTabNameHeader'; +import TabNameWithCount from '../shared/tabs/Entity/TabNameWithCount'; +import { IncidentTab } from '../shared/tabs/Incident/IncidentTab'; +import { LineageTab } from '../shared/tabs/Lineage/LineageTab'; +import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; +import { SidebarTitleActionType, getDataProduct, getDatasetLastUpdatedMs, isOutputPort } from '../shared/utils'; +import { Preview } from './preview/Preview'; +import { OperationsTab } from './profile/OperationsTab'; +import { DatasetStatsSummarySubHeader } from './profile/stats/stats/DatasetStatsSummarySubHeader'; +import StatsTab from '../shared/tabs/Dataset/Stats/StatsTab'; + +const SUBTYPES = { + VIEW: 'view', +}; + +const headerDropdownItems = new Set([ + EntityMenuItems.EXTERNAL_URL, + EntityMenuItems.SHARE, + EntityMenuItems.UPDATE_DEPRECATION, + EntityMenuItems.RAISE_INCIDENT, + EntityMenuItems.ANNOUNCE, +]); + +/** + * Definition of the DataHub Dataset entity. + */ +export class DatasetEntity implements Entity { + type: EntityType = EntityType.Dataset; + + icon = (fontSize?: number, styleType?: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ( + + ); + } + + if (styleType === IconStyleType.SVG) { + return ; + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + appconfig = useAppConfig; + + isBrowseEnabled = () => true; + + isLineageEnabled = () => true; + + getAutoCompleteFieldName = () => 'name'; + + getGraphName = () => 'dataset'; + + getPathName = () => this.getGraphName(); + + getEntityName = () => 'Dataset'; + + getCollectionName = () => 'Datasets'; + + useEntityQuery = useGetDatasetQuery; + + renderProfile = (urn: string) => ( + + !!dataset?.dataset?.viewProperties?.logic || + !!dataset?.dataset?.subTypes?.typeNames + ?.map((t) => t.toLocaleLowerCase()) + .includes(SUBTYPES.VIEW.toLocaleLowerCase()), + enabled: (_, dataset: GetDatasetQuery) => !!dataset?.dataset?.viewProperties?.logic, + }, + }, + { + name: 'Documentation', + component: DocumentationTab, + icon: FileOutlined, + }, + { + name: 'Preview', + component: EmbedTab, + icon: EyeOutlined, + display: { + visible: (_, dataset: GetDatasetQuery) => !!dataset?.dataset?.embed?.renderUrl, + enabled: (_, dataset: GetDatasetQuery) => !!dataset?.dataset?.embed?.renderUrl, + }, + }, + { + name: 'Lineage', + component: LineageTab, + icon: PartitionOutlined, + }, + { + name: 'Properties', + component: PropertiesTab, + icon: UnorderedListOutlined, + getDynamicName: (_, dataset: GetDatasetQuery, loading) => { + const customPropertiesCount = dataset?.dataset?.properties?.customProperties?.length || 0; + const structuredPropertiesCount = + dataset?.dataset?.structuredProperties?.properties?.length || 0; + const propertiesCount = customPropertiesCount + structuredPropertiesCount; + return ; + }, + }, + { + name: 'Queries', + component: QueriesTab, + icon: ConsoleSqlOutlined, + display: { + visible: (_, _1) => true, + enabled: (_, _2) => true, + }, + }, + { + name: 'Stats', + component: StatsTab, + icon: FundOutlined, + display: { + visible: (_, _1) => true, + enabled: (_, dataset: GetDatasetQuery) => + (dataset?.dataset?.latestFullTableProfile?.length || 0) > 0 || + (dataset?.dataset?.latestPartitionProfile?.length || 0) > 0 || + (dataset?.dataset?.usageStats?.buckets?.length || 0) > 0 || + (dataset?.dataset?.operations?.length || 0) > 0, + }, + }, + { + name: 'Quality', + component: AcrylValidationsTab, // Use SaaS specific Validations Tab. + icon: CheckCircleOutlined, + }, + { + name: 'Governance', + icon: () => ( + + + + ), + component: GovernanceTab, + }, + { + name: 'Runs', // TODO: Rename this to DatasetRunsTab. + component: OperationsTab, + display: { + visible: (_, dataset: GetDatasetQuery) => { + return (dataset?.dataset?.runs?.total || 0) > 0; + }, + enabled: (_, dataset: GetDatasetQuery) => { + return (dataset?.dataset?.runs?.total || 0) > 0; + }, + }, + }, + { + name: 'Access Management', + component: AccessManagement, + icon: UnlockOutlined, + display: { + visible: (_, _1) => this.appconfig().config.featureFlags.showAccessManagement, + enabled: (_, _2) => true, + }, + }, + { + name: 'Incidents', + icon: WarningOutlined, + component: IncidentTab, + getDynamicName: (_, dataset, loading) => { + const activeIncidentCount = dataset?.dataset?.activeIncidents?.total; + return ; + }, + }, + ]} + sidebarSections={this.getSidebarSections()} + sidebarTabs={this.getSidebarTabs()} + /> + ); + + getSidebarSections = () => [ + { component: SidebarEntityHeader }, + { component: SidebarDatasetHeaderSection }, + { component: SidebarAboutSection }, + { component: SidebarNotesSection }, + { component: SidebarLineageSection }, + { component: SidebarOwnerSection }, + { component: SidebarDomainSection }, + { component: DataProductSection }, + { component: SidebarTagsSection }, + { component: SidebarGlossaryTermsSection }, + { + component: SidebarSiblingsSection, + display: { + visible: (_, dataset: GetDatasetQuery) => !!dataset?.dataset?.siblingsSearch?.total, + }, + }, + { component: SidebarDatasetViewDefinitionSection }, + { component: SidebarQueryOperationsSection }, + { component: StatusSection }, + { component: SidebarStructuredProperties }, + // { + // component: SidebarRecommendationsSection, + // }, + ]; + + getSidebarTabs = () => [ + { + name: 'Lineage', + component: LineageTab, + description: "View this data asset's upstream and downstream dependencies", + icon: PartitionOutlined, + properties: { + actionType: SidebarTitleActionType.LineageExplore, + }, + }, + { + name: 'Columns', + component: SchemaTab, + description: "View this data asset's columns", + icon: LayoutOutlined, + properties: { + fullHeight: true, + }, + }, + { + name: 'Properties', + component: PropertiesTab, + description: 'View additional properties about this asset', + icon: UnorderedListOutlined, + }, + ]; + + #shouldMergeInLineage(dataset?: Dataset | null, flags?: FeatureFlagsConfig): boolean { + // Lineage query must include platform and typeNames on dataset and its sibling + return ( + !!flags?.hideDbtSourceInLineage && + dataset?.platform?.urn === DBT_URN && + !!dataset?.subTypes?.typeNames?.includes(SubType.DbtSource) + ); + } + + getOverridePropertiesFromEntity = ( + dataset?: Dataset | null, + flags?: FeatureFlagsConfig, + ): GenericEntityProperties => { + // if dataset has subTypes filled out, pick the most specific subtype and return it + const subTypes = dataset?.subTypes; + + const extendedProperties: DatasetProperties | undefined | null = dataset?.properties && { + ...dataset?.properties, + qualifiedName: dataset?.properties?.qualifiedName || this.displayName(dataset), + }; + + const firstSibling = dataset?.siblingsSearch?.searchResults?.[0]?.entity as Dataset | undefined; + const isReplacedBySibling = this.#shouldMergeInLineage(dataset, flags); + const isSiblingHidden = this.#shouldMergeInLineage(firstSibling, flags); + + const lineageUrn = isReplacedBySibling ? firstSibling?.urn : undefined; + let lineageSiblingIcon: string | undefined; + if (isReplacedBySibling) { + // Swap lineage urn and show as merged with sibling, extra icon is the original entity icon + lineageSiblingIcon = dataset?.platform?.properties?.logoUrl ?? undefined; + } else if (isSiblingHidden) { + // Same lineage urn but show as merged with sibling, extra icon is the sibling's icon + lineageSiblingIcon = firstSibling?.platform?.properties?.logoUrl ?? undefined; + } + return { + name: dataset && this.displayName(dataset), + externalUrl: dataset?.properties?.externalUrl, + entityTypeOverride: subTypes ? capitalizeFirstLetterOnly(subTypes.typeNames?.[0]) : '', + properties: extendedProperties, + lineageUrn, + lineageSiblingIcon, + }; + }; + + renderPreview = (previewType: PreviewType, data: Dataset) => { + const genericProperties = this.getGenericEntityProperties(data); + const platformNames = genericProperties?.siblingPlatforms?.map( + (platform) => platform.properties?.displayName || capitalizeFirstLetterOnly(platform.name), + ); + return ( + platform.properties?.logoUrl)} + platformInstanceId={data.dataPlatformInstance?.instanceId} + owners={data.ownership?.owners} + globalTags={data.globalTags} + glossaryTerms={data.glossaryTerms} + domain={data.domain?.domain} + dataProduct={getDataProduct(genericProperties?.dataProduct)} + container={data.container} + externalUrl={data.properties?.externalUrl} + health={data.health} + headerDropdownItems={headerDropdownItems} + previewType={previewType} + browsePaths={data.browsePathV2 || undefined} + /> + ); + }; + + renderSearch = (result: SearchResult) => { + const data = result.entity as Dataset; + const genericProperties = this.getGenericEntityProperties(data); + const platformNames = genericProperties?.siblingPlatforms?.map( + (platform) => platform.properties?.displayName || capitalizeFirstLetterOnly(platform.name), + ); + + return ( + platform.properties?.logoUrl)} + owners={data.ownership?.owners} + globalTags={data.globalTags} + domain={data.domain?.domain} + dataProduct={getDataProduct(genericProperties?.dataProduct)} + deprecation={data.deprecation} + glossaryTerms={data.glossaryTerms} + subtype={data.subTypes?.typeNames?.[0]} + container={data.container} + parentContainers={data.parentContainers} + snippet={} + insights={result.insights} + externalUrl={data.properties?.externalUrl} + statsSummary={data.statsSummary} + rowCount={(data as any).lastProfile?.length && (data as any).lastProfile[0].rowCount} + columnCount={(data as any).lastProfile?.length && (data as any).lastProfile[0].columnCount} + lastUpdatedMs={getDatasetLastUpdatedMs( + (data as any).properties, + (data as any).lastOperation?.length && (data as any).lastOperation[0], + )} + health={data.health} + degree={(result as any).degree} + paths={(result as any).paths} + isOutputPort={isOutputPort(result)} + headerDropdownItems={headerDropdownItems} + browsePaths={data.browsePathV2 || undefined} + /> + ); + }; + + renderSearchMatches = (_: SearchResult) => { + return ( + <> + + + ); + }; + + getLineageVizConfig = (entity: Dataset) => { + return { + urn: entity?.urn, + name: entity?.properties?.name || entity.name, + expandedName: entity?.properties?.qualifiedName || entity?.properties?.name || entity.name, + type: EntityType.Dataset, + subtype: entity?.subTypes?.typeNames?.[0] || undefined, + icon: entity?.platform?.properties?.logoUrl || undefined, + platform: entity?.platform, + health: entity?.health || undefined, + deprecation: entity?.deprecation, + }; + }; + + displayName = (data: Dataset) => { + return data?.properties?.name || data.name || data.urn; + }; + + platformLogoUrl = (data: Dataset) => { + return data.platform.properties?.logoUrl || undefined; + }; + + getGenericEntityProperties = (data: Dataset, flags?: FeatureFlagsConfig) => { + return getDataForEntityType({ + data, + entityType: this.type, + getOverrideProperties: this.getOverridePropertiesFromEntity, + flags, + }); + }; + + supportedCapabilities = () => { + return new Set([ + EntityCapabilityType.OWNERS, + EntityCapabilityType.GLOSSARY_TERMS, + EntityCapabilityType.TAGS, + EntityCapabilityType.DOMAINS, + EntityCapabilityType.DEPRECATION, + EntityCapabilityType.SOFT_DELETE, + EntityCapabilityType.DATA_PRODUCTS, + EntityCapabilityType.TEST, + EntityCapabilityType.LINEAGE, + EntityCapabilityType.HEALTH, + ]); + }; + + renderEmbeddedProfile = (urn: string) => ( + + ); +} diff --git a/datahub-web-react/src/app/entityV2/dataset/preview/Preview.tsx b/datahub-web-react/src/app/entityV2/dataset/preview/Preview.tsx new file mode 100644 index 00000000000000..7d7b770e95ca5f --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/preview/Preview.tsx @@ -0,0 +1,155 @@ +import { GenericEntityProperties } from '@app/entity/shared/types'; +import React from 'react'; +import { + Container, + DataProduct, + DatasetStatsSummary, + Deprecation, + Domain, + EntityPath, + EntityType, + FabricType, + GlobalTags, + GlossaryTerms, + Health, + Maybe, + Owner, + ParentContainersResult, + SearchInsight, + BrowsePathV2, +} from '../../../../types.generated'; +import DefaultPreviewCard from '../../../previewV2/DefaultPreviewCard'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { IconStyleType, PreviewType } from '../../Entity'; +import { ANTD_GRAY } from '../../shared/constants'; +import { PopularityTier } from '../../shared/containers/profile/sidebar/shared/utils'; +import { summaryHasStats, DatasetLastUpdatedMs } from '../../shared/utils'; +import { DatasetStatsSummary as DatasetStatsSummaryView } from '../shared/DatasetStatsSummary'; +import { EntityMenuItems } from '../../shared/EntityDropdown/EntityMenuActions'; + +export const Preview = ({ + urn, + data, + name, + origin, + description, + platformName, + platformLogo, + platformNames, + platformLogos, + platformInstanceId, + owners, + globalTags, + domain, + dataProduct, + deprecation, + snippet, + insights, + glossaryTerms, + subtype, + externalUrl, + container, + parentContainers, + rowCount, + columnCount, + statsSummary, + lastUpdatedMs, + health, + degree, + paths, + isOutputPort, + tier, + headerDropdownItems, + previewType, + browsePaths, +}: { + urn: string; + data: GenericEntityProperties | null; + name: string; + origin: FabricType; + description?: string | null; + platformName?: string; + platformLogo?: string | null; + platformNames?: (Maybe | undefined)[]; + platformLogos?: (Maybe | undefined)[]; + platformInstanceId?: string; + owners?: Array | null; + domain?: Domain | null; + dataProduct?: DataProduct | null; + deprecation?: Deprecation | null; + globalTags?: GlobalTags | null; + snippet?: React.ReactNode | null; + insights?: Array | null; + glossaryTerms?: GlossaryTerms | null; + subtype?: string | null; + externalUrl?: string | null; + container?: Container | null; + parentContainers?: ParentContainersResult | null; + rowCount?: number | null; + columnCount?: number | null; + statsSummary?: DatasetStatsSummary | null; + lastUpdatedMs?: DatasetLastUpdatedMs; + health?: Health[] | null; + degree?: number; + paths?: EntityPath[]; + isOutputPort?: boolean; + tier?: PopularityTier; + headerDropdownItems?: Set; + previewType?: Maybe; + browsePaths?: BrowsePathV2; +}): JSX.Element => { + const entityRegistry = useEntityRegistry(); + const hasStats = !!columnCount || summaryHasStats(statsSummary); + + return ( + + ) + } + health={health || undefined} + degree={degree} + paths={paths} + isOutputPort={isOutputPort} + lastUpdatedMs={lastUpdatedMs} + tier={tier} + headerDropdownItems={headerDropdownItems} + statsSummary={statsSummary} + previewType={previewType} + browsePaths={browsePaths} + /> + ); +}; diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/Lineage.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/Lineage.tsx new file mode 100644 index 00000000000000..419ce1c8381508 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/Lineage.tsx @@ -0,0 +1,62 @@ +import { Button, List, Space, Typography } from 'antd'; +import React from 'react'; +import { useHistory, useLocation } from 'react-router-dom'; +import styled from 'styled-components'; +import { DownstreamEntityRelationships, EntityType, UpstreamEntityRelationships } from '../../../../types.generated'; +import { navigateToLineageUrl } from '../../../lineage/utils/navigateToLineageUrl'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { PreviewType } from '../../Entity'; + +export type Props = { + upstreamLineage?: UpstreamEntityRelationships | null; + downstreamLineage?: DownstreamEntityRelationships | null; +}; + +const ViewRawButtonContainer = styled.div` + display: flex; + justify-content: flex-end; +`; + +export default function Lineage({ upstreamLineage, downstreamLineage }: Props) { + const entityRegistry = useEntityRegistry(); + const history = useHistory(); + const location = useLocation(); + const upstreamEntities = upstreamLineage?.entities?.map((entityRelationship) => entityRelationship?.entity); + const downstreamEntities = downstreamLineage?.entities?.map((entityRelationship) => entityRelationship?.entity); + + return ( + <> +
+ + + +
+ + Upstream} + renderItem={(item) => ( + + {entityRegistry.renderPreview(item?.type || EntityType.Dataset, PreviewType.PREVIEW, item)} + + )} + /> + Downstream} + renderItem={(item) => ( + + {entityRegistry.renderPreview(item?.type || EntityType.Dataset, PreviewType.PREVIEW, item)} + + )} + /> + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/OperationsTab.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/OperationsTab.tsx new file mode 100644 index 00000000000000..0f83c922624545 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/OperationsTab.tsx @@ -0,0 +1,247 @@ +import { DeliveredProcedureOutlined } from '@ant-design/icons'; +import { Pagination, Table, Typography } from 'antd'; +import { Tooltip } from '@components'; +import React, { useState } from 'react'; +import styled from 'styled-components'; + +import { GetDatasetRunsQuery, useGetDatasetRunsQuery } from '../../../../graphql/dataset.generated'; +import { + DataProcessInstanceRunResultType, + DataProcessRunStatus, + EntityType, + RelationshipDirection, +} from '../../../../types.generated'; +import { + getExecutionRequestStatusDisplayColor, + getExecutionRequestStatusDisplayText, + getExecutionRequestStatusIcon, +} from '../../../ingest/source/utils'; +import { CompactEntityNameList } from '../../../recommendations/renderer/component/CompactEntityNameList'; +import { ANTD_GRAY } from '../../shared/constants'; +import { useEntityData } from '../../../entity/shared/EntityContext'; +import LoadingSvg from '../../../../images/datahub-logo-color-loading_pendulum.svg?react'; +import { scrollToTop } from '../../../shared/searchUtils'; +import { formatDuration } from '../../../shared/formatDuration'; +import { notEmpty } from '../../shared/utils'; + +const ExternalUrlLink = styled.a` + font-size: 16px; + color: ${ANTD_GRAY[8]}; +`; + +const PaginationControlContainer = styled.div` + padding-top: 16px; + padding-bottom: 16px; + text-align: center; +`; + +const LoadingText = styled.div` + margin-top: 18px; + font-size: 12px; +`; + +const LoadingContainer = styled.div` + padding-top: 40px; + padding-bottom: 40px; + width: 100%; + text-align: center; +`; + +function getStatusForStyling(status: DataProcessRunStatus, resultType: DataProcessInstanceRunResultType) { + if (status === 'COMPLETE') { + if (resultType === 'SKIPPED') { + return 'CANCELLED'; + } + return resultType; + } + return 'RUNNING'; +} + +const columns = [ + { + title: 'Time', + dataIndex: 'time', + key: 'time', + render: (value) => ( + {new Date(Number(value)).toLocaleString()} + ), + }, + { + title: 'Duration', + dataIndex: 'duration', + key: 'duration', + render: (durationMs: number) => formatDuration(durationMs), + }, + { + title: 'Run ID', + dataIndex: 'name', + key: 'name', + }, + { + title: 'Task', + dataIndex: 'parentTemplate', + key: 'parentTemplate', + render: (parentTemplate) => , + }, + { + title: 'Status', + dataIndex: 'status', + key: 'status', + render: (status: any, row) => { + const statusForStyling = getStatusForStyling(status, row?.resultType); + const Icon = getExecutionRequestStatusIcon(statusForStyling); + const text = getExecutionRequestStatusDisplayText(statusForStyling); + const color = getExecutionRequestStatusDisplayColor(statusForStyling); + return ( + <> +
+ {Icon && } + + {text || 'N/A'} + +
+ + ); + }, + }, + { + title: 'Inputs', + dataIndex: 'inputs', + key: 'inputs', + render: (inputs) => , + }, + { + title: 'Outputs', + dataIndex: 'outputs', + key: 'outputs', + render: (outputs) => , + }, + { + title: '', + dataIndex: 'externalUrl', + key: 'externalUrl', + render: (externalUrl) => + externalUrl && ( + + + + + + ), + }, +]; + +const PAGE_SIZE = 20; + +export const OperationsTab = () => { + const { urn, entityData } = useEntityData(); + const [page, setPage] = useState(1); + + // Fetch data across all siblings. + const allUrns = [ + urn, + ...(entityData?.siblingsSearch?.searchResults || []).map((sibling) => sibling.entity.urn).filter(notEmpty), + ]; + const loadings: boolean[] = []; + const datas: GetDatasetRunsQuery[] = []; + allUrns.forEach((entityUrn) => { + // Because there's a consistent number and order of the urns, + // this usage of a hook within a loop should be safe. + // eslint-disable-next-line react-hooks/rules-of-hooks + const { loading, data } = useGetDatasetRunsQuery({ + variables: { + urn: entityUrn, + start: (page - 1) * PAGE_SIZE, + count: PAGE_SIZE, + direction: RelationshipDirection.Outgoing, + }, + }); + loadings.push(loading); + if (data) { + datas.push(data); + } + }); + + const loading = loadings.some((loadingEntry) => loadingEntry); + + // Merge the runs data from all entities. + // If there's more than one entity contributing to the data, then we can't do pagination. + let canPaginate = true; + let dataRuns: NonNullable['runs'] | undefined; + if (datas.length > 0) { + let numWithRuns = 0; + for (let i = 0; i < datas.length; i++) { + if (datas[i]?.dataset?.runs?.total) { + numWithRuns++; + } + + if (dataRuns && dataRuns.runs) { + dataRuns.runs.push(...(datas[i]?.dataset?.runs?.runs || [])); + dataRuns.total = (dataRuns.total ?? 0) + (datas[i]?.dataset?.runs?.total ?? 0); + } else { + dataRuns = JSON.parse(JSON.stringify(datas[i]?.dataset?.runs)); + } + } + + if (numWithRuns > 1) { + canPaginate = false; + } + } + + // This also sorts the runs data across all entities. + const runs = dataRuns?.runs?.sort((a, b) => (b?.created?.time ?? 0) - (a?.created?.time ?? 0)); + + const tableData = runs + ?.filter((run) => run) + .map((run) => ({ + time: run?.created?.time, + name: run?.name, + status: run?.state?.[0]?.status, + resultType: run?.state?.[0]?.result?.resultType, + duration: run?.state?.[0]?.durationMillis, + inputs: run?.inputs?.relationships.map((relationship) => relationship.entity), + outputs: run?.outputs?.relationships.map((relationship) => relationship.entity), + externalUrl: run?.externalUrl, + parentTemplate: run?.parentTemplate?.relationships?.[0]?.entity, + })); + + // If the table contains jobs, we need to show the job-related columns. Otherwise we can simplify the table. + const containsJobs = tableData?.some((run) => run.parentTemplate?.type !== EntityType.Dataset); + const simplifiedColumns = containsJobs + ? columns + : columns.filter((column) => !['name', 'inputs', 'outputs'].includes(column.key)); + + const onChangePage = (newPage: number) => { + scrollToTop(); + setPage(newPage); + }; + + // TODO: Much of this file is duplicated from RunsTab.tsx. We should refactor this to share code. + return ( + <> + {loading && ( + + + Fetching runs... + + )} + {!loading && ( + <> +
+ {canPaginate && ( + + + + )} + + )} + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/UsageFacepile.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/UsageFacepile.tsx new file mode 100644 index 00000000000000..f2bdd4750e3ea5 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/UsageFacepile.tsx @@ -0,0 +1,42 @@ +import React, { useMemo } from 'react'; +import { Tooltip } from '@components'; +import { EntityType, UserUsageCounts } from '../../../../types.generated'; +import { SpacedAvatarGroup } from '../../../shared/avatar/SpaceAvatarGroup'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import ActorAvatar from '../../shared/ActorAvatar'; + +export type Props = { + users?: (UserUsageCounts | null)[] | null; + maxNumberDisplayed?: number; +}; + +export default function UsageFacepile({ users, maxNumberDisplayed }: Props) { + const sortedUsers = useMemo(() => users?.slice().sort((a, b) => (b?.count || 0) - (a?.count || 0)), [users]); + let displayedUsers = sortedUsers; + if (maxNumberDisplayed) { + displayedUsers = displayedUsers?.slice(0, maxNumberDisplayed); + } + + const entityRegistry = useEntityRegistry(); + + return ( + + {displayedUsers?.map((displayedUser) => { + const user = displayedUser?.user; + const userName = entityRegistry.getDisplayName(EntityType.CorpUser, user); + return ( + + + + ); + })} + + ); +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/Lineage.test.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/Lineage.test.tsx new file mode 100644 index 00000000000000..f7cca3addf283b --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/Lineage.test.tsx @@ -0,0 +1,22 @@ +import React from 'react'; +import { render } from '@testing-library/react'; +import { MockedProvider } from '@apollo/client/testing'; + +import Lineage from '../Lineage'; +import { sampleDownstreamRelationship, sampleRelationship } from '../stories/lineageEntities'; +import TestPageContainer from '../../../../../utils/test-utils/TestPageContainer'; +import { mocks } from '../../../../../Mocks'; + +describe('Lineage', () => { + it('renders', () => { + const { getByText } = render( + + + , + + , + ); + expect(getByText('Upstream HiveDataset')).toBeInTheDocument(); + expect(getByText('Downstream HiveDataset')).toBeInTheDocument(); + }); +}); diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/Properties.test.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/Properties.test.tsx new file mode 100644 index 00000000000000..4787efcc08c287 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/Properties.test.tsx @@ -0,0 +1,24 @@ +import React from 'react'; +import { render } from '@testing-library/react'; +import { MockedProvider } from '@apollo/client/testing'; +import { Properties } from '../../../shared/components/legacy/Properties'; +import { sampleProperties } from '../stories/properties'; +import TestPageContainer from '../../../../../utils/test-utils/TestPageContainer'; +import { mocks } from '../../../../../Mocks'; + +describe('Properties', () => { + it('renders', () => { + const { getByText } = render( + + + , + + , + ); + expect(getByText('Properties')).toBeInTheDocument(); + expect(getByText('Number of Partitions')).toBeInTheDocument(); + expect(getByText('18')).toBeInTheDocument(); + expect(getByText('Cluster Name')).toBeInTheDocument(); + expect(getByText('Testing')).toBeInTheDocument(); + }); +}); diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/Schema.test.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/Schema.test.tsx new file mode 100644 index 00000000000000..bc20887a1a34bc --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/Schema.test.tsx @@ -0,0 +1,398 @@ +import { MockedProvider } from '@apollo/client/testing'; +import { fireEvent, render } from '@testing-library/react'; +import React from 'react'; +import { mocks } from '../../../../../Mocks'; +import { EntityType, SchemaMetadata } from '../../../../../types.generated'; +import TestPageContainer from '../../../../../utils/test-utils/TestPageContainer'; +import { EntityContext } from '../../../../entity/shared/EntityContext'; +import { SchemaTab } from '../../../shared/tabs/Dataset/Schema/SchemaTab'; +import { TabRenderType } from '../../../shared/types'; +import SchemaRow from '../schema/components/SchemaRow'; +import { + sampleSchema, + sampleSchemaWithKeyValueFields, + sampleSchemaWithoutFields, + sampleSchemaWithPkFk, + sampleSchemaWithTags, +} from '../stories/sampleSchema'; + +vi.mock('virtualizedtableforantd4', async () => { + return { + ...(await vi.importActual('virtualizedtableforantd4')), + useVT: () => [{ body: { row: SchemaRow } }, vi.fn()], + }; +}); + +describe('Schema', () => { + it('renders', () => { + const { getByText } = render( + + + + + + + , + ); + expect(getByText('name')).toBeInTheDocument(); + expect(getByText('the name of the order')).toBeInTheDocument(); + expect(getByText('shipping_address')).toBeInTheDocument(); + expect(getByText('the address the order ships to')).toBeInTheDocument(); + }); + + it('renders raw', () => { + const { getByText, queryAllByTestId } = render( + + + + + + + , + ); + + expect(queryAllByTestId('schema-raw-view')).toHaveLength(0); + + const rawButton = getByText('Raw'); + fireEvent.click(rawButton); + + expect(queryAllByTestId('schema-raw-view')).toHaveLength(1); + + const schemaButton = getByText('Tabular'); + fireEvent.click(schemaButton); + + expect(queryAllByTestId('schema-raw-view')).toHaveLength(0); + }); + + it('renders tags and terms', () => { + const { getByText } = render( + + + + + + + , + ); + expect(getByText('Legacy')).toBeInTheDocument(); + expect(getByText('sample-glossary-term')).toBeInTheDocument(); + }); + + it('renders description', () => { + const { getByText } = render( + + + + + + + , + ); + expect(getByText('order id')).toBeInTheDocument(); + }); + + it('renders field', () => { + const { getByText } = render( + + + + + + + , + ); + expect(getByText('shipping_address')).toBeInTheDocument(); + }); + + it('renders primary keys', () => { + const { getByText } = render( + + + + + + + , + ); + expect(getByText('Primary Key')).toBeInTheDocument(); + }); + + it.skip('renders foreign keys', () => { + const { getByText, getAllByText } = render( + + + + + + + , + ); + expect(getByText('Foreign Key')).toBeInTheDocument(); + + const fkButton = getByText('Foreign Key'); + fireEvent.click(fkButton); + + expect(getByText('Foreign Key to')).toBeInTheDocument(); + expect(getAllByText('Yet Another Dataset')).toHaveLength(2); + }); + + it('renders key/value toggle', () => { + const { getByText, queryByText } = render( + + + + + + + , + ); + expect(getByText('Key')).toBeInTheDocument(); + expect(getByText('Value')).toBeInTheDocument(); + expect(getByText('count')).toBeInTheDocument(); + expect(getByText('cost')).toBeInTheDocument(); + expect(queryByText('id')).not.toBeInTheDocument(); + + const keyButton = getByText('Key'); + fireEvent.click(keyButton); + + expect(getByText('Key')).toBeInTheDocument(); + expect(getByText('Value')).toBeInTheDocument(); + expect(getByText('id')).toBeInTheDocument(); + expect(queryByText('count')).not.toBeInTheDocument(); + expect(queryByText('cost')).not.toBeInTheDocument(); + }); + + it('does not renders key/value toggle when no schema', () => { + const { queryByText } = render( + + + + + + + , + ); + expect(queryByText('Key')).not.toBeInTheDocument(); + expect(queryByText('Value')).not.toBeInTheDocument(); + }); + + it('renders usage column when usage is present', () => { + const usageStats = { + buckets: [ + { + bucket: Date.now(), + metrics: { + totalSqlQueries: 10, + }, + }, + ], + aggregations: { + uniqueUserCount: 2, + totalSqlQueries: 10, + fields: [ + { + fieldName: 'id', + count: 10, + }, + { + fieldName: 'name', + count: 24, + }, + ], + }, + }; + + const { queryByText } = render( + + + + + + + , + ); + expect(queryByText('Stats')).toBeInTheDocument(); + }); +}); diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/SchemaDescriptionField.test.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/SchemaDescriptionField.test.tsx new file mode 100644 index 00000000000000..4fc846409d963a --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/SchemaDescriptionField.test.tsx @@ -0,0 +1,66 @@ +import { MockedProvider } from '@apollo/client/testing'; +import { fireEvent, render, waitFor } from '@testing-library/react'; +import React from 'react'; +import { mocks } from '../../../../../Mocks'; +import TestPageContainer from '../../../../../utils/test-utils/TestPageContainer'; +import SchemaDescriptionField from '../schema/components/SchemaDescriptionField'; + +describe('SchemaDescriptionField', () => { + it('renders editable description', async () => { + const { getByText, getByRole, queryByText } = render( + + + {}} + description="test description updated" + isEdited + onUpdate={async () => {}} + />{' '} + + , + ); + expect(getByRole('img')).toBeInTheDocument(); + expect(getByText('test description updated')).toBeInTheDocument(); + expect(queryByText('Update description')).not.toBeInTheDocument(); + }); + + it('renders update description modal', async () => { + const { getByText, getByRole, queryByText } = render( + + + {}} + description="test description" + original="test description" + isEdited + onUpdate={async () => {}} + /> + + , + ); + expect(queryByText('Update description')).not.toBeInTheDocument(); + fireEvent.click(getByRole('img')); + await waitFor(() => expect(getByText('Update description')).toBeInTheDocument()); + expect(getByText('Cancel')).toBeInTheDocument(); + expect(getByText('Publish')).toBeInTheDocument(); + expect(getByText('Original:')).toBeInTheDocument(); + fireEvent.click(getByText('Cancel')); + await waitFor(() => expect(queryByText('Update description')).not.toBeInTheDocument()); + }); + + it('renders short messages without show more / show less', () => { + const { getByText, queryByText } = render( + {}} + description="short description" + onUpdate={() => Promise.resolve()} + />, + ); + expect(getByText('short description')).toBeInTheDocument(); + expect(queryByText('Read Less')).not.toBeInTheDocument(); + expect(queryByText('Read More')).not.toBeInTheDocument(); + }); +}); diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/Stats.test.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/Stats.test.tsx new file mode 100644 index 00000000000000..d5682a629b92b3 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/Stats.test.tsx @@ -0,0 +1,155 @@ +import React from 'react'; +import { render } from '@testing-library/react'; +import { MockedProvider } from '@apollo/client/testing'; +import SnapshotStatsView from '../stats/snapshot/SnapshotStatsView'; +import TestPageContainer from '../../../../../utils/test-utils/TestPageContainer'; +import { completeSampleProfile, missingFieldStatsProfile, missingTableStatsProfile } from '../stories/stats'; +import { mocks } from '../../../../../Mocks'; + +describe('SnapshotStatsView', () => { + it('renders complete profile', () => { + const { getByText } = render( + + + + + , + ); + + // Row Count + expect(getByText('1000')).toBeInTheDocument(); + expect(getByText('Rows')).toBeInTheDocument(); + + // Column Count + expect(getByText('2000')).toBeInTheDocument(); + expect(getByText('Columns')).toBeInTheDocument(); + + // Field Profiles + // First column + expect(getByText('testColumn')).toBeInTheDocument(); + expect(getByText('1')).toBeInTheDocument(); + expect(getByText('11.10%')).toBeInTheDocument(); + expect(getByText('2')).toBeInTheDocument(); + expect(getByText('22.20%')).toBeInTheDocument(); + expect(getByText('3')).toBeInTheDocument(); + expect(getByText('4')).toBeInTheDocument(); + expect(getByText('5')).toBeInTheDocument(); + expect(getByText('6')).toBeInTheDocument(); + expect(getByText('value1')).toBeInTheDocument(); + expect(getByText('value2')).toBeInTheDocument(); + expect(getByText('value3')).toBeInTheDocument(); + + // Second column + expect(getByText('testColumn2')).toBeInTheDocument(); + expect(getByText('8')).toBeInTheDocument(); + expect(getByText('33.30%')).toBeInTheDocument(); + expect(getByText('9')).toBeInTheDocument(); + expect(getByText('44.40%')).toBeInTheDocument(); + expect(getByText('10')).toBeInTheDocument(); + expect(getByText('11')).toBeInTheDocument(); + expect(getByText('12')).toBeInTheDocument(); + expect(getByText('13')).toBeInTheDocument(); + expect(getByText('14')).toBeInTheDocument(); + expect(getByText('value4')).toBeInTheDocument(); + expect(getByText('value5')).toBeInTheDocument(); + expect(getByText('value6')).toBeInTheDocument(); + }); + + it('renders profile without field stats', () => { + const { getByText, queryByText } = render( + + + + + , + ); + + // Row Count + expect(getByText('1000')).toBeInTheDocument(); + expect(getByText('Rows')).toBeInTheDocument(); + + // Column Count + expect(getByText('2000')).toBeInTheDocument(); + expect(getByText('Columns')).toBeInTheDocument(); + + // Field Profiles + // First column + expect(queryByText('testColumn')).toBeNull(); + expect(queryByText('1')).toBeNull(); + expect(queryByText('11.10%')).toBeNull(); + expect(queryByText('2')).toBeNull(); + expect(queryByText('22.20%')).toBeNull(); + expect(queryByText('3')).toBeNull(); + expect(queryByText('4')).toBeNull(); + expect(queryByText('5')).toBeNull(); + expect(queryByText('6')).toBeNull(); + expect(queryByText('value1')).toBeNull(); + expect(queryByText('value2')).toBeNull(); + expect(queryByText('value3')).toBeNull(); + + // Second column + expect(queryByText('testColumn2')).toBeNull(); + expect(queryByText('8')).toBeNull(); + expect(queryByText('33.30%')).toBeNull(); + expect(queryByText('9')).toBeNull(); + expect(queryByText('44.40%')).toBeNull(); + expect(queryByText('10')).toBeNull(); + expect(queryByText('11')).toBeNull(); + expect(queryByText('12')).toBeNull(); + expect(queryByText('13')).toBeNull(); + expect(queryByText('14')).toBeNull(); + expect(queryByText('value4')).toBeNull(); + expect(queryByText('value5')).toBeNull(); + expect(queryByText('value6')).toBeNull(); + }); + + it('renders profile without table stats', () => { + const { getByText, queryByText } = render( + + + + + , + ); + + // Row Count + expect(queryByText('1000')).toBeNull(); + expect(queryByText('Rows')).toBeNull(); + expect(queryByText('Row Count Unknown')).toBeInTheDocument(); + + // Column Count + expect(queryByText('2000')).toBeNull(); + expect(queryByText('Columns')).toBeNull(); + expect(queryByText('Column Count Unknown')).toBeInTheDocument(); + + // Field Profiles + // First column + expect(getByText('testColumn')).toBeInTheDocument(); + expect(getByText('1')).toBeInTheDocument(); + expect(getByText('11.10%')).toBeInTheDocument(); + expect(getByText('2')).toBeInTheDocument(); + expect(getByText('22.20%')).toBeInTheDocument(); + expect(getByText('3')).toBeInTheDocument(); + expect(getByText('4')).toBeInTheDocument(); + expect(getByText('5')).toBeInTheDocument(); + expect(getByText('6')).toBeInTheDocument(); + expect(getByText('value1')).toBeInTheDocument(); + expect(getByText('value2')).toBeInTheDocument(); + expect(getByText('value3')).toBeInTheDocument(); + + // Second column + expect(getByText('testColumn2')).toBeInTheDocument(); + expect(getByText('8')).toBeInTheDocument(); + expect(getByText('33.30%')).toBeInTheDocument(); + expect(getByText('9')).toBeInTheDocument(); + expect(getByText('44.40%')).toBeInTheDocument(); + expect(getByText('10')).toBeInTheDocument(); + expect(getByText('11')).toBeInTheDocument(); + expect(getByText('12')).toBeInTheDocument(); + expect(getByText('13')).toBeInTheDocument(); + expect(getByText('14')).toBeInTheDocument(); + expect(getByText('value4')).toBeInTheDocument(); + expect(getByText('value5')).toBeInTheDocument(); + expect(getByText('value6')).toBeInTheDocument(); + }); +}); diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/schema/translateFieldPath.test.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/schema/translateFieldPath.test.tsx new file mode 100644 index 00000000000000..bb2c0121ef251f --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/schema/translateFieldPath.test.tsx @@ -0,0 +1,33 @@ +import translateFieldPath from '../../schema/utils/translateFieldPath'; + +describe('translateFieldPath', () => { + it('translates qualified unions', () => { + expect(translateFieldPath('[type=union].[type=QualifyingStruct].struct.[type=long].field')).toEqual( + '(QualifyingStruct) struct.field', + ); + }); + + it('translates nested arrays', () => { + expect(translateFieldPath('[type=array].[type=array].my_array.[type=long].field')).toEqual( + 'my_array[][].field', + ); + }); + + it('removes non-qualifying structs', () => { + expect( + translateFieldPath('[type=array].[type=array].MyArray.[type=Struct].field.[type=long].nested_field'), + ).toEqual('MyArray[][].field.nested_field'); + }); + + it('cleans the [key=true] prefix', () => { + expect( + translateFieldPath( + '[key=True].[type=array].[type=array].MyArray.[type=Struct].field.[type=long].nested_field', + ), + ).toEqual('MyArray[][].field.nested_field'); + }); + + it('leaves old fieldpaths as is', () => { + expect(translateFieldPath('a.b.c')).toEqual('a.b.c'); + }); +}); diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/schema/translateFieldPathSegment.test.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/schema/translateFieldPathSegment.test.tsx new file mode 100644 index 00000000000000..2ddfe7d19e2425 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/schema/translateFieldPathSegment.test.tsx @@ -0,0 +1,39 @@ +import translateFieldPathSegment from '../../schema/utils/translateFieldPathSegment'; + +describe('translateFieldPathSegment', () => { + it('translates unions', () => { + expect(translateFieldPathSegment('MyUnion', 1, ['[type=union]', 'MyUnion'])).toEqual('MyUnion.'); + }); + + it('translates arrays', () => { + expect(translateFieldPathSegment('MyArray', 1, ['[type=array]', 'MyArray'])).toEqual('MyArray[].'); + }); + + it('translates qualifying structs in the middle', () => { + expect( + translateFieldPathSegment('[type=QualifyingStruct]', 1, [ + '[type=union]', + '[type=QualifyingStruct]', + 'MyUnion', + ]), + ).toEqual('(QualifyingStruct) '); + }); + + it('translates qualifying structs in the end', () => { + expect( + translateFieldPathSegment('[type=QualifyingStruct]', 1, ['[type=union]', '[type=QualifyingStruct]']), + ).toEqual(' QualifyingStruct'); + }); + + it('translates primitives', () => { + expect( + translateFieldPathSegment('field', 4, [ + '[type=union]', + 'MyUnion', + '[type=QualifyingStruct]', + '[type=long]', + 'field', + ]), + ).toEqual('field.'); + }); +}); diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/schema/utils.test.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/schema/utils.test.tsx new file mode 100644 index 00000000000000..8d41161380295b --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/__tests__/schema/utils.test.tsx @@ -0,0 +1,49 @@ +import { SchemaFieldDataType } from '../../../../../../types.generated'; +import { filterKeyFieldPath } from '../../schema/utils/utils'; + +describe('utils', () => { + describe('filterKeyFieldPath', () => { + it('allows keys when looking for keys', () => { + expect( + filterKeyFieldPath(true, { + fieldPath: '[version=2.0].[key=True].[type=long].field', + nullable: false, + type: SchemaFieldDataType.Number, + recursive: false, + }), + ).toEqual(true); + }); + it('blocks non-keys when looking for keys', () => { + expect( + filterKeyFieldPath(true, { + fieldPath: '[version=2.0].[type=long].field', + nullable: false, + type: SchemaFieldDataType.Number, + recursive: false, + }), + ).toEqual(false); + }); + + it('allows non-keys when looking for non-keys', () => { + expect( + filterKeyFieldPath(false, { + fieldPath: '[version=2.0].[type=long].field', + nullable: false, + type: SchemaFieldDataType.Number, + recursive: false, + }), + ).toEqual(true); + }); + + it('blocks keys when looking for non-keys', () => { + expect( + filterKeyFieldPath(false, { + fieldPath: '[version=2.0].[key=True].[type=long].field', + nullable: false, + type: SchemaFieldDataType.Number, + recursive: false, + }), + ).toEqual(false); + }); + }); +}); diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/CustomPagination.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/CustomPagination.tsx new file mode 100644 index 00000000000000..8d270898a44566 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/CustomPagination.tsx @@ -0,0 +1,113 @@ +import React, { useState } from 'react'; +import { Button, Menu, Dropdown, Typography } from 'antd'; +import { LeftOutlined, RightOutlined } from '@ant-design/icons'; +import styled from 'styled-components'; + +const CustomPaginationContainer = styled.div` + display: flex; + flex-direction: row; + height: 32px; +`; +const NavButton = styled(Button)` + margin: 4px 6px; + cursor: pointer; +`; +const DescriptionText = styled(Typography.Text)` + line-height: 32px; +`; +const VersionText = styled(Typography.Text)` + padding: 0 4px; + line-height: 32px; + cursor: pointer; +`; +const VersionRightText = styled(Typography.Text)` + padding-left: 4px; + line-height: 32px; + cursor: pointer; +`; + +type Props = { + onChange: (version1: number, version2: number) => void; + maxVersion: number; +}; + +export default function CustomPagination({ onChange, maxVersion }: Props) { + const [version1, setVersion1] = useState(maxVersion || 1); // current version - first dropdown selected + const [version2, setVersion2] = useState(maxVersion ? maxVersion - 1 : 0); // past version comparing with current - second dropdown + + const onNextClick = () => { + setVersion1((v) => v - 1); + setVersion2(version1 - 2); + onChange(version1 - 1, version1 - 2); + }; + const onPrevClick = () => { + setVersion1((v) => v + 1); + setVersion2(version1); + onChange(version1 + 1, version1); + }; + const onVersion1Click = ({ key }) => { + const newVersion1 = parseInt(key, 10); + setVersion1(newVersion1); + if (version2 >= newVersion1) { + setVersion2(newVersion1 - 1); + onChange(newVersion1, newVersion1 - 1); + return; + } + onChange(newVersion1, version2); + }; + const onVersion2Click = ({ key }) => { + setVersion2(parseInt(key, 10)); + onChange(version1, parseInt(key, 10)); + }; + + const menu1 = ( + + {[...Array(maxVersion)].map((_, i) => ( + // eslint-disable-next-line react/no-array-index-key + + {i === 0 ? 'latest' : `version ${maxVersion + 1 - i}`} + + ))} + + ); + + const menu2 = ( + + {[...Array(version1)].map((_, i) => ( + // eslint-disable-next-line react/no-array-index-key + + {`version ${version1 - i}`} + + ))} + + ); + + return ( + + } + onClick={onPrevClick} + disabled={version1 >= maxVersion} + /> + Comparing + + + {version1 === maxVersion ? 'latest' : `version ${version1 + 1}`} + + + to + + {`version ${version2 + 1}`} + + } + onClick={onNextClick} + disabled={version1 <= 1} + /> + + ); +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/InteriorTitleContent.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/InteriorTitleContent.tsx new file mode 100644 index 00000000000000..6e3cd3c95c3531 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/InteriorTitleContent.tsx @@ -0,0 +1,132 @@ +import { Tooltip, Typography } from 'antd'; +import React from 'react'; +import Highlight from 'react-highlighter'; +import styled from 'styled-components'; +import { DeprecationIcon } from '@src/app/entityV2/shared/components/styled/DeprecationIcon'; +import { SchemaMetadata, SubResourceType } from '../../../../../../types.generated'; +import { REDESIGN_COLORS } from '../../../../shared/constants'; +import NullableLabel, { + ForeignKeyLabel, + PartitioningKeyLabel, + PrimaryKeyLabel, +} from '../../../../shared/tabs/Dataset/Schema/components/ConstraintLabels'; +import translateFieldPath from '../utils/translateFieldPath'; +import { ExtendedSchemaFields } from '../utils/types'; + +const MAX_COMPACT_FIELD_PATH_LENGTH = 15; + +const FieldTitleWrapper = styled.div<{ $isCompact: boolean }>` + display: inline-flex; + flex-direction: ${(props) => (props.$isCompact ? 'column' : 'row')}; + align-items: ${(props) => (props.$isCompact ? 'start' : 'center')}; + justify-content: start; + gap: 10px; + width: 100%; + max-width: 100%; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +`; + +const FieldPathContainer = styled.div` + vertical-align: top; + display: inline-block; +`; + +const DeprecatedContainer = styled.div` + color: ${REDESIGN_COLORS.DARK_GREY}; +`; + +const FieldPathText = styled(Typography.Text)<{ $isCompact: boolean }>` + font-size: 12px; + line-height: ${(props) => (props.$isCompact ? '14px' : '24px')}; + font-weight: 600; + color: ${REDESIGN_COLORS.DARK_GREY}; + + display: flex; + align-items: center; + gap: 6px; +`; + +type InteriorTitleProps = { + parentUrn: string; + schemaMetadata: SchemaMetadata | undefined | null; + filterText: string; + fieldPath: string; + record: ExtendedSchemaFields; + isCompact?: boolean; +}; + +export const InteriorTitleContent = ({ + parentUrn, + schemaMetadata, + filterText, + fieldPath, + record, + isCompact, +}: InteriorTitleProps) => { + const fieldPathWithoutAnnotations = translateFieldPath(fieldPath); + const parentPathWithoutAnnotations = translateFieldPath(record.parent?.fieldPath || ''); + let pathToDisplay = fieldPathWithoutAnnotations; + + // if the parent path is a prefix of the field path, remove it for display purposes + if (parentPathWithoutAnnotations && fieldPathWithoutAnnotations.indexOf(parentPathWithoutAnnotations) === 0) { + // parent length + 1 because of the trailing `.` of the parent + pathToDisplay = fieldPathWithoutAnnotations.slice(parentPathWithoutAnnotations.length + 1); + } + + let compactPathToDisplay; + if (isCompact) { + compactPathToDisplay = + pathToDisplay.length > MAX_COMPACT_FIELD_PATH_LENGTH + ? `${pathToDisplay.substring(0, MAX_COMPACT_FIELD_PATH_LENGTH)}...` + : pathToDisplay; + } + + return ( + + {!!record.schemaFieldEntity?.deprecation?.deprecated && !isCompact && ( + + + + )} + + + {isCompact ? ( + + {compactPathToDisplay} + + ) : ( + {pathToDisplay} + )} + + + {!isCompact && ( + <> + {(schemaMetadata?.primaryKeys?.includes(fieldPath) || record.isPartOfKey) && } + {record.isPartitioningKey && } + {record.nullable && } + {/* {record.nullable && } */} + {schemaMetadata?.foreignKeys + ?.filter( + (constraint) => + (constraint?.sourceFields?.filter( + (sourceField) => sourceField?.fieldPath.trim() === fieldPath.trim(), + ).length || 0) > 0, + ) + .map((constraint) => ( + + ))} + + )} + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaDescriptionField.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaDescriptionField.tsx new file mode 100644 index 00000000000000..d9d60894653d3b --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaDescriptionField.tsx @@ -0,0 +1,255 @@ +import { Typography, message, Button } from 'antd'; +import { EditOutlined } from '@ant-design/icons'; +import React, { useState } from 'react'; +import styled from 'styled-components'; +import { FetchResult } from '@apollo/client'; + +import CompactMarkdownViewer from '@src/app/entityV2/shared/tabs/Documentation/components/CompactMarkdownViewer'; +import { UpdateDatasetMutation } from '../../../../../../graphql/dataset.generated'; +import UpdateDescriptionModal from '../../../../shared/components/legacy/DescriptionModal'; +import { removeMarkdown } from '../../../../shared/components/styled/StripMarkdownText'; +import SchemaEditableContext from '../../../../../shared/SchemaEditableContext'; +import { useEntityData } from '../../../../../entity/shared/EntityContext'; +import analytics, { EventType, EntityActionType } from '../../../../../analytics'; +import { Editor } from '../../../../shared/tabs/Documentation/components/editor/Editor'; +import { REDESIGN_COLORS } from '../../../../shared/constants'; +import { StringMapEntry } from '../../../../../../types.generated'; +import DocumentationPropagationDetails from '../../../../../sharedV2/propagation/DocumentationPropagationDetails'; + +const EditIcon = styled(EditOutlined)` + cursor: pointer; + display: none; +`; + +const AddNewDescription = styled(Button)` + display: flex; + width: 140px; + background-color: #fafafa; + border-radius: 4px; + align-items: center; + justify-content: center; +`; + +const ExpandedActions = styled.div` + height: 10px; +`; + +const DescriptionContainer = styled.div` + position: relative; + display: inline-block; + text-overflow: ellipsis; + overflow: hidden; + white-space: nowrap; + width: 100%; + min-height: 22px; + font-size: 12px; + font-weight: 400; + line-height: 24px; + color: ${REDESIGN_COLORS.DARK_GREY}; + vertical-align: middle; + &:hover ${EditIcon} { + display: inline-block; + } + + & ins.diff { + background-color: #b7eb8f99; + text-decoration: none; + &:hover { + background-color: #b7eb8faa; + } + } + & del.diff { + background-color: #ffa39e99; + text-decoration: line-through; + &: hover { + background-color: #ffa39eaa; + } + } +`; +const EditedLabel = styled(Typography.Text)` + display: inline-block; + margin-left: 8px; + color: rgba(150, 150, 150, 0.5); + font-style: italic; + position: relative; + top: -2px; +`; + +const ReadLessText = styled(Typography.Link)` + margin-right: 4px; +`; + +const StyledViewer = styled(Editor)` + padding-right: 8px; + display: block; + + .remirror-editor.ProseMirror { + padding: 0; + font-size: 12px; + font-weight: 400; + line-height: 24px; + color: ${REDESIGN_COLORS.DARK_GREY}; + vertical-align: middle; + } +`; + +const DescriptionWrapper = styled.span` + display: inline-flex; + align-items: center; +`; + +const AddModalWrapper = styled.div``; + +type Props = { + onExpanded: (expanded: boolean) => void; + expanded: boolean; + description: string; + fieldPath?: string; + original?: string | null; + onUpdate: ( + description: string, + ) => Promise, Record> | void>; + handleShowMore?: (_: string) => void; + isEdited?: boolean; + isReadOnly?: boolean; + isPropagated?: boolean; + sourceDetail?: StringMapEntry[] | null; +}; + +export default function DescriptionField({ + expanded, + onExpanded: handleExpanded, + description, + fieldPath, + onUpdate, + handleShowMore, + isEdited = false, + original, + isReadOnly, + isPropagated, + sourceDetail, +}: Props) { + const [showAddModal, setShowAddModal] = useState(false); + + const overLimit = removeMarkdown(description).length > 40; + const isSchemaEditable = React.useContext(SchemaEditableContext); + const onCloseModal = () => { + setShowAddModal(false); + }; + const { urn, entityType } = useEntityData(); + + const sendAnalytics = () => { + analytics.event({ + type: EventType.EntityActionEvent, + actionType: EntityActionType.UpdateSchemaDescription, + entityType, + entityUrn: urn, + }); + }; + + const onUpdateModal = async (desc: string | null) => { + message.loading({ content: 'Updating...' }); + try { + await onUpdate(desc || ''); + message.destroy(); + message.success({ content: 'Updated!', duration: 2 }); + sendAnalytics(); + } catch (e: unknown) { + message.destroy(); + if (e instanceof Error) message.error({ content: `Update Failed! \n ${e.message || ''}`, duration: 2 }); + } + onCloseModal(); + }; + + const enableEdits = isSchemaEditable && !isReadOnly; + const EditButton = + (enableEdits && description && setShowAddModal(true)} />) || + undefined; + + const showAddButton = enableEdits && !description; + + return ( + + {/* {expanded || !overLimit ? ( */} + {expanded ? ( + <> + {!!description && } + {!!description && (EditButton || overLimit) && ( + + {overLimit && ( + { + e.stopPropagation(); + handleExpanded(false); + }} + > + Read Less + + )} + {EditButton} + + )} + + ) : ( + description && ( + <> + {/* + // { + // e.stopPropagation(); + // handleExpanded(true); + // }} + // > + // Read More + // + // + // } + suffix={EditButton} + shouldWrap + > */} + + {isPropagated && } +   + handleShowMore && handleShowMore(fieldPath || '')} + fixedLineHeight + customStyle={{ fontSize: '12px' }} + scrollableY={false} + /> + + {/* */} + + ) + )} + {isSchemaEditable && isEdited && (edited)} + {showAddModal && ( + e.stopPropagation()}> + + + )} + {showAddButton && ( + { + setShowAddModal(true); + e.stopPropagation(); + }} + > + Add Description + + )} + + ); +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaFilterSelectContent.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaFilterSelectContent.tsx new file mode 100644 index 00000000000000..c602cd7813650c --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaFilterSelectContent.tsx @@ -0,0 +1,71 @@ +import React, { useState } from 'react'; +import { Button, Checkbox } from 'antd'; +import styled from 'styled-components'; + +import { SchemaFilterType } from '../../../../shared/tabs/Dataset/Schema/utils/filterSchemaRows'; +import { ANTD_GRAY } from '../../../../shared/constants'; + +type Props = { + schemaFilterTypes: SchemaFilterType[]; + setSchemaFilterTypes: (filters: SchemaFilterType[]) => void; + close: () => void; +}; + +const UpdateButton = styled(Button)` + width: 100%; + text-align: center; + background-color: ${(props) => props.theme.styles['primary-color']}; + color: white; + border-radius: 0; + margin-top: 10px; +`; + +const StyledCheckbox = styled(Checkbox)` + font-size: 14px; + line-height: 22px; + padding-top: 5px; + padding-bottom: 5px; + margin-left: -16px; + padding-left: 16px; + :hover { + background-color: ${ANTD_GRAY[3]}; + } + width: 232px; +`; + +export default function SchemaFilterSelectContent({ schemaFilterTypes, setSchemaFilterTypes, close }: Props) { + const [stagedSchemaFilterTypes, setStagedSchemaFilterTypes] = useState(schemaFilterTypes); + + return ( +
+ setStagedSchemaFilterTypes(values as SchemaFilterType[])} + > + + Name + + + Documentation + + + Tags + + + Glossary Terms + + +
+ { + setSchemaFilterTypes(stagedSchemaFilterTypes); + close(); + }} + > + Update + +
+
+ ); +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaHeader.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaHeader.tsx new file mode 100644 index 00000000000000..d95ff2992c2b8e --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaHeader.tsx @@ -0,0 +1,210 @@ +import { FileTextOutlined, TableOutlined } from '@ant-design/icons'; +import VersionSelector from '@app/entityV2/dataset/profile/schema/components/VersionSelector'; +import HistoryIcon from '@mui/icons-material/History'; +import { Button, Typography } from 'antd'; +import { Tooltip } from '@components'; +import { debounce } from 'lodash'; +import React, { useState } from 'react'; +import styled from 'styled-components/macro'; +import { SemanticVersionStruct } from '../../../../../../types.generated'; +import TabToolbar from '../../../../shared/components/styled/TabToolbar'; +import { ANTD_GRAY, REDESIGN_COLORS } from '../../../../shared/constants'; +import { SchemaFilterType } from '../../../../shared/tabs/Dataset/Schema/utils/filterSchemaRows'; +import SchemaSearchInput from './SchemaSearchInput'; + +const SchemaHeaderContainer = styled.div` + display: flex; + justify-content: space-between; + width: 100%; + padding-bottom: 3px; +`; + +// Below styles are for buttons on the left side of the Schema Header +const LeftButtonsGroup = styled.div` + &&& { + display: flex; + justify-content: left; + width: 100%; + } +`; + +const RawButton = styled(Button)` + &&& { + display: flex; + margin-right: 10px; + justify-content: left; + align-items: center; + } +`; + +const RawButtonTitleContainer = styled.span` + display: flex; + align-items: center; +`; + +const RawButtonTitle = styled(Typography.Text)` + margin-left: 6px; +`; + +const KeyButton = styled(Button)<{ $highlighted: boolean }>` + border-radius: 8px 0px 0px 8px; + font-weight: ${(props) => (props.$highlighted ? '600' : '400')}; +`; + +const ValueButton = styled(Button)<{ $highlighted: boolean }>` + border-radius: 0px 8px 8px 0px; + font-weight: ${(props) => (props.$highlighted ? '600' : '400')}; +`; + +const KeyValueButtonGroup = styled.div` + margin-right: 10px; + display: flex; +`; + +// Below styles are for buttons on the right side of the Schema Header +const RightButtonsGroup = styled.div` + display: flex; + align-items: center; + justify-content: right; + gap: 15px; + + padding-left: 5px; +`; + +const SchemaAuditButton = styled(Button)` + display: flex; + align-items: center; + background: ${REDESIGN_COLORS.WHITE}; + padding: 0; + margin-right: 15px; + + svg { + background: ${REDESIGN_COLORS.TITLE_PURPLE}; + border-radius: 50%; + stroke: ${REDESIGN_COLORS.WHITE}; + color: ${REDESIGN_COLORS.WHITE}; + padding: 4px; + stroke-width: 0.5px; + } +`; + +const MAX_ROWS_BEFORE_DEBOUNCE = 50; + +type Props = { + hasRaw: boolean; + showRaw: boolean; + setShowRaw: (show: boolean) => void; + hasKeySchema: boolean; + showKeySchema: boolean; + setShowKeySchema: (show: boolean) => void; + selectedVersion: string; + versionList: Array; + showSchemaTimeline: boolean; + setShowSchemaTimeline: any; + setFilterText: (text: string) => void; + numRows: number; + schemaFilterTypes: SchemaFilterType[]; + setSchemaFilterTypes: (filters: SchemaFilterType[]) => void; + highlightedMatchIndex: number | null; + setHighlightedMatchIndex: (val: number | null) => void; + matches: { path: string; index: number }[]; + schemaFilter: string; +}; + +export default function SchemaHeader({ + hasRaw, + showRaw, + setShowRaw, + hasKeySchema, + showKeySchema, + setShowKeySchema, + selectedVersion, + versionList, + setShowSchemaTimeline, + showSchemaTimeline, + setFilterText, + numRows, + schemaFilterTypes, + setSchemaFilterTypes, + matches, + highlightedMatchIndex, + setHighlightedMatchIndex, + schemaFilter, +}: Props) { + const [schemaFilterSelectOpen, setSchemaFilterSelectOpen] = useState(false); + + const schemaAuditToggleText = showSchemaTimeline ? 'Close change history' : 'View change history'; + + const debouncedSetFilterText = debounce( + (e: React.ChangeEvent) => setFilterText(e.target.value), + numRows > MAX_ROWS_BEFORE_DEBOUNCE ? 100 : 0, + ); + + return ( + + + + {hasRaw && ( + setShowRaw(!showRaw)}> + {showRaw ? ( + + + Tabular + + ) : ( + + + Raw + + )} + + )} + {hasKeySchema && ( + + setShowKeySchema(true)}> + Key + + setShowKeySchema(false)}> + Value + + + )} + {!showRaw && ( + match.path)} + highlightedMatchIndex={highlightedMatchIndex} + setHighlightedMatchIndex={setHighlightedMatchIndex} + schemaFilterSelectOpen={schemaFilterSelectOpen} + setSchemaFilterSelectOpen={setSchemaFilterSelectOpen} + numRows={numRows} + /> + )} + + + {versionList.length > 1 && ( + + )} + + setShowSchemaTimeline(!showSchemaTimeline)} + style={{ color: showSchemaTimeline ? REDESIGN_COLORS.BLUE : ANTD_GRAY[7] }} + > + + + + + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaRawView.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaRawView.tsx new file mode 100644 index 00000000000000..07235a4e32cfb6 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaRawView.tsx @@ -0,0 +1,36 @@ +import React from 'react'; +import { Typography } from 'antd'; +import styled from 'styled-components'; +import { Schema, SchemaMetadata } from '../../../../../../types.generated'; +import { diffJson, formatRawSchema, getRawSchema } from '../utils/utils'; + +type Props = { + schemaDiff: { + current?: SchemaMetadata | Schema | null; + previous?: SchemaMetadata | null; + }; + editMode: boolean; + showKeySchema: boolean; +}; + +const SchemaContainer = styled.div` + padding: 12px; +`; + +export default function SchemaRawView({ schemaDiff, editMode, showKeySchema }: Props) { + const currentSchemaRaw = formatRawSchema(getRawSchema(schemaDiff.current?.platformSchema, showKeySchema)); + + const schemaRawDiff = editMode + ? currentSchemaRaw + : diffJson(formatRawSchema(getRawSchema(schemaDiff.previous?.platformSchema, showKeySchema)), currentSchemaRaw); + + return ( + + +
+                    {schemaRawDiff}
+                
+
+
+ ); +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaRow.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaRow.tsx new file mode 100644 index 00000000000000..bbdd935cba411a --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaRow.tsx @@ -0,0 +1,13 @@ +import React from 'react'; + +const SchemaRow = React.forwardRef((props, ref) => { + // eslint-disable-next-line react/prop-types + const { children, ...rest } = props; + return ( +
+ {children} + + ); +}); + +export default SchemaRow; diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaSearchInput.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaSearchInput.tsx new file mode 100644 index 00000000000000..7c6333ab78b31d --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaSearchInput.tsx @@ -0,0 +1,175 @@ +import { FilterOutlined, SearchOutlined } from '@ant-design/icons'; +import { Input } from 'antd'; +import { Popover } from '@components'; +import React from 'react'; +import styled from 'styled-components'; +import { pluralize } from '../../../../../shared/textUtil'; +import { REDESIGN_COLORS } from '../../../../shared/constants'; +import { SchemaFilterType } from '../../../../shared/tabs/Dataset/Schema/utils/filterSchemaRows'; +import SchemaFilterSelectContent from './SchemaFilterSelectContent'; + +const StyledInput = styled(Input)` + max-width: 300px; + background: ${REDESIGN_COLORS.LIGHT_GREY}; + margin-top: 5px; + font-size: 14px; + font-weight: 500; + line-height: 24px; + color: ${REDESIGN_COLORS.DARK_GREY}; +`; + +const MatchLabelText = styled.span` + font-size: 12px; + font-style: normal; + font-weight: 700; + color: ${REDESIGN_COLORS.DARK_GREY}; + padding-left: 10px; + margin-top: 5px; +`; + +const SearchContainer = styled.span` + --antd-wave-shadow-color: transparent; + flex: auto; + white-space: nowrap; + display: flex; + align-items: center; + + .ant-input-group-wrapper { + border-radius: 20px; + border: 1px solid ${REDESIGN_COLORS.GREY}; + background: #f3f5fa; + } + + .ant-input-group-wrapper { + background-color: #ffffff00 !important; + } + + .ant-input-wrapper { + background-color: #ffffff00 !important; + } + + .ant-input { + border-radius: 0; + } + + .ant-input-affix-wrapper { + border-radius: 20px; + border: none; + } + + .ant-input-group-addon { + border: none; + background-color: #ffffff00 !important; + left: 2px; + } + + .ant-input-affix-wrapper:focus { + border: none; + } + + .ant-input-affix-wrapper:not(.ant-input-affix-wrapper-disabled):hover { + border: none; + } + + .ant-input-affix-wrapper::selection { + background: transparent; + } +`; + +const StyledPopover = styled(Popover)` + border-radius: 50%; + background: ${REDESIGN_COLORS.GREY}; +`; + +const StyledFilterIcon = styled(FilterOutlined)<{ $hasFiltered: boolean }>` + cursor: pointer; + margin-left: -14px; + margin-right: -14px; + margin-top: -12px; + margin-bottom: -12px; + padding-left: 14px; + padding-right: 14px; + padding-top: 12px; + padding-bottom: 12px; + border-radius: 50%; + color: ${(props) => (props.$hasFiltered ? props.theme.styles['primary-color'] : 'inherit')}; + + :hover { + color: ${(props) => (!props.$hasFiltered ? props.theme.styles['primary-color'] : 'inherit')}; + } +`; + +interface SchemaSearchProps { + schemaFilterTypes: SchemaFilterType[]; + setSchemaFilterTypes: (filters: SchemaFilterType[]) => void; + schemaFilter: string; + debouncedSetFilterText: (event: React.ChangeEvent) => void; + matches: string[]; + highlightedMatchIndex: number | null; + setHighlightedMatchIndex: (val: number | null) => void; + schemaFilterSelectOpen: boolean; + setSchemaFilterSelectOpen: (val: boolean) => void; + numRows: number; +} + +const SchemaSearchInput: React.FC = ({ + schemaFilterTypes, + setSchemaFilterTypes, + schemaFilter, + debouncedSetFilterText, + matches, + highlightedMatchIndex, + setHighlightedMatchIndex, + schemaFilterSelectOpen, + setSchemaFilterSelectOpen, + numRows, +}: SchemaSearchProps) => { + const schemaFilterTypeSelectPrompt = ( + setSchemaFilterSelectOpen(val)} + content={ + setSchemaFilterSelectOpen(false)} + schemaFilterTypes={schemaFilterTypes} + setSchemaFilterTypes={setSchemaFilterTypes} + /> + } + trigger="click" + overlayInnerStyle={{ padding: 0 }} + > + + + ); + + return ( + + } + onKeyDown={(e) => { + if (e.code === 'Enter' && highlightedMatchIndex !== null && matches.length > 0) { + setHighlightedMatchIndex((highlightedMatchIndex + 1) % matches.length); + } + }} + /> + {schemaFilter.length > 0 && ( + + Matched {matches.length} {pluralize(matches.length, 'column')} of {numRows} + + )} + + ); +}; + +export default SchemaSearchInput; diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaVersionSummary.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaVersionSummary.tsx new file mode 100644 index 00000000000000..b0f07a59e29c0e --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/SchemaVersionSummary.tsx @@ -0,0 +1,52 @@ +import React from 'react'; +import { Typography } from 'antd'; +import styled from 'styled-components'; + +const SummaryContainer = styled.div` + margin-bottom: 16px; + padding-left: 10px; + & ul { + padding-inline-start: 30px; + margin-top: 5px; + } +`; + +export interface SchemaDiffSummary { + added: number; + removed: number; + updated: number; +} + +type Props = { + diffSummary: SchemaDiffSummary; +}; + +export default function SchemaVersionSummary({ diffSummary }: Props) { + return ( + +
    + {diffSummary.added ? ( +
  • + {`${diffSummary.added} column${ + diffSummary.added > 1 ? 's were' : ' was' + } added`} +
  • + ) : null} + {diffSummary.removed ? ( +
  • + {`${diffSummary.removed} column${ + diffSummary.removed > 1 ? 's were' : ' was' + } removed`} +
  • + ) : null} + {diffSummary.updated ? ( +
  • + {`${diffSummary.updated} description${ + diffSummary.updated > 1 ? 's were' : ' was' + } updated`} +
  • + ) : null} +
+
+ ); +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/StructuredPropValues.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/StructuredPropValues.tsx new file mode 100644 index 00000000000000..8d74ff748ab8d3 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/StructuredPropValues.tsx @@ -0,0 +1,69 @@ +import StructuredPropertyValue from '@src/app/entityV2/shared/tabs/Properties/StructuredPropertyValue'; +import { mapStructuredPropertyToPropertyRow } from '@src/app/entityV2/shared/tabs/Properties/useStructuredProperties'; +import { useEntityRegistry } from '@src/app/useEntityRegistry'; +import { SchemaFieldEntity, SearchResult, StdDataType } from '@src/types.generated'; +import { Tooltip } from 'antd'; +import React from 'react'; +import styled from 'styled-components'; + +const ValuesContainer = styled.span` + max-width: 120px; + display: flex; +`; + +const MoreIndicator = styled.span` + float: right; +`; + +interface Props { + schemaFieldEntity: SchemaFieldEntity; + propColumn: SearchResult | undefined; +} + +const StructuredPropValues = ({ schemaFieldEntity, propColumn }: Props) => { + const entityRegistry = useEntityRegistry(); + + const property = schemaFieldEntity.structuredProperties?.properties?.find( + (prop) => prop.structuredProperty.urn === propColumn?.entity?.urn, + ); + const propRow = property ? mapStructuredPropertyToPropertyRow(property) : undefined; + const values = propRow?.values; + const isRichText = propRow?.dataType?.info.type === StdDataType.RichText; + + const hasMoreValues = values && values.length > 2; + const displayedValues = hasMoreValues ? values.slice(0, 1) : values; + const tooltipContent = values?.map((value) => { + const title = value.entity + ? entityRegistry.getDisplayName(value.entity.type, value.entity) + : value.value?.toString(); + return
{title}
; + }); + + return ( + <> + {values && ( + <> + {displayedValues?.map((val) => { + return ( + + + + ); + })} + {hasMoreValues && ( + + ... + + )} + + )} + + ); +}; + +export default StructuredPropValues; diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/TypeIcon.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/TypeIcon.tsx new file mode 100644 index 00000000000000..4117bea0c53879 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/TypeIcon.tsx @@ -0,0 +1,113 @@ +import { + FieldBinaryOutlined, + NumberOutlined, + UnorderedListOutlined, + QuestionCircleOutlined, + UnderlineOutlined, + CalendarOutlined, + FieldTimeOutlined, +} from '@ant-design/icons'; +import { Typography } from 'antd'; +import { Tooltip } from '@components'; +import React, { FC } from 'react'; +import { VscSymbolString, VscFileBinary } from 'react-icons/vsc'; +import styled from 'styled-components'; +import { capitalizeFirstLetter } from '../../../../../shared/textUtil'; +import { SchemaFieldDataType } from '../../../../../../types.generated'; + +const TypeIconContainer = styled.div` + display: flex; + flex-direction: column; + justify-content: center; + text-align: center; + margin-top: 2.5px; + width: 40px; +`; + +const TypeSubtitle = styled(Typography.Text)<{ hasicon?: string }>` + font-size: 8px; + text-align: center; + ${(props) => (props.hasicon ? '' : 'margin-top: 4px;')} +`; + +const IconSpan = styled.span` + font-size: 18px; +`; + +const DATA_TYPE_ICON_MAP: Record | null; size: number; text: string }> = + { + [SchemaFieldDataType.Boolean]: { + icon: FieldBinaryOutlined, + size: 18, + text: 'Boolean', + }, + [SchemaFieldDataType.Fixed]: { icon: FieldBinaryOutlined, size: 18, text: 'Fixed' }, + [SchemaFieldDataType.String]: { + icon: () => ( + + + + ), + size: 20, + text: 'String', + }, + [SchemaFieldDataType.Bytes]: { + icon: () => ( + + + + ), + size: 18, + text: 'Bytes', + }, + [SchemaFieldDataType.Number]: { icon: NumberOutlined, size: 14, text: 'Number' }, + [SchemaFieldDataType.Date]: { icon: CalendarOutlined, size: 18, text: 'Date' }, + [SchemaFieldDataType.Time]: { icon: FieldTimeOutlined, size: 18, text: 'Time' }, + [SchemaFieldDataType.Enum]: { icon: UnorderedListOutlined, size: 18, text: 'Enum' }, + [SchemaFieldDataType.Null]: { icon: QuestionCircleOutlined, size: 16, text: '' }, + [SchemaFieldDataType.Map]: { icon: null, size: 0, text: 'Map' }, + [SchemaFieldDataType.Array]: { icon: UnorderedListOutlined, size: 14, text: 'Array' }, + [SchemaFieldDataType.Union]: { icon: UnderlineOutlined, size: 14, text: 'Union' }, + [SchemaFieldDataType.Struct]: { icon: null, size: 0, text: 'Struct' }, + }; + +const truncate = (length: number, input?: string | null) => { + if (!input) return ''; + if (input.length > length) { + return `${input.substring(0, length)}...`; + } + return input; +}; + +type Props = { + type: SchemaFieldDataType; + nativeDataType: string | null | undefined; +}; + +export default function TypeIcon({ type, nativeDataType }: Props) { + const { icon: Icon, size, text } = DATA_TYPE_ICON_MAP[type]; + + // if unable to match type to DataHub, display native type info by default + const nativeFallback = type === SchemaFieldDataType.Null; + + // eslint-disable-next-line react/prop-types + const NativeDataTypeTooltip = ({ children }) => + nativeDataType ? ( + + {children} + + ) : ( + <>{children} + ); + + return ( + + + {Icon && } + + {nativeFallback ? truncate(250, nativeDataType) : text} + + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/VersionSelector.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/VersionSelector.tsx new file mode 100644 index 00000000000000..2a3b255e31c770 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/schema/components/VersionSelector.tsx @@ -0,0 +1,153 @@ +import { CaretDownOutlined } from '@ant-design/icons'; +import { REDESIGN_COLORS } from '@app/entityV2/shared/constants'; +import { toRelativeTimeString } from '@app/shared/time/timeUtils'; +import PlatformIcon from '@app/sharedV2/icons/PlatformIcon'; +import navigateToUrl from '@app/utils/navigateToUrl'; +import { DataPlatform, SemanticVersionStruct } from '@types'; +import { Select } from 'antd'; +import { Tooltip } from '@components'; +import React, { useEffect, useMemo } from 'react'; +import { useHistory, useLocation } from 'react-router-dom'; +import styled from 'styled-components/macro'; + +export const SEMANTIC_VERSION_PARAM = 'semantic_version'; +export const SIBLING_VERSION_PARAM = 'secondary_version'; // Note: Currently unused + +const Wrapper = styled.div` + display: flex; + align-items: center; + gap: 4px; +`; + +const SchemaBlameSelector = styled(Select)` + &&& .ant-select-selector { + background: ${REDESIGN_COLORS.LIGHT_GREY}; + font-size: 14px; + font-weight: 500; + line-height: 24px; + color: ${REDESIGN_COLORS.DARK_GREY}; + min-width: 30px; + margin-right: 10px; + border-radius: 20px; + } +`; + +const SchemaBlameSelectorOption = styled(Select.Option)` + &&& { + overflow: visible; + margin-top: 6px; + width: 100%; + } +`; + +interface VersionOption { + label: string; + timestamp: number; + disabled: boolean; +} + +interface Props { + versionList: Array; + selectedVersion: string; + platform?: DataPlatform; + isSibling: boolean; + isPrimary: boolean; + minTimestamp?: number; + maxTimestamp?: number; + primaryVersion?: string; +} + +/** + * Note: Configured to allow for displaying two version selectors, one for each sibling. + * Currently not used in that way. + */ +export default function VersionSelector({ + versionList, + selectedVersion, + platform, + isSibling, + isPrimary, + minTimestamp, + maxTimestamp, + primaryVersion, +}: Props) { + const location = useLocation(); + const history = useHistory(); + + const versionOptions = useMemo(() => { + return getVersionOptions( + versionList, + isPrimary ? undefined : minTimestamp, + isPrimary ? undefined : maxTimestamp, + ); + }, [versionList, minTimestamp, maxTimestamp, isPrimary]); + + useEffect(() => { + // If the selected version is disabled, navigate to the first available version + if (isPrimary || !selectedVersion) return; + const selectedIndex = versionOptions.findIndex((v) => v.label === selectedVersion); + const nextOption = versionOptions.find((v) => !v.disabled); + if (selectedIndex !== -1 && versionOptions[selectedIndex].disabled && nextOption?.label) { + navigateToUrl({ + location, + history, + urlParam: isSibling ? SIBLING_VERSION_PARAM : SEMANTIC_VERSION_PARAM, + value: nextOption.label, + }); + } + }, [versionOptions, selectedVersion, isSibling, isPrimary, history, location]); + + return ( + + {platform && } + { + navigateToUrl({ + location, + history, + urlParam: isSibling ? SIBLING_VERSION_PARAM : SEMANTIC_VERSION_PARAM, + value: e as string, + }); + }} + data-testid="schema-version-selector-dropdown" + suffixIcon={} + > + {versionOptions.map((v) => ( + + + {`${v.label} - ${toRelativeTimeString(v.timestamp) || 'unknown'}`} + + + ))} + + + ); +} + +function getVersionOptions( + versionList: Array, + minTimestamp?: number, + maxTimestamp?: number, +): VersionOption[] { + return versionList + .map( + (v) => + v?.semanticVersion && + v?.semanticVersionTimestamp && { + label: v.semanticVersion, + timestamp: v.semanticVersionTimestamp, + disabled: + v.semanticVersionTimestamp < (minTimestamp || 0) || + v.semanticVersionTimestamp > (maxTimestamp || Number.POSITIVE_INFINITY), + }, + ) + .filter((v): v is VersionOption => !!v); +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/constants.ts b/datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/constants.ts new file mode 100644 index 00000000000000..ae842ab9558850 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/constants.ts @@ -0,0 +1,4 @@ +export const KEY_SCHEMA_PREFIX = '[key=True].'; +export const VERSION_PREFIX = '[version=2.0].'; +export const ARRAY_TOKEN = '[type=array]'; +export const UNION_TOKEN = '[type=union]'; diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/schemaTitleRenderer.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/schemaTitleRenderer.tsx new file mode 100644 index 00000000000000..86f9166faab758 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/schemaTitleRenderer.tsx @@ -0,0 +1,24 @@ +import React from 'react'; +import { SchemaMetadata } from '../../../../../../types.generated'; +import { InteriorTitleContent } from '../components/InteriorTitleContent'; +import { ExtendedSchemaFields } from './types'; + +export default function useSchemaTitleRenderer( + parentUrn: string, + schemaMetadata: SchemaMetadata | undefined | null, + filterText: string, + isCompact?: boolean, +) { + return (fieldPath: string, record: ExtendedSchemaFields): JSX.Element => { + return ( + + ); + }; +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/schemaTypeRenderer.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/schemaTypeRenderer.tsx new file mode 100644 index 00000000000000..ffc1b39072fb6b --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/schemaTypeRenderer.tsx @@ -0,0 +1,41 @@ +import { Popover } from '@components'; +import React from 'react'; +import styled from 'styled-components'; +import TypeLabel from '../../../../shared/tabs/Dataset/Schema/components/TypeLabel'; +import { ExtendedSchemaFields } from './types'; +import { REDESIGN_COLORS } from '../../../../shared/constants'; + +const FieldTypeWrapper = styled.div` + display: inline-flex; + align-items: center; +`; + +const FieldTypeContainer = styled.div` + vertical-align: top; + display: flex; + color: ${REDESIGN_COLORS.GREY_500}; +`; + +type InteriorTypeProps = { + record: ExtendedSchemaFields; +}; + +const InteriorTypeContent = ({ record }: InteriorTypeProps) => { + return ( + + + + + + ); +}; + +export default function useSchemaTypeRenderer() { + return (fieldPath: string, record: ExtendedSchemaFields): JSX.Element => { + return ( + }> + + + ); + }; +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/translateFieldPath.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/translateFieldPath.tsx new file mode 100644 index 00000000000000..ccb532bc2d4b34 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/translateFieldPath.tsx @@ -0,0 +1,24 @@ +import { KEY_SCHEMA_PREFIX, VERSION_PREFIX } from './constants'; +import translateFieldPathSegment from './translateFieldPathSegment'; + +export default function translateFieldPath(fieldPath: string) { + // fields that are part of a key schema are prefixed with [key=true] + // we don't want to display this + const cleanedFieldPath = fieldPath.replace(KEY_SCHEMA_PREFIX, '').replace(VERSION_PREFIX, ''); + const fieldPathParts = cleanedFieldPath.split('.'); + + // convert each fieldPathSegment into a human readable format + const adjustedFieldPathParts = fieldPathParts.map(translateFieldPathSegment); + + let fieldPathWithoutAnnotations = adjustedFieldPathParts.join(''); + + // clean up artifacts from unions and arrays nested within one another + fieldPathWithoutAnnotations = fieldPathWithoutAnnotations.replace(/\.\./g, '.').replace(/\. /g, ' '); + + // removing a hanging dot if present + if (fieldPathWithoutAnnotations.endsWith('.')) { + fieldPathWithoutAnnotations = fieldPathWithoutAnnotations.slice(0, -1); + } + + return fieldPathWithoutAnnotations; +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/translateFieldPathSegment.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/translateFieldPathSegment.tsx new file mode 100644 index 00000000000000..7153a38a32c3b5 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/translateFieldPathSegment.tsx @@ -0,0 +1,53 @@ +import { ARRAY_TOKEN, UNION_TOKEN } from './constants'; + +export default function translateFieldPathSegment(fieldPathSegment, i, fieldPathParts) { + // for each segment, convert its fieldPath representation into a human readable version + // We leave the annotations present and strip them out in a second pass + const previousSegment = fieldPathParts[i - 1]; + + // we need to look back to see how many arrays there were previously to display the array indexing notation after the field + let previousArrayCount = 0; + for (let j = i - 1; j >= 0; j--) { + if (fieldPathParts[j] === ARRAY_TOKEN) { + previousArrayCount++; + } + if (fieldPathParts[j].indexOf('[') === -1) { + break; + } + } + + // strip out the version prefix + if ( + fieldPathSegment.startsWith('[version=') || + fieldPathSegment === ARRAY_TOKEN || + fieldPathSegment === UNION_TOKEN + ) { + return ''; + } + + // structs that qualify a union are represented as [union]union_field.[type=QualifiedStruct].qualified_struct_field + // we convert into union_field. (QualifiedStruct) qualified_struct_field + if (fieldPathSegment.startsWith('[type=') && fieldPathSegment.endsWith(']')) { + const typeName = fieldPathSegment.replace('[type=', '').replace(']', ''); + // if the qualified struct is the last element, just show the qualified struct + if (i === fieldPathParts.length - 1) { + return ` ${typeName}`; + } + + // if the qualified struct is not the last element, surround with parens + if (previousSegment === UNION_TOKEN) { + return `(${typeName}) `; + } + + // if the struct is not qualifying, ignore + return ''; + } + + // arrays are represented as [type=array]array_field_name + // we convert into array_field_name[] + if (previousArrayCount > 0) { + return `${fieldPathSegment}${'[]'.repeat(previousArrayCount)}.`; + } + + return `${fieldPathSegment}.`; +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/types.ts b/datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/types.ts new file mode 100644 index 00000000000000..b71b21112ddb58 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/types.ts @@ -0,0 +1,16 @@ +import { SchemaField, GlobalTags } from '../../../../../../types.generated'; + +export interface ExtendedSchemaFields extends SchemaField { + children?: Array; + depth?: number; + previousDescription?: string | null; + pastGlobalTags?: GlobalTags | null; + isNewRow?: boolean; + isDeletedRow?: boolean; + parent?: ExtendedSchemaFields; +} + +export enum SchemaViewType { + NORMAL, + BLAME, +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/utils.ts b/datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/utils.ts new file mode 100644 index 00000000000000..f9afa21b670e3e --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/schema/utils/utils.ts @@ -0,0 +1,283 @@ +import { SorterResult } from 'antd/lib/table/interface'; +import * as diff from 'diff'; + +import { + EditableSchemaFieldInfo, + EditableSchemaMetadata, + EditableSchemaMetadataUpdate, + PlatformSchema, + SchemaField, +} from '../../../../../../types.generated'; +import { convertTagsForUpdate } from '../../../../../shared/tags/utils/convertTagsForUpdate'; +import { SchemaDiffSummary } from '../components/SchemaVersionSummary'; +import { KEY_SCHEMA_PREFIX, UNION_TOKEN, VERSION_PREFIX } from './constants'; +import { ExtendedSchemaFields } from './types'; + +export function convertEditableSchemaMeta( + editableSchemaMeta?: Array, + fields?: Array, +): Array { + const updatedFields = [...(fields || [])] as Array; + if (editableSchemaMeta && editableSchemaMeta.length > 0) { + editableSchemaMeta.forEach((updatedField) => { + const originalFieldIndex = updatedFields.findIndex((f) => f.fieldPath === updatedField.fieldPath); + if (originalFieldIndex > -1) { + updatedFields[originalFieldIndex] = { + ...updatedFields[originalFieldIndex], + description: updatedField.description, + globalTags: { ...updatedField.globalTags }, + }; + } + }); + } + return updatedFields; +} + +export function convertEditableSchemaMetadataForUpdate( + editableSchemaMetadata: EditableSchemaMetadata | null | undefined, +): EditableSchemaMetadataUpdate { + return { + editableSchemaFieldInfo: + editableSchemaMetadata?.editableSchemaFieldInfo.map((editableSchemaFieldInfo) => ({ + fieldPath: editableSchemaFieldInfo?.fieldPath, + description: editableSchemaFieldInfo?.description, + globalTags: { tags: convertTagsForUpdate(editableSchemaFieldInfo?.globalTags?.tags || []) }, + })) || [], + }; +} + +export function filterKeyFieldPath(showKeySchema: boolean, field: SchemaField) { + return field.fieldPath.indexOf(KEY_SCHEMA_PREFIX) > -1 ? showKeySchema : !showKeySchema; +} + +export function downgradeV2FieldPath(fieldPath?: string | null) { + if (!fieldPath) { + return fieldPath; + } + + const cleanedFieldPath = fieldPath.replace(KEY_SCHEMA_PREFIX, '').replace(VERSION_PREFIX, ''); + + // strip out all annotation segments + return cleanedFieldPath + .split('.') + .map((segment) => (segment.startsWith('[') ? null : segment)) + .filter(Boolean) + .join('.'); +} + +export function pathMatchesNewPath(fieldPathA?: string | null, fieldPathB?: string | null) { + return fieldPathA === fieldPathB || fieldPathA === downgradeV2FieldPath(fieldPathB); +} + +// group schema fields by fieldPath and grouping for hierarchy in schema table +export function groupByFieldPath( + schemaRows?: Array, + options: { + showKeySchema: boolean; + } = { showKeySchema: false }, +): Array { + const rows = [ + ...(schemaRows?.filter(filterKeyFieldPath.bind({}, options.showKeySchema)) || []), + ] as Array; + + const outputRows: Array = []; + const outputRowByPath = {}; + + for (let rowIndex = 0; rowIndex < rows.length; rowIndex++) { + let parentRow: null | ExtendedSchemaFields = null; + const row = { children: undefined, ...rows[rowIndex], depth: 0 }; + + for (let j = rowIndex - 1; j >= 0; j--) { + const rowTokens = row.fieldPath.split('.'); + const isQualifyingUnionField = rowTokens[rowTokens.length - 3] === UNION_TOKEN; + if (isQualifyingUnionField) { + // in the case of unions, parent will not be a subset of the child + rowTokens.splice(rowTokens.length - 2, 1); + const parentPath = rowTokens.join('.'); + + if (rows[j].fieldPath === parentPath) { + parentRow = outputRowByPath[rows[j].fieldPath]; + break; + } + } else { + // In the case of structs, arrays, etc, parent will be the first token from + // the left of this field's name(last token of the path) that does not enclosed in []. + let parentPath: null | string = null; + for ( + let lastParentTokenIndex = rowTokens.length - 2; + lastParentTokenIndex >= 0; + --lastParentTokenIndex + ) { + const lastParentToken: string = rowTokens[lastParentTokenIndex]; + if (lastParentToken && lastParentToken[0] !== '[') { + parentPath = rowTokens.slice(0, lastParentTokenIndex + 1).join('.'); + break; + } + } + if (parentPath && rows[j].fieldPath === parentPath) { + parentRow = outputRowByPath[rows[j].fieldPath]; + break; + } + } + } + + // if the parent field exists in the ouput, add the current row as a child + if (parentRow) { + row.depth = (parentRow.depth || 0) + 1; + row.parent = parentRow; + parentRow.children = [...(parentRow.children || []), row]; + } else { + outputRows.push(row); + } + outputRowByPath[row.fieldPath] = row; + } + return outputRows; +} + +export function diffMarkdown(oldStr: string, newStr: string) { + const diffArray = diff.diffChars(oldStr || '', newStr || ''); + return diffArray + .map((diffOne) => { + if (diffOne.added) { + return `${diffOne.value}`; + } + if (diffOne.removed) { + return `${diffOne.value}`; + } + return diffOne.value; + }) + .join(''); +} + +export function diffJson(oldStr: string, newStr: string) { + const diffArray = diff.diffJson(oldStr || '', newStr || ''); + return diffArray + .map((diffOne) => { + if (diffOne.added) { + return `+${diffOne.value}`; + } + if (diffOne.removed) { + return `-${diffOne.value}`; + } + return diffOne.value; + }) + .join(''); +} + +export function formatRawSchema(schemaValue?: string | null): string { + try { + if (!schemaValue) { + return schemaValue || ''; + } + return JSON.stringify(JSON.parse(schemaValue), null, 2); + } catch (e) { + return schemaValue || ''; + } +} + +export function getRawSchema(schema: PlatformSchema | undefined | null, showKeySchema: boolean): string { + if (!schema) { + return ''; + } + + if (schema.__typename === 'TableSchema') { + return schema.schema; + } + if (schema.__typename === 'KeyValueSchema') { + return showKeySchema ? schema.keySchema : schema.valueSchema; + } + return ''; +} + +// Get diff summary between two versions and prepare to visualize description diff changes +export function getDiffSummary( + currentVersionRows?: Array, + previousVersionRows?: Array, + options: { showKeySchema: boolean } = { showKeySchema: false }, +): { + rows: Array; + diffSummary: SchemaDiffSummary; +} { + let rows = [ + ...(currentVersionRows?.filter(filterKeyFieldPath.bind({}, options.showKeySchema)) || []), + ] as Array; + const previousRows = [ + ...(previousVersionRows?.filter(filterKeyFieldPath.bind({}, options.showKeySchema)) || []), + ] as Array; + + const diffSummary: SchemaDiffSummary = { + added: 0, + removed: 0, + updated: 0, + }; + + if (previousVersionRows && previousVersionRows.length > 0) { + rows.forEach((field, rowIndex) => { + const relevantPastFieldIndex = previousRows.findIndex( + (pf) => pf.type === rows[rowIndex].type && pf.fieldPath === rows[rowIndex].fieldPath, + ); + if (relevantPastFieldIndex > -1) { + if (previousRows[relevantPastFieldIndex].description !== rows[rowIndex].description) { + rows[rowIndex] = { + ...rows[rowIndex], + previousDescription: previousRows[relevantPastFieldIndex].description, + }; + diffSummary.updated++; // Increase updated row number in diff summary + } + previousRows.splice(relevantPastFieldIndex, 1); + } else { + rows[rowIndex] = { ...rows[rowIndex], isNewRow: true }; + diffSummary.added++; // Increase added row number in diff summary + } + }); + rows = [...rows, ...previousRows.map((pf) => ({ ...pf, isDeletedRow: true }))]; + diffSummary.removed = previousRows.length; // removed row number in diff summary + } + + return { rows, diffSummary }; +} + +// we need to calculate excluding collapsed fields because Antd table expects +// an indexToScroll to only counting based on visible fields +export function findIndexOfFieldPathExcludingCollapsedFields( + fieldPath: string, + expandedRows: Set, + rows: Array, + sorter: SorterResult | undefined, + compareFn: ((a: any, b: any) => number) | undefined, +) { + let index = 0; // This will keep track of the index across recursive calls + + function search(shadowedRows) { + let sortedRows = shadowedRows; + if (sorter?.order === 'ascend') { + sortedRows = shadowedRows.toSorted(compareFn); + } else if (sorter?.order === 'descend') { + sortedRows = shadowedRows.toSorted(compareFn).toReversed(); + } + + // eslint-disable-next-line no-restricted-syntax + for (const row of sortedRows) { + // eslint-disable-next) { + // Check if the current row's ID matches the ID we're looking for + if (row.fieldPath === fieldPath) { + return index; + } + index++; // Increment index for the current row + + // Check if current row is expanded and has children + if (expandedRows.has(row.fieldPath) && row.children && row.children.length) { + const foundIndex = search(row.children); // Recursively search children + if (foundIndex !== -1) { + // If found in children, return the found index + return foundIndex; + } + } + } + // Return -1 if the ID was not found in this branch + return -1; + } + + // Start the recursive search + return search(rows); +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/stats/Stats.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/stats/Stats.tsx new file mode 100644 index 00000000000000..f9f5dff5cce8fc --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/stats/Stats.tsx @@ -0,0 +1,40 @@ +import React, { useState } from 'react'; +import { Radio } from 'antd'; +import { DatasetProfile } from '../../../../../types.generated'; +import LatestStatsView from './snapshot/LatestStatsView'; +import HistoricalStatsView from './historical/HistoricalStatsView'; + +export type Props = { + urn: string; + profile: DatasetProfile; +}; + +enum ViewType { + LATEST, + HISTORICAL, +} + +export default function Stats({ urn, profile }: Props) { + /** + * Determines which view should be visible: latest or historical. + */ + const [view, setView] = useState(ViewType.LATEST); + + const onChangeView = (e) => { + setView(e.target.value); + }; + + const toggleView = ( + + Latest + Historical + + ); + + return ( + <> + {view === ViewType.LATEST && } + {view === ViewType.HISTORICAL && } + + ); +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/stats/StatsSection.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/stats/StatsSection.tsx new file mode 100644 index 00000000000000..c0a98e44f8c358 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/stats/StatsSection.tsx @@ -0,0 +1,34 @@ +import { Divider, Row, Typography } from 'antd'; +import styled from 'styled-components'; +import React from 'react'; + +const Section = styled.div` + padding-top: 24px; + padding-bottom: 40px; + margin-bottom: 20px; + width: 100%; +`; + +const ThinDivider = styled(Divider)` + margin-top: 8px; + margin-bottom: 8px; +`; + +export type Props = { + children: React.ReactNode; + title: string; + rightFloatView?: React.ReactNode; +}; + +export default function StatsSection({ children, title, rightFloatView }: Props) { + return ( +
+ + {title} + {rightFloatView || } + + + {children} +
+ ); +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/stats/historical/HistoricalStatsView.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/stats/historical/HistoricalStatsView.tsx new file mode 100644 index 00000000000000..2b98a4d8f67544 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/stats/historical/HistoricalStatsView.tsx @@ -0,0 +1,230 @@ +import React, { ReactNode, useEffect, useState } from 'react'; +import styled from 'styled-components'; + +import { Affix, Row, Select, Typography } from 'antd'; +import { useGetDataProfilesLazyQuery } from '../../../../../../graphql/dataset.generated'; +import { DateInterval } from '../../../../../../types.generated'; +import { Message } from '../../../../../shared/Message'; +import { getFixedLookbackWindow } from '../../../../../shared/time/timeUtils'; + +import ProfilingRunsChart from './charts/ProfilingRunsChart'; +import StatsSection from '../StatsSection'; +import StatChart from './charts/StatChart'; +import { + computeAllFieldPaths, + computeChartTickInterval, + extractChartValuesFromFieldProfiles, + extractChartValuesFromTableProfiles, +} from '../../../../shared/utils'; + +const HeaderRow = styled(Row)` + padding-top: 24px; + padding-bottom: 28px; + background-color: white; +`; + +const SubHeaderText = styled(Typography.Text)` + color: gray; + font-size: 16px; +`; + +const EmbeddedSelect = styled(Select)` + padding-left: 8px; +`; + +/** + * Change this to add or modify the lookback windows that are selectable via the UI. + */ +const LOOKBACK_WINDOWS = [ + { text: '1 day', windowSize: { interval: DateInterval.Day, count: 1 } }, + { text: '1 week', windowSize: { interval: DateInterval.Week, count: 1 } }, + { text: '1 month', windowSize: { interval: DateInterval.Month, count: 1 } }, + { text: '3 months', windowSize: { interval: DateInterval.Month, count: 3 } }, + { text: '1 year', windowSize: { interval: DateInterval.Year, count: 1 } }, +]; + +const DEFAULT_LOOKBACK_WINDOW = '3 months'; + +const getLookbackWindowSize = (text: string) => { + for (let i = 0; i < LOOKBACK_WINDOWS.length; i++) { + const window = LOOKBACK_WINDOWS[i]; + if (window.text === text) { + return window.windowSize; + } + } + throw new Error(`Unrecognized lookback window size ${text} provided`); +}; + +export type Props = { + urn: string; + toggleView: ReactNode; +}; + +export default function HistoricalStatsView({ urn, toggleView }: Props) { + const [getDataProfiles, { data: profilesData, loading: profilesLoading }] = useGetDataProfilesLazyQuery({ + fetchPolicy: 'cache-first', + }); + + /** + * Perform initial fetch of default lookback window stats. + */ + useEffect(() => { + getDataProfiles({ + variables: { urn, ...getFixedLookbackWindow(getLookbackWindowSize(DEFAULT_LOOKBACK_WINDOW)) }, + }); + }, [urn, getDataProfiles]); + + /** + * Determines which fixed lookback window is used to display historical statistics. See above for valid options. + */ + const [selectedLookbackWindow, setSelectedLookbackWindow] = useState(DEFAULT_LOOKBACK_WINDOW); + const selectedWindowSize = getLookbackWindowSize(selectedLookbackWindow); + const selectedWindow = getFixedLookbackWindow(selectedWindowSize); + + /** + * Determines which field path is highlighted in column stats. Defaults to none. + */ + const [selectedFieldPath, setSelectedFieldPath] = useState(''); + + /** + * Change handlers. + */ + const onChangeSelectedLookbackWindow = (text) => { + const newWindowSize = getLookbackWindowSize(text); + const newTimeWindow = getFixedLookbackWindow(newWindowSize); + getDataProfiles({ + variables: { urn, ...newTimeWindow }, + }); + setSelectedLookbackWindow(text); + }; + + const onChangeSelectedFieldPath = (value) => { + setSelectedFieldPath(value); + }; + + const graphTickInterval = computeChartTickInterval(selectedWindowSize); + const graphDateRange = { + start: selectedWindow.startTime.toString(), + end: selectedWindow.endTime.toString(), + }; + + const profiles = profilesData?.dataset?.datasetProfiles || []; + const allFieldPaths = Array.from(computeAllFieldPaths(profiles)); + + if (selectedFieldPath === '' && allFieldPaths.length > 0) { + // Set initially selected field path. + setSelectedFieldPath(allFieldPaths[0]); + } + + const columnSelectView = ( + + Viewing stats for column + + {allFieldPaths.map((fieldPath) => ( + {fieldPath} + ))} + + + ); + + /** + * Compute Table Stat chart data. + */ + const rowCountChartValues = extractChartValuesFromTableProfiles(profiles, 'rowCount'); + const columnCountChartValues = extractChartValuesFromTableProfiles(profiles, 'columnCount'); + + /** + * Compute Column Stat chart data. + */ + const nullCountChartValues: Array = extractChartValuesFromFieldProfiles( + profiles, + selectedFieldPath, + 'nullCount', + ); + const nullPercentageChartValues: Array = extractChartValuesFromFieldProfiles( + profiles, + selectedFieldPath, + 'nullProportion', + ); + const distinctCountChartValues: Array = extractChartValuesFromFieldProfiles( + profiles, + selectedFieldPath, + 'uniqueCount', + ); + const distinctPercentageChartValues: Array = extractChartValuesFromFieldProfiles( + profiles, + selectedFieldPath, + 'uniqueProportion', + ); + + return ( + <> + {profilesLoading && } + + +
+ Profiling History + + Viewing profiling history for the past + + {LOOKBACK_WINDOWS.map((lookbackWindow) => ( + {lookbackWindow.text} + ))} + + +
+ {toggleView} +
+
+ + + + + + + + + + + + + + + + + + + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/stats/historical/charts/ProfilingRunsChart.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/stats/historical/charts/ProfilingRunsChart.tsx new file mode 100644 index 00000000000000..36f2b5e37902e2 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/stats/historical/charts/ProfilingRunsChart.tsx @@ -0,0 +1,91 @@ +import { Button, Col, Modal, Table, Typography } from 'antd'; +import React, { useState } from 'react'; +import styled from 'styled-components'; +import { DatasetProfile } from '../../../../../../../types.generated'; +import DataProfileView from '../../snapshot/SnapshotStatsView'; + +export const ChartTable = styled(Table)` + margin: 12px; + box-shadow: ${(props) => props.theme.styles['box-shadow']}; +`; + +export type Props = { + profiles: Array; +}; + +export default function ProfilingRunsChart({ profiles }: Props) { + const [showModal, setShowModal] = useState(false); + const [selectedProfileIndex, setSelectedProfileIndex] = useState(-1); + + const showProfileModal = (index: number) => { + setSelectedProfileIndex(index); + setShowModal(true); + }; + + const onClose = () => { + setShowModal(false); + setSelectedProfileIndex(-1); + }; + + const tableData = profiles.map((profile) => { + const profileDate = new Date(profile.timestampMillis); + return { + timestamp: `${profileDate.toLocaleDateString()} at ${profileDate.toLocaleTimeString()}`, + rowCount: profile.rowCount?.toString() || 'unknown', + columnCount: profile.columnCount?.toString() || 'unknown', + }; + }); + + const tableColumns = [ + { + title: 'Recent Profiles', + key: 'Recent Profiles', + dataIndex: 'timestamp', + render: (title, record, index) => { + return ( + + ); + }, + }, + { + title: 'Row Count', + key: 'Row Count', + dataIndex: 'rowCount', + }, + { + title: 'Column Count', + key: 'Column Count', + dataIndex: 'columnCount', + }, + ]; + + const selectedProfile = (selectedProfileIndex >= 0 && profiles[selectedProfileIndex]) || undefined; + const profileModalTitle = + selectedProfile && + `Showing profile from ${new Date(selectedProfile?.timestampMillis).toLocaleDateString()} at ${new Date( + selectedProfile?.timestampMillis, + ).toLocaleTimeString()}`; + + return ( + <> + {selectedProfile && ( + + + + )} +
+ + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/stats/historical/charts/StatChart.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/stats/historical/charts/StatChart.tsx new file mode 100644 index 00000000000000..a6d2d2b4c855c6 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/stats/historical/charts/StatChart.tsx @@ -0,0 +1,96 @@ +import { Col, Divider, Typography } from 'antd'; +import React, { useMemo } from 'react'; +import styled from 'styled-components'; + +import { DateInterval, DateRange } from '../../../../../../../types.generated'; +import { ChartCard } from '../../../../../../analyticsDashboard/components/ChartCard'; +import { ChartContainer } from '../../../../../../analyticsDashboard/components/ChartContainer'; +import { TimeSeriesChart } from '../../../../../../analyticsDashboard/components/TimeSeriesChart'; + +const ChartTitle = styled(Typography.Title)` + && { + margin-bottom: 20px; + text-align: left; + width: 100%; + } +`; + +const ThinDivider = styled(Divider)` + margin: 0px; + padding: 0px; +`; + +type Point = { + timeMs: number; + value: number; +}; + +export type Props = { + title: string; + values: Array; + tickInterval: DateInterval; + dateRange: DateRange; +}; + +/** + * Change these to change the chart axis & line colors + * TODO: Add this to the theme config. + */ +const DEFAULT_LINE_COLOR = '#20d3bd'; +const DEFAULT_AXIS_COLOR = '#D8D8D8'; +const DEFAULT_AXIS_WIDTH = 2; + +/** + * Time Series Chart with a single line. + */ +export default function StatChart({ title, values, tickInterval: interval, dateRange }: Props) { + const timeSeriesData = useMemo( + () => + values + .sort((a, b) => a.timeMs - b.timeMs) + .map((value) => { + const dateStr = new Date(value.timeMs).toISOString(); + return { + x: dateStr, + y: value.value, + }; + }), + [values], + ); + + const chartData = { + title, + lines: [ + { + name: 'line_1', + data: timeSeriesData, + }, + ], + interval, + dateRange, + }; + + return ( + <> + + + + {chartData.title} + + + + + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/stats/snapshot/LatestStatsView.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/stats/snapshot/LatestStatsView.tsx new file mode 100644 index 00000000000000..a94e6c4f6d98d2 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/stats/snapshot/LatestStatsView.tsx @@ -0,0 +1,36 @@ +import React, { ReactNode } from 'react'; +import styled from 'styled-components'; +import { Affix, Row, Typography } from 'antd'; +import { DatasetProfile } from '../../../../../../types.generated'; +import DataProfileView from './SnapshotStatsView'; + +const HeaderRow = styled(Row)` + padding-top: 24px; + padding-bottom: 28px; + background-color: white; +`; + +export type Props = { + profile: DatasetProfile; + toggleView: ReactNode; +}; + +export default function LatestStatsView({ profile, toggleView }: Props) { + const reportedAtDate = new Date(profile.timestampMillis); + return ( + <> + + +
+ Latest Stats + + Reported on {reportedAtDate.toLocaleDateString()} at {reportedAtDate.toLocaleTimeString()} + +
+ {toggleView} +
+
+ + + ); +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/stats/snapshot/SnapshotStatsView.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/stats/snapshot/SnapshotStatsView.tsx new file mode 100644 index 00000000000000..73c37fc3ac363f --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/stats/snapshot/SnapshotStatsView.tsx @@ -0,0 +1,180 @@ +import { Row, Table, Tag, Typography } from 'antd'; +import styled from 'styled-components'; + +import { ColumnsType, ColumnType } from 'antd/lib/table'; +import React, { useMemo } from 'react'; +import { DatasetProfile } from '../../../../../../types.generated'; +import { Highlight } from '../../../../../analyticsDashboard/components/Highlight'; +import StatsSection from '../StatsSection'; + +const ColumnStatsTable = styled(Table)` + margin-top: 24px; +`; + +const isPresent = (val: any) => { + return val !== undefined && val !== null; +}; + +const decimalToPercentStr = (decimal: number, precision: number): string => { + return `${(decimal * 100).toFixed(precision)}%`; +}; + +export type Props = { + profile: DatasetProfile; +}; + +export default function DataProfileView({ profile }: Props) { + const columnStatsTableData = useMemo( + () => + profile.fieldProfiles?.map((doc) => ({ + name: doc.fieldPath, + min: doc.min, + max: doc.max, + mean: doc.mean, + median: doc.median, + stdev: doc.stdev, + nullCount: isPresent(doc.nullCount) && doc.nullCount!.toString(), + nullPercentage: isPresent(doc.nullProportion) && decimalToPercentStr(doc.nullProportion!, 2), + distinctCount: isPresent(doc.uniqueCount) && doc.uniqueCount!.toString(), + distinctPercentage: isPresent(doc.uniqueProportion) && decimalToPercentStr(doc.uniqueProportion!, 2), + sampleValues: doc.sampleValues, + })) || [], + [profile], + ); + + /** + * Returns a placeholder value to show in the column data table when data is null. + */ + const unknownValue = () => { + return unknown; + }; + + /** + * Computes a set of the object keys across all items in a given array. + */ + const getItemKeySet = (items: Array) => { + const keySet = new Set(); + items.forEach((item) => { + Object.keys(item).forEach((key) => { + keySet.add(key); + }); + }); + return keySet; + }; + + /** + * Dynamically builds column stat table columns based on the fields present in the dataset profile data. + */ + const buildColumnStatsColumns = (tableData: Array) => { + // Optional columns. Defines how to render a column given a value exists somewhere in the profile. + const optionalColumns: ColumnsType = [ + { + title: 'Min', + dataIndex: 'min', + render: (value) => value || unknownValue(), + }, + { + title: 'Max', + dataIndex: 'max', + render: (value) => value || unknownValue(), + }, + { + title: 'Mean', + dataIndex: 'mean', + render: (value) => value || unknownValue(), + }, + { + title: 'Median', + dataIndex: 'median', + render: (value) => value || unknownValue(), + }, + { + title: 'Null Count', + dataIndex: 'nullCount', + render: (value) => value || unknownValue(), + }, + { + title: 'Null %', + dataIndex: 'nullPercentage', + render: (value) => value || unknownValue(), + }, + { + title: 'Distinct Count', + dataIndex: 'distinctCount', + render: (value) => value || unknownValue(), + }, + { + title: 'Distinct %', + dataIndex: 'distinctPercentage', + render: (value) => value || unknownValue(), + }, + { + title: 'Std. Dev', + dataIndex: 'stdev', + render: (value) => value || unknownValue(), + }, + { + title: 'Sample Values', + dataIndex: 'sampleValues', + render: (sampleValues: Array) => { + return ( + (sampleValues && + sampleValues + .slice(0, sampleValues.length < 3 ? sampleValues?.length : 3) + .map((value) => {value})) || + unknownValue() + ); + }, + }, + ]; + + // Name column always required. + const requiredColumns: ColumnsType = [ + { + title: 'Name', + dataIndex: 'name', + }, + ]; + + // Retrieves a set of names of columns that should be shown based on their presence in the data profile. + const columnsPresent: Set = getItemKeySet(tableData); + + // Compute the final columns to render. + const columns = [ + ...requiredColumns, + ...optionalColumns.filter((column: ColumnType) => columnsPresent.has(column.dataIndex as string)), + ]; + + // TODO: Support Quantiles && Distinct Value Frequencies. + return columns; + }; + + const columnStatsColumns = buildColumnStatsColumns(columnStatsTableData); + + const rowCount = (isPresent(profile?.rowCount) ? profile?.rowCount : -1) as number; + const rowCountTitle = (rowCount >= 0 && 'Rows') || 'Row Count Unknown'; + + const columnCount = (isPresent(profile?.columnCount) ? profile?.columnCount : -1) as number; + const columnCountTitle = (columnCount >= 0 && 'Columns') || 'Column Count Unknown'; + + return ( + <> + + + + + + + + record.name} + /> + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/stats/stats/DatasetStatsSummarySubHeader.tsx b/datahub-web-react/src/app/entityV2/dataset/profile/stats/stats/DatasetStatsSummarySubHeader.tsx new file mode 100644 index 00000000000000..7337b4a4d76cd4 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/stats/stats/DatasetStatsSummarySubHeader.tsx @@ -0,0 +1,48 @@ +import React from 'react'; +import { GetDatasetQuery } from '../../../../../../graphql/dataset.generated'; +import { DatasetStatsSummary as DatasetStatsSummaryObj, EntityType } from '../../../../../../types.generated'; +import { useBaseEntity } from '../../../../../entity/shared/EntityContext'; +import { useEntityRegistry } from '../../../../../useEntityRegistry'; +import { DatasetStatsSummary } from '../../../shared/DatasetStatsSummary'; + +export const DatasetStatsSummarySubHeader = () => { + const result = useBaseEntity(); + const dataset = result?.dataset; + + const maybeStatsSummary = dataset?.statsSummary as DatasetStatsSummaryObj; + + const latestFullTableProfile = dataset?.latestFullTableProfile?.[0]; + const latestPartitionProfile = dataset?.latestPartitionProfile?.[0]; + + const maybeLastProfile = latestFullTableProfile || latestPartitionProfile || undefined; + + const maybeLastOperation = dataset?.operations && dataset.operations.length ? dataset.operations[0] : undefined; + + const rowCount = maybeLastProfile?.rowCount; + const columnCount = maybeLastProfile?.columnCount; + const sizeInBytes = maybeLastProfile?.sizeInBytes; + const totalSqlQueries = dataset?.usageStats?.aggregations?.totalSqlQueries; + const queryCountLast30Days = maybeStatsSummary?.queryCountLast30Days; + const uniqueUserCountLast30Days = maybeStatsSummary?.uniqueUserCountLast30Days; + + const lastUpdatedMs = maybeLastOperation?.lastUpdatedTimestamp; + + const entityRegistry = useEntityRegistry(); + const platformName = dataset?.platform && entityRegistry.getDisplayName(EntityType.DataPlatform, dataset?.platform); + const platformLogoUrl = dataset?.platform?.properties?.logoUrl; + + return ( + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/stories/documentation.ts b/datahub-web-react/src/app/entityV2/dataset/profile/stories/documentation.ts new file mode 100644 index 00000000000000..3b62c3e6cf0ec3 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/stories/documentation.ts @@ -0,0 +1,13 @@ +import { EntityType } from '../../../../../types.generated'; + +export const sampleDocs = [ + { + url: 'https://www.google.com', + description: 'This doc spans the internet web', + author: { urn: 'urn:li:corpuser:1', username: '1', type: EntityType.CorpUser }, + created: { + time: 0, + actor: 'urn:li:corpuser:1', + }, + }, +]; diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/stories/lineageEntities.ts b/datahub-web-react/src/app/entityV2/dataset/profile/stories/lineageEntities.ts new file mode 100644 index 00000000000000..15c04d9ebe2ae3 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/stories/lineageEntities.ts @@ -0,0 +1,101 @@ +import { EntityType, FabricType, PlatformNativeType } from '../../../../../types.generated'; + +export const sampleUpstreamEntities = [ + { + name: 'Upstream HiveDataset', + type: EntityType.Dataset, + urn: 'abc', + platform: { + urn: 'urn:li:dataPlatform:hive', + name: 'Hive', + type: EntityType.DataPlatform, + }, + origin: FabricType.Prod, + description: 'this is a dataset', + platformNativeType: PlatformNativeType.Table, + tags: [], + created: { + time: 0, + }, + lastModified: { + time: 0, + }, + }, + { + name: 'Upstream KafkaDataset', + type: EntityType.Dataset, + urn: 'abc', + platform: { + urn: 'urn:li:dataPlatform:hive', + name: 'Hive', + type: EntityType.DataPlatform, + }, + origin: FabricType.Prod, + description: 'this is a dataset', + platformNativeType: PlatformNativeType.Table, + tags: [], + created: { + time: 0, + }, + lastModified: { + time: 0, + }, + }, +]; + +export const sampleDownstreamEntities = [ + { + name: 'Downstream HiveDataset', + type: EntityType.Dataset, + urn: 'abc', + platform: { + urn: 'urn:li:dataPlatform:hive', + name: 'Hive', + type: EntityType.DataPlatform, + }, + origin: FabricType.Prod, + description: 'this is a dataset', + platformNativeType: PlatformNativeType.Table, + tags: [], + created: { + time: 0, + }, + lastModified: { + time: 0, + }, + }, + { + name: 'Downstream KafkaDataset', + type: EntityType.Dataset, + urn: 'abc', + platform: { + urn: 'urn:li:dataPlatform:hive', + name: 'Hive', + type: EntityType.DataPlatform, + }, + origin: FabricType.Prod, + description: 'this is a dataset', + platformNativeType: PlatformNativeType.Table, + tags: [], + created: { + time: 0, + }, + lastModified: { + time: 0, + }, + }, +]; + +export const sampleRelationship = { + entities: sampleUpstreamEntities.map((entity) => ({ + entity, + created: { time: 0 }, + })), +}; + +export const sampleDownstreamRelationship = { + entities: sampleDownstreamEntities.map((entity) => ({ + entity, + created: { time: 0 }, + })), +}; diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/stories/properties.ts b/datahub-web-react/src/app/entityV2/dataset/profile/stories/properties.ts new file mode 100644 index 00000000000000..dc74984113df14 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/stories/properties.ts @@ -0,0 +1,10 @@ +export const sampleProperties = [ + { + key: 'Number of Partitions', + value: '18', + }, + { + key: 'Cluster Name', + value: 'Testing', + }, +]; diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/stories/sampleSchema.ts b/datahub-web-react/src/app/entityV2/dataset/profile/stories/sampleSchema.ts new file mode 100644 index 00000000000000..2d7a226b948219 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/stories/sampleSchema.ts @@ -0,0 +1,366 @@ +import { dataset3 } from '../../../../../Mocks'; +import { EntityType, Schema, SchemaMetadata, SchemaField, SchemaFieldDataType } from '../../../../../types.generated'; + +// Extending the schema type with an option for tags +export type TaggedSchemaField = { + tags: Tag[]; +} & SchemaField; + +export type Tag = { + name: string; + value?: string; + color: string; + category: string; + descriptor?: boolean; +}; + +export const sampleSchema: SchemaMetadata | Schema | null = { + name: 'MockSchema', + platformUrn: 'mock:urn', + version: 1, + hash: '', + fields: [ + { + fieldPath: 'id', + nullable: false, + description: 'order id', + type: SchemaFieldDataType.Number, + nativeDataType: 'number', + recursive: false, + }, + { + fieldPath: 'name', + nullable: true, + description: 'the name of the order', + type: SchemaFieldDataType.String, + nativeDataType: 'string', + recursive: false, + }, + { + fieldPath: 'shipping_address', + nullable: true, + description: 'the address the order ships to', + type: SchemaFieldDataType.String, + nativeDataType: 'string', + recursive: false, + }, + { + fieldPath: 'count', + nullable: true, + description: 'the number of items in the order', + type: SchemaFieldDataType.Number, + nativeDataType: 'number', + recursive: false, + }, + { + fieldPath: 'cost', + nullable: true, + description: 'the dollar value of the order', + type: SchemaFieldDataType.Number, + nativeDataType: 'number', + recursive: false, + }, + { + fieldPath: 'was_returned', + nullable: true, + description: 'if the order was sent back', + type: SchemaFieldDataType.Boolean, + nativeDataType: 'boolean', + recursive: false, + }, + { + fieldPath: 'payload', + nullable: true, + description: 'payload attached to the order', + type: SchemaFieldDataType.Bytes, + nativeDataType: 'bytes', + recursive: false, + }, + { + fieldPath: 'payment_information', + nullable: true, + description: 'struct representing the payment information', + type: SchemaFieldDataType.Struct, + nativeDataType: 'struct', + recursive: false, + }, + ], + platformSchema: { + __typename: 'TableSchema', + schema: '{ "type": "record", "name": "SampleHdfsSchema", "namespace": "com.linkedin.dataset", "doc": "Sample HDFS dataset", "fields": [ { "name": "field_foo", "type": [ "string" ] }, { "name": "field_bar", "type": [ "boolean" ] } ] }', + }, +}; + +export const sampleSchemaWithTags: Schema = { + name: 'MockSchema', + platformUrn: 'mock:urn', + version: 1, + hash: '', + fields: [ + { + fieldPath: 'id', + nullable: false, + description: 'order id', + type: SchemaFieldDataType.Number, + nativeDataType: 'number', + recursive: false, + globalTags: { + tags: [ + { + tag: { + urn: 'urn:li:tag:Legacy', + name: 'Legacy', + description: 'this is a legacy dataset', + type: EntityType.Tag, + }, + associatedUrn: 'mock:urn', + }, + ], + }, + glossaryTerms: { + terms: [ + { + term: { + type: EntityType.GlossaryTerm, + name: 'sample-glossary-term', + urn: 'urn:li:glossaryTerm:sample-glossary-term', + hierarchicalName: 'example.sample-glossary-term', + properties: { + name: 'sample-glossary-term', + description: 'sample definition', + definition: 'sample definition', + termSource: 'sample term source', + }, + }, + associatedUrn: 'mock:urn', + }, + ], + }, + }, + { + fieldPath: 'name', + nullable: true, + description: 'the name of the order', + type: SchemaFieldDataType.String, + nativeDataType: 'string', + recursive: false, + } as SchemaField, + { + fieldPath: 'shipping_address', + nullable: true, + description: 'the address the order ships to', + type: SchemaFieldDataType.String, + nativeDataType: 'string', + recursive: false, + } as SchemaField, + { + fieldPath: 'count', + nullable: true, + description: 'the number of items in the order', + type: SchemaFieldDataType.Number, + nativeDataType: 'number', + recursive: false, + }, + { + fieldPath: 'cost', + nullable: true, + description: 'the dollar value of the order', + type: SchemaFieldDataType.Number, + nativeDataType: 'number', + recursive: false, + } as SchemaField, + { + fieldPath: 'was_returned', + nullable: true, + description: 'if the order was sent back', + type: SchemaFieldDataType.Boolean, + nativeDataType: 'boolean', + recursive: false, + }, + { + fieldPath: 'payload', + nullable: true, + description: 'payload attached to the order', + type: SchemaFieldDataType.Bytes, + nativeDataType: 'bytes', + recursive: false, + }, + { + fieldPath: 'payment_information', + nullable: true, + description: 'struct representing the payment information', + type: SchemaFieldDataType.Struct, + nativeDataType: 'struct', + recursive: false, + } as SchemaField, + ], +}; + +export const sampleSchemaWithPkFk: SchemaMetadata = { + primaryKeys: ['name'], + foreignKeys: [ + { + name: 'constraint', + sourceFields: [ + { + urn: 'datasetUrn', + type: EntityType.Dataset, + parent: { urn: 'test', type: EntityType.Dataset }, + fieldPath: 'shipping_address', + }, + ], + foreignFields: [ + { + urn: dataset3.urn, + type: EntityType.Dataset, + parent: { urn: dataset3.name, type: EntityType.Dataset }, + fieldPath: 'address', + }, + ], + foreignDataset: dataset3, + }, + ], + name: 'MockSchema', + platformUrn: 'mock:urn', + version: 1, + hash: '', + fields: [ + { + fieldPath: 'id', + nullable: false, + description: 'order id', + type: SchemaFieldDataType.Number, + nativeDataType: 'number', + recursive: false, + globalTags: { + tags: [ + { + tag: { + urn: 'urn:li:tag:Legacy', + name: 'Legacy', + description: 'this is a legacy dataset', + type: EntityType.Tag, + }, + associatedUrn: 'mock:urn', + }, + ], + }, + glossaryTerms: { + terms: [ + { + term: { + type: EntityType.GlossaryTerm, + urn: 'urn:li:glossaryTerm:sample-glossary-term', + name: 'sample-glossary-term', + hierarchicalName: 'example.sample-glossary-term', + properties: { + name: 'sample-glossary-term', + description: 'sample definition', + definition: 'sample definition', + termSource: 'sample term source', + }, + }, + associatedUrn: 'mock:urn', + }, + ], + }, + }, + { + fieldPath: 'name', + nullable: true, + description: 'the name of the order', + type: SchemaFieldDataType.String, + nativeDataType: 'string', + recursive: false, + } as SchemaField, + { + fieldPath: 'shipping_address', + nullable: true, + description: 'the address the order ships to', + type: SchemaFieldDataType.String, + nativeDataType: 'string', + recursive: false, + } as SchemaField, + { + fieldPath: 'count', + nullable: true, + description: 'the number of items in the order', + type: SchemaFieldDataType.Number, + nativeDataType: 'number', + recursive: false, + }, + { + fieldPath: 'cost', + nullable: true, + description: 'the dollar value of the order', + type: SchemaFieldDataType.Number, + nativeDataType: 'number', + recursive: false, + } as SchemaField, + { + fieldPath: 'was_returned', + nullable: true, + description: 'if the order was sent back', + type: SchemaFieldDataType.Boolean, + nativeDataType: 'boolean', + recursive: false, + }, + { + fieldPath: 'payload', + nullable: true, + description: 'payload attached to the order', + type: SchemaFieldDataType.Bytes, + nativeDataType: 'bytes', + recursive: false, + }, + { + fieldPath: 'payment_information', + nullable: true, + description: 'struct representing the payment information', + type: SchemaFieldDataType.Struct, + nativeDataType: 'struct', + recursive: false, + } as SchemaField, + ], +}; + +export const sampleSchemaWithoutFields: SchemaMetadata | Schema | null = { + name: 'MockSchema', + platformUrn: 'mock:urn', + version: 1, + hash: '', + fields: [], +}; + +export const sampleSchemaWithKeyValueFields: SchemaMetadata | Schema | null = { + name: 'MockSchema', + platformUrn: 'mock:urn', + version: 1, + hash: '', + fields: [ + { + fieldPath: '[key=True].[version=2.0].id', + nullable: true, + description: 'the number of items in the order', + type: SchemaFieldDataType.Number, + nativeDataType: 'number', + recursive: false, + }, + { + fieldPath: 'count', + nullable: true, + description: 'the number of items in the order', + type: SchemaFieldDataType.Number, + nativeDataType: 'number', + recursive: false, + }, + { + fieldPath: 'cost', + nullable: true, + description: 'the dollar value of the order', + type: SchemaFieldDataType.Number, + nativeDataType: 'number', + recursive: false, + } as SchemaField, + ], +}; diff --git a/datahub-web-react/src/app/entityV2/dataset/profile/stories/stats.ts b/datahub-web-react/src/app/entityV2/dataset/profile/stories/stats.ts new file mode 100644 index 00000000000000..035acd5668e8ed --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/profile/stories/stats.ts @@ -0,0 +1,73 @@ +import { DatasetProfile } from '../../../../../types.generated'; + +export const completeSampleProfile: DatasetProfile = { + rowCount: 1000, + columnCount: 2000, + timestampMillis: 0, + fieldProfiles: [ + { + fieldPath: 'testColumn', + uniqueCount: 1, + uniqueProportion: 0.111, + nullCount: 2, + nullProportion: 0.222, + min: '3', + max: '4', + mean: '5', + median: '6', + stdev: '7', + sampleValues: ['value1', 'value2', 'value3'], + }, + { + fieldPath: 'testColumn2', + uniqueCount: 8, + uniqueProportion: 0.333, + nullCount: 9, + nullProportion: 0.444, + min: '10', + max: '11', + mean: '12', + median: '13', + stdev: '14', + sampleValues: ['value4', 'value5', 'value6'], + }, + ], +}; + +export const missingFieldStatsProfile: DatasetProfile = { + rowCount: 1000, + columnCount: 2000, + timestampMillis: 0, +}; + +export const missingTableStatsProfile: DatasetProfile = { + timestampMillis: 0, + fieldProfiles: [ + { + fieldPath: 'testColumn', + uniqueCount: 1, + uniqueProportion: 0.111, + nullCount: 2, + nullProportion: 0.222, + min: '3', + max: '4', + mean: '5', + median: '6', + stdev: '7', + sampleValues: ['value1', 'value2', 'value3'], + }, + { + fieldPath: 'testColumn2', + uniqueCount: 8, + uniqueProportion: 0.333, + nullCount: 9, + nullProportion: 0.444, + min: '10', + max: '11', + mean: '12', + median: '13', + stdev: '14', + sampleValues: ['value4', 'value5', 'value6'], + }, + ], +}; diff --git a/datahub-web-react/src/app/entityV2/dataset/shared/DatasetStatsSummary.tsx b/datahub-web-react/src/app/entityV2/dataset/shared/DatasetStatsSummary.tsx new file mode 100644 index 00000000000000..c94a6279acb633 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/shared/DatasetStatsSummary.tsx @@ -0,0 +1,122 @@ +import { Typography } from 'antd'; +import React from 'react'; +import styled from 'styled-components/macro'; +import { countFormatter, needsFormatting } from '../../../../utils/formatter'; +import LastUpdated from '../../../shared/LastUpdated'; +import { formatNumber, formatNumberWithoutAbbreviation } from '../../../shared/formatNumber'; +import { StatsSummary } from '../../shared/components/styled/StatsSummary'; +import { ANTD_GRAY } from '../../shared/constants'; +import { PercentileLabel } from '../../shared/stats/PercentileLabel'; +import ExpandingStat from './ExpandingStat'; + +const StatText = styled.span<{ color: string }>` + color: ${(props) => props.color}; + @media (min-width: 1160px) { + white-space: nowrap; + } + font-size: 12px; + font-family: 'Mulish', sans-serif; + color: #8894a9; +`; + +type Props = { + rowCount?: number | null; + columnCount?: number | null; + sizeInBytes?: number | null; + totalSqlQueries?: number | null; + queryCountLast30Days?: number | null; + queryCountPercentileLast30Days?: number | null; + uniqueUserCountLast30Days?: number | null; + uniqueUserPercentileLast30Days?: number | null; + lastUpdatedMs?: number | null; + color?: string; + platformName?: string; + platformLogoUrl?: string | null; + subTypes?: string[]; + mode?: 'normal' | 'tooltip-content'; +}; + +export const DatasetStatsSummary = ({ + rowCount, + columnCount, + // eslint-disable-next-line @typescript-eslint/no-unused-vars + sizeInBytes, + totalSqlQueries, + queryCountLast30Days, + queryCountPercentileLast30Days, + uniqueUserCountLast30Days, + uniqueUserPercentileLast30Days, + lastUpdatedMs, + color, + platformName, + platformLogoUrl, + subTypes, + mode = 'normal', +}: Props) => { + const isTooltipMode = mode === 'tooltip-content'; + const displayedColor = isTooltipMode ? '' : color ?? ANTD_GRAY[7]; + + const statsViews = [ + !!rowCount && ( + ( + + {/* */} + {isExpanded ? formatNumberWithoutAbbreviation(rowCount) : countFormatter(rowCount)} rows + {!!columnCount && ( + <> + ,{' '} + {isExpanded + ? formatNumberWithoutAbbreviation(columnCount) + : countFormatter(columnCount)}{' '} + columns + + )} + + )} + /> + ), + (!!queryCountLast30Days || !!totalSqlQueries) && ( + + {/* */} + {formatNumber(queryCountLast30Days || totalSqlQueries)}{' '} + {queryCountLast30Days ? <>queries : <>monthly queries} + {!!queryCountPercentileLast30Days && ( + + + + )} + + ), + !!uniqueUserCountLast30Days && ( + + {/* */} + {formatNumber(uniqueUserCountLast30Days)} users + {!!uniqueUserPercentileLast30Days && ( + + + + )} + + ), + !!lastUpdatedMs && ( + + + + ), + ].filter((stat) => stat); + + return <>{statsViews.length > 0 && }; +}; diff --git a/datahub-web-react/src/app/entityV2/dataset/shared/ExpandingStat.tsx b/datahub-web-react/src/app/entityV2/dataset/shared/ExpandingStat.tsx new file mode 100644 index 00000000000000..4e223b6e540588 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/shared/ExpandingStat.tsx @@ -0,0 +1,47 @@ +import React, { ReactNode, useEffect, useRef, useState } from 'react'; +import styled from 'styled-components'; + +const ExpandingStatContainer = styled.span<{ disabled: boolean; expanded: boolean; width: string }>` + max-width: 100%; + transition: width 250ms ease; +`; + +const ExpandingStat = ({ + disabled = false, + render, +}: { + disabled?: boolean; + + render: (isExpanded: boolean) => ReactNode; +}) => { + const contentRef = useRef(null); + const [width, setWidth] = useState('inherit'); + const [isExpanded, setIsExpanded] = useState(false); + + useEffect(() => { + if (!contentRef.current) return; + setWidth(`${contentRef.current.offsetWidth}px`); + }, [isExpanded]); + + const onMouseEnter = () => { + if (!disabled) setIsExpanded(true); + }; + + const onMouseLeave = () => { + if (!disabled) setIsExpanded(false); + }; + + return ( + + {render(isExpanded)} + + ); +}; + +export default ExpandingStat; diff --git a/datahub-web-react/src/app/entityV2/dataset/shared/FormattedBytesStat.tsx b/datahub-web-react/src/app/entityV2/dataset/shared/FormattedBytesStat.tsx new file mode 100644 index 00000000000000..5430f514ee9700 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/dataset/shared/FormattedBytesStat.tsx @@ -0,0 +1,12 @@ +import React from 'react'; +import { Tooltip } from '@components'; +import { formatBytes, formatNumberWithoutAbbreviation } from '../../../shared/formatNumber'; + +export const FormattedBytesStat = ({ bytes }: { bytes: number }) => { + const formattedBytes = formatBytes(bytes); + return ( + + {formattedBytes.number} {formattedBytes.unit} + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/domain/DataProductsTab/CreateDataProductModal.tsx b/datahub-web-react/src/app/entityV2/domain/DataProductsTab/CreateDataProductModal.tsx new file mode 100644 index 00000000000000..d0e74f4e031c5a --- /dev/null +++ b/datahub-web-react/src/app/entityV2/domain/DataProductsTab/CreateDataProductModal.tsx @@ -0,0 +1,80 @@ +import { Button, Modal, message } from 'antd'; +import React, { useState } from 'react'; +import DataProductBuilderForm from './DataProductBuilderForm'; +import { DataProductBuilderState } from './types'; +import { useCreateDataProductMutation } from '../../../../graphql/dataProduct.generated'; +import { DataProduct, Domain } from '../../../../types.generated'; + +export const MODAL_WIDTH = '75vw'; + +export const MODAL_BODY_STYLE = { + overflow: 'auto', + width: '80vw', + maxWidth: 800, +}; + +const DEFAULT_STATE = { + name: '', +}; + +type Props = { + domain: Domain; + onClose: () => void; + onCreateDataProduct: (dataProduct: DataProduct) => void; +}; + +export default function CreateDataProductModal({ domain, onCreateDataProduct, onClose }: Props) { + const [builderState, updateBuilderState] = useState(DEFAULT_STATE); + const [createDataProductMutation] = useCreateDataProductMutation(); + + function createDataProduct() { + createDataProductMutation({ + variables: { + input: { + domainUrn: domain.urn, + properties: { + name: builderState.name, + description: builderState.description || undefined, + }, + }, + }, + }) + .then(({ data, errors }) => { + if (!errors) { + message.success('Created Data Product!'); + if (data?.createDataProduct) { + const updateDataProduct = { ...data.createDataProduct, domain: { domain } }; + onCreateDataProduct(updateDataProduct as DataProduct); + } + onClose(); + } + }) + .catch(() => { + onClose(); + message.destroy(); + message.error({ content: 'Failed to create Data Product. An unexpected error occurred' }); + }); + } + + return ( + + + + + } + > + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/domain/DataProductsTab/DataProductBuilderForm.tsx b/datahub-web-react/src/app/entityV2/domain/DataProductsTab/DataProductBuilderForm.tsx new file mode 100644 index 00000000000000..b5a27a6e1b8766 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/domain/DataProductsTab/DataProductBuilderForm.tsx @@ -0,0 +1,52 @@ +import { Form, Input, Typography } from 'antd'; +import React from 'react'; +import styled from 'styled-components'; +import { Editor as MarkdownEditor } from '../../shared/tabs/Documentation/components/editor/Editor'; +import { ANTD_GRAY } from '../../shared/constants'; +import { DataProductBuilderState } from './types'; + +const StyledEditor = styled(MarkdownEditor)` + border: 1px solid ${ANTD_GRAY[4]}; +`; + +type Props = { + builderState: DataProductBuilderState; + updateBuilderState: (newState: DataProductBuilderState) => void; +}; + +export default function DataProductBuilderForm({ builderState, updateBuilderState }: Props) { + function updateName(name: string) { + updateBuilderState({ + ...builderState, + name, + }); + } + + function updateDescription(description: string) { + updateBuilderState({ + ...builderState, + description, + }); + } + + return ( +
+ Name} + required + > + updateName(e.target.value)} + placeholder="Revenue Dashboards" + /> + + Description}> + + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/domain/DataProductsTab/DataProductResult.tsx b/datahub-web-react/src/app/entityV2/domain/DataProductsTab/DataProductResult.tsx new file mode 100644 index 00000000000000..4036be8cca2e73 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/domain/DataProductsTab/DataProductResult.tsx @@ -0,0 +1,91 @@ +import { useEntityContext } from '@src/app/entity/shared/EntityContext'; +import { Button } from 'antd'; +import React, { useState } from 'react'; +import styled from 'styled-components'; +import { DataProduct, EntityType } from '../../../../types.generated'; +import { useEntityRegistryV2 } from '../../../useEntityRegistry'; +import { PreviewType } from '../../Entity'; +import EditDataProductModal from './EditDataProductModal'; +import { REDESIGN_COLORS } from '../../shared/constants'; +import useDeleteEntity from '../../shared/EntityDropdown/useDeleteEntity'; + +const TransparentButton = styled(Button)` + color: ${REDESIGN_COLORS.RED_ERROR}; + font-size: 12px; + box-shadow: none; + border: none; + display: none; + padding: unset; + align-items: center; + &&& span { + font-size: 12px; + } + + &:hover { + transition: 0.15s; + opacity: 0.9; + color: ${REDESIGN_COLORS.RED_ERROR}; + } +`; + +const ResultWrapper = styled.div` + padding: 20px; + display: flex; + align-items: center; + border: 1px solid #ebecf0; + background: ${REDESIGN_COLORS.WHITE}; + border-radius: 10px; + + &:hover ${TransparentButton} { + display: flex; + } +`; + +const PreviewWrapper = styled.div` + position: relative; + flex: 1; + max-width: 100%; +`; + +interface Props { + dataProduct: DataProduct; + onUpdateDataProduct: (dataProduct: DataProduct) => void; + setDeletedDataProductUrns: React.Dispatch>; +} + +export default function DataProductResult({ dataProduct, onUpdateDataProduct, setDeletedDataProductUrns }: Props) { + const entityRegistry = useEntityRegistryV2(); + const { refetch } = useEntityContext(); + const [isEditModalVisible, setIsEditModalVisible] = useState(false); + + function deleteDataProduct() { + setDeletedDataProductUrns((currentUrns) => [...currentUrns, dataProduct.urn]); + } + + const { onDeleteEntity } = useDeleteEntity(dataProduct.urn, dataProduct.type, dataProduct, deleteDataProduct); + + function onDeleteDataProduct() { + onDeleteEntity(); + setTimeout(() => refetch(), 3000); + } + + return ( + <> + + + {entityRegistry.renderPreview(EntityType.DataProduct, PreviewType.PREVIEW, dataProduct, { + onDelete: onDeleteDataProduct, + onEdit: () => setIsEditModalVisible(true), + })} + + + {isEditModalVisible && ( + setIsEditModalVisible(false)} + onUpdateDataProduct={onUpdateDataProduct} + /> + )} + + ); +} diff --git a/datahub-web-react/src/app/entityV2/domain/DataProductsTab/DataProductsTab.tsx b/datahub-web-react/src/app/entityV2/domain/DataProductsTab/DataProductsTab.tsx new file mode 100644 index 00000000000000..f010b804488ebc --- /dev/null +++ b/datahub-web-react/src/app/entityV2/domain/DataProductsTab/DataProductsTab.tsx @@ -0,0 +1,171 @@ +import { Button, Empty, Pagination } from 'antd'; +import { LoadingOutlined, PlusOutlined } from '@ant-design/icons'; +import React, { useState } from 'react'; +import * as QueryString from 'query-string'; +import { useLocation } from 'react-router'; +import styled from 'styled-components'; +import { useGetSearchResultsForMultipleQuery } from '../../../../graphql/search.generated'; +import { DataProduct, Domain, EntityType } from '../../../../types.generated'; +import TabToolbar from '../../shared/components/styled/TabToolbar'; +import { SearchBar } from '../../../search/SearchBar'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { scrollToTop } from '../../../shared/searchUtils'; +import { DomainsPaginationContainer } from '../../../domain/DomainsList'; +import { ANTD_GRAY, REDESIGN_COLORS } from '../../shared/constants'; +import { useEntityContext, useEntityData } from '../../../entity/shared/EntityContext'; +import { DOMAINS_FILTER_NAME } from '../../../search/utils/constants'; +import DataProductResult from './DataProductResult'; +import CreateDataProductModal from './CreateDataProductModal'; + +const DataProductsPaginationWrapper = styled(DomainsPaginationContainer)` + justify-content: center; +`; + +const ResultsWrapper = styled.div` + height: auto; + overflow: auto; + flex: 1; + position: relative; + width: 100%; + display: flex; + flex-direction: column; + padding: 16px; + gap: 12px; + background: ${REDESIGN_COLORS.BACKGROUND}; +`; + +const StyledLoading = styled(LoadingOutlined)` + font-size: 32px; +`; + +const LoadingWrapper = styled.div` + display: flex; + justify-content: center; + margin-top: 25%; +`; + +const DEFAULT_PAGE_SIZE = 10; + +export default function DataProductsTab() { + const { refetch } = useEntityContext(); + const { entityData } = useEntityData(); + const entityRegistry = useEntityRegistry(); + const location = useLocation(); + const params = QueryString.parse(location.search, { arrayFormat: 'comma' }); + const paramsQuery = (params?.query as string) || undefined; + const [query, setQuery] = useState(paramsQuery); + const [page, setPage] = useState(1); + const [isCreateModalVisible, setIsCreateModalVisible] = useState(params.createModal === 'true'); + const [createdDataProducts, setCreatedDataProducts] = useState([]); + const [editedDataProducts, setEditedDataProducts] = useState([]); + const [deletedDataProductUrns, setDeletedDataProductUrns] = useState([]); + + const start = (page - 1) * DEFAULT_PAGE_SIZE; + const domainUrn = entityData?.urn || ''; + + const { data, loading } = useGetSearchResultsForMultipleQuery({ + skip: !domainUrn, + variables: { + input: { + types: [EntityType.DataProduct], + query: query || '', + start, + count: DEFAULT_PAGE_SIZE, + orFilters: [{ and: [{ field: DOMAINS_FILTER_NAME, values: [domainUrn] }] }], + searchFlags: { skipCache: true }, + }, + }, + fetchPolicy: 'no-cache', + }); + const totalResults = data?.searchAcrossEntities?.total || 0; + const searchResults = data?.searchAcrossEntities?.searchResults.map((r) => r.entity) || []; + const dataProducts = [...createdDataProducts, ...searchResults]; + const displayedDataProducts = dataProducts + .map( + (dataProduct) => + editedDataProducts.find((editedDataProduct) => editedDataProduct.urn === dataProduct.urn) || + dataProduct, + ) + .filter((dataProduct) => !deletedDataProductUrns.includes(dataProduct.urn)); + + const onChangePage = (newPage: number) => { + scrollToTop(); + setPage(newPage); + }; + + function onCreateDataProduct(dataProduct: DataProduct) { + setCreatedDataProducts([dataProduct, ...createdDataProducts]); + setTimeout(() => refetch(), 3000); + } + + function onUpdateDataProduct(dataProduct: DataProduct) { + setEditedDataProducts([dataProduct, ...editedDataProducts]); + } + + return ( + <> + + + null} + onQueryChange={(q) => setQuery(q && q.length > 0 ? q : undefined)} + entityRegistry={entityRegistry} + hideRecommendations + /> + + + {!loading && !displayedDataProducts.length && ( + + )} + {loading && ( + + + + )} + {!loading && + displayedDataProducts.map((dataProduct) => ( + + ))} + + + + + {isCreateModalVisible && ( + setIsCreateModalVisible(false)} + /> + )} + + ); +} diff --git a/datahub-web-react/src/app/entityV2/domain/DataProductsTab/EditDataProductModal.tsx b/datahub-web-react/src/app/entityV2/domain/DataProductsTab/EditDataProductModal.tsx new file mode 100644 index 00000000000000..67b3b2904b512f --- /dev/null +++ b/datahub-web-react/src/app/entityV2/domain/DataProductsTab/EditDataProductModal.tsx @@ -0,0 +1,69 @@ +import { Button, Modal, message } from 'antd'; +import React, { useState } from 'react'; +import DataProductBuilderForm from './DataProductBuilderForm'; +import { DataProductBuilderState } from './types'; +import { useUpdateDataProductMutation } from '../../../../graphql/dataProduct.generated'; +import { DataProduct } from '../../../../types.generated'; +import { MODAL_BODY_STYLE, MODAL_WIDTH } from './CreateDataProductModal'; + +type Props = { + dataProduct: DataProduct; + onClose: () => void; + onUpdateDataProduct: (dataProduct: DataProduct) => void; +}; + +export default function EditDataProductModal({ dataProduct, onUpdateDataProduct, onClose }: Props) { + const [builderState, updateBuilderState] = useState({ + name: dataProduct.properties?.name || '', + description: dataProduct.properties?.description || '', + }); + const [updateDataProductMutation] = useUpdateDataProductMutation(); + + function updateDataProduct() { + updateDataProductMutation({ + variables: { + urn: dataProduct.urn, + input: { + name: builderState.name, + description: builderState.description || undefined, + }, + }, + }) + .then(({ data, errors }) => { + if (!errors) { + message.success('Updates Data Product!'); + if (data?.updateDataProduct) { + onUpdateDataProduct(data.updateDataProduct as DataProduct); + } + onClose(); + } + }) + .catch(() => { + onClose(); + message.destroy(); + message.error({ content: 'Failed to update Data Product. An unexpected error occurred' }); + }); + } + + return ( + + + + + } + > + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/domain/DataProductsTab/types.ts b/datahub-web-react/src/app/entityV2/domain/DataProductsTab/types.ts new file mode 100644 index 00000000000000..1ed3ede39cfbe4 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/domain/DataProductsTab/types.ts @@ -0,0 +1,4 @@ +export type DataProductBuilderState = { + name: string; + description?: string; +}; diff --git a/datahub-web-react/src/app/entityV2/domain/DomainEntitiesTab.tsx b/datahub-web-react/src/app/entityV2/domain/DomainEntitiesTab.tsx new file mode 100644 index 00000000000000..27894eaddfba3f --- /dev/null +++ b/datahub-web-react/src/app/entityV2/domain/DomainEntitiesTab.tsx @@ -0,0 +1,36 @@ +import { SearchCardContext } from '@app/entityV2/shared/SearchCardContext'; +import React from 'react'; +import { useEntityData } from '../../entity/shared/EntityContext'; +import { EntityType } from '../../../types.generated'; +import { EmbeddedListSearchSection } from '../shared/components/styled/search/EmbeddedListSearchSection'; +import { UnionType } from '../../search/utils/constants'; + +export const DomainEntitiesTab = () => { + const { urn, entityType } = useEntityData(); + + let fixedFilter; + // Set a fixed filter corresponding to the current entity urn. + if (entityType === EntityType.Domain) { + fixedFilter = { + field: 'domains', + values: [urn], + }; + } + + const excludeFromFilter = { field: '_entityType', values: ['DATA_PRODUCT'], value: 'DATA_PRODUCT', negated: true }; + + return ( + + + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/domain/DomainEntity.tsx b/datahub-web-react/src/app/entityV2/domain/DomainEntity.tsx new file mode 100644 index 00000000000000..f5dad4847cb191 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/domain/DomainEntity.tsx @@ -0,0 +1,236 @@ +import { AppstoreOutlined, FileDoneOutlined, FileOutlined, UnorderedListOutlined } from '@ant-design/icons'; +import * as React from 'react'; +import { useGetDomainQuery } from '../../../graphql/domain.generated'; +import { Domain, EntityType, SearchResult } from '../../../types.generated'; +import DomainIcon from '../../domain/DomainIcon'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { EntityMenuItems } from '../shared/EntityDropdown/EntityMenuActions'; +import { EntityProfileTab } from '../shared/constants'; +import { EntityProfile } from '../shared/containers/profile/EntityProfile'; +import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection'; +import SidebarEntitiesSection from '../shared/containers/profile/sidebar/Domain/SidebarEntitiesSection'; +import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection'; +import SidebarEntityHeader from '../shared/containers/profile/sidebar/SidebarEntityHeader'; +import StatusSection from '../shared/containers/profile/sidebar/shared/StatusSection'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import { EntityActionItem } from '../shared/entity/EntityActions'; +import SidebarStructuredProperties from '../shared/sidebarSection/SidebarStructuredProperties'; +import { SUMMARY_TAB_ICON } from '../shared/summary/HeaderComponents'; +import { DocumentationTab } from '../shared/tabs/Documentation/DocumentationTab'; +import TabNameWithCount from '../shared/tabs/Entity/TabNameWithCount'; +import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; +import DataProductsTab from './DataProductsTab/DataProductsTab'; +import { DomainEntitiesTab } from './DomainEntitiesTab'; +import { Preview } from './preview/Preview'; +import { DomainSummaryTab } from './summary/DomainSummaryTab'; +import SidebarNotesSection from '../shared/sidebarSection/SidebarNotesSection'; + +const headerDropdownItems = new Set([ + EntityMenuItems.MOVE, + EntityMenuItems.SHARE, + EntityMenuItems.DELETE, + EntityMenuItems.ANNOUNCE, +]); + +/** + * Definition of the DataHub Domain entity. + */ +export class DomainEntity implements Entity { + type: EntityType = EntityType.Domain; + + icon = (fontSize?: number, styleType?: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ; + } + + if (styleType === IconStyleType.SVG) { + return ( + + ); + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + isBrowseEnabled = () => false; + + isLineageEnabled = () => false; + + getAutoCompleteFieldName = () => 'name'; + + getGraphName = () => 'domain'; + + getPathName = () => this.getGraphName(); + + getEntityName = () => 'Domain'; + + getCollectionName = () => 'Domains'; + + useEntityQuery = useGetDomainQuery; + + renderProfile = (urn: string) => ( + { + const assetCount = entityData?.entities?.total; + return ; + }, + component: DomainEntitiesTab, + icon: AppstoreOutlined, + }, + { + id: EntityProfileTab.DOCUMENTATION_TAB, + name: 'Documentation', + component: DocumentationTab, + icon: FileOutlined, + }, + { + id: EntityProfileTab.DATA_PRODUCTS_TAB, + name: 'Data Products', + getDynamicName: (entityData, _, loading) => { + const dataProductsCount = entityData?.dataProducts?.total; + return ; + }, + component: DataProductsTab, + icon: FileDoneOutlined, + }, + { + name: 'Properties', + component: PropertiesTab, + icon: UnorderedListOutlined, + }, + ]} + sidebarSections={this.getSidebarSections()} + sidebarTabs={this.getSidebarTabs()} + /> + ); + + getSidebarSections = () => [ + { + component: SidebarEntityHeader, + }, + { + component: SidebarAboutSection, + }, + { + component: SidebarNotesSection, + }, + { + component: SidebarEntitiesSection, + }, + { + component: SidebarOwnerSection, + }, + { + component: StatusSection, + }, + { + component: SidebarStructuredProperties, + }, + ]; + + getSidebarTabs = () => [ + { + name: 'Properties', + component: PropertiesTab, + description: 'View additional properties about this asset', + icon: UnorderedListOutlined, + }, + ]; + + renderPreview = (previewType: PreviewType, data: Domain) => { + const genericProperties = this.getGenericEntityProperties(data); + return ( + + ); + }; + + renderSearch = (result: SearchResult) => { + const data = result.entity as Domain; + const genericProperties = this.getGenericEntityProperties(data); + return ( + + ); + }; + + displayName = (data: Domain) => { + return data?.properties?.name || data?.id || data.urn; + }; + + getOverridePropertiesFromEntity = (data: Domain) => { + return { + name: data.properties?.name, + }; + }; + + getGenericEntityProperties = (data: Domain) => { + return getDataForEntityType({ + data, + entityType: this.type, + getOverrideProperties: this.getOverridePropertiesFromEntity, + }); + }; + + supportedCapabilities = () => { + // TODO.. Determine whether SOFT_DELETE should go into here. + return new Set([EntityCapabilityType.OWNERS]); + }; +} diff --git a/datahub-web-react/src/app/entityV2/domain/preview/DomainEntitiesSnippet.tsx b/datahub-web-react/src/app/entityV2/domain/preview/DomainEntitiesSnippet.tsx new file mode 100644 index 00000000000000..6d36964004d64f --- /dev/null +++ b/datahub-web-react/src/app/entityV2/domain/preview/DomainEntitiesSnippet.tsx @@ -0,0 +1,45 @@ +import { DatabaseOutlined, FileDoneOutlined } from '@ant-design/icons'; +import { VerticalDivider } from '@remirror/react'; +import React from 'react'; +import styled from 'styled-components'; +import { SearchResultFields_Domain_Fragment } from '../../../../graphql/search.generated'; +import { ANTD_GRAY_V2 } from '../../shared/constants'; +import DomainIcon from '../../../domain/DomainIcon'; +import { pluralize } from '../../../shared/textUtil'; + +const Wrapper = styled.div` + color: ${ANTD_GRAY_V2[8]}; + font-size: 12px; + display: flex; + align-items: center; + + svg { + margin-right: 4px; + } +`; + +const StyledDivider = styled(VerticalDivider)` + &&& { + margin: 0 8px; + } +`; + +interface Props { + domain: SearchResultFields_Domain_Fragment; +} + +export default function DomainEntitiesSnippet({ domain }: Props) { + const entityCount = domain.entities?.total || 0; + const subDomainCount = domain.children?.total || 0; + const dataProductCount = domain.dataProducts?.total || 0; + + return ( + + {entityCount} {entityCount === 1 ? 'entity' : 'entities'} + + {subDomainCount} {pluralize(subDomainCount, 'sub-domain')} + + {dataProductCount} {pluralize(dataProductCount, 'data product')} + + ); +} diff --git a/datahub-web-react/src/app/entityV2/domain/preview/Preview.tsx b/datahub-web-react/src/app/entityV2/domain/preview/Preview.tsx new file mode 100644 index 00000000000000..a2189ea1c51f15 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/domain/preview/Preview.tsx @@ -0,0 +1,66 @@ +import { GenericEntityProperties } from '@app/entity/shared/types'; +import React from 'react'; +import { Domain, EntityType, Owner, SearchInsight } from '../../../../types.generated'; +import DefaultPreviewCard from '../../../previewV2/DefaultPreviewCard'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import DomainEntitiesSnippet from './DomainEntitiesSnippet'; +import DomainIcon from '../../../domain/DomainIcon'; +import EntityCount from '../../shared/containers/profile/header/EntityCount'; +import { DomainColoredIcon } from '../../shared/links/DomainColoredIcon'; +import { EntityMenuItems } from '../../shared/EntityDropdown/EntityMenuActions'; +import { PreviewType } from '../../Entity'; + +export const Preview = ({ + domain, + urn, + data, + name, + description, + owners, + insights, + logoComponent, + entityCount, + headerDropdownItems, + previewType, +}: { + domain: Domain; + urn: string; + data: GenericEntityProperties | null; + name: string; + description?: string | null; + owners?: Array | null; + insights?: Array | null; + logoComponent?: JSX.Element; + entityCount?: number; + headerDropdownItems?: Set; + previewType?: PreviewType; +}): JSX.Element => { + const entityRegistry = useEntityRegistry(); + return ( + + } + owners={owners} + insights={insights} + logoComponent={logoComponent} + parentEntities={domain.parentDomains?.domains} + snippet={} + subHeader={} + entityIcon={} + headerDropdownItems={headerDropdownItems} + previewType={previewType} + /> + ); +}; diff --git a/datahub-web-react/src/app/entityV2/domain/summary/ContentSectionLoading.tsx b/datahub-web-react/src/app/entityV2/domain/summary/ContentSectionLoading.tsx new file mode 100644 index 00000000000000..63bfd2b0f689df --- /dev/null +++ b/datahub-web-react/src/app/entityV2/domain/summary/ContentSectionLoading.tsx @@ -0,0 +1,24 @@ +import * as React from 'react'; +import { Skeleton, Space } from 'antd'; +import styled from 'styled-components'; +import { ANTD_GRAY } from '../../shared/constants'; + +const TypeSkeleton = styled(Skeleton.Input)` + && { + width: 60px; + height: 60px; + border-radius: 8px; + background-color: ${ANTD_GRAY[3]}; + } +`; + +export default function ContentSectionLoading() { + return ( + + + + + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/domain/summary/ContentsSection.tsx b/datahub-web-react/src/app/entityV2/domain/summary/ContentsSection.tsx new file mode 100644 index 00000000000000..921e5304fe71e6 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/domain/summary/ContentsSection.tsx @@ -0,0 +1,102 @@ +import React, { useEffect } from 'react'; +import { useHistory } from 'react-router'; +import styled from 'styled-components'; +import { AppstoreOutlined } from '@ant-design/icons'; +import { useEntityContext, useEntityData } from '../../../entity/shared/EntityContext'; +import { useGetDomainEntitySummaryQuery } from '../../../../graphql/domain.generated'; +import { + getContentsSummary, + getDomainEntitiesFilterUrl, + navigateToDomainEntities, +} from '../../shared/containers/profile/sidebar/Domain/utils'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import ContentSectionLoading from './ContentSectionLoading'; +import { EntityCountCard } from '../../../sharedV2/cards/EntityCountCard'; +import { pluralize } from '../../../shared/textUtil'; +import { + SectionContainer, + SummaryTabHeaderTitle, + SummaryTabHeaderWrapper, +} from '../../shared/summary/HeaderComponents'; +import { getContentTypeIcon } from '../../shared/summary/IconComponents'; +import { ANTD_GRAY } from '../../shared/constants'; +import { Carousel } from '../../../sharedV2/carousel/Carousel'; + +const ViewAllButton = styled.div` + color: ${ANTD_GRAY[7]}; + padding: 2px; + :hover { + cursor: pointer; + color: ${ANTD_GRAY[8]}; + text-decoration: underline; + } +`; + +export const ContentsSection = () => { + const { entityState } = useEntityContext(); + const history = useHistory(); + const entityRegistry = useEntityRegistry(); + const { urn, entityType } = useEntityData(); + const { data, loading, refetch } = useGetDomainEntitySummaryQuery({ + variables: { + urn, + }, + }); + + const contentsSummary = data?.aggregateAcrossEntities && getContentsSummary(data.aggregateAcrossEntities as any); + const contentsCount = contentsSummary?.total || 0; + const hasContents = contentsCount > 0; + + const shouldRefetch = entityState?.shouldRefetchContents; + useEffect(() => { + if (shouldRefetch) { + refetch(); + entityState?.setShouldRefetchContents(false); + } + }, [shouldRefetch, entityState, refetch]); + + if (!hasContents) { + return null; + } + + return ( + + + } title={`Assets (${contentsCount})`} /> + navigateToDomainEntities(urn, entityType, history, entityRegistry)}> + View all + + + {loading && } + + {!loading && + contentsSummary?.types.map((summary) => { + const { type, count, entityType: summaryEntityType } = summary; + const typeName = ( + type || + entityRegistry.getEntityName(summaryEntityType) || + summaryEntityType + ).toLocaleLowerCase(); + const link = getDomainEntitiesFilterUrl( + urn, + entityType, + entityRegistry, + [summary.entityType], + summary.type ? [summary.type] : undefined, + ); + return ( + + ); + })} + + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/domain/summary/DataProductsSection.tsx b/datahub-web-react/src/app/entityV2/domain/summary/DataProductsSection.tsx new file mode 100644 index 00000000000000..81db264f9f069f --- /dev/null +++ b/datahub-web-react/src/app/entityV2/domain/summary/DataProductsSection.tsx @@ -0,0 +1,93 @@ +import React from 'react'; +import styled from 'styled-components'; +import { useHistory } from 'react-router'; +import AddRoundedIcon from '@mui/icons-material/AddRounded'; +import { useEntityData } from '../../../entity/shared/EntityContext'; +import ContentSectionLoading from './ContentSectionLoading'; +import { useGetSearchResultsForMultipleQuery } from '../../../../graphql/search.generated'; +import { DataProduct, EntityType } from '../../../../types.generated'; +import { DOMAINS_FILTER_NAME } from '../../../searchV2/utils/constants'; +import { DataProductMiniPreview } from '../../shared/links/DataProductMiniPreview'; +import { + SectionContainer, + SummaryTabHeaderTitle, + SummaryTabHeaderWrapper, + SummaryHeaderButtonGroup, +} from '../../shared/summary/HeaderComponents'; +import { navigateToDomainDataProducts } from '../../shared/containers/profile/sidebar/Domain/utils'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { IconStyleType } from '../../../entity/Entity'; +import { ANTD_GRAY } from '../../shared/constants'; +import { Carousel } from '../../../sharedV2/carousel/Carousel'; +import SectionActionButton from '../../shared/containers/profile/sidebar/SectionActionButton'; + +const ViewAllButton = styled.div` + color: ${ANTD_GRAY[7]}; + padding: 2px; + + :hover { + cursor: pointer; + color: ${ANTD_GRAY[8]}; + text-decoration: underline; + } +`; + +const StyledCarousel = styled(Carousel)` + align-items: stretch; +`; + +export const DataProductsSection = () => { + const { urn, entityType, entityData } = useEntityData(); + const history = useHistory(); + const domainUrn = entityData?.urn || ''; + const entityRegistry = useEntityRegistry(); + + const { data, loading } = useGetSearchResultsForMultipleQuery({ + skip: !domainUrn, + variables: { + input: { + types: [EntityType.DataProduct], + query: '', + start: 0, + count: 5, + orFilters: [{ and: [{ field: DOMAINS_FILTER_NAME, values: [domainUrn] }] }], + searchFlags: { skipCache: true }, + }, + }, + }); + + const dataProducts = data?.searchAcrossEntities?.searchResults.map((r) => r.entity) || []; + const numDataProducts = data?.searchAcrossEntities?.total || 0; + + if (!numDataProducts) { + return null; + } + + return ( + + + + + } + onClick={() => navigateToDomainDataProducts(urn, entityType, history, entityRegistry, true)} + /> + + navigateToDomainDataProducts(urn, entityType, history, entityRegistry)}> + View all + + + {loading && } + + {!loading && + dataProducts.map((product) => ( + + ))} + + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/domain/summary/DocumentationSection.tsx b/datahub-web-react/src/app/entityV2/domain/summary/DocumentationSection.tsx new file mode 100644 index 00000000000000..4990f0b87c7f3c --- /dev/null +++ b/datahub-web-react/src/app/entityV2/domain/summary/DocumentationSection.tsx @@ -0,0 +1,98 @@ +import { EditOutlined, ExpandAltOutlined, FileOutlined } from '@ant-design/icons'; +import { Button, Divider, Typography } from 'antd'; +import React from 'react'; +import styled from 'styled-components'; +import { useEntityData, useRefetch, useRouteToTab } from '../../../entity/shared/EntityContext'; +import { AddLinkModal } from '../../shared/components/styled/AddLinkModal'; +import { EmptyTab } from '../../shared/components/styled/EmptyTab'; +import { ANTD_GRAY } from '../../shared/constants'; +import { LinkList } from '../../shared/tabs/Documentation/components/LinkList'; +import { Editor } from '../../shared/tabs/Documentation/components/editor/Editor'; + +const Header = styled.div` + display: flex; + align-items: start; + justify-content: space-between; + padding: 16px 4px; +`; + +const Title = styled(Typography.Title)` + && { + color: ${ANTD_GRAY[9]}; + padding: 0px; + margin: 0px; + display: flex; + align-items: center; + } +`; + +const ThinDivider = styled(Divider)` + && { + padding-top: 0px; + padding-bottom: 0px; + margin-top: 0px; + margin-bottom: 20px; + } +`; + +const Documentation = styled.div` + .remirror-editor.ProseMirror { + padding: 0px 8px; + } +`; + +const StyledFileOutlined = styled(FileOutlined)` + && { + font-size: 16px; + margin-right: 8px; + } +`; + +export const DocumentationSection = () => { + // The summary tab consists of modules + const { entityData } = useEntityData(); + const refetch = useRefetch(); + const routeToTab = useRouteToTab(); + + const description = entityData?.editableProperties?.description || entityData?.properties?.description || ''; + const hasDescription = description || description !== ''; + + return ( + <> +
+ + <StyledFileOutlined /> + About + + {hasDescription && ( + + )} +
+ + + {(hasDescription && ) || ( + + + + + )} + + + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/domain/summary/DomainSummaryTab.tsx b/datahub-web-react/src/app/entityV2/domain/summary/DomainSummaryTab.tsx new file mode 100644 index 00000000000000..0a02d9fbad0f30 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/domain/summary/DomainSummaryTab.tsx @@ -0,0 +1,17 @@ +import React from 'react'; +import { ContentsSection } from './ContentsSection'; +import { DataProductsSection } from './DataProductsSection'; +import SummaryAboutSection from '../../shared/summary/SummaryAboutSection'; +import { SummaryTabWrapper } from '../../shared/summary/HeaderComponents'; +import OwnersSection from './OwnersSection'; + +export const DomainSummaryTab = () => { + return ( + + + + + + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/domain/summary/OwnerDetail.tsx b/datahub-web-react/src/app/entityV2/domain/summary/OwnerDetail.tsx new file mode 100644 index 00000000000000..39eff2d6678638 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/domain/summary/OwnerDetail.tsx @@ -0,0 +1,56 @@ +import React from 'react'; +import styled from 'styled-components'; +import { Tooltip } from '@components'; +import { useEntityRegistryV2 } from '../../../useEntityRegistry'; +import { EntityType, Owner } from '../../../../types.generated'; +import CustomAvatar from '../../../shared/avatar/CustomAvatar'; +import { REDESIGN_COLORS } from '../../shared/constants'; + +const Details = styled.div` + display: flex; + align-items: center; + gap: 5px; + color: ${REDESIGN_COLORS.SUBTITLE}; + font-size: 14px; + font-weight: 500; +`; + +const OwnerName = styled.div` + width: 110px; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +`; + +interface Props { + owner: Owner; +} + +const OwnerDetail = ({ owner }: Props) => { + const entityRegistry = useEntityRegistryV2(); + + const ownerName = entityRegistry.getDisplayName(EntityType.CorpUser, owner.owner); + + const ownerPictureLink = owner.owner.editableProperties?.pictureLink || undefined; + + const avatar: React.ReactNode = ( + + ); + + return ( + <> + {!!ownerName && ( + <> +
+
{avatar}
+ + {ownerName} + +
+ + )} + + ); +}; + +export default OwnerDetail; diff --git a/datahub-web-react/src/app/entityV2/domain/summary/OwnersSection.tsx b/datahub-web-react/src/app/entityV2/domain/summary/OwnersSection.tsx new file mode 100644 index 00000000000000..0c80bd565e935c --- /dev/null +++ b/datahub-web-react/src/app/entityV2/domain/summary/OwnersSection.tsx @@ -0,0 +1,89 @@ +import React from 'react'; +import styled from 'styled-components'; +import { Typography } from 'antd'; +import { UserOutlined } from '@ant-design/icons'; +import { useEntityData } from '../../../entity/shared/EntityContext'; +import { Owner, OwnershipTypeEntity } from '../../../../types.generated'; +import { getOwnershipTypeName } from '../../shared/containers/profile/sidebar/Ownership/ownershipUtils'; +import { REDESIGN_COLORS } from '../../shared/constants'; +import OwnerDetail from './OwnerDetail'; +import { SummaryTabHeaderTitle } from '../../shared/summary/HeaderComponents'; + +const OwnershipTypeNameText = styled(Typography.Text)` + font-weight: 500; + font-size: 12px; + color: ${REDESIGN_COLORS.DARK_GREY}; +`; + +const OwnersContainer = styled.div` + display: flex; + flex-direction: row; + flex-wrap: wrap; + gap: 8px; +`; + +const OwnershipContainer = styled.div` + display: flex; + flex-direction: column; + flex-wrap: wrap; + gap: 8px; +`; + +const SectionContainer = styled.div` + display: flex; + flex-direction: column; + gap: 12px; +`; + +const Details = styled.div` + display: flex; + flex-direction: row; + flex-wrap: wrap; + gap: 24px; +`; + +const OwnersSection = () => { + const { entityData } = useEntityData(); + const ownersEmpty = !entityData?.ownership?.owners?.length; + const ownershipTypesMap: Map = new Map(); + const ownersByTypeMap: Map = new Map(); + entityData?.ownership?.owners?.forEach((owner) => { + const ownershipType = owner?.ownershipType; + const ownershipTypeName = getOwnershipTypeName(ownershipType); + // If ownership type is not in the map, add it + if (ownershipType && !ownershipTypesMap.has(ownershipTypeName)) { + ownershipTypesMap.set(ownershipTypeName, ownershipType); + } + if (!ownersByTypeMap.has(ownershipTypeName)) { + ownersByTypeMap.set(ownershipTypeName, []); + } + ownersByTypeMap.get(ownershipTypeName)?.push(owner); + }); + // Sort ownership types by name alphabetically + const ownershipTypeNames = Array.from(ownershipTypesMap.keys()).sort(); + + if (ownersEmpty) return null; + + return ( + + } /> +
+ {ownershipTypeNames.map((ownershipTypeName) => { + const owners = ownersByTypeMap.get(ownershipTypeName) as Owner[]; + return ( + + {ownershipTypeName} + + {owners.map((owner) => ( + + ))} + + + ); + })} +
+
+ ); +}; + +export default OwnersSection; diff --git a/datahub-web-react/src/app/entityV2/glossaryNode/ChildrenTab.tsx b/datahub-web-react/src/app/entityV2/glossaryNode/ChildrenTab.tsx new file mode 100644 index 00000000000000..2c254908321e61 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryNode/ChildrenTab.tsx @@ -0,0 +1,39 @@ +import React from 'react'; +import { EntityType, GlossaryNode, GlossaryTerm } from '../../../types.generated'; +import EmptyGlossarySection from '../../glossaryV2/EmptyGlossarySection'; +import GlossaryEntitiesList from '../../glossaryV2/GlossaryEntitiesList'; +import { useEntityRegistry } from '../../useEntityRegistry'; +import { sortGlossaryTerms } from '../glossaryTerm/utils'; +import { useEntityData } from '../../entity/shared/EntityContext'; +import { sortGlossaryNodes } from './utils'; + +function ChildrenTab() { + const { entityData } = useEntityData(); + const entityRegistry = useEntityRegistry(); + + if (!entityData) return <>; + + const childNodes = entityData?.children?.relationships + .filter((child) => child.entity?.type === EntityType.GlossaryNode) + .sort((nodeA, nodeB) => sortGlossaryNodes(entityRegistry, nodeA.entity, nodeB.entity)) + .map((child) => child.entity); + const childTerms = entityData?.children?.relationships + .filter((child) => child.entity?.type === EntityType.GlossaryTerm) + .sort((termA, termB) => sortGlossaryTerms(entityRegistry, termA.entity, termB.entity)) + .map((child) => child.entity); + + const hasTermsOrNodes = !!childNodes?.length || !!childTerms?.length; + + if (hasTermsOrNodes) { + return ( + + ); + } + + return ; +} + +export default ChildrenTab; diff --git a/datahub-web-react/src/app/entityV2/glossaryNode/GlossaryNodeEntity.tsx b/datahub-web-react/src/app/entityV2/glossaryNode/GlossaryNodeEntity.tsx new file mode 100644 index 00000000000000..e819f00f20640c --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryNode/GlossaryNodeEntity.tsx @@ -0,0 +1,202 @@ +import { AppstoreOutlined, FileOutlined, UnorderedListOutlined } from '@ant-design/icons'; +import React from 'react'; +import { BookmarksSimple } from '@phosphor-icons/react'; +import { useGetGlossaryNodeQuery } from '../../../graphql/glossaryNode.generated'; +import { EntityType, GlossaryNode, SearchResult } from '../../../types.generated'; +import { FetchedEntity } from '../../lineage/types'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { TYPE_ICON_CLASS_NAME } from '../shared/components/subtypes'; +import { EntityProfile } from '../shared/containers/profile/EntityProfile'; +import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection'; +import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection'; +import StatusSection from '../shared/containers/profile/sidebar/shared/StatusSection'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import { EntityActionItem } from '../shared/entity/EntityActions'; +import { EntityMenuItems } from '../shared/EntityDropdown/EntityMenuActions'; +import SidebarStructuredProperties from '../shared/sidebarSection/SidebarStructuredProperties'; +import { DocumentationTab } from '../shared/tabs/Documentation/DocumentationTab'; +import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; +import ChildrenTab from './ChildrenTab'; +import { Preview } from './preview/Preview'; +import SidebarNotesSection from '../shared/sidebarSection/SidebarNotesSection'; + +const headerDropdownItems = new Set([ + EntityMenuItems.MOVE, + EntityMenuItems.SHARE, + EntityMenuItems.DELETE, + EntityMenuItems.ANNOUNCE, +]); + +class GlossaryNodeEntity implements Entity { + getLineageVizConfig?: ((entity: GlossaryNode) => FetchedEntity) | undefined; + + type: EntityType = EntityType.GlossaryNode; + + icon = (fontSize?: number, styleType?: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ( + + ); + } + + if (styleType === IconStyleType.ACCENT) { + return ; + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + isBrowseEnabled = () => false; + + getAutoCompleteFieldName = () => 'name'; + + isLineageEnabled = () => false; + + getPathName = () => 'glossaryNode'; + + getCollectionName = () => 'Term Groups'; + + getEntityName = () => 'Term Group'; + + useEntityQuery = useGetGlossaryNodeQuery; + + renderProfile = (urn: string) => { + return ( + + ); + }; + + getSidebarSections = () => [ + { + component: SidebarAboutSection, + properties: { + hideLinksButton: true, + }, + }, + { + component: SidebarNotesSection, + }, + { + component: SidebarOwnerSection, + }, + { + component: StatusSection, + }, + { + component: SidebarStructuredProperties, + }, + ]; + + getSidebarTabs = () => [ + { + name: 'Properties', + component: PropertiesTab, + description: 'View additional properties about this asset', + icon: UnorderedListOutlined, + }, + ]; + + displayName = (data: GlossaryNode) => { + return data?.properties?.name || data?.urn; + }; + + getOverridePropertiesFromEntity = (data: GlossaryNode) => { + return { + name: this.displayName(data), + }; + }; + + renderSearch = (result: SearchResult) => { + return this.renderPreview(PreviewType.SEARCH, result.entity as GlossaryNode); + }; + + renderPreview = (previewType: PreviewType, data: GlossaryNode) => { + const genericProperties = this.getGenericEntityProperties(data); + return ( + + ); + }; + + platformLogoUrl = (_: GlossaryNode) => { + return undefined; + }; + + getGenericEntityProperties = (glossaryNode: GlossaryNode) => { + return getDataForEntityType({ + data: glossaryNode, + entityType: this.type, + getOverrideProperties: (data) => data, + }); + }; + + supportedCapabilities = () => { + return new Set([ + EntityCapabilityType.OWNERS, + EntityCapabilityType.DEPRECATION, + EntityCapabilityType.SOFT_DELETE, + ]); + }; + + getGraphName = () => this.getPathName(); +} + +export default GlossaryNodeEntity; diff --git a/datahub-web-react/src/app/entityV2/glossaryNode/_tests_/utils.test.ts b/datahub-web-react/src/app/entityV2/glossaryNode/_tests_/utils.test.ts new file mode 100644 index 00000000000000..1b10c95e4be240 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryNode/_tests_/utils.test.ts @@ -0,0 +1,65 @@ +import { globalEntityRegistryV2 } from '@app/EntityRegistryProvider'; +import { EntityType } from '../../../../types.generated'; +import { sortGlossaryNodes } from '../utils'; + +describe('sortGlossaryNodes', () => { + it('should correctly sort glossary nodes when both nodes are provided', () => { + const nodeA = { + type: EntityType.GlossaryNode, + urn: 'urn:123', + properties: { + __typename: 'GlossaryNodeProperties', + name: 'test child 2', + }, + }; + const nodeB = { + type: EntityType.GlossaryNode, + urn: 'urn:li:456', + properties: { + __typename: 'GlossaryNodeProperties', + name: 'test child 1', + }, + }; + const result = sortGlossaryNodes(globalEntityRegistryV2, nodeA, nodeB); + expect(result).toBeGreaterThan(0); + }); + + it('should not sort glossary nodes when both nodes are provided in sorted order', () => { + const nodeA = { + type: EntityType.GlossaryNode, + urn: 'urn:123', + properties: { + __typename: 'GlossaryNodeProperties', + name: 'test child 1', + }, + }; + const nodeB = { + type: EntityType.GlossaryNode, + urn: 'urn:li:456', + properties: { + __typename: 'GlossaryNodeProperties', + name: 'test child 2', + }, + }; + const result = sortGlossaryNodes(globalEntityRegistryV2, nodeA, nodeB); + expect(result).toBeLessThan(0); + }); + + it('should correctly sort glossary nodes when only one node is provided', () => { + const nodeA = { + type: EntityType.GlossaryNode, + urn: 'urn:123', + properties: { + __typename: 'GlossaryNodeProperties', + name: 'test child 1', + }, + }; + const result = sortGlossaryNodes(globalEntityRegistryV2, nodeA); + expect(result).toBeGreaterThan(0); + }); + + it('should handle null nodes by considering them equal in sorting', () => { + const result = sortGlossaryNodes(globalEntityRegistryV2); + expect(result).toBe(0); + }); +}); diff --git a/datahub-web-react/src/app/entityV2/glossaryNode/preview/Preview.tsx b/datahub-web-react/src/app/entityV2/glossaryNode/preview/Preview.tsx new file mode 100644 index 00000000000000..392d56f4a8acdd --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryNode/preview/Preview.tsx @@ -0,0 +1,45 @@ +import { GenericEntityProperties } from '@app/entity/shared/types'; +import React from 'react'; +import { FolderOutlined } from '@ant-design/icons'; +import { EntityType, Owner, ParentNodesResult } from '../../../../types.generated'; +import DefaultPreviewCard from '../../../previewV2/DefaultPreviewCard'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { EntityMenuItems } from '../../shared/EntityDropdown/EntityMenuActions'; +import { PreviewType } from '../../Entity'; + +export const Preview = ({ + urn, + data, + name, + description, + owners, + parentNodes, + headerDropdownItems, + previewType, +}: { + urn: string; + data: GenericEntityProperties | null; + name: string; + description?: string | null; + owners?: Array | null; + parentNodes?: ParentNodesResult | null; + headerDropdownItems?: Set; + previewType?: PreviewType; +}): JSX.Element => { + const entityRegistry = useEntityRegistry(); + return ( + } + entityType={EntityType.GlossaryNode} + parentEntities={parentNodes?.nodes} + headerDropdownItems={headerDropdownItems} + previewType={previewType} + /> + ); +}; diff --git a/datahub-web-react/src/app/entityV2/glossaryNode/utils.ts b/datahub-web-react/src/app/entityV2/glossaryNode/utils.ts new file mode 100644 index 00000000000000..2263f5a9238d01 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryNode/utils.ts @@ -0,0 +1,8 @@ +import { Entity, EntityType } from '../../../types.generated'; +import { EntityRegistry } from '../../../entityRegistryContext'; + +export function sortGlossaryNodes(entityRegistry: EntityRegistry, nodeA?: Entity | null, nodeB?: Entity | null) { + const nodeAName = entityRegistry.getDisplayName(EntityType.GlossaryNode, nodeA) || ''; + const nodeBName = entityRegistry.getDisplayName(EntityType.GlossaryNode, nodeB) || ''; + return nodeAName.localeCompare(nodeBName); +} diff --git a/datahub-web-react/src/app/entityV2/glossaryTerm/GlossaryRelatedAssetsTabHeader.tsx b/datahub-web-react/src/app/entityV2/glossaryTerm/GlossaryRelatedAssetsTabHeader.tsx new file mode 100644 index 00000000000000..4f52dd15c090bd --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryTerm/GlossaryRelatedAssetsTabHeader.tsx @@ -0,0 +1,54 @@ +import { Pill } from '@src/alchemy-components'; +import { useEntityData } from '@src/app/entity/shared/EntityContext'; +import { formatNumber } from '@src/app/shared/formatNumber'; +import { useGetSearchResultsForMultipleQuery } from '@src/graphql/search.generated'; +import React from 'react'; +import styled from 'styled-components'; + +const Styled = styled.div` + display: flex; + align-items: center; +`; + +const TabName = styled.div` + padding-right: 4px; +`; + +function GlossaryRelatedAssetsTabHeader() { + const { entityData } = useEntityData(); + + // To get the number of related assets + const { data } = useGetSearchResultsForMultipleQuery({ + variables: { + input: { + types: [], + query: '*', + count: 0, + orFilters: [ + { + and: [ + { + field: 'glossaryTerms', + values: [entityData?.urn || ''], + }, + ], + }, + ], + searchFlags: { + skipCache: true, + }, + }, + }, + skip: !entityData?.urn, + fetchPolicy: 'cache-and-network', + }); + + return ( + + Related Assets + + + ); +} + +export default GlossaryRelatedAssetsTabHeader; diff --git a/datahub-web-react/src/app/entityV2/glossaryTerm/GlossaryTermEntity.tsx b/datahub-web-react/src/app/entityV2/glossaryTerm/GlossaryTermEntity.tsx new file mode 100644 index 00000000000000..bb8bad0eeff547 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryTerm/GlossaryTermEntity.tsx @@ -0,0 +1,244 @@ +import { AppstoreOutlined, FileOutlined, LayoutOutlined, UnorderedListOutlined } from '@ant-design/icons'; +import * as React from 'react'; +import { BookmarkSimple } from '@phosphor-icons/react'; +import { GetGlossaryTermQuery, useGetGlossaryTermQuery } from '../../../graphql/glossaryTerm.generated'; +import { EntityType, GlossaryTerm, SearchResult } from '../../../types.generated'; +import { GenericEntityProperties } from '../../entity/shared/types'; +import { FetchedEntity } from '../../lineage/types'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { EntityMenuItems } from '../shared/EntityDropdown/EntityMenuActions'; +import { TYPE_ICON_CLASS_NAME } from '../shared/components/subtypes'; +import { EntityProfile } from '../shared/containers/profile/EntityProfile'; +import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection'; +import { SidebarDomainSection } from '../shared/containers/profile/sidebar/Domain/SidebarDomainSection'; +import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection'; +import SidebarEntityHeader from '../shared/containers/profile/sidebar/SidebarEntityHeader'; +import StatusSection from '../shared/containers/profile/sidebar/shared/StatusSection'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import { EntityActionItem } from '../shared/entity/EntityActions'; +import SidebarStructuredProperties from '../shared/sidebarSection/SidebarStructuredProperties'; +import { SchemaTab } from '../shared/tabs/Dataset/Schema/SchemaTab'; +import { DocumentationTab } from '../shared/tabs/Documentation/DocumentationTab'; +import TabNameWithCount from '../shared/tabs/Entity/TabNameWithCount'; +import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; +import GlossaryRelatedAssetsTabHeader from './GlossaryRelatedAssetsTabHeader'; +import { Preview } from './preview/Preview'; +import GlossaryRelatedEntity from './profile/GlossaryRelatedEntity'; +import GlossayRelatedTerms from './profile/GlossaryRelatedTerms'; +import { RelatedTermTypes } from './profile/GlossaryRelatedTermsResult'; +import SidebarNotesSection from '../shared/sidebarSection/SidebarNotesSection'; + +const headerDropdownItems = new Set([ + EntityMenuItems.MOVE, + EntityMenuItems.SHARE, + EntityMenuItems.UPDATE_DEPRECATION, + EntityMenuItems.DELETE, + EntityMenuItems.ANNOUNCE, +]); + +/** + * Definition of the DataHub Dataset entity. + */ +export class GlossaryTermEntity implements Entity { + getLineageVizConfig?: ((entity: GlossaryTerm) => FetchedEntity) | undefined; + + type: EntityType = EntityType.GlossaryTerm; + + icon = (fontSize?: number, styleType?: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ( + + ); + } + + if (styleType === IconStyleType.ACCENT) { + return ; + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + isBrowseEnabled = () => true; + + getAutoCompleteFieldName = () => 'name'; + + isLineageEnabled = () => false; + + getPathName = () => 'glossaryTerm'; + + getCollectionName = () => 'Glossary Terms'; + + getEntityName = () => 'Glossary Term'; + + useEntityQuery = useGetGlossaryTermQuery; + + renderProfile = (urn) => { + return ( + + glossaryTerm?.glossaryTerm?.schemaMetadata !== null, + enabled: (_, glossaryTerm: GetGlossaryTermQuery) => + glossaryTerm?.glossaryTerm?.schemaMetadata !== null, + }, + }, + { + name: 'Related Terms', + getDynamicName: (entityData, _, loading) => { + const totalRelatedTerms = Object.keys(RelatedTermTypes).reduce((acc, curr) => { + return acc + (entityData?.[curr]?.total || 0); + }, 0); + return ( + + ); + }, + component: GlossayRelatedTerms, + icon: () => , + }, + { + name: 'Properties', + component: PropertiesTab, + icon: UnorderedListOutlined, + }, + ]} + sidebarSections={this.getSidebarSections()} + getOverrideProperties={this.getOverridePropertiesFromEntity} + sidebarTabs={this.getSidebarTabs()} + /> + ); + }; + + getSidebarSections = () => [ + { + component: SidebarEntityHeader, + }, + { + component: SidebarAboutSection, + }, + { + component: SidebarNotesSection, + }, + { + component: SidebarOwnerSection, + }, + { + component: SidebarDomainSection, + properties: { + hideOwnerType: true, + }, + }, + { + component: StatusSection, + }, + { + component: SidebarStructuredProperties, + }, + ]; + + getSidebarTabs = () => [ + { + name: 'Properties', + component: PropertiesTab, + description: 'View additional properties about this asset', + icon: UnorderedListOutlined, + }, + ]; + + getOverridePropertiesFromEntity = (glossaryTerm?: GlossaryTerm | null): GenericEntityProperties => { + // if dataset has subTypes filled out, pick the most specific subtype and return it + return { + customProperties: glossaryTerm?.properties?.customProperties, + }; + }; + + renderSearch = (result: SearchResult) => { + return this.renderPreview(PreviewType.SEARCH, result.entity as GlossaryTerm); + }; + + renderPreview = (previewType: PreviewType, data: GlossaryTerm) => { + const genericProperties = this.getGenericEntityProperties(data); + return ( + + ); + }; + + displayName = (data: GlossaryTerm) => { + return data?.properties?.name || data?.name || data?.urn; + }; + + platformLogoUrl = (_: GlossaryTerm) => { + return undefined; + }; + + getGenericEntityProperties = (glossaryTerm: GlossaryTerm) => { + return getDataForEntityType({ + data: glossaryTerm, + entityType: this.type, + getOverrideProperties: (data) => data, + }); + }; + + supportedCapabilities = () => { + return new Set([ + EntityCapabilityType.OWNERS, + EntityCapabilityType.DEPRECATION, + EntityCapabilityType.SOFT_DELETE, + ]); + }; + + getGraphName = () => this.getPathName(); +} diff --git a/datahub-web-react/src/app/entityV2/glossaryTerm/_tests_/utils.test.ts b/datahub-web-react/src/app/entityV2/glossaryTerm/_tests_/utils.test.ts new file mode 100644 index 00000000000000..446841344fed71 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryTerm/_tests_/utils.test.ts @@ -0,0 +1,83 @@ +import { globalEntityRegistryV2 } from '@app/EntityRegistryProvider'; +import { EntityType } from '../../../../types.generated'; +import { getRelatedAssetsUrl, getRelatedEntitiesUrl, sortGlossaryTerms } from '../utils'; + +describe('sortGlossaryTerms', () => { + it('should correctly sort glossary terms when both nodes are provided', () => { + const nodeA = { + type: EntityType.GlossaryTerm, + urn: 'urn:123', + properties: { + __typename: 'GlossaryTermProperties', + name: 'test child 2', + }, + }; + const nodeB = { + type: EntityType.GlossaryTerm, + urn: 'urn:li:456', + properties: { + __typename: 'GlossaryTermProperties', + name: 'test child 1', + }, + }; + const result = sortGlossaryTerms(globalEntityRegistryV2, nodeA, nodeB); + expect(result).toBeGreaterThan(0); + }); + + it('should not sort glossary terms when both nodes are provided in sorted order', () => { + const nodeA = { + type: EntityType.GlossaryTerm, + urn: 'urn:123', + properties: { + __typename: 'GlossaryTermProperties', + name: 'test child 1', + }, + }; + const nodeB = { + type: EntityType.GlossaryTerm, + urn: 'urn:li:456', + properties: { + __typename: 'GlossaryTermProperties', + name: 'test child 2', + }, + }; + const result = sortGlossaryTerms(globalEntityRegistryV2, nodeA, nodeB); + expect(result).toBeLessThan(0); + }); + + it('should correctly sort glossary terms when only one node is provided', () => { + const nodeA = { + type: EntityType.GlossaryTerm, + urn: 'urn:123', + properties: { + __typename: 'GlossaryTermProperties', + name: 'test child 1', + }, + }; + const result = sortGlossaryTerms(globalEntityRegistryV2, nodeA); + expect(result).toBeGreaterThan(0); + }); + + it('should handle null nodes by considering them equal in sorting', () => { + const result = sortGlossaryTerms(globalEntityRegistryV2); + expect(result).toBe(0); + }); +}); + +describe('getRelatedEntitiesUrl', () => { + it('should return Related Entities URL', () => { + const urn = 'urn123'; + const url = getRelatedEntitiesUrl(globalEntityRegistryV2, urn); + const expectedURL = `/glossaryTerm/${urn}/${encodeURIComponent('Related Entities')}`; + expect(url).toEqual(expectedURL); + }); +}); + +describe('getRelatedAssetsUrl', () => { + it('should return Related Assets URL', () => { + const urn = 'urn123'; + const url = getRelatedAssetsUrl(globalEntityRegistryV2, urn); + const expectedURL = `/glossaryTerm/${urn}/${encodeURIComponent('Related Assets')}`; + expect(url).toEqual(expectedURL); + }); +}); diff --git a/datahub-web-react/src/app/entityV2/glossaryTerm/preview/Preview.tsx b/datahub-web-react/src/app/entityV2/glossaryTerm/preview/Preview.tsx new file mode 100644 index 00000000000000..5db90779779adb --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryTerm/preview/Preview.tsx @@ -0,0 +1,57 @@ +import { GenericEntityProperties } from '@app/entity/shared/types'; +import React from 'react'; +import { BookmarkSimple } from '@phosphor-icons/react'; +import { Deprecation, Domain, EntityType, Owner, ParentNodesResult } from '../../../../types.generated'; +import DefaultPreviewCard from '../../../previewV2/DefaultPreviewCard'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { IconStyleType, PreviewType } from '../../Entity'; +import UrlButton from '../../shared/UrlButton'; +import { getRelatedAssetsUrl } from '../utils'; +import { EntityMenuItems } from '../../shared/EntityDropdown/EntityMenuActions'; + +export const Preview = ({ + urn, + name, + data, + description, + owners, + deprecation, + parentNodes, + previewType, + domain, + headerDropdownItems, +}: { + urn: string; + data: GenericEntityProperties | null; + name: string; + description?: string | null; + owners?: Array | null; + deprecation?: Deprecation | null; + parentNodes?: ParentNodesResult | null; + previewType: PreviewType; + domain?: Domain | undefined; + headerDropdownItems?: Set; +}): JSX.Element => { + const entityRegistry = useEntityRegistry(); + return ( + } + entityType={EntityType.GlossaryTerm} + typeIcon={entityRegistry.getIcon(EntityType.GlossaryTerm, 14, IconStyleType.ACCENT)} + deprecation={deprecation} + parentEntities={parentNodes?.nodes} + domain={domain} + entityTitleSuffix={ + View Related Assets + } + headerDropdownItems={headerDropdownItems} + /> + ); +}; diff --git a/datahub-web-react/src/app/entityV2/glossaryTerm/preview/__tests__/Preview.test.tsx b/datahub-web-react/src/app/entityV2/glossaryTerm/preview/__tests__/Preview.test.tsx new file mode 100644 index 00000000000000..f65cbb8db71f3f --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryTerm/preview/__tests__/Preview.test.tsx @@ -0,0 +1,27 @@ +import { MockedProvider } from '@apollo/client/testing'; +import { render } from '@testing-library/react'; +import React from 'react'; +import { mocks } from '../../../../../Mocks'; +import TestPageContainer from '../../../../../utils/test-utils/TestPageContainer'; +import { Preview } from '../Preview'; +import { PreviewType } from '../../../Entity'; + +describe('Preview', () => { + it('renders', () => { + const { getByText } = render( + + + + + , + ); + expect(getByText('custom_name')).toBeInTheDocument(); + }); +}); diff --git a/datahub-web-react/src/app/entityV2/glossaryTerm/profile/AddRelatedTermsModal.tsx b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/AddRelatedTermsModal.tsx new file mode 100644 index 00000000000000..7986eb19070ddf --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/AddRelatedTermsModal.tsx @@ -0,0 +1,218 @@ +import { message, Button, Modal, Select, Tag } from 'antd'; +import React, { useState } from 'react'; +import styled from 'styled-components/macro'; +import { useAddRelatedTermsMutation } from '../../../../graphql/glossaryTerm.generated'; +import { useGetSearchResultsLazyQuery } from '../../../../graphql/search.generated'; +import { EntityType, SearchResult, TermRelationshipType } from '../../../../types.generated'; +import GlossaryBrowser from '../../../glossary/GlossaryBrowser/GlossaryBrowser'; +import ClickOutside from '../../../shared/ClickOutside'; +import { BrowserWrapper } from '../../../shared/tags/AddTagsTermsModal'; +import TermLabel from '../../../shared/TermLabel'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { useEntityData, useRefetch } from '../../../entity/shared/EntityContext'; +import ParentEntities from '../../../searchV2/filters/ParentEntities'; +import { getParentEntities } from '../../../searchV2/filters/utils'; + +const StyledSelect = styled(Select)` + width: 480px; +`; + +const SearchResultContainer = styled.div` + display: flex; + flex-direction: column; + justify-content: center; +`; + +interface Props { + onClose: () => void; + relationshipType: TermRelationshipType; +} + +function AddRelatedTermsModal(props: Props) { + const { onClose, relationshipType } = props; + + const [inputValue, setInputValue] = useState(''); + const [selectedUrns, setSelectedUrns] = useState([]); + const [selectedTerms, setSelectedTerms] = useState([]); + const [isFocusedOnInput, setIsFocusedOnInput] = useState(false); + const entityRegistry = useEntityRegistry(); + const { urn: entityDataUrn } = useEntityData(); + const refetch = useRefetch(); + + const [AddRelatedTerms] = useAddRelatedTermsMutation(); + + function addTerms() { + AddRelatedTerms({ + variables: { + input: { + urn: entityDataUrn, + termUrns: selectedUrns, + relationshipType, + }, + }, + }) + .catch((e) => { + message.destroy(); + message.error({ content: `Failed to move: \n ${e.message || ''}`, duration: 3 }); + }) + .finally(() => { + message.loading({ content: 'Adding...', duration: 2 }); + setTimeout(() => { + message.success({ + content: 'Added Related Terms!', + duration: 2, + }); + refetch(); + }, 2000); + }); + onClose(); + } + + const [termSearch, { data: termSearchData }] = useGetSearchResultsLazyQuery(); + const termSearchResults = termSearchData?.search?.searchResults || []; + + const tagSearchOptions = termSearchResults + .filter((result) => result?.entity?.urn !== entityDataUrn) + .map((result: SearchResult) => { + const displayName = entityRegistry.getDisplayName(result.entity.type, result.entity); + + return ( + + + + + + + ); + }); + + const handleSearch = (text: string) => { + if (text.length > 0) { + termSearch({ + variables: { + input: { + type: EntityType.GlossaryTerm, + query: text, + start: 0, + count: 20, + }, + }, + }); + } + }; + + // When a Tag or term search result is selected, add the urn to the Urns + const onSelectValue = (urn: string) => { + const newUrns = [...selectedUrns, urn]; + setSelectedUrns(newUrns); + const selectedSearchOption = tagSearchOptions.find((option) => option.props.value === urn); + setSelectedTerms([...selectedTerms, { urn, component: }]); + }; + + // When a Tag or term search result is deselected, remove the urn from the Owners + const onDeselectValue = (urn: string) => { + const newUrns = selectedUrns.filter((u) => u !== urn); + setSelectedUrns(newUrns); + setInputValue(''); + setIsFocusedOnInput(true); + setSelectedTerms(selectedTerms.filter((term) => term.urn !== urn)); + }; + + function selectTermFromBrowser(urn: string, displayName: string) { + setIsFocusedOnInput(false); + const newUrns = [...selectedUrns, urn]; + setSelectedUrns(newUrns); + setSelectedTerms([...selectedTerms, { urn, component: }]); + } + + function clearInput() { + setInputValue(''); + setTimeout(() => setIsFocusedOnInput(true), 0); // call after click outside + } + + function handleBlur() { + setInputValue(''); + } + + const tagRender = (properties) => { + // eslint-disable-next-line react/prop-types + const { closable, onClose: close, value } = properties; + const onPreventMouseDown = (event) => { + event.preventDefault(); + event.stopPropagation(); + }; + const selectedItem = selectedTerms.find((term) => term.urn === value).component; + + return ( + + {selectedItem} + + ); + }; + + const isShowingGlossaryBrowser = !inputValue && isFocusedOnInput; + + return ( + + + + + } + > + setIsFocusedOnInput(false)}> + onSelectValue(asset)} + onDeselect={(asset: any) => onDeselectValue(asset)} + onSearch={(value: string) => { + // eslint-disable-next-line react/prop-types + handleSearch(value.trim()); + // eslint-disable-next-line react/prop-types + setInputValue(value.trim()); + }} + tagRender={tagRender} + value={selectedUrns} + onClear={clearInput} + onFocus={() => setIsFocusedOnInput(true)} + onBlur={handleBlur} + dropdownStyle={isShowingGlossaryBrowser || !inputValue ? { display: 'none' } : {}} + > + {tagSearchOptions} + + + + + + + ); +} + +export default AddRelatedTermsModal; diff --git a/datahub-web-react/src/app/entityV2/glossaryTerm/profile/GlossaryRelatedEntity.tsx b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/GlossaryRelatedEntity.tsx new file mode 100644 index 00000000000000..bf41caefba3c22 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/GlossaryRelatedEntity.tsx @@ -0,0 +1,56 @@ +import * as React from 'react'; +import { UnionType } from '../../../search/utils/constants'; +import { EmbeddedListSearchSection } from '../../shared/components/styled/search/EmbeddedListSearchSection'; + +import { useEntityData } from '../../../entity/shared/EntityContext'; +import { SearchCardContext } from '../../shared/SearchCardContext'; + +export default function GlossaryRelatedEntity() { + const { entityData } = useEntityData(); + + const entityUrn = entityData?.urn; + + const fixedOrFilters = + (entityUrn && [ + { + field: 'glossaryTerms', + values: [entityUrn], + }, + { + field: 'fieldGlossaryTerms', + values: [entityUrn], + }, + ]) || + []; + + entityData?.isAChildren?.relationships.forEach((term) => { + const childUrn = term.entity?.urn; + + if (childUrn) { + fixedOrFilters.push({ + field: 'glossaryTerms', + values: [childUrn], + }); + + fixedOrFilters.push({ + field: 'fieldGlossaryTerms', + values: [childUrn], + }); + } + }); + + return ( + + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/glossaryTerm/profile/GlossaryRelatedTerms.tsx b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/GlossaryRelatedTerms.tsx new file mode 100644 index 00000000000000..3538375ba18e2a --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/GlossaryRelatedTerms.tsx @@ -0,0 +1,70 @@ +import { Menu } from 'antd'; +import React, { useEffect, useState } from 'react'; +import styled from 'styled-components/macro'; +import { useEntityData } from '../../../entity/shared/EntityContext'; +import GlossaryRelatedTermsResult, { RelatedTermTypes } from './GlossaryRelatedTermsResult'; + +const DetailWrapper = styled.div` + display: inline-flex; + flex: 1; + width: 100%; +`; + +const MenuWrapper = styled.div` + border-right: 2px solid #f5f5f5; + flex-basis: 30%; + flex-shrink: 1; +`; + +const Content = styled.div` + flex-grow: 1; + flex-basis: 70%; + flex-shrink: 0; + max-width: 100%; + overflow: hidden; +`; + +export default function GlossayRelatedTerms() { + const { entityData } = useEntityData(); + const [selectedKey, setSelectedKey] = useState(''); + const menuOptionsArray = Object.keys(RelatedTermTypes); + + useEffect(() => { + if (menuOptionsArray && menuOptionsArray.length > 0 && selectedKey.length === 0) { + setSelectedKey(menuOptionsArray[0]); + } + }, [menuOptionsArray, selectedKey]); + + const onMenuClick = ({ key }) => { + setSelectedKey(key); + }; + + return ( + + + { + onMenuClick(key); + }} + > + {menuOptionsArray.map((option) => ( + + {RelatedTermTypes[option]} + + ))} + + + + {selectedKey && entityData && ( + + )} + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/glossaryTerm/profile/GlossaryRelatedTermsResult.tsx b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/GlossaryRelatedTermsResult.tsx new file mode 100644 index 00000000000000..1312c3cdaff6e6 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/GlossaryRelatedTermsResult.tsx @@ -0,0 +1,114 @@ +import { Button, Typography } from 'antd'; +import React, { useState } from 'react'; +import styled from 'styled-components/macro'; +import { TermRelationshipType } from '../../../../types.generated'; +import { Message } from '../../../shared/Message'; +import { EmptyTab } from '../../shared/components/styled/EmptyTab'; +import AddRelatedTermsModal from './AddRelatedTermsModal'; +import RelatedTerm from './RelatedTerm'; +import { CustomIcon } from '../../../sharedV2/icons/customIcons/CustomIcon'; +import addTerm from '../../../sharedV2/icons/customIcons/add-term.svg'; +import { REDESIGN_COLORS } from '../../shared/constants'; + +export enum RelatedTermTypes { + hasRelatedTerms = 'Contains', + isRelatedTerms = 'Inherits', + containedBy = 'Contained by', + isAChildren = 'Inherited by', +} + +export type Props = { + glossaryRelatedTermType: string; + glossaryRelatedTermResult: Array; +}; + +const ListWrapper = styled.div` + display: flex; + padding: 0 16px; + flex-direction: column; + width: 100%; +`; + +const ListContainer = styled.div` + display: flex; + flex-direction: column; + gap: 16px; +`; + +const TitleContainer = styled.div` + align-items: center; + display: flex; + justify-content: space-between; + padding: 10px 20px; + margin-bottom: 10px; +`; + +const messageStyle = { marginTop: '10%' }; + +const ButtonStyle = styled(Button)` + border: 1px solid ${REDESIGN_COLORS.TITLE_PURPLE}; + color: ${REDESIGN_COLORS.TITLE_PURPLE}; + border-radius: 8px; + display: flex; + gap: 0.2rem; + + &:hover, + &:focus { + border: 1px solid ${REDESIGN_COLORS.TITLE_PURPLE}; + color: ${REDESIGN_COLORS.TITLE_PURPLE}; + } +`; + +export default function GlossaryRelatedTermsResult({ glossaryRelatedTermType, glossaryRelatedTermResult }: Props) { + const [isShowingAddModal, setIsShowingAddModal] = useState(false); + const glossaryRelatedTermUrns: Array = []; + glossaryRelatedTermResult.forEach((item: any) => { + glossaryRelatedTermUrns.push(item?.entity?.urn); + }); + const contentLoading = false; + const relationshipType = + glossaryRelatedTermType === RelatedTermTypes.hasRelatedTerms || + glossaryRelatedTermType === RelatedTermTypes.containedBy + ? TermRelationshipType.HasA + : TermRelationshipType.IsA; + const canEditRelatedTerms = + glossaryRelatedTermType === RelatedTermTypes.isRelatedTerms || + glossaryRelatedTermType === RelatedTermTypes.hasRelatedTerms; + + return ( + <> + {contentLoading ? ( + + ) : ( + + + + {glossaryRelatedTermType} + + {canEditRelatedTerms && ( + setIsShowingAddModal(true)}> + Add Terms + + )} + + + {glossaryRelatedTermUrns.map((urn) => ( + + ))} + + {glossaryRelatedTermUrns.length === 0 && ( + + )} + + )} + {isShowingAddModal && ( + setIsShowingAddModal(false)} relationshipType={relationshipType} /> + )} + + ); +} diff --git a/datahub-web-react/src/app/entityV2/glossaryTerm/profile/GlossarySidebarAboutSection.tsx b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/GlossarySidebarAboutSection.tsx new file mode 100644 index 00000000000000..cae3b2c5c2ed40 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/GlossarySidebarAboutSection.tsx @@ -0,0 +1,53 @@ +import { Typography } from 'antd'; +import React from 'react'; +import styled from 'styled-components'; +import StripMarkdownText from '../../shared/components/styled/StripMarkdownText'; +import { SidebarHeader } from '../../shared/containers/profile/sidebar/SidebarHeader'; +import { useEntityData, useRouteToTab } from '../../../entity/shared/EntityContext'; + +const DescriptionTypography = styled(Typography.Paragraph)` + max-width: 65ch; +`; + +export default function GlossarySidebarAboutSection() { + const { entityData }: any = useEntityData(); + const description = entityData?.glossaryTermInfo?.definition; + const source = entityData?.glossaryTermInfo?.sourceRef; + const sourceUrl = entityData?.glossaryTermInfo?.sourceUrl; + const routeToTab = useRouteToTab(); + + return ( +
+ + {description && ( + + routeToTab({ tabName: 'Documentation' })}> + Read More + + } + > + {description} + + + )} + + + {source && ( + + {sourceUrl ? ( + + {source} + + ) : ( + { + source, + } + )} + + )} +
+ ); +} diff --git a/datahub-web-react/src/app/entityV2/glossaryTerm/profile/GlossaryTermHeader.tsx b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/GlossaryTermHeader.tsx new file mode 100644 index 00000000000000..fdf007d0c6d3a7 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/GlossaryTermHeader.tsx @@ -0,0 +1,31 @@ +import { Divider, Space, Typography } from 'antd'; +import React from 'react'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { AvatarsGroup } from '../../../shared/avatar'; + +type Props = { + definition: string; + sourceRef: string; + sourceUrl: string; + ownership?: any; +}; +export default function GlossaryTermHeader({ definition, sourceRef, sourceUrl, ownership }: Props) { + const entityRegistry = useEntityRegistry(); + return ( + <> + + {definition} + }> + Source + {sourceRef} + {sourceUrl && ( + + view source + + )} + + + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/glossaryTerm/profile/RelatedTerm.tsx b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/RelatedTerm.tsx new file mode 100644 index 00000000000000..442fced4528e6d --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/RelatedTerm.tsx @@ -0,0 +1,83 @@ +import React from 'react'; +import { CloseOutlined } from '@ant-design/icons'; +import { Button } from 'antd'; +import styled from 'styled-components/macro'; +import { useGetGlossaryTermQuery } from '../../../../graphql/glossaryTerm.generated'; +import { EntityType, TermRelationshipType } from '../../../../types.generated'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { PreviewType } from '../../Entity'; +import useRemoveRelatedTerms from './useRemoveRelatedTerms'; +import { REDESIGN_COLORS } from '../../shared/constants'; + +const TransparentButton = styled(Button)` + color: ${REDESIGN_COLORS.TITLE_PURPLE}; + font-size: 12px; + box-shadow: none; + border: none; + padding: 0px 10px; + position: absolute; + top: 19px; + right: 50px; + display: none; + + &:hover { + transition: 0.15s; + opacity: 0.9; + color: ${REDESIGN_COLORS.TITLE_PURPLE}; + } +`; + +const ListItem = styled.div` + position: relative; + border: 1px solid #ebebeb; + border-radius: 11px; + + &:hover ${TransparentButton} { + display: inline-block; + } + &:hover { + border: 1px solid ${REDESIGN_COLORS.TITLE_PURPLE}; + } +`; + +const Profile = styled.div` + display: flex; + position: relative; + overflow: hidden; + padding: 16px; +`; + +interface Props { + urn: string; + relationshipType: TermRelationshipType; + isEditable: boolean; +} + +function RelatedTerm(props: Props) { + const { urn, relationshipType, isEditable } = props; + + const entityRegistry = useEntityRegistry(); + const { data, loading } = useGetGlossaryTermQuery({ variables: { urn } }); + let displayName = ''; + if (data) { + displayName = entityRegistry.getDisplayName(EntityType.GlossaryTerm, data.glossaryTerm); + } + const { onRemove } = useRemoveRelatedTerms(urn, relationshipType, displayName); + + if (loading) return null; + + return ( + + + {entityRegistry.renderPreview(EntityType.GlossaryTerm, PreviewType.PREVIEW, data?.glossaryTerm)} + {isEditable && ( + + Remove Relationship + + )} + + + ); +} + +export default RelatedTerm; diff --git a/datahub-web-react/src/app/entityV2/glossaryTerm/profile/SchemaView.tsx b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/SchemaView.tsx new file mode 100644 index 00000000000000..46484459ba4a33 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/SchemaView.tsx @@ -0,0 +1,30 @@ +import React from 'react'; +import { Empty, Typography } from 'antd'; +import styled from 'styled-components'; + +export type Props = { + rawSchema: string | null; +}; + +const Content = styled.div` + margin-left: 32px; + flex-grow: 1; +`; + +export default function SchemaView({ rawSchema }: Props) { + return ( + <> + {rawSchema && rawSchema.length > 0 ? ( + +
+                        {rawSchema}
+                    
+
+ ) : ( + + + + )} + + ); +} diff --git a/datahub-web-react/src/app/entityV2/glossaryTerm/profile/__tests__/GlossaryRelatedTerms.test.tsx b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/__tests__/GlossaryRelatedTerms.test.tsx new file mode 100644 index 00000000000000..1f791ddebe51a7 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/__tests__/GlossaryRelatedTerms.test.tsx @@ -0,0 +1,20 @@ +import { render } from '@testing-library/react'; +import React from 'react'; +import { MockedProvider } from '@apollo/client/testing'; +import TestPageContainer from '../../../../../utils/test-utils/TestPageContainer'; +import GlossaryRelatedTerms from '../GlossaryRelatedTerms'; +import { mocks } from '../../../../../Mocks'; + +describe('Glossary Related Terms', () => { + it('renders and print hasRelatedTerms detail by default', async () => { + const { getByText } = render( + + + + + , + ); + expect(getByText('Contains')).toBeInTheDocument(); + expect(getByText('Inherits')).toBeInTheDocument(); + }); +}); diff --git a/datahub-web-react/src/app/entityV2/glossaryTerm/profile/__tests__/GlossaryTermHeader.test.tsx b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/__tests__/GlossaryTermHeader.test.tsx new file mode 100644 index 00000000000000..0dfc088143baf5 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/__tests__/GlossaryTermHeader.test.tsx @@ -0,0 +1,31 @@ +import { MockedProvider } from '@apollo/client/testing'; +import { render } from '@testing-library/react'; +import React from 'react'; +import { mocks } from '../../../../../Mocks'; +import TestPageContainer from '../../../../../utils/test-utils/TestPageContainer'; +import GlossaryTermHeader from '../GlossaryTermHeader'; + +const glossaryTermHeaderData = { + definition: 'this is sample definition', + sourceUrl: 'sourceUrl', + sourceRef: 'Source ref', + fqdn: 'fqdn', +}; + +describe('Glossary Term Header', () => { + it('renders', () => { + const { getByText } = render( + + + + + , + ); + expect(getByText(glossaryTermHeaderData.definition)).toBeInTheDocument(); + }); +}); diff --git a/datahub-web-react/src/app/entityV2/glossaryTerm/profile/useRemoveRelatedTerms.tsx b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/useRemoveRelatedTerms.tsx new file mode 100644 index 00000000000000..bfe5f6fbe074db --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryTerm/profile/useRemoveRelatedTerms.tsx @@ -0,0 +1,60 @@ +import { message, Modal } from 'antd'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { useEntityData, useRefetch } from '../../../entity/shared/EntityContext'; +import { useRemoveRelatedTermsMutation } from '../../../../graphql/glossaryTerm.generated'; +import { TermRelationshipType } from '../../../../types.generated'; + +function useRemoveRelatedTerms(termUrn: string, relationshipType: TermRelationshipType, displayName: string) { + const { urn, entityType } = useEntityData(); + const entityRegistry = useEntityRegistry(); + const refetch = useRefetch(); + + const [removeRelatedTerms] = useRemoveRelatedTermsMutation(); + + function handleRemoveRelatedTerms() { + removeRelatedTerms({ + variables: { + input: { + urn, + termUrns: [termUrn], + relationshipType, + }, + }, + }) + .catch((e) => { + message.destroy(); + message.error({ content: `Failed to remove: \n ${e.message || ''}`, duration: 3 }); + }) + .finally(() => { + message.loading({ + content: 'Removing...', + duration: 2, + }); + setTimeout(() => { + refetch(); + message.success({ + content: `Removed Glossary Term!`, + duration: 2, + }); + }, 2000); + }); + } + + function onRemove() { + Modal.confirm({ + title: `Remove ${displayName}`, + content: `Are you sure you want to remove this ${entityRegistry.getEntityName(entityType)}?`, + onOk() { + handleRemoveRelatedTerms(); + }, + onCancel() {}, + okText: 'Yes', + maskClosable: true, + closable: true, + }); + } + + return { onRemove }; +} + +export default useRemoveRelatedTerms; diff --git a/datahub-web-react/src/app/entityV2/glossaryTerm/utils.ts b/datahub-web-react/src/app/entityV2/glossaryTerm/utils.ts new file mode 100644 index 00000000000000..f4973abe3957ce --- /dev/null +++ b/datahub-web-react/src/app/entityV2/glossaryTerm/utils.ts @@ -0,0 +1,16 @@ +import { Entity, EntityType } from '../../../types.generated'; +import { EntityRegistry } from '../../../entityRegistryContext'; + +export function sortGlossaryTerms(entityRegistry: EntityRegistry, nodeA?: Entity | null, nodeB?: Entity | null) { + const nodeAName = entityRegistry.getDisplayName(EntityType.GlossaryTerm, nodeA) || ''; + const nodeBName = entityRegistry.getDisplayName(EntityType.GlossaryTerm, nodeB) || ''; + return nodeAName.localeCompare(nodeBName); +} + +export function getRelatedEntitiesUrl(entityRegistry: EntityRegistry, urn: string) { + return `${entityRegistry.getEntityUrl(EntityType.GlossaryTerm, urn)}/${encodeURIComponent('Related Entities')}`; +} + +export function getRelatedAssetsUrl(entityRegistry: EntityRegistry, urn: string) { + return `${entityRegistry.getEntityUrl(EntityType.GlossaryTerm, urn)}/${encodeURIComponent('Related Assets')}`; +} diff --git a/datahub-web-react/src/app/entityV2/group/AddGroupMembersModal.tsx b/datahub-web-react/src/app/entityV2/group/AddGroupMembersModal.tsx new file mode 100644 index 00000000000000..08d5065487e95f --- /dev/null +++ b/datahub-web-react/src/app/entityV2/group/AddGroupMembersModal.tsx @@ -0,0 +1,213 @@ +import React, { useRef, useState } from 'react'; +import { message, Modal, Button, Select, Tag, Empty } from 'antd'; +import { getModalDomContainer } from '@src/utils/focus'; +import { LoadingOutlined } from '@ant-design/icons'; +import styled from 'styled-components'; +import { useAddGroupMembersMutation } from '../../../graphql/group.generated'; +import { CorpUser, Entity, EntityType } from '../../../types.generated'; +import { useGetSearchResultsLazyQuery } from '../../../graphql/search.generated'; +import { useEntityRegistry } from '../../useEntityRegistry'; +import { useGetRecommendations } from '../../shared/recommendation'; +import { OwnerLabel } from '../../shared/OwnerLabel'; +import { ANTD_GRAY } from '../shared/constants'; + +type Props = { + urn: string; + visible: boolean; + onCloseModal: () => void; + onSubmit: () => void; +}; + +const SelectInput = styled(Select)``; + +const StyleTag = styled(Tag)` + padding: 0px 7px 0px 0px; + margin-right: 3px; + display: flex; + justify-content: start; + align-items: center; +`; + +const LoadingWrapper = styled.div` + padding: 8px; + display: flex; + justify-content: center; + + svg { + height: 15px; + width: 15px; + color: ${ANTD_GRAY[8]}; + } +`; + +export const AddGroupMembersModal = ({ urn, visible, onCloseModal, onSubmit }: Props) => { + const entityRegistry = useEntityRegistry(); + const [selectedMembers, setSelectedMembers] = useState([]); + const [inputValue, setInputValue] = useState(''); + const [addGroupMembersMutation] = useAddGroupMembersMutation(); + const [userSearch, { data: userSearchData, loading: searchLoading }] = useGetSearchResultsLazyQuery(); + const searchResults = userSearchData?.search?.searchResults?.map((searchResult) => searchResult.entity) || []; + const { recommendedData, loading: recommendationsLoading } = useGetRecommendations([EntityType.CorpUser]); + const loading = recommendationsLoading || searchLoading; + const inputEl = useRef(null); + + const handleUserSearch = (text: string) => { + userSearch({ + variables: { + input: { + type: EntityType.CorpUser, + query: text, + start: 0, + count: 5, + }, + }, + }); + }; + + // Renders a search result in the select dropdown. + const renderSearchResult = (entity: Entity) => { + const avatarUrl = (entity as CorpUser).editableProperties?.pictureLink || undefined; + const displayName = entityRegistry.getDisplayName(entity.type, entity); + return ( + + + + ); + }; + + const groupResult = !inputValue || inputValue.length === 0 ? recommendedData : searchResults; + + const groupSearchOptions = groupResult?.map((result) => { + return renderSearchResult(result); + }); + + const onModalClose = () => { + setInputValue(''); + setSelectedMembers([]); + onCloseModal(); + }; + + const onSelectMember = (newMemberUrn: string) => { + if (inputEl && inputEl.current) { + (inputEl.current as any).blur(); + } + const newUsers = [...(selectedMembers || []), newMemberUrn]; + setSelectedMembers(newUsers); + }; + + const onDeselectMember = (member: { key: string; label: React.ReactNode; value: string }) => { + setInputValue(''); + const newUserActors = selectedMembers.filter((user) => user.value !== member.value); + setSelectedMembers(newUserActors); + }; + + const tagRender = (props) => { + // eslint-disable-next-line react/prop-types + const { label, closable, onClose } = props; + const onPreventMouseDown = (event) => { + event.preventDefault(); + event.stopPropagation(); + }; + return ( + + {label} + + ); + }; + + const onAdd = async () => { + const selectedMemberUrns = selectedMembers.map((selectedMember) => selectedMember.value); + if (selectedMembers.length === 0) { + return; + } + try { + await addGroupMembersMutation({ + variables: { + groupUrn: urn, + userUrns: selectedMemberUrns, + }, + }); + message.success({ content: 'Group members added!', duration: 3 }); + } catch (e: unknown) { + message.destroy(); + if (e instanceof Error) { + message.error({ content: `Failed to group members: \n ${e.message || ''}`, duration: 3 }); + } + } finally { + onSubmit(); + onModalClose(); + } + }; + + function handleBlur() { + setInputValue(''); + } + + return ( + + + + + } + getContainer={getModalDomContainer} + > + onSelectMember(actorUrn)} + onDeselect={(actorUrn: any) => onDeselectMember(actorUrn)} + onSearch={(value: string) => { + // eslint-disable-next-line react/prop-types + handleUserSearch(value.trim()); + // eslint-disable-next-line react/prop-types + setInputValue(value.trim()); + }} + tagRender={tagRender} + onBlur={handleBlur} + value={selectedMembers} + style={{ width: '100%' }} + notFoundContent={ + !loading ? ( + + ) : null + } + > + {loading ? ( + + + + + + ) : ( + groupSearchOptions + )} + + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/group/Group.tsx b/datahub-web-react/src/app/entityV2/group/Group.tsx new file mode 100644 index 00000000000000..5d08fdb62ddd30 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/group/Group.tsx @@ -0,0 +1,79 @@ +import { TeamOutlined } from '@ant-design/icons'; +import * as React from 'react'; +import { CorpGroup, EntityType, SearchResult } from '../../../types.generated'; +import { Entity, IconStyleType, PreviewType } from '../Entity'; +import { Preview } from './preview/Preview'; +import GroupProfile from './GroupProfile'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import { TYPE_ICON_CLASS_NAME } from '../shared/components/subtypes'; + +/** + * Definition of the DataHub CorpGroup entity. + */ +export class GroupEntity implements Entity { + type: EntityType = EntityType.CorpGroup; + + // TODO: update icons for UserGroup + icon = (fontSize?: number, styleType?: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ; + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + isBrowseEnabled = () => false; + + isLineageEnabled = () => false; + + getAutoCompleteFieldName = () => 'name'; + + getGraphName: () => string = () => 'corpGroup'; + + getPathName: () => string = () => 'group'; + + getEntityName = () => 'Group'; + + getCollectionName: () => string = () => 'Groups'; + + renderProfile = (urn: string) => ; + + renderPreview = (_: PreviewType, data: CorpGroup) => ( + + ); + + renderSearch = (result: SearchResult) => { + return this.renderPreview(PreviewType.SEARCH, result.entity as CorpGroup); + }; + + displayName = (data: CorpGroup) => { + return data.properties?.displayName || data.info?.displayName || data.name || data.urn; + }; + + getGenericEntityProperties = (group: CorpGroup) => { + return getDataForEntityType({ data: group, entityType: this.type, getOverrideProperties: (data) => data }); + }; + + supportedCapabilities = () => { + return new Set([]); + }; +} diff --git a/datahub-web-react/src/app/entityV2/group/GroupAssets.tsx b/datahub-web-react/src/app/entityV2/group/GroupAssets.tsx new file mode 100644 index 00000000000000..64c29cc6a6b643 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/group/GroupAssets.tsx @@ -0,0 +1,28 @@ +import React from 'react'; +import styled from 'styled-components'; +import { UnionType } from '../../search/utils/constants'; +import { EmbeddedListSearchSection } from '../shared/components/styled/search/EmbeddedListSearchSection'; + +const GroupAssetsWrapper = styled.div` + height: 100%; +`; + +type Props = { + urn: string; +}; + +export const GroupAssets = ({ urn }: Props) => { + return ( + + + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/group/GroupBasicInfoSection.tsx b/datahub-web-react/src/app/entityV2/group/GroupBasicInfoSection.tsx new file mode 100644 index 00000000000000..ec9bdacea1b640 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/group/GroupBasicInfoSection.tsx @@ -0,0 +1,36 @@ +import React from 'react'; +import { SlackOutlined } from '@ant-design/icons'; +import styled from 'styled-components'; +import { + EmptyValue, + SocialDetails, + BasicDetailsContainer, + SocialInfo, + DraftsOutlinedIconStyle, +} from '../shared/SidebarStyledComponents'; + +const StyledBasicDetailsContainer = styled(BasicDetailsContainer)` + padding: 10px; +`; + +type Props = { + email: string | undefined; + slack: string | undefined; +}; + +export const GroupBasicInfoSection = ({ email, slack }: Props) => { + return ( + + + + + {email || } + + + + {slack || } + + + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/group/GroupEditModal.tsx b/datahub-web-react/src/app/entityV2/group/GroupEditModal.tsx new file mode 100644 index 00000000000000..f89a103f57b921 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/group/GroupEditModal.tsx @@ -0,0 +1,153 @@ +import React, { useEffect, useState } from 'react'; +import { message, Button, Input, Modal, Typography, Form } from 'antd'; +import { useUpdateCorpGroupPropertiesMutation } from '../../../graphql/group.generated'; + +type PropsData = { + name: string | undefined; + email: string | undefined; + slack: string | undefined; + urn: string | undefined; +}; + +type Props = { + canEditGroupName?: boolean; + visible: boolean; + onClose: () => void; + onSave: () => void; + handleTitleUpdate: (name: string) => void; + editModalData: PropsData; + updateName?: (name: string) => void; // TODO: Add name to the update mutation for groups to avoid 2 calls. +}; +/** Regex Validations */ +export const USER_NAME_REGEX = new RegExp('^[a-zA-Z ]*$'); + +export default function GroupEditModal({ + canEditGroupName, + visible, + onClose, + onSave, + editModalData, + handleTitleUpdate, + updateName, +}: Props) { + const [updateCorpGroupPropertiesMutation] = useUpdateCorpGroupPropertiesMutation(); + const [form] = Form.useForm(); + + console.log(updateName); // will used later now to fix lint added in console + + const [saveButtonEnabled, setSaveButtonEnabled] = useState(true); + const [data, setData] = useState({ + name: editModalData.name, + slack: editModalData.slack, + email: editModalData.email, + urn: editModalData.urn, + }); + + useEffect(() => { + setData({ ...editModalData }); + }, [editModalData]); + + // save changes function + const onSaveChanges = () => { + updateCorpGroupPropertiesMutation({ + variables: { + urn: editModalData?.urn || '', + input: { + email: data.email, + slack: data.slack, + }, + }, + }) + .then(() => { + message.success({ + content: `Changes saved.`, + duration: 3, + }); + onSave(); // call the refetch function once save + // clear the values from edit profile form + setData({ + name: '', + email: '', + slack: '', + urn: '', + }); + }) + .catch((e) => { + message.destroy(); + message.error({ content: `Failed to Save changes!: \n ${e.message || ''}`, duration: 3 }); + }); + handleTitleUpdate(data?.name || ''); + onClose(); + }; + + return ( + + + + + } + > +
+ setSaveButtonEnabled(form.getFieldsError().some((field) => field.errors.length > 0)) + } + onKeyPress={(event) => { + if (event.key === 'Enter') { + event.preventDefault(); + onSaveChanges(); + } + }} + > + {canEditGroupName && ( + Name}> + setData({ ...data, name: event.target.value })} /> + + )} + Email} + rules={[ + { + type: 'email', + message: 'Please enter valid email', + }, + { whitespace: true }, + { min: 2, max: 50 }, + ]} + hasFeedback + > + setData({ ...data, email: event.target.value })} + /> + + Slack Channel} + rules={[{ whitespace: true }, { min: 2, max: 50 }]} + hasFeedback + > + setData({ ...data, slack: event.target.value })} + /> + + +
+ ); +} diff --git a/datahub-web-react/src/app/entityV2/group/GroupInfoHeaderSection.tsx b/datahub-web-react/src/app/entityV2/group/GroupInfoHeaderSection.tsx new file mode 100644 index 00000000000000..0dd47fcca11325 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/group/GroupInfoHeaderSection.tsx @@ -0,0 +1,62 @@ +import React from 'react'; +import { LockOutlined } from '@ant-design/icons'; +import { Typography } from 'antd'; +import { Tooltip } from '@components'; +import styled from 'styled-components'; +import { REDESIGN_COLORS } from '../shared/constants'; +import { MemberCount } from './GroupSidebar'; +import { EntityRelationshipsResult } from '../../../types.generated'; + +const GroupHeader = styled.div` + position: relative; + z-index: 2; +`; + +const GroupName = styled(Typography.Title)` + word-wrap: break-word; + text-align: left; + &&& { + margin-bottom: 0; + word-break: break-all; + font-size: 12px; + color: ${REDESIGN_COLORS.WHITE}; + text-overflow: ellipsis; + overflow: hidden; + white-space: nowrap; + } + + .ant-typography-edit { + font-size: 12px; + } +`; + +type Props = { + groupMemberRelationships: EntityRelationshipsResult; + isExternalGroup: boolean; + externalGroupType: string | undefined; + groupName: string | undefined; +}; + +export const GroupInfoHeaderSection = ({ + groupMemberRelationships, + externalGroupType, + isExternalGroup, + groupName, +}: Props) => { + const groupMemberRelationshipsCount = groupMemberRelationships?.count || 0; + return ( + + + {groupName} + + {groupMemberRelationshipsCount > 0 && {groupMemberRelationships?.count} members} + {isExternalGroup && ( + + + + )} + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/group/GroupMemberLink.tsx b/datahub-web-react/src/app/entityV2/group/GroupMemberLink.tsx new file mode 100644 index 00000000000000..ed34070e0b0c17 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/group/GroupMemberLink.tsx @@ -0,0 +1,37 @@ +import React from 'react'; +import { Tag } from 'antd'; +import { Tooltip } from '@components'; +import { Link } from 'react-router-dom'; +import styled from 'styled-components'; +import { CustomAvatar } from '../../shared/avatar'; +import { CorpUser, EntityType } from '../../../types.generated'; +import EntityRegistry from '../../entity/EntityRegistry'; + +const MemberTag = styled(Tag)` + padding: 2px; + padding-right: 6px; + margin-bottom: 8px; + display: inline-flex; + width: auto; +`; + +type Props = { + user: CorpUser; + entityRegistry: EntityRegistry; +}; + +export const GroupMemberLink = ({ user, entityRegistry }: Props) => { + const name = entityRegistry.getDisplayName(EntityType.CorpUser, user); + return ( + + + + {name.length > 15 ? {`${name.substring(0, 15)}..`} : name} + + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/group/GroupMembers.tsx b/datahub-web-react/src/app/entityV2/group/GroupMembers.tsx new file mode 100644 index 00000000000000..61ca97d0bd6e77 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/group/GroupMembers.tsx @@ -0,0 +1,254 @@ +import React, { useState } from 'react'; +import { MoreOutlined, UserAddOutlined, UserDeleteOutlined } from '@ant-design/icons'; +import { Col, Dropdown, message, Modal, Pagination, Row, Empty, Button, Typography, MenuProps } from 'antd'; +import { Link } from 'react-router-dom'; +import styled from 'styled-components'; +import { useGetAllGroupMembersQuery, useRemoveGroupMembersMutation } from '../../../graphql/group.generated'; +import { CorpUser, EntityType } from '../../../types.generated'; +import { CustomAvatar } from '../../shared/avatar'; +import { useEntityRegistry } from '../../useEntityRegistry'; +import { AddGroupMembersModal } from './AddGroupMembersModal'; +import { scrollToTop } from '../../shared/searchUtils'; + +const ADD_MEMBER_STYLE = { + backGround: '#ffffff', + boxShadow: '0px 2px 6px rgba(0, 0, 0, 0.05)', +}; +const AVATAR_STYLE = { margin: '5px 5px 5px 0' }; + +/** + * Styled Components + */ +const AddMember = styled(Button)` + padding: 13px 13px 30px 30px; + cursor: pointer; + + &&& .anticon.anticon-user-add { + margin-right: 6px; + } +`; + +const AddMemberText = styled(Typography.Text)` + font-family: Mulish; + font-style: normal; + font-weight: 500; + font-size: 12px; + line-height: 20px; +`; + +const MemberNameSection = styled.div` + font-size: 20px; + line-height: 28px; + color: #262626; + display: flex; + align-items: center; + justify-content: start; + padding-left: 12px; +`; + +const GroupMemberWrapper = styled.div` + height: calc(100vh - 217px); + overflow-y: auto; + + & .groupMemberRow { + margin: 0 19px; + } +`; + +const MemberColumn = styled(Col)` + padding: 19px 0 19px 0; + border-bottom: 1px solid #f0f0f0; +`; + +const MemberEditIcon = styled.div` + font-size: 22px; + float: right; +`; + +const Name = styled.span` + font-weight: bold; + font-size: 14px; + line-height: 22px; + color: #262626; + margin-left: 8px; +`; + +const NoGroupMembers = styled(Empty)` + padding: 40px; +`; + +const StyledMoreOutlined = styled(MoreOutlined)` + :hover { + cursor: pointer; + } +`; + +type Props = { + urn: string; + pageSize: number; + isExternalGroup: boolean; + onChangeMembers?: () => void; +}; + +export default function GroupMembers({ urn, pageSize, isExternalGroup, onChangeMembers }: Props) { + const entityRegistry = useEntityRegistry(); + + const [page, setPage] = useState(1); + /* eslint-disable @typescript-eslint/no-unused-vars */ + const [isEditingMembers, setIsEditingMembers] = useState(false); + const start = (page - 1) * pageSize; + const { data: membersData, refetch } = useGetAllGroupMembersQuery({ + variables: { urn, start, count: pageSize }, + fetchPolicy: 'cache-first', + }); + const [removeGroupMembersMutation] = useRemoveGroupMembersMutation(); + + const onChangeMembersPage = (newPage: number) => { + scrollToTop(); + setPage(newPage); + }; + + const removeGroupMember = (userUrn: string) => { + removeGroupMembersMutation({ + variables: { + groupUrn: urn, + userUrns: [userUrn], + }, + }) + .then(({ errors }) => { + if (!errors) { + message.success({ content: 'Removed Group Member!', duration: 2 }); + // Hack to deal with eventual consistency + setTimeout(() => { + // Reload the page. + refetch(); + }, 3000); + onChangeMembers?.(); + } + }) + .catch((e) => { + message.destroy(); + message.error({ content: `Failed to remove group member: \n ${e.message || ''}`, duration: 3 }); + }); + }; + + const onClickEditMembers = () => { + setIsEditingMembers(true); + }; + + const onAddMembers = () => { + setTimeout(() => { + refetch(); + }, 3000); + onChangeMembers?.(); + }; + + const onRemoveMember = (memberUrn: string) => { + Modal.confirm({ + title: `Confirm Group Member Removal`, + content: `Are you sure you want to remove this user from the group?`, + onOk() { + removeGroupMember(memberUrn); + }, + onCancel() {}, + okText: 'Yes', + maskClosable: true, + closable: true, + }); + }; + + const relationships = membersData && membersData.corpGroup?.relationships; + const total = relationships?.total || 0; + const groupMembers = relationships?.relationships?.map((rel) => rel.entity as CorpUser) || []; + + const getItems = (urnID: string): MenuProps['items'] => { + return [ + { + key: 'make', + disabled: true, + label: ( + + Make owner + + ), + }, + { + key: 'remove', + disabled: isExternalGroup, + onClick: () => onRemoveMember(urnID), + label: ( + + Remove from Group + + ), + }, + ]; + }; + + return ( + <> + + + + Add Member + + + + {groupMembers.length === 0 && } + {groupMembers + ? groupMembers.map((item) => { + const entityUrn = entityRegistry.getEntityUrl(EntityType.CorpUser, item.urn); + return ( + + + + + + {entityRegistry.getDisplayName(EntityType.CorpUser, item)} + + + + + + + + + + + + ); + }) + : null} + + + + + {isEditingMembers && ( + setIsEditingMembers(false)} + /> + )} + + ); +} diff --git a/datahub-web-react/src/app/entityV2/group/GroupMembersSidebarSectionContent.tsx b/datahub-web-react/src/app/entityV2/group/GroupMembersSidebarSectionContent.tsx new file mode 100644 index 00000000000000..4ac155146de4f4 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/group/GroupMembersSidebarSectionContent.tsx @@ -0,0 +1,41 @@ +import React, { useState } from 'react'; +import { Typography } from 'antd'; +import { CorpUser, EntityRelationship } from '../../../types.generated'; +import { useEntityRegistry } from '../../useEntityRegistry'; +import { TagsSection } from '../shared/SidebarStyledComponents'; +import { ShowMoreSection } from '../shared/sidebarSection/ShowMoreSection'; +import { GroupMemberLink } from './GroupMemberLink'; + +type Props = { + relationships: Array; +}; +const DEFAULT_MAX_ENTITIES_TO_SHOW = 4; + +export default function GroupMembersSidebarSectionContent({ relationships }: Props) { + const [entityCount, setEntityCount] = useState(DEFAULT_MAX_ENTITIES_TO_SHOW); + + const entityRegistry = useEntityRegistry(); + const relationshipsCount = relationships?.length || 0; + return ( + <> + + {relationships.length === 0 && ( + No members yet. + )} + {relationships.length > 0 && + relationships.map((item, index) => { + const user = item.entity as CorpUser; + return index < entityCount && ; + })} + + {relationshipsCount > entityCount && ( + + )} + + ); +} diff --git a/datahub-web-react/src/app/entityV2/group/GroupOwnerSidebarSectionContent.tsx b/datahub-web-react/src/app/entityV2/group/GroupOwnerSidebarSectionContent.tsx new file mode 100644 index 00000000000000..148e19d815d327 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/group/GroupOwnerSidebarSectionContent.tsx @@ -0,0 +1,63 @@ +import React, { useState } from 'react'; +import { Typography } from 'antd'; +import { EntityType, Ownership } from '../../../types.generated'; +import { ExpandedOwner } from '../shared/components/styled/ExpandedOwner/ExpandedOwner'; +import { EditOwnersModal } from '../shared/containers/profile/sidebar/Ownership/EditOwnersModal'; +import { ShowMoreSection } from '../shared/sidebarSection/ShowMoreSection'; +import { TagsSection } from '../shared/SidebarStyledComponents'; + +type Props = { + ownership: Ownership; + refetch: () => Promise; + urn: string; + showAddOwnerModal: boolean; + setShowAddOwnerModal: (showAddOwnerModal) => void; +}; + +const DEFAULT_MAX_ENTITIES_TO_SHOW = 4; + +export default function GroupOwnerSidebarSectionContent({ + urn, + ownership, + refetch, + showAddOwnerModal, + setShowAddOwnerModal, +}: Props) { + const [entityCount, setEntityCount] = useState(DEFAULT_MAX_ENTITIES_TO_SHOW); + const ownershipCount = ownership?.owners?.length || 0; + const ownersEmpty = !ownership?.owners?.length; + + return ( + <> + + {ownersEmpty && ( + No group owners added yet. + )} + {ownership && + ownership?.owners?.map( + (owner, index) => + index < entityCount && , + )} + + {ownershipCount > entityCount && ( + + )} + {showAddOwnerModal && ( + { + setShowAddOwnerModal(false); + }} + /> + )} + + ); +} diff --git a/datahub-web-react/src/app/entityV2/group/GroupProfile.tsx b/datahub-web-react/src/app/entityV2/group/GroupProfile.tsx new file mode 100644 index 00000000000000..8056d72535a6d3 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/group/GroupProfile.tsx @@ -0,0 +1,224 @@ +import React, { useContext, useState } from 'react'; +import { Col } from 'antd'; +import { matchPath } from 'react-router'; +import { useLocation } from 'react-router-dom'; +import styled from 'styled-components/macro'; +import { ReadOutlined } from '@ant-design/icons'; +import colors from '@src/alchemy-components/theme/foundations/colors'; +import { PageRoutes } from '../../../conf/Global'; +import { useGetGroupQuery } from '../../../graphql/group.generated'; +import { OriginType, EntityRelationshipsResult, Ownership, EntityType } from '../../../types.generated'; +import { EntityContext } from '../../entity/shared/EntityContext'; +import { EntityHead } from '../../shared/EntityHead'; +import { GenericEntityProperties } from '../../entity/shared/types'; +import { Message } from '../../shared/Message'; +import GroupMembers from './GroupMembers'; +import { RoutedTabs } from '../../shared/RoutedTabs'; +import GroupSidebar from './GroupSidebar'; +import { GroupAssets } from './GroupAssets'; +import { ErrorSection } from '../../shared/error/ErrorSection'; +import { useEntityRegistry } from '../../useEntityRegistry'; +import NonExistentEntityPage from '../shared/entity/NonExistentEntityPage'; +import CompactContext from '../../shared/CompactContext'; +import { StyledEntitySidebarContainer, StyledSidebar } from '../shared/containers/profile/sidebar/EntityProfileSidebar'; +import EntitySidebarSectionsTab from '../shared/containers/profile/sidebar/EntitySidebarSectionsTab'; +import EntitySidebarContext from '../../sharedV2/EntitySidebarContext'; +import SidebarCollapsibleHeader from '../shared/containers/profile/sidebar/SidebarCollapsibleHeader'; +import { EntitySidebarTabs } from '../shared/containers/profile/sidebar/EntitySidebarTabs'; +import { REDESIGN_COLORS } from '../shared/constants'; + +const messageStyle = { marginTop: '10%' }; + +export enum TabType { + Assets = 'Owner Of', + Members = 'Members', +} + +const ENABLED_TAB_TYPES = [TabType.Assets, TabType.Members]; + +const MEMBER_PAGE_SIZE = 15; + +/** + * Styled Components + */ +const GroupProfileWrapper = styled.div` + &&& .ant-tabs-nav { + margin: 0; + } + + background-color: ${REDESIGN_COLORS.WHITE}; + border-radius: 8px; + overflow: hidden; + height: 100%; + display: flex; + &&& .ant-tabs > .ant-tabs-nav .ant-tabs-nav-wrap { + padding-left: 15px; + } +`; + +const ContentContainer = styled.div<{ isVisible: boolean }>` + flex: 1; + ${(props) => props.isVisible && `border-right: 1px solid ${REDESIGN_COLORS.SIDE_BAR_BORDER_RIGHT};`} + overflow: inherit; +`; + +const TabsContainer = styled.div``; + +const Tabs = styled.div``; + +type Props = { + urn: string; +}; + +const defaultTabDisplayConfig = { + visible: (_, _1) => true, + enabled: (_, _1) => true, +}; + +/** + * Responsible for reading & writing groups. + * + * TODO: Add use of apollo cache to improve fetching performance. + */ +export default function GroupProfile({ urn }: Props) { + const entityRegistry = useEntityRegistry(); + const location = useLocation(); + const isCompact = React.useContext(CompactContext); + const isInSearch = matchPath(location.pathname, PageRoutes.SEARCH_RESULTS) !== null; + const { loading, error, data, refetch } = useGetGroupQuery({ variables: { urn, membersCount: MEMBER_PAGE_SIZE } }); + + const groupMemberRelationships = data?.corpGroup?.relationships as EntityRelationshipsResult; + const isExternalGroup: boolean = data?.corpGroup?.origin?.type === OriginType.External; + const externalGroupType: string = data?.corpGroup?.origin?.externalType || 'outside DataHub'; + const groupName = data?.corpGroup ? entityRegistry.getDisplayName(EntityType.CorpGroup, data.corpGroup) : undefined; + + const finalTabs = [ + { + name: 'About', + icon: ReadOutlined, + component: EntitySidebarSectionsTab, + display: { + ...defaultTabDisplayConfig, + }, + }, + ]; + + const [selectedTabName, setSelectedTabName] = useState(finalTabs[0].name); + const selectedTab = finalTabs.find((tab) => tab.name === selectedTabName); + const { width, isClosed } = useContext(EntitySidebarContext); + + const getTabs = () => { + return [ + { + name: TabType.Assets, + path: TabType.Assets.toLocaleLowerCase(), + content: , + display: { + enabled: () => true, + }, + }, + { + name: TabType.Members, + path: TabType.Members.toLocaleLowerCase(), + content: ( + { + setTimeout(() => refetch(), 3000); + }} + /> + ), + display: { + enabled: () => true, + }, + }, + ].filter((tab) => ENABLED_TAB_TYPES.includes(tab.name)); + }; + + const defaultTabPath = getTabs() && getTabs()?.length > 0 ? getTabs()[0].path : ''; + const onTabChange = () => null; + + // Side bar data + const sidebarData = { + photoUrl: undefined, + avatarName: + data?.corpGroup?.properties?.displayName || + data?.corpGroup?.name || + data?.corpGroup?.info?.displayName || + undefined, + name: groupName, + email: data?.corpGroup?.editableProperties?.email || data?.corpGroup?.properties?.email || undefined, + slack: data?.corpGroup?.editableProperties?.slack || data?.corpGroup?.properties?.slack || undefined, + aboutText: + data?.corpGroup?.editableProperties?.description || data?.corpGroup?.properties?.description || undefined, + groupMemberRelationships: groupMemberRelationships as EntityRelationshipsResult, + groupOwnership: data?.corpGroup?.ownership as Ownership, + isExternalGroup, + externalGroupType, + urn, + }; + + if (data?.corpGroup?.exists === false) { + return ; + } + + if (isCompact) { + return ( + + + + + {!isClosed && } + + + + setSelectedTabName(name)} + /> + + + + + ); + } + + return ( + {}, + dataNotCombinedWithSiblings: null, + baseEntity: null, + }} + > + + {error && } + {loading && } + {data && data?.corpGroup && ( + +
+ + + + + + + )} + + ); +} diff --git a/datahub-web-react/src/app/entityV2/group/GroupProfileInfoCard.tsx b/datahub-web-react/src/app/entityV2/group/GroupProfileInfoCard.tsx new file mode 100644 index 00000000000000..8255373db2ae77 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/group/GroupProfileInfoCard.tsx @@ -0,0 +1,144 @@ +import React, { useState } from 'react'; +import { Col, message } from 'antd'; +import styled from 'styled-components'; +import SectionActionButton from '../shared/containers/profile/sidebar/SectionActionButton'; +import { REDESIGN_COLORS } from '../shared/constants'; +import CustomAvatar from '../../shared/avatar/CustomAvatar'; +import { + CustomAvatarContainer, + EditProfileButtonContainer, + WhiteEditOutlinedIconStyle, + GroupInfo, +} from '../shared/SidebarStyledComponents'; +import { EntityRelationshipsResult, Ownership } from '../../../types.generated'; +import GroupEditModal from './GroupEditModal'; +import { useUpdateNameMutation } from '../../../graphql/mutations.generated'; +import { GroupBasicInfoSection } from './GroupBasicInfoSection'; +import { GroupInfoHeaderSection } from './GroupInfoHeaderSection'; +import { useUserContext } from '../../context/useUserContext'; + +export type SidebarData = { + photoUrl: string | undefined; + avatarName: string | undefined; + name: string | undefined; + email: string | undefined; + slack: string | undefined; + aboutText: string | undefined; + groupMemberRelationships: EntityRelationshipsResult; + groupOwnership: Ownership; + isExternalGroup: boolean; + externalGroupType: string | undefined; + urn: string; +}; + +const AVATAR_STYLE = { + borderRadius: '9px', + zIndex: '2', + height: '36px', + width: '36px', + backgroundColor: REDESIGN_COLORS.AVATAR_STYLE_WHITE_BACKGROUND, +}; + +const AvatarWithTitleContainer = styled.div` + display: flex; + padding: 10px; + background: ${REDESIGN_COLORS.GROUP_AVATAR_STYLE_GRADIENT}}; + gap: 0.5rem; +`; + +type Props = { + sidebarData: SidebarData; + refetch: () => Promise; +}; + +export const GroupProfileInfoCard = ({ sidebarData, refetch }: Props) => { + const { + avatarName, + name, + groupMemberRelationships, + email, + photoUrl, + slack, + isExternalGroup, + externalGroupType, + urn, + } = sidebarData; + + const [updateName] = useUpdateNameMutation(); + const [editGroupModal, showEditGroupModal] = useState(false); + + const me = useUserContext(); + const canEditGroupName = me?.platformPrivileges?.manageIdentities; + + // Update Group Title + // eslint-disable-next-line @typescript-eslint/no-shadow + const handleTitleUpdate = async (name: string) => { + await updateName({ variables: { input: { name, urn } } }) + .then(() => { + message.success({ content: 'Name Updated', duration: 2 }); + refetch(); + }) + .catch((e: unknown) => { + message.destroy(); + if (e instanceof Error) { + message.error({ content: `Failed to update name: \n ${e.message || ''}`, duration: 3 }); + } + }); + }; + + const getEditModalData = { + name, + urn, + email, + slack, + }; + + return ( + <> + + + + + + + + + + + } + onClick={(event) => { + showEditGroupModal(true); + event.stopPropagation(); + }} + /> + + + + + + {/* Modal */} + showEditGroupModal(false)} + onSave={() => { + refetch(); + }} + editModalData={getEditModalData} + /> + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/group/GroupSidebar.tsx b/datahub-web-react/src/app/entityV2/group/GroupSidebar.tsx new file mode 100644 index 00000000000000..f1771b3544e940 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/group/GroupSidebar.tsx @@ -0,0 +1,64 @@ +import { message } from 'antd'; +import React from 'react'; +import styled from 'styled-components'; +import { useUpdateCorpGroupPropertiesMutation } from '../../../graphql/group.generated'; +import { SideBar, Content } from '../shared/SidebarStyledComponents'; +import { AboutSidebarSection } from '../shared/sidebarSection/AboutSidebarSection'; +import { REDESIGN_COLORS } from '../shared/constants'; +import { GroupProfileInfoCard, SidebarData } from './GroupProfileInfoCard'; +import { GroupSidebarOwnersSection } from './GroupSidebarOwnersSection'; +import { GroupSidebarMembersSection } from './GroupSidebarMembersSection'; + +type Props = { + sidebarData: SidebarData; + refetch: () => Promise; +}; + +export const MemberCount = styled.div` + font-size: 10px; + color: ${REDESIGN_COLORS.WHITE}; + font-weight: 400; + text-align: left; +`; + +/** + * Responsible for reading & writing users. + */ +export default function GroupSidebar({ sidebarData, refetch }: Props) { + const { aboutText, groupMemberRelationships, urn, groupOwnership: ownership } = sidebarData; + const [updateCorpGroupPropertiesMutation] = useUpdateCorpGroupPropertiesMutation(); + + // About Text save + const onSaveAboutMe = (inputString) => { + updateCorpGroupPropertiesMutation({ + variables: { + urn: urn || '', + input: { + description: inputString, + }, + }, + }) + .then(() => { + message.success({ + content: `Changes saved.`, + duration: 3, + }); + refetch(); + }) + .catch((e) => { + message.destroy(); + message.error({ content: `Failed to Save changes!: \n ${e.message || ''}`, duration: 3 }); + }); + }; + + return ( + + + + + + + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/group/GroupSidebarMembersSection.tsx b/datahub-web-react/src/app/entityV2/group/GroupSidebarMembersSection.tsx new file mode 100644 index 00000000000000..1359c82cea64b5 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/group/GroupSidebarMembersSection.tsx @@ -0,0 +1,20 @@ +import React from 'react'; +import GroupMembersSideBarSectionContent from './GroupMembersSidebarSectionContent'; +import { SidebarSection } from '../shared/containers/profile/sidebar/SidebarSection'; +import { EntityRelationshipsResult } from '../../../types.generated'; + +type Props = { + groupMemberRelationships: EntityRelationshipsResult; +}; + +export const GroupSidebarMembersSection = ({ groupMemberRelationships }: Props) => { + return ( + + } + /> + ); +}; diff --git a/datahub-web-react/src/app/entityV2/group/GroupSidebarOwnersSection.tsx b/datahub-web-react/src/app/entityV2/group/GroupSidebarOwnersSection.tsx new file mode 100644 index 00000000000000..6e659be91f2097 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/group/GroupSidebarOwnersSection.tsx @@ -0,0 +1,41 @@ +import React, { useState } from 'react'; +import { PlusOutlined } from '@ant-design/icons'; +import GroupOwnerSidebarSectionContent from './GroupOwnerSidebarSectionContent'; +import SectionActionButton from '../shared/containers/profile/sidebar/SectionActionButton'; +import { SidebarSection } from '../shared/containers/profile/sidebar/SidebarSection'; +import { Ownership } from '../../../types.generated'; + +type Props = { + ownership: Ownership; + refetch: () => Promise; + urn: string; +}; + +export const GroupSidebarOwnersSection = ({ ownership, refetch, urn }: Props) => { + const [showAddOwnerModal, setShowAddOwnerModal] = useState(false); + + return ( + + } + extra={ + } + onClick={(event) => { + setShowAddOwnerModal(true); + event.stopPropagation(); + }} + /> + } + /> + ); +}; diff --git a/datahub-web-react/src/app/entityV2/group/preview/Preview.tsx b/datahub-web-react/src/app/entityV2/group/preview/Preview.tsx new file mode 100644 index 00000000000000..b875bd4a55efc2 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/group/preview/Preview.tsx @@ -0,0 +1,111 @@ +import React from 'react'; +import { Tag, Typography } from 'antd'; +import { Link } from 'react-router-dom'; +import styled from 'styled-components'; + +import { EntityType } from '../../../../types.generated'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { ANTD_GRAY } from '../../shared/constants'; +import { IconStyleType } from '../../Entity'; +import NoMarkdownViewer from '../../shared/components/styled/StripMarkdownText'; +import SearchTextHighlighter from '../../../searchV2/matches/SearchTextHighlighter'; + +const PreviewContainer = styled.div` + margin-bottom: 4px; + display: flex; + width: 100%; + justify-content: space-between; + align-items: center; +`; + +const PlatformInfo = styled.div` + margin-bottom: 8px; + display: flex; + align-items: center; + height: 24px; +`; + +const TitleContainer = styled.div` + margin-bottom: 8px; +`; + +const PreviewImage = styled.div` + max-height: 18px; + width: auto; + object-fit: contain; + margin-right: 10px; + background-color: transparent; +`; + +const EntityTitle = styled(Typography.Text)` + &&& { + margin-bottom: 0; + font-size: 16px; + font-weight: 600; + vertical-align: middle; + } +`; + +const PlatformText = styled(Typography.Text)` + font-size: 12px; + line-height: 20px; + font-weight: 700; + color: ${ANTD_GRAY[7]}; +`; + +const DescriptionContainer = styled.div` + margin-top: 5px; +`; + +const MemberCountContainer = styled.span` + margin-left: 12px; + margin-right: 12px; +`; + +export const Preview = ({ + urn, + name, + description, + membersCount, +}: { + urn: string; + name: string; + description?: string | null; + membersCount?: number; +}): JSX.Element => { + const entityRegistry = useEntityRegistry(); + const url = entityRegistry.getEntityUrl(EntityType.CorpGroup, urn); + + return ( + +
+ + + + + {entityRegistry.getIcon(EntityType.CorpGroup, 20, IconStyleType.HIGHLIGHT)} + + {entityRegistry.getEntityName(EntityType.CorpGroup)} + + + {name ? : urn} + + {membersCount} members + + + + + {description && description.length > 0 && ( + + } + > + {description} + + + )} +
+
+ ); +}; diff --git a/datahub-web-react/src/app/entityV2/mlFeature/MLFeatureEntity.tsx b/datahub-web-react/src/app/entityV2/mlFeature/MLFeatureEntity.tsx new file mode 100644 index 00000000000000..3ff8b2b4c01bce --- /dev/null +++ b/datahub-web-react/src/app/entityV2/mlFeature/MLFeatureEntity.tsx @@ -0,0 +1,251 @@ +import { DotChartOutlined, PartitionOutlined, UnorderedListOutlined } from '@ant-design/icons'; +import * as React from 'react'; +import { useGetMlFeatureQuery } from '../../../graphql/mlFeature.generated'; +import { EntityType, MlFeature, SearchResult } from '../../../types.generated'; +import { GenericEntityProperties } from '../../entity/shared/types'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { EntityMenuItems } from '../shared/EntityDropdown/EntityMenuActions'; +import { TYPE_ICON_CLASS_NAME } from '../shared/components/subtypes'; +import { EntityProfile } from '../shared/containers/profile/EntityProfile'; +import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection'; +import DataProductSection from '../shared/containers/profile/sidebar/DataProduct/DataProductSection'; +import { SidebarDomainSection } from '../shared/containers/profile/sidebar/Domain/SidebarDomainSection'; +import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection'; +import SidebarEntityHeader from '../shared/containers/profile/sidebar/SidebarEntityHeader'; +import { SidebarGlossaryTermsSection } from '../shared/containers/profile/sidebar/SidebarGlossaryTermsSection'; +import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection'; +import StatusSection from '../shared/containers/profile/sidebar/shared/StatusSection'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import SidebarStructuredProperties from '../shared/sidebarSection/SidebarStructuredProperties'; +import { DocumentationTab } from '../shared/tabs/Documentation/DocumentationTab'; +import { LineageTab } from '../shared/tabs/Lineage/LineageTab'; +import { FeatureTableTab } from '../shared/tabs/ML/MlFeatureFeatureTableTab'; +import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; +import { SidebarTitleActionType, getDataProduct, isOutputPort } from '../shared/utils'; +import { Preview } from './preview/Preview'; +import SidebarNotesSection from '../shared/sidebarSection/SidebarNotesSection'; + +const headerDropdownItems = new Set([ + EntityMenuItems.UPDATE_DEPRECATION, + EntityMenuItems.RAISE_INCIDENT, + EntityMenuItems.ANNOUNCE, +]); + +/** + * Definition of the DataHub MLFeature entity. + */ +export class MLFeatureEntity implements Entity { + type: EntityType = EntityType.Mlfeature; + + icon = (fontSize?: number, styleType?: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ( + + ); + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + isBrowseEnabled = () => false; + + isLineageEnabled = () => true; + + getAutoCompleteFieldName = () => 'name'; + + getGraphName = () => 'mlFeature'; + + getPathName = () => 'features'; + + getEntityName = () => 'Feature'; + + getCollectionName = () => 'Features'; + + getOverridePropertiesFromEntity = (feature?: MlFeature | null): GenericEntityProperties => { + return { + // eslint-disable-next-line + platform: feature?.['featureTables']?.relationships?.[0]?.entity?.platform, + }; + }; + + useEntityQuery = useGetMlFeatureQuery; + + renderProfile = (urn: string) => ( + + ); + + getSidebarSections = () => [ + { + component: SidebarEntityHeader, + }, + { + component: SidebarAboutSection, + }, + { + component: SidebarNotesSection, + }, + { + component: SidebarOwnerSection, + }, + { + component: SidebarDomainSection, + }, + { + component: DataProductSection, + }, + { + component: SidebarTagsSection, + }, + { + component: SidebarGlossaryTermsSection, + }, + { + component: StatusSection, + }, + { + component: SidebarStructuredProperties, + }, + ]; + + getSidebarTabs = () => [ + { + name: 'Lineage', + component: LineageTab, + description: "View this data asset's upstream and downstream dependencies", + icon: PartitionOutlined, + properties: { + actionType: SidebarTitleActionType.LineageExplore, + }, + }, + { + name: 'Properties', + component: PropertiesTab, + description: 'View additional properties about this asset', + icon: UnorderedListOutlined, + }, + ]; + + renderPreview = (previewType: PreviewType, data: MlFeature) => { + const genericProperties = this.getGenericEntityProperties(data); + // eslint-disable-next-line + const platform = data?.['featureTables']?.relationships?.[0]?.entity?.platform; + return ( + + ); + }; + + renderSearch = (result: SearchResult) => { + const data = result.entity as MlFeature; + const genericProperties = this.getGenericEntityProperties(data); + // eslint-disable-next-line + const platform = data?.['featureTables']?.relationships?.[0]?.entity?.platform; + return ( + + ); + }; + + displayName = (data: MlFeature) => { + return data.name || data.urn; + }; + + getGenericEntityProperties = (mlFeature: MlFeature) => { + return getDataForEntityType({ + data: mlFeature, + entityType: this.type, + getOverrideProperties: this.getOverridePropertiesFromEntity, + }); + }; + + getLineageVizConfig = (entity: MlFeature) => { + return { + urn: entity.urn, + name: entity.name, + type: EntityType.Mlfeature, + // eslint-disable-next-line + icon: entity?.['featureTables']?.relationships?.[0]?.entity?.platform?.properties?.logoUrl || undefined, + // eslint-disable-next-line + platform: entity?.['featureTables']?.relationships?.[0]?.entity?.platform, + }; + }; + + supportedCapabilities = () => { + return new Set([ + EntityCapabilityType.OWNERS, + EntityCapabilityType.GLOSSARY_TERMS, + EntityCapabilityType.TAGS, + EntityCapabilityType.DOMAINS, + EntityCapabilityType.DEPRECATION, + EntityCapabilityType.SOFT_DELETE, + EntityCapabilityType.DATA_PRODUCTS, + EntityCapabilityType.LINEAGE, + ]); + }; +} diff --git a/datahub-web-react/src/app/entityV2/mlFeature/preview/Preview.tsx b/datahub-web-react/src/app/entityV2/mlFeature/preview/Preview.tsx new file mode 100644 index 00000000000000..f0753bebb46600 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/mlFeature/preview/Preview.tsx @@ -0,0 +1,72 @@ +import { GenericEntityProperties } from '@app/entity/shared/types'; +import React from 'react'; +import { BrowsePathV2, DataPlatform, DataProduct, EntityPath, EntityType, Owner } from '../../../../types.generated'; +import DefaultPreviewCard from '../../../previewV2/DefaultPreviewCard'; +import { capitalizeFirstLetterOnly } from '../../../shared/textUtil'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { IconStyleType, PreviewType } from '../../Entity'; +import { EntityMenuItems } from '../../shared/EntityDropdown/EntityMenuActions'; + +export const Preview = ({ + urn, + data, + name, + platformInstanceId, + featureNamespace, + description, + dataProduct, + owners, + platform, + degree, + paths, + isOutputPort, + headerDropdownItems, + previewType, + browsePaths, +}: { + urn: string; + data: GenericEntityProperties | null; + name: string; + featureNamespace: string; + platformInstanceId?: string; + description?: string | null; + dataProduct?: DataProduct | null; + owners?: Array | null; + platform?: DataPlatform | null | undefined; + degree?: number; + paths?: EntityPath[]; + isOutputPort?: boolean; + headerDropdownItems?: Set; + previewType?: PreviewType; + browsePaths?: BrowsePathV2 | undefined; +}): JSX.Element => { + const entityRegistry = useEntityRegistry(); + const platformName = platform?.properties?.displayName || capitalizeFirstLetterOnly(platform?.name); + const platformTitle = + platformName && featureNamespace + ? `${platformName} > ${featureNamespace}` + : platformName || featureNamespace || ''; + + return ( + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/mlFeatureTable/MLFeatureTableEntity.tsx b/datahub-web-react/src/app/entityV2/mlFeatureTable/MLFeatureTableEntity.tsx new file mode 100644 index 00000000000000..e304b221b5d492 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/mlFeatureTable/MLFeatureTableEntity.tsx @@ -0,0 +1,229 @@ +import { DotChartOutlined, UnorderedListOutlined } from '@ant-design/icons'; +import * as React from 'react'; +import { useGetMlFeatureTableQuery } from '../../../graphql/mlFeatureTable.generated'; +import { EntityType, MlFeatureTable, SearchResult } from '../../../types.generated'; +import { GenericEntityProperties } from '../../entity/shared/types'; +import { capitalizeFirstLetterOnly } from '../../shared/textUtil'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { TYPE_ICON_CLASS_NAME } from '../shared/components/subtypes'; +import { EntityProfile } from '../shared/containers/profile/EntityProfile'; +import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection'; +import DataProductSection from '../shared/containers/profile/sidebar/DataProduct/DataProductSection'; +import { SidebarDomainSection } from '../shared/containers/profile/sidebar/Domain/SidebarDomainSection'; +import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection'; +import StatusSection from '../shared/containers/profile/sidebar/shared/StatusSection'; +import SidebarEntityHeader from '../shared/containers/profile/sidebar/SidebarEntityHeader'; +import { SidebarGlossaryTermsSection } from '../shared/containers/profile/sidebar/SidebarGlossaryTermsSection'; +import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import { EntityMenuItems } from '../shared/EntityDropdown/EntityMenuActions'; +import SidebarStructuredProperties from '../shared/sidebarSection/SidebarStructuredProperties'; +import { DocumentationTab } from '../shared/tabs/Documentation/DocumentationTab'; +import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; +import { getDataProduct, isOutputPort } from '../shared/utils'; +import { Preview } from './preview/Preview'; +import MlFeatureTableFeatures from './profile/features/MlFeatureTableFeatures'; +import Sources from './profile/Sources'; +import SidebarNotesSection from '../shared/sidebarSection/SidebarNotesSection'; + +const headerDropdownItems = new Set([EntityMenuItems.UPDATE_DEPRECATION, EntityMenuItems.ANNOUNCE]); + +/** + * Definition of the DataHub MLFeatureTable entity. + */ +export class MLFeatureTableEntity implements Entity { + type: EntityType = EntityType.MlfeatureTable; + + icon = (fontSize?: number, styleType?: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ( + + ); + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + isBrowseEnabled = () => true; + + isLineageEnabled = () => true; + + getAutoCompleteFieldName = () => 'name'; + + getGraphName = () => 'mlFeatureTable'; + + getPathName = () => 'featureTables'; + + getEntityName = () => 'Feature Table'; + + getCollectionName = () => 'Feature Tables'; + + getOverridePropertiesFromEntity = (_?: MlFeatureTable | null): GenericEntityProperties => { + return {}; + }; + + useEntityQuery = useGetMlFeatureTableQuery; + + renderProfile = (urn: string) => ( + + ); + + getSidebarSections = () => [ + { + component: SidebarEntityHeader, + }, + { + component: SidebarAboutSection, + }, + { + component: SidebarNotesSection, + }, + { + component: SidebarOwnerSection, + }, + { + component: SidebarDomainSection, + }, + { + component: DataProductSection, + }, + { + component: SidebarTagsSection, + }, + { + component: SidebarGlossaryTermsSection, + }, + { + component: StatusSection, + }, + { + component: SidebarStructuredProperties, + }, + ]; + + getSidebarTabs = () => [ + { + name: 'Properties', + component: PropertiesTab, + description: 'View additional properties about this asset', + icon: UnorderedListOutlined, + }, + ]; + + renderPreview = (previewType: PreviewType, data: MlFeatureTable) => { + const genericProperties = this.getGenericEntityProperties(data); + return ( + + ); + }; + + renderSearch = (result: SearchResult) => { + const data = result.entity as MlFeatureTable; + const genericProperties = this.getGenericEntityProperties(data); + return ( + + ); + }; + + getLineageVizConfig = (entity: MlFeatureTable) => { + return { + urn: entity.urn, + name: entity.name, + type: EntityType.MlfeatureTable, + icon: entity.platform.properties?.logoUrl || undefined, + platform: entity.platform, + deprecation: entity?.deprecation, + }; + }; + + displayName = (data: MlFeatureTable) => { + return data.name || data.urn; + }; + + getGenericEntityProperties = (mlFeatureTable: MlFeatureTable) => { + return getDataForEntityType({ + data: mlFeatureTable, + entityType: this.type, + getOverrideProperties: (data) => data, + }); + }; + + supportedCapabilities = () => { + return new Set([ + EntityCapabilityType.OWNERS, + EntityCapabilityType.GLOSSARY_TERMS, + EntityCapabilityType.TAGS, + EntityCapabilityType.DOMAINS, + EntityCapabilityType.DEPRECATION, + EntityCapabilityType.SOFT_DELETE, + EntityCapabilityType.DATA_PRODUCTS, + EntityCapabilityType.LINEAGE, + ]); + }; +} diff --git a/datahub-web-react/src/app/entityV2/mlFeatureTable/preview/Preview.tsx b/datahub-web-react/src/app/entityV2/mlFeatureTable/preview/Preview.tsx new file mode 100644 index 00000000000000..e72f781a89b45d --- /dev/null +++ b/datahub-web-react/src/app/entityV2/mlFeatureTable/preview/Preview.tsx @@ -0,0 +1,63 @@ +import { GenericEntityProperties } from '@app/entity/shared/types'; +import React from 'react'; +import { DataProduct, EntityPath, EntityType, Owner } from '../../../../types.generated'; +import DefaultPreviewCard from '../../../previewV2/DefaultPreviewCard'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { IconStyleType, PreviewType } from '../../Entity'; +import { EntityMenuItems } from '../../shared/EntityDropdown/EntityMenuActions'; + +export const Preview = ({ + urn, + data, + name, + description, + owners, + logoUrl, + platformName, + dataProduct, + platformInstanceId, + degree, + paths, + isOutputPort, + headerDropdownItems, + previewType, +}: { + urn: string; + data: GenericEntityProperties | null; + name: string; + description?: string | null; + owners?: Array | null; + logoUrl?: string | null; + platformName?: string | null; + dataProduct?: DataProduct | null; + platformInstanceId?: string; + degree?: number; + paths?: EntityPath[]; + isOutputPort?: boolean; + headerDropdownItems?: Set; + previewType?: PreviewType; +}): JSX.Element => { + const entityRegistry = useEntityRegistry(); + return ( + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/mlFeatureTable/profile/Sources.tsx b/datahub-web-react/src/app/entityV2/mlFeatureTable/profile/Sources.tsx new file mode 100644 index 00000000000000..71145b6b8f10ba --- /dev/null +++ b/datahub-web-react/src/app/entityV2/mlFeatureTable/profile/Sources.tsx @@ -0,0 +1,81 @@ +import { List, Typography } from 'antd'; +import React, { useMemo } from 'react'; +import styled from 'styled-components'; +import { GetMlFeatureTableQuery } from '../../../../graphql/mlFeatureTable.generated'; +import { Dataset, EntityType } from '../../../../types.generated'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { PreviewType } from '../../Entity'; +import { useBaseEntity } from '../../../entity/shared/EntityContext'; +import { notEmpty } from '../../shared/utils'; + +const ViewRawButtonContainer = styled.div` + display: flex; + justify-content: flex-end; +`; + +export default function SourcesView() { + const entityRegistry = useEntityRegistry(); + const baseEntity = useBaseEntity(); + const featureTable = baseEntity?.mlFeatureTable; + + const features = useMemo( + () => + featureTable?.properties && + (featureTable?.properties?.mlFeatures || featureTable?.properties?.mlPrimaryKeys) + ? [ + ...(featureTable?.properties?.mlPrimaryKeys || []), + ...(featureTable?.properties?.mlFeatures || []), + ].filter(notEmpty) + : [], + [featureTable?.properties], + ); + + const sources = useMemo( + () => + features?.reduce((accumulator: Array, feature) => { + if (feature.__typename === 'MLFeature' && feature.properties?.sources) { + // eslint-disable-next-line array-callback-return + feature.properties?.sources.map((source: Dataset | null) => { + if (source && accumulator.findIndex((dataset) => dataset.urn === source?.urn) === -1) { + accumulator.push(source); + } + }); + } else if (feature.__typename === 'MLPrimaryKey' && feature.properties?.sources) { + // eslint-disable-next-line array-callback-return + feature.properties?.sources.map((source: Dataset | null) => { + if (source && accumulator.findIndex((dataset) => dataset.urn === source?.urn) === -1) { + accumulator.push(source); + } + }); + } + return accumulator; + }, []), + [features], + ); + + return ( + <> +
+ + { + // ToDo: uncomment below these after refactored Lineage to support dynamic entities + /* */ + } + +
+ Sources} + renderItem={(item) => ( + + {entityRegistry.renderPreview(item?.type || EntityType.Dataset, PreviewType.PREVIEW, item)} + + )} + /> + + ); +} diff --git a/datahub-web-react/src/app/entityV2/mlFeatureTable/profile/features/MlFeatureDataTypeIcon.tsx b/datahub-web-react/src/app/entityV2/mlFeatureTable/profile/features/MlFeatureDataTypeIcon.tsx new file mode 100644 index 00000000000000..0d3896949a948a --- /dev/null +++ b/datahub-web-react/src/app/entityV2/mlFeatureTable/profile/features/MlFeatureDataTypeIcon.tsx @@ -0,0 +1,95 @@ +import { + UnorderedListOutlined, + FieldStringOutlined, + ClockCircleOutlined, + QuestionOutlined, + StopOutlined, + OrderedListOutlined, + NumberOutlined, + AudioOutlined, + VideoCameraOutlined, + FileImageOutlined, + FieldBinaryOutlined, + LineChartOutlined, +} from '@ant-design/icons'; +import { Typography } from 'antd'; +import { Tooltip } from '@components'; +import React, { FC } from 'react'; +import { VscFileBinary } from 'react-icons/vsc'; +import styled from 'styled-components'; +import { capitalizeFirstLetter } from '../../../../shared/textUtil'; +import { MlFeatureDataType } from '../../../../../types.generated'; + +const TypeIconContainer = styled.div` + display: flex; + flex-direction: column; + justify-content: center; + text-align: center; + margin-top: 2.5px; + width: 70px; +`; + +const TypeSubtitle = styled(Typography.Text)<{ hasicon?: string }>` + font-size: 8px; + text-align: center; + text-transform: uppercase; + ${(props) => (props.hasicon ? '' : 'margin-top: 4px;')} +`; + +const IconSpan = styled.span` + font-size: 18px; +`; + +const DATA_TYPE_ICON_MAP: Record | null; size: number; text: string }> = { + [MlFeatureDataType.Byte]: { + icon: () => ( + + + + ), + size: 18, + text: 'Byte', + }, + [MlFeatureDataType.Time]: { icon: ClockCircleOutlined, size: 18, text: 'Time' }, + [MlFeatureDataType.Set]: { icon: UnorderedListOutlined, size: 18, text: 'Set' }, + [MlFeatureDataType.Unknown]: { icon: QuestionOutlined, size: 16, text: 'Unknown' }, + [MlFeatureDataType.Map]: { icon: UnorderedListOutlined, size: 14, text: 'Map' }, + [MlFeatureDataType.Useless]: { icon: StopOutlined, size: 18, text: 'Useless' }, + [MlFeatureDataType.Nominal]: { icon: NumberOutlined, size: 14, text: 'Nominal' }, + [MlFeatureDataType.Ordinal]: { icon: OrderedListOutlined, size: 18, text: 'Ordinal' }, + [MlFeatureDataType.Binary]: { icon: FieldBinaryOutlined, size: 16, text: 'Binary' }, + [MlFeatureDataType.Count]: { icon: NumberOutlined, size: 14, text: 'Count' }, + [MlFeatureDataType.Interval]: { icon: ClockCircleOutlined, size: 16, text: 'Interval' }, + [MlFeatureDataType.Image]: { icon: FileImageOutlined, size: 16, text: 'Image' }, + [MlFeatureDataType.Video]: { icon: VideoCameraOutlined, size: 16, text: 'Video' }, + [MlFeatureDataType.Audio]: { icon: AudioOutlined, size: 16, text: 'Audio' }, + [MlFeatureDataType.Text]: { icon: FieldStringOutlined, size: 18, text: 'Text' }, + [MlFeatureDataType.Sequence]: { icon: OrderedListOutlined, size: 16, text: 'Sequence' }, + [MlFeatureDataType.Continuous]: { icon: LineChartOutlined, size: 16, text: 'Continuous' }, +}; + +type Props = { + dataType?: MlFeatureDataType; +}; + +export default function MlFeatureDataTypeIcon({ dataType }: Props) { + const { icon: Icon, size, text } = DATA_TYPE_ICON_MAP[dataType || MlFeatureDataType.Unknown]; + + // eslint-disable-next-line react/prop-types + const NativeDataTypeTooltip = ({ children }) => ( + + {children} + + ); + + return ( + + + {Icon && } + + {text} + + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/mlFeatureTable/profile/features/MlFeatureTableFeatures.tsx b/datahub-web-react/src/app/entityV2/mlFeatureTable/profile/features/MlFeatureTableFeatures.tsx new file mode 100644 index 00000000000000..1eb92daa896f45 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/mlFeatureTable/profile/features/MlFeatureTableFeatures.tsx @@ -0,0 +1,23 @@ +import React from 'react'; + +import { MlPrimaryKey, MlFeature } from '../../../../../types.generated'; +import { GetMlFeatureTableQuery } from '../../../../../graphql/mlFeatureTable.generated'; +import { useBaseEntity } from '../../../../entity/shared/EntityContext'; +import { notEmpty } from '../../../shared/utils'; +import TableOfMlFeatures from './TableOfMlFeatures'; + +export default function MlFeatureTableFeatures() { + const baseEntity = useBaseEntity(); + const featureTable = baseEntity?.mlFeatureTable; + + const features = ( + featureTable?.properties && (featureTable?.properties?.mlFeatures || featureTable?.properties?.mlPrimaryKeys) + ? [ + ...(featureTable?.properties?.mlPrimaryKeys || []), + ...(featureTable?.properties?.mlFeatures || []), + ].filter(notEmpty) + : [] + ) as Array; + + return ; +} diff --git a/datahub-web-react/src/app/entityV2/mlFeatureTable/profile/features/TableOfMlFeatures.tsx b/datahub-web-react/src/app/entityV2/mlFeatureTable/profile/features/TableOfMlFeatures.tsx new file mode 100644 index 00000000000000..24fcc622030843 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/mlFeatureTable/profile/features/TableOfMlFeatures.tsx @@ -0,0 +1,160 @@ +import React, { useState } from 'react'; +import { Table, Typography } from 'antd'; +import { CheckSquareOutlined } from '@ant-design/icons'; +import { AlignType } from 'rc-table/lib/interface'; +import styled from 'styled-components'; +import { Link } from 'react-router-dom'; + +import MlFeatureDataTypeIcon from './MlFeatureDataTypeIcon'; +import { MlFeatureDataType, MlPrimaryKey, MlFeature } from '../../../../../types.generated'; +import { useRefetch } from '../../../../entity/shared/EntityContext'; +import TagTermGroup from '../../../../sharedV2/tags/TagTermGroup'; +import SchemaDescriptionField from '../../../dataset/profile/schema/components/SchemaDescriptionField'; +import { useUpdateDescriptionMutation } from '../../../../../graphql/mutations.generated'; +import { useEntityRegistry } from '../../../../useEntityRegistry'; + +const FeaturesContainer = styled.div` + margin-bottom: 100px; +`; + +const defaultColumns = [ + { + title: 'Type', + dataIndex: 'dataType', + key: 'dataType', + width: 100, + align: 'left' as AlignType, + render: (dataType: MlFeatureDataType) => { + return ; + }, + }, +]; + +type Props = { + features: Array; +}; + +export default function TableOfMlFeatures({ features }: Props) { + const refetch = useRefetch(); + const [updateDescription] = useUpdateDescriptionMutation(); + const entityRegistry = useEntityRegistry(); + + const [tagHoveredIndex, setTagHoveredIndex] = useState(undefined); + const [expandedRows, setExpandedRows] = useState({}); + + const onTagTermCell = (record: any, rowIndex: number | undefined) => ({ + onMouseEnter: () => { + setTagHoveredIndex(`${record.urn}-${rowIndex}`); + }, + onMouseLeave: () => { + setTagHoveredIndex(undefined); + }, + }); + + const nameColumn = { + title: 'Name', + dataIndex: 'name', + key: 'name', + width: 100, + render: (name: string, feature: MlFeature | MlPrimaryKey) => ( + + {name} + + ), + }; + + const descriptionColumn = { + title: 'Description', + dataIndex: 'description', + key: 'description', + render: (_, feature: MlFeature | MlPrimaryKey, index: number) => ( + { + setExpandedRows((prev) => ({ ...prev, [index]: expanded })); + }} + expanded={!!expandedRows[index]} + description={feature?.editableProperties?.description || feature?.properties?.description || ''} + original={feature?.properties?.description} + isEdited={!!feature?.editableProperties?.description} + onUpdate={(updatedDescription) => + updateDescription({ + variables: { + input: { + description: updatedDescription, + resourceUrn: feature.urn, + }, + }, + }).then(refetch) + } + /> + ), + width: 300, + }; + + const tagColumn = { + width: 125, + title: 'Tags', + dataIndex: 'tags', + key: 'tags', + render: (_, feature: MlFeature | MlPrimaryKey, rowIndex: number) => ( + setTagHoveredIndex(undefined)} + entityUrn={feature.urn} + entityType={feature.type} + refetch={refetch} + /> + ), + onCell: onTagTermCell, + }; + + const termColumn = { + width: 125, + title: 'Terms', + dataIndex: 'glossaryTerms', + key: 'glossaryTerms', + render: (_, feature: MlFeature | MlPrimaryKey, rowIndex: number) => ( + setTagHoveredIndex(undefined)} + entityUrn={feature.urn} + entityType={feature.type} + refetch={refetch} + /> + ), + onCell: onTagTermCell, + }; + + const primaryKeyColumn = { + title: 'Primary Key', + dataIndex: 'primaryKey', + key: 'primaryKey', + render: (_: any, record: MlFeature | MlPrimaryKey) => + record.__typename === 'MLPrimaryKey' ? : null, + width: 50, + }; + + const allColumns = [...defaultColumns, nameColumn, descriptionColumn, tagColumn, termColumn, primaryKeyColumn]; + + return ( + + {features && features.length > 0 && ( +
`${record.dataType}-${record.name}`} + expandable={{ defaultExpandAllRows: true, expandRowByClick: true }} + pagination={false} + /> + )} + + ); +} diff --git a/datahub-web-react/src/app/entityV2/mlModel/MLModelEntity.tsx b/datahub-web-react/src/app/entityV2/mlModel/MLModelEntity.tsx new file mode 100644 index 00000000000000..e0839929b9f556 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/mlModel/MLModelEntity.tsx @@ -0,0 +1,223 @@ +import { CodeSandboxOutlined, UnorderedListOutlined } from '@ant-design/icons'; +import * as React from 'react'; +import { useGetMlModelQuery } from '../../../graphql/mlModel.generated'; +import { EntityType, MlModel, SearchResult } from '../../../types.generated'; +import { GenericEntityProperties } from '../../entity/shared/types'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { EntityMenuItems } from '../shared/EntityDropdown/EntityMenuActions'; +import { TYPE_ICON_CLASS_NAME } from '../shared/components/subtypes'; +import { EntityProfile } from '../shared/containers/profile/EntityProfile'; +import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection'; +import DataProductSection from '../shared/containers/profile/sidebar/DataProduct/DataProductSection'; +import { SidebarDomainSection } from '../shared/containers/profile/sidebar/Domain/SidebarDomainSection'; +import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection'; +import SidebarEntityHeader from '../shared/containers/profile/sidebar/SidebarEntityHeader'; +import { SidebarGlossaryTermsSection } from '../shared/containers/profile/sidebar/SidebarGlossaryTermsSection'; +import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection'; +import StatusSection from '../shared/containers/profile/sidebar/shared/StatusSection'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import SidebarStructuredProperties from '../shared/sidebarSection/SidebarStructuredProperties'; +import { DocumentationTab } from '../shared/tabs/Documentation/DocumentationTab'; +import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; +import { isOutputPort } from '../shared/utils'; +import { Preview } from './preview/Preview'; +import MLModelGroupsTab from './profile/MLModelGroupsTab'; +import MLModelSummary from './profile/MLModelSummary'; +import MlModelFeaturesTab from './profile/MlModelFeaturesTab'; +import SidebarNotesSection from '../shared/sidebarSection/SidebarNotesSection'; + +const headerDropdownItems = new Set([ + EntityMenuItems.SHARE, + EntityMenuItems.UPDATE_DEPRECATION, + EntityMenuItems.RAISE_INCIDENT, + EntityMenuItems.ANNOUNCE, +]); + +/** + * Definition of the DataHub MlModel entity. + */ +export class MLModelEntity implements Entity { + type: EntityType = EntityType.Mlmodel; + + icon = (fontSize?: number, styleType?: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ( + + ); + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + isBrowseEnabled = () => true; + + isLineageEnabled = () => true; + + getAutoCompleteFieldName = () => 'name'; + + getGraphName = () => 'mlModel'; + + getPathName = () => 'mlModels'; + + getEntityName = () => 'ML Model'; + + getCollectionName = () => 'ML Models'; + + getOverridePropertiesFromEntity = (mlModel?: MlModel | null): GenericEntityProperties => { + return { + externalUrl: mlModel?.properties?.externalUrl, + }; + }; + + useEntityQuery = useGetMlModelQuery; + + renderProfile = (urn: string) => ( + + ); + + getSidebarSections = () => [ + { + component: SidebarEntityHeader, + }, + { + component: SidebarAboutSection, + }, + { + component: SidebarNotesSection, + }, + { + component: SidebarOwnerSection, + }, + { + component: SidebarDomainSection, + }, + { + component: DataProductSection, + }, + { + component: SidebarTagsSection, + }, + { + component: SidebarGlossaryTermsSection, + }, + { + component: StatusSection, + }, + { + component: SidebarStructuredProperties, + }, + ]; + + getSidebarTabs = () => [ + { + name: 'Properties', + component: PropertiesTab, + description: 'View additional properties about this asset', + icon: UnorderedListOutlined, + }, + ]; + + renderPreview = (previewType: PreviewType, data: MlModel) => { + const genericProperties = this.getGenericEntityProperties(data); + return ( + + ); + }; + + renderSearch = (result: SearchResult) => { + const data = result.entity as MlModel; + const genericProperties = this.getGenericEntityProperties(data); + return ( + + ); + }; + + getLineageVizConfig = (entity: MlModel) => { + return { + urn: entity.urn, + name: entity.name, + type: EntityType.Mlmodel, + icon: entity.platform?.properties?.logoUrl || undefined, + platform: entity.platform, + deprecation: entity?.deprecation, + }; + }; + + displayName = (data: MlModel) => { + return data.name || data.urn; + }; + + getGenericEntityProperties = (mlModel: MlModel) => { + return getDataForEntityType({ data: mlModel, entityType: this.type, getOverrideProperties: (data) => data }); + }; + + supportedCapabilities = () => { + return new Set([ + EntityCapabilityType.OWNERS, + EntityCapabilityType.GLOSSARY_TERMS, + EntityCapabilityType.TAGS, + EntityCapabilityType.DOMAINS, + EntityCapabilityType.DEPRECATION, + EntityCapabilityType.SOFT_DELETE, + EntityCapabilityType.DATA_PRODUCTS, + EntityCapabilityType.LINEAGE, + ]); + }; +} diff --git a/datahub-web-react/src/app/entityV2/mlModel/preview/Preview.tsx b/datahub-web-react/src/app/entityV2/mlModel/preview/Preview.tsx new file mode 100644 index 00000000000000..89529a9087cc86 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/mlModel/preview/Preview.tsx @@ -0,0 +1,53 @@ +import { GenericEntityProperties } from '@app/entity/shared/types'; +import React from 'react'; +import { EntityPath, EntityType, MlModel } from '../../../../types.generated'; +import DefaultPreviewCard from '../../../previewV2/DefaultPreviewCard'; +import { capitalizeFirstLetterOnly } from '../../../shared/textUtil'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { IconStyleType, PreviewType } from '../../Entity'; +import { getDataProduct } from '../../shared/utils'; +import { EntityMenuItems } from '../../shared/EntityDropdown/EntityMenuActions'; + +export const Preview = ({ + data, + model, + degree, + paths, + isOutputPort, + headerDropdownItems, + previewType, +}: { + data: GenericEntityProperties | null; + model: MlModel; + degree?: number; + paths?: EntityPath[]; + isOutputPort?: boolean; + headerDropdownItems?: Set; + previewType?: PreviewType; +}): JSX.Element => { + const entityRegistry = useEntityRegistry(); + const genericProperties = entityRegistry.getGenericEntityProperties(EntityType.Mlmodel, model); + + return ( + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/mlModel/profile/MLModelGroupsTab.tsx b/datahub-web-react/src/app/entityV2/mlModel/profile/MLModelGroupsTab.tsx new file mode 100644 index 00000000000000..2ba9e0edd6783d --- /dev/null +++ b/datahub-web-react/src/app/entityV2/mlModel/profile/MLModelGroupsTab.tsx @@ -0,0 +1,48 @@ +import React from 'react'; +import { Space, Table, Typography } from 'antd'; +import Link from 'antd/lib/typography/Link'; +import { ColumnsType } from 'antd/es/table'; +import styled from 'styled-components'; + +import { EntityType, MlModelGroup } from '../../../../types.generated'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { useBaseEntity } from '../../../entity/shared/EntityContext'; +import { GetMlModelQuery } from '../../../../graphql/mlModel.generated'; + +const TabContent = styled.div` + padding: 16px; +`; + +export default function MLModelGroupsTab() { + const baseEntity = useBaseEntity(); + const model = baseEntity?.mlModel; + + const entityRegistry = useEntityRegistry(); + + const propertyTableColumns: ColumnsType = [ + { + title: 'Group', + dataIndex: 'name', + render: (name, record) => { + return {name}; + }, + }, + { + title: 'Description', + dataIndex: 'description', + }, + ]; + + return ( + + + Groups +
+ + + ); +} diff --git a/datahub-web-react/src/app/entityV2/mlModel/profile/MLModelSummary.tsx b/datahub-web-react/src/app/entityV2/mlModel/profile/MLModelSummary.tsx new file mode 100644 index 00000000000000..c5dc468f5ac23d --- /dev/null +++ b/datahub-web-react/src/app/entityV2/mlModel/profile/MLModelSummary.tsx @@ -0,0 +1,47 @@ +import React from 'react'; +import styled from 'styled-components'; +import { Space, Table, Typography } from 'antd'; + +import { MlHyperParam, MlMetric } from '../../../../types.generated'; +import { useBaseEntity } from '../../../entity/shared/EntityContext'; +import { GetMlModelQuery } from '../../../../graphql/mlModel.generated'; + +const TabContent = styled.div` + padding: 16px; +`; + +export default function MLModelSummary() { + const baseEntity = useBaseEntity(); + const model = baseEntity?.mlModel; + + const propertyTableColumns = [ + { + title: 'Name', + dataIndex: 'name', + width: 450, + }, + { + title: 'Value', + dataIndex: 'value', + }, + ]; + + return ( + + + Training Metrics +
+ Hyper Parameters +
+ + + ); +} diff --git a/datahub-web-react/src/app/entityV2/mlModel/profile/MlModelFeaturesTab.tsx b/datahub-web-react/src/app/entityV2/mlModel/profile/MlModelFeaturesTab.tsx new file mode 100644 index 00000000000000..81f0750a88a78b --- /dev/null +++ b/datahub-web-react/src/app/entityV2/mlModel/profile/MlModelFeaturesTab.tsx @@ -0,0 +1,17 @@ +import React from 'react'; + +import { MlPrimaryKey, MlFeature } from '../../../../types.generated'; +import { useBaseEntity } from '../../../entity/shared/EntityContext'; +import { GetMlModelQuery } from '../../../../graphql/mlModel.generated'; +import TableOfMlFeatures from '../../mlFeatureTable/profile/features/TableOfMlFeatures'; + +export default function MlModelFeaturesTab() { + const entity = useBaseEntity() as GetMlModelQuery; + + const model = entity && entity.mlModel; + const features = model?.features?.relationships.map((relationship) => relationship.entity) as Array< + MlFeature | MlPrimaryKey + >; + + return ; +} diff --git a/datahub-web-react/src/app/entityV2/mlModelGroup/MLModelGroupEntity.tsx b/datahub-web-react/src/app/entityV2/mlModelGroup/MLModelGroupEntity.tsx new file mode 100644 index 00000000000000..0a2f99e02dd761 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/mlModelGroup/MLModelGroupEntity.tsx @@ -0,0 +1,210 @@ +import { CodeSandboxOutlined, UnorderedListOutlined } from '@ant-design/icons'; +import * as React from 'react'; +import { useGetMlModelGroupQuery } from '../../../graphql/mlModelGroup.generated'; +import { EntityType, MlModelGroup, SearchResult } from '../../../types.generated'; +import { GenericEntityProperties } from '../../entity/shared/types'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { EntityMenuItems } from '../shared/EntityDropdown/EntityMenuActions'; +import { TYPE_ICON_CLASS_NAME } from '../shared/components/subtypes'; +import { EntityProfile } from '../shared/containers/profile/EntityProfile'; +import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection'; +import DataProductSection from '../shared/containers/profile/sidebar/DataProduct/DataProductSection'; +import { SidebarDomainSection } from '../shared/containers/profile/sidebar/Domain/SidebarDomainSection'; +import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection'; +import SidebarEntityHeader from '../shared/containers/profile/sidebar/SidebarEntityHeader'; +import { SidebarGlossaryTermsSection } from '../shared/containers/profile/sidebar/SidebarGlossaryTermsSection'; +import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection'; +import StatusSection from '../shared/containers/profile/sidebar/shared/StatusSection'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import SidebarStructuredProperties from '../shared/sidebarSection/SidebarStructuredProperties'; +import { DocumentationTab } from '../shared/tabs/Documentation/DocumentationTab'; +import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; +import { isOutputPort } from '../shared/utils'; +import { Preview } from './preview/Preview'; +import ModelGroupModels from './profile/ModelGroupModels'; +import SidebarNotesSection from '../shared/sidebarSection/SidebarNotesSection'; + +const headerDropdownItems = new Set([EntityMenuItems.UPDATE_DEPRECATION, EntityMenuItems.ANNOUNCE]); + +/** + * Definition of the DataHub MlModelGroup entity. + */ +export class MLModelGroupEntity implements Entity { + type: EntityType = EntityType.MlmodelGroup; + + icon = (fontSize?: number, styleType?: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ( + + ); + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + isBrowseEnabled = () => true; + + isLineageEnabled = () => true; + + getAutoCompleteFieldName = () => 'name'; + + getGraphName = () => 'mlModelGroup'; + + getPathName = () => 'mlModelGroup'; + + getEntityName = () => 'ML Group'; + + getCollectionName = () => 'ML Groups'; + + getOverridePropertiesFromEntity = (_?: MlModelGroup | null): GenericEntityProperties => { + return {}; + }; + + useEntityQuery = useGetMlModelGroupQuery; + + renderProfile = (urn: string) => ( + + ); + + getSidebarSections = () => [ + { + component: SidebarEntityHeader, + }, + { + component: SidebarAboutSection, + }, + { + component: SidebarNotesSection, + }, + { + component: SidebarOwnerSection, + }, + { + component: SidebarDomainSection, + }, + { + component: DataProductSection, + }, + { + component: SidebarTagsSection, + }, + { + component: SidebarGlossaryTermsSection, + }, + { + component: StatusSection, + }, + { + component: SidebarStructuredProperties, + }, + ]; + + getSidebarTabs = () => [ + { + name: 'Properties', + component: PropertiesTab, + description: 'View additional properties about this asset', + icon: UnorderedListOutlined, + }, + ]; + + renderPreview = (previewType: PreviewType, data: MlModelGroup) => { + const genericProperties = this.getGenericEntityProperties(data); + return ( + + ); + }; + + renderSearch = (result: SearchResult) => { + const data = result.entity as MlModelGroup; + const genericProperties = this.getGenericEntityProperties(data); + return ( + + ); + }; + + getLineageVizConfig = (entity: MlModelGroup) => { + return { + urn: entity.urn, + name: entity.name, + type: EntityType.MlmodelGroup, + icon: entity.platform?.properties?.logoUrl || undefined, + platform: entity.platform, + deprecation: entity?.deprecation, + }; + }; + + displayName = (data: MlModelGroup) => { + return data.name || data.urn; + }; + + getGenericEntityProperties = (mlModelGroup: MlModelGroup) => { + return getDataForEntityType({ + data: mlModelGroup, + entityType: this.type, + getOverrideProperties: (data) => data, + }); + }; + + supportedCapabilities = () => { + return new Set([ + EntityCapabilityType.OWNERS, + EntityCapabilityType.GLOSSARY_TERMS, + EntityCapabilityType.TAGS, + EntityCapabilityType.DOMAINS, + EntityCapabilityType.DEPRECATION, + EntityCapabilityType.SOFT_DELETE, + EntityCapabilityType.DATA_PRODUCTS, + EntityCapabilityType.LINEAGE, + ]); + }; +} diff --git a/datahub-web-react/src/app/entityV2/mlModelGroup/preview/Preview.tsx b/datahub-web-react/src/app/entityV2/mlModelGroup/preview/Preview.tsx new file mode 100644 index 00000000000000..31abd228e4466e --- /dev/null +++ b/datahub-web-react/src/app/entityV2/mlModelGroup/preview/Preview.tsx @@ -0,0 +1,51 @@ +import { GenericEntityProperties } from '@app/entity/shared/types'; +import React from 'react'; +import { EntityPath, EntityType, MlModelGroup } from '../../../../types.generated'; +import DefaultPreviewCard from '../../../previewV2/DefaultPreviewCard'; +import { capitalizeFirstLetterOnly } from '../../../shared/textUtil'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { getDataProduct } from '../../shared/utils'; +import { EntityMenuItems } from '../../shared/EntityDropdown/EntityMenuActions'; +import { PreviewType } from '../../Entity'; + +export const Preview = ({ + data, + group, + degree, + paths, + isOutputPort, + headerDropdownItems, + previewType, +}: { + data: GenericEntityProperties | null; + group: MlModelGroup; + degree?: number; + paths?: EntityPath[]; + isOutputPort?: boolean; + headerDropdownItems?: Set; + previewType?: PreviewType; +}): JSX.Element => { + const entityRegistry = useEntityRegistry(); + const genericProperties = entityRegistry.getGenericEntityProperties(EntityType.MlmodelGroup, group); + return ( + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/mlModelGroup/profile/ModelGroupModels.tsx b/datahub-web-react/src/app/entityV2/mlModelGroup/profile/ModelGroupModels.tsx new file mode 100644 index 00000000000000..916256bb999f61 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/mlModelGroup/profile/ModelGroupModels.tsx @@ -0,0 +1,32 @@ +import { List, Space, Typography } from 'antd'; +import React from 'react'; +import { GetMlModelGroupQuery } from '../../../../graphql/mlModelGroup.generated'; +import { EntityType } from '../../../../types.generated'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { PreviewType } from '../../Entity'; +import { useBaseEntity } from '../../../entity/shared/EntityContext'; + +export default function MLGroupModels() { + const baseEntity = useBaseEntity(); + const models = baseEntity?.mlModelGroup?.incoming?.relationships?.map((relationship) => relationship.entity) || []; + + const entityRegistry = useEntityRegistry(); + + return ( + <> + + Models} + renderItem={(item) => ( + + {entityRegistry.renderPreview(EntityType.Mlmodel, PreviewType.PREVIEW, item)} + + )} + /> + + + ); +} diff --git a/datahub-web-react/src/app/entityV2/mlPrimaryKey/MLPrimaryKeyEntity.tsx b/datahub-web-react/src/app/entityV2/mlPrimaryKey/MLPrimaryKeyEntity.tsx new file mode 100644 index 00000000000000..cb19739476ec70 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/mlPrimaryKey/MLPrimaryKeyEntity.tsx @@ -0,0 +1,235 @@ +import { DotChartOutlined, PartitionOutlined, UnorderedListOutlined } from '@ant-design/icons'; +import * as React from 'react'; +import { useGetMlPrimaryKeyQuery } from '../../../graphql/mlPrimaryKey.generated'; +import { EntityType, MlPrimaryKey, SearchResult } from '../../../types.generated'; +import { GenericEntityProperties } from '../../entity/shared/types'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { TYPE_ICON_CLASS_NAME } from '../shared/components/subtypes'; +import { EntityProfile } from '../shared/containers/profile/EntityProfile'; +import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection'; +import DataProductSection from '../shared/containers/profile/sidebar/DataProduct/DataProductSection'; +import { SidebarDomainSection } from '../shared/containers/profile/sidebar/Domain/SidebarDomainSection'; +import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection'; +import SidebarEntityHeader from '../shared/containers/profile/sidebar/SidebarEntityHeader'; +import { SidebarGlossaryTermsSection } from '../shared/containers/profile/sidebar/SidebarGlossaryTermsSection'; +import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection'; +import StatusSection from '../shared/containers/profile/sidebar/shared/StatusSection'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import SidebarStructuredProperties from '../shared/sidebarSection/SidebarStructuredProperties'; +import { DocumentationTab } from '../shared/tabs/Documentation/DocumentationTab'; +import { LineageTab } from '../shared/tabs/Lineage/LineageTab'; +import { FeatureTableTab } from '../shared/tabs/ML/MlPrimaryKeyFeatureTableTab'; +import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; +import { SidebarTitleActionType, getDataProduct, isOutputPort } from '../shared/utils'; +import { Preview } from './preview/Preview'; + +/** + * Definition of the DataHub MLPrimaryKey entity. + */ +export class MLPrimaryKeyEntity implements Entity { + type: EntityType = EntityType.MlprimaryKey; + + icon = (fontSize?: number, styleType?: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ( + + ); + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + isBrowseEnabled = () => false; + + isLineageEnabled = () => true; + + getAutoCompleteFieldName = () => 'name'; + + getGraphName = () => 'mlPrimaryKey'; + + getPathName = () => 'mlPrimaryKeys'; + + getEntityName = () => 'ML Primary Key'; + + getCollectionName = () => 'ML Primary Keys'; + + getOverridePropertiesFromEntity = (key?: MlPrimaryKey | null): GenericEntityProperties => { + return { + // eslint-disable-next-line + platform: key?.['featureTables']?.relationships?.[0]?.entity?.platform, + }; + }; + + useEntityQuery = useGetMlPrimaryKeyQuery; + + renderProfile = (urn: string) => ( + + ); + + getSidebarSections = () => [ + { + component: SidebarEntityHeader, + }, + { + component: SidebarAboutSection, + }, + { + component: SidebarOwnerSection, + }, + { + component: SidebarDomainSection, + }, + { + component: DataProductSection, + }, + { + component: SidebarTagsSection, + }, + { + component: SidebarGlossaryTermsSection, + }, + { + component: StatusSection, + }, + { + component: SidebarStructuredProperties, + }, + ]; + + getSidebarTabs = () => [ + { + name: 'Lineage', + component: LineageTab, + description: "View this data asset's upstream and downstream dependencies", + icon: PartitionOutlined, + properties: { + actionType: SidebarTitleActionType.LineageExplore, + }, + }, + { + name: 'Properties', + component: PropertiesTab, + description: 'View additional properties about this asset', + icon: UnorderedListOutlined, + }, + ]; + + renderPreview = (previewType: PreviewType, data: MlPrimaryKey) => { + const genericProperties = this.getGenericEntityProperties(data); + // eslint-disable-next-line + const platform = data?.['featureTables']?.relationships?.[0]?.entity?.platform; + return ( + + ); + }; + + renderSearch = (result: SearchResult) => { + const data = result.entity as MlPrimaryKey; + const genericProperties = this.getGenericEntityProperties(data); + // eslint-disable-next-line + const platform = data?.['featureTables']?.relationships?.[0]?.entity?.platform; + return ( + + ); + }; + + displayName = (data: MlPrimaryKey) => { + return data.name || data.urn; + }; + + getGenericEntityProperties = (mlPrimaryKey: MlPrimaryKey) => { + return getDataForEntityType({ + data: mlPrimaryKey, + entityType: this.type, + getOverrideProperties: this.getOverridePropertiesFromEntity, + }); + }; + + getLineageVizConfig = (entity: MlPrimaryKey) => { + return { + urn: entity.urn, + name: entity.name, + type: EntityType.MlprimaryKey, + // eslint-disable-next-line + icon: entity?.['featureTables']?.relationships?.[0]?.entity?.platform?.properties?.logoUrl || undefined, + // eslint-disable-next-line + platform: entity?.['featureTables']?.relationships?.[0]?.entity?.platform?.name, + }; + }; + + supportedCapabilities = () => { + return new Set([ + EntityCapabilityType.OWNERS, + EntityCapabilityType.GLOSSARY_TERMS, + EntityCapabilityType.TAGS, + EntityCapabilityType.DOMAINS, + EntityCapabilityType.DEPRECATION, + EntityCapabilityType.SOFT_DELETE, + EntityCapabilityType.DATA_PRODUCTS, + EntityCapabilityType.LINEAGE, + ]); + }; +} diff --git a/datahub-web-react/src/app/entityV2/mlPrimaryKey/preview/Preview.tsx b/datahub-web-react/src/app/entityV2/mlPrimaryKey/preview/Preview.tsx new file mode 100644 index 00000000000000..993d6b51773217 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/mlPrimaryKey/preview/Preview.tsx @@ -0,0 +1,61 @@ +import { GenericEntityProperties } from '@app/entity/shared/types'; +import React from 'react'; +import { DataPlatform, DataProduct, EntityPath, EntityType, Owner } from '../../../../types.generated'; +import DefaultPreviewCard from '../../../previewV2/DefaultPreviewCard'; +import { capitalizeFirstLetterOnly } from '../../../shared/textUtil'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { IconStyleType, PreviewType } from '../../Entity'; + +export const Preview = ({ + urn, + data, + name, + featureNamespace, + description, + owners, + platform, + dataProduct, + platformInstanceId, + degree, + paths, + isOutputPort, + previewType, +}: { + urn: string; + data: GenericEntityProperties | null; + name: string; + featureNamespace: string; + description?: string | null; + owners?: Array | null; + platform?: DataPlatform | null | undefined; + dataProduct?: DataProduct | null; + platformInstanceId?: string; + degree?: number; + paths?: EntityPath[]; + isOutputPort?: boolean; + previewType?: PreviewType; +}): JSX.Element => { + const entityRegistry = useEntityRegistry(); + return ( + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/ownership/ManageOwnership.tsx b/datahub-web-react/src/app/entityV2/ownership/ManageOwnership.tsx new file mode 100644 index 00000000000000..a3304ab015faa4 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/ownership/ManageOwnership.tsx @@ -0,0 +1,49 @@ +import React from 'react'; +import styled from 'styled-components/macro'; +import { Typography } from 'antd'; +import { OwnershipList } from './OwnershipList'; + +const PageContainer = styled.div` + padding-top: 20px; + width: 100%; + display: flex; + flex-direction: column; + overflow: auto; +`; + +const PageHeaderContainer = styled.div` + && { + padding-left: 24px; + } +`; + +const PageTitle = styled(Typography.Title)` + && { + margin-bottom: 12px; + } +`; + +const ListContainer = styled.div` + display: flex; + flex-direction: column; + overflow: auto; +`; + +/** + * Component used for displaying the 'Manage Ownership' experience. + */ +export const ManageOwnership = () => { + return ( + + + Manage Ownership + + Create, edit, and remove custom Ownership Types. + + + + + + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/ownership/OwnershipBuilderModal.tsx b/datahub-web-react/src/app/entityV2/ownership/OwnershipBuilderModal.tsx new file mode 100644 index 00000000000000..ca5a5ee0253201 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/ownership/OwnershipBuilderModal.tsx @@ -0,0 +1,225 @@ +import React, { useEffect, useState } from 'react'; +import { Button, Form, Input, Modal, Typography, message, notification } from 'antd'; +import styled from 'styled-components/macro'; +import { useCreateOwnershipTypeMutation, useUpdateOwnershipTypeMutation } from '../../../graphql/ownership.generated'; +import { OwnershipTypeEntity } from '../../../types.generated'; +import { OwnershipTypeBuilderState } from './table/types'; + +const NAME_INPUT_TEST_ID = 'ownership-type-name-input'; +const DESCRIPTION_INPUT_TEST_ID = 'ownership-type-description-input'; + +const TitleContainer = styled.div` + display: flex; + justify-content: space-between; +`; + +const TitleText = styled(Typography.Text)` + font-size: 16px; + font-weight: 700; +`; + +const FormItemContainer = styled.div` + display: flex; + flex-direction: column; +`; + +const FormItemTitle = styled(Typography.Text)` + margin-bottom: 8px; + font-weight: 700; +`; + +const StyledFormItem = styled(Form.Item)` + margin-bottom: 8px; +`; + +const SaveButtonContainer = styled.div` + width: 100%; + display: flex; + justify-content: right; +`; + +const CancelButton = styled(Button)` + margin-right: 12px; +`; + +type Props = { + isOpen: boolean; + onClose: () => void; + refetch: () => void; + ownershipType?: OwnershipTypeEntity; +}; + +export const OwnershipBuilderModal = ({ isOpen, onClose, refetch, ownershipType }: Props) => { + // State + const [ownershipTypeBuilderState, setOwnershipTypeBuilderState] = useState({ + name: ownershipType?.info?.name || ownershipType?.urn || '', + description: ownershipType?.info?.description || '', + }); + const setName = (name: string) => { + setOwnershipTypeBuilderState({ ...ownershipTypeBuilderState, name }); + }; + const setDescription = (description: string) => { + setOwnershipTypeBuilderState({ ...ownershipTypeBuilderState, description }); + }; + const [form] = Form.useForm(); + form.setFieldsValue(ownershipTypeBuilderState); + + // Side effects + useEffect(() => { + if (ownershipType) { + const ownershipTypeName = ownershipType?.info?.name || ownershipType?.urn; + const ownershipTypeDescription = ownershipType?.info?.description || ''; + setOwnershipTypeBuilderState({ + name: ownershipTypeName, + description: ownershipTypeDescription, + }); + } else { + setOwnershipTypeBuilderState({ + name: '', + description: '', + }); + } + }, [ownershipType]); + + // Queries + const [createOwnershipTypeMutation] = useCreateOwnershipTypeMutation(); + const [updateOwnershipTypeMutation] = useUpdateOwnershipTypeMutation(); + + const onCreateOwnershipType = () => { + if (ownershipTypeBuilderState.name) { + createOwnershipTypeMutation({ + variables: { + input: { + name: ownershipTypeBuilderState.name, + description: ownershipTypeBuilderState.description, + }, + }, + }) + .then(() => { + setName(''); + setDescription(''); + onClose(); + notification.success({ + message: `Success`, + description: 'Successfully created ownership type.', + placement: 'bottomLeft', + duration: 3, + }); + setTimeout(() => { + refetch(); + }, 3000); + }) + .catch((e: unknown) => { + message.destroy(); + if (e instanceof Error) { + message.error({ + content: `Failed to create ownership type`, + duration: 3, + }); + } + }); + } + }; + + const onUpdateOwnershipType = () => { + if (ownershipType) { + updateOwnershipTypeMutation({ + variables: { + urn: ownershipType?.urn || '', + input: { + name: ownershipTypeBuilderState.name, + description: ownershipTypeBuilderState.description, + }, + }, + }) + .then(() => { + setName(''); + setDescription(''); + onClose(); + notification.success({ + message: `Success`, + description: 'Successfully updated ownership type.', + placement: 'bottomLeft', + duration: 3, + }); + setTimeout(() => { + refetch(); + }, 3000); + }) + .catch((e: unknown) => { + message.destroy(); + if (e instanceof Error) { + message.error({ + content: `Failed to update ownership type`, + duration: 3, + }); + } + }); + } + }; + + const onUpsert = ownershipType ? onUpdateOwnershipType : onCreateOwnershipType; + const titleText = ownershipType ? 'Edit Ownership Type' : 'Create Ownership Type'; + return ( + + {titleText} + + } + footer={null} + > +
+ + Name + + { + setName(e.target.value); + }} + /> + + + + Description + + { + setDescription(e.target.value); + }} + /> + + + + + + Cancel + + + +
+ ); +}; diff --git a/datahub-web-react/src/app/entityV2/ownership/OwnershipList.tsx b/datahub-web-react/src/app/entityV2/ownership/OwnershipList.tsx new file mode 100644 index 00000000000000..6e735c7cb4cf27 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/ownership/OwnershipList.tsx @@ -0,0 +1,132 @@ +import React, { useState } from 'react'; +import { Button, Pagination, message } from 'antd'; +import { PlusOutlined } from '@ant-design/icons'; +import styled from 'styled-components/macro'; +import { useListOwnershipTypesQuery } from '../../../graphql/ownership.generated'; +import { Message } from '../../shared/Message'; +import { OwnershipBuilderModal } from './OwnershipBuilderModal'; +import TabToolbar from '../shared/components/styled/TabToolbar'; +import { OwnershipTable } from './table/OwnershipTable'; +import { OwnershipTypeEntity } from '../../../types.generated'; +import { SearchBar } from '../../search/SearchBar'; +import { useEntityRegistry } from '../../useEntityRegistry'; +import { scrollToTop } from '../../shared/searchUtils'; + +const PaginationContainer = styled.div` + display: flex; + justify-content: center; +`; + +const StyledPagination = styled(Pagination)` + margin: 40px; +`; + +const searchBarStyle = { + maxWidth: 220, + padding: 0, +}; + +const searchBarInputStyle = { + height: 32, + fontSize: 12, +}; + +/** + * This component renders a paginated, searchable list of Ownership Types. + */ +export const OwnershipList = () => { + /** + * Context + */ + const entityRegistry = useEntityRegistry(); + + /** + * State + */ + const [page, setPage] = useState(1); + const [showOwnershipBuilder, setShowOwnershipBuilder] = useState(false); + const [ownershipType, setOwnershipType] = useState(undefined); + const [query, setQuery] = useState(undefined); + + /** + * Queries + */ + const pageSize = 10; + const start: number = (page - 1) * pageSize; + const { data, loading, error, refetch } = useListOwnershipTypesQuery({ + variables: { + input: { + start, + count: pageSize, + query, + }, + }, + }); + const totalOwnershipTypes = data?.listOwnershipTypes?.total || 0; + const ownershipTypes = + data?.listOwnershipTypes?.ownershipTypes.filter((type) => type.urn !== 'urn:li:ownershipType:none') || []; + + const onClickCreateOwnershipType = () => { + setShowOwnershipBuilder(true); + }; + + const onCloseModal = () => { + setShowOwnershipBuilder(false); + setOwnershipType(undefined); + }; + + const onChangePage = (newPage: number) => { + scrollToTop(); + setPage(newPage); + }; + + return ( + <> + {!data && loading && } + {error && + message.error({ + content: `Failed to load Ownership Types! An unexpected error occurred.`, + duration: 3, + })} + + + null} + onQueryChange={(q) => setQuery(q.length > 0 ? q : undefined)} + entityRegistry={entityRegistry} + /> + + + {totalOwnershipTypes >= pageSize && ( + + + + )} + + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/ownership/table/ActionsColumn.tsx b/datahub-web-react/src/app/entityV2/ownership/table/ActionsColumn.tsx new file mode 100644 index 00000000000000..41e07520a0ece5 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/ownership/table/ActionsColumn.tsx @@ -0,0 +1,128 @@ +import React from 'react'; +import { Dropdown, MenuProps, Popconfirm, Typography, message, notification } from 'antd'; +import { DeleteOutlined, EditOutlined, MoreOutlined } from '@ant-design/icons'; +import styled from 'styled-components/macro'; +import { OwnershipTypeEntity } from '../../../../types.generated'; +import { useDeleteOwnershipTypeMutation } from '../../../../graphql/ownership.generated'; + +const DROPDOWN_TEST_ID = 'ownership-table-dropdown'; +const EDIT_OWNERSHIP_TYPE_TEST_ID = 'edit-ownership-type'; +const DELETE_OWNERSHIP_TYPE_TEST_ID = 'delete-ownership-type'; + +const StyledDropdown = styled(Dropdown)``; + +const MenuButtonContainer = styled.div` + display: flex; + justify-content: center; + align-items: center; +`; + +const MenuButtonText = styled(Typography.Text)` + font-size: 14px; + font-weight: 400; + margin-left: 8px; +`; + +const StyledMoreOutlined = styled(MoreOutlined)` + width: 20px; + &&& { + padding-left: 0px; + padding-right: 0px; + font-size: 18px; + } + :hover { + cursor: pointer; + } +`; + +type Props = { + ownershipType: OwnershipTypeEntity; + setIsOpen: (isOpen: boolean) => void; + setOwnershipType: (ownershipType: OwnershipTypeEntity) => void; + refetch: () => void; +}; + +export const ActionsColumn = ({ ownershipType, setIsOpen, setOwnershipType, refetch }: Props) => { + const editOnClick = () => { + setIsOpen(true); + setOwnershipType(ownershipType); + }; + + const [deleteOwnershipTypeMutation] = useDeleteOwnershipTypeMutation(); + + const onDelete = () => { + deleteOwnershipTypeMutation({ + variables: { + urn: ownershipType.urn, + }, + }) + .then(() => { + notification.success({ + message: `Success`, + description: 'You have deleted an ownership type.', + placement: 'bottomLeft', + duration: 3, + }); + setTimeout(() => { + refetch(); + }, 3000); + }) + .catch((e: unknown) => { + message.destroy(); + if (e instanceof Error) { + message.error({ + content: `Failed to delete an ownership type`, + duration: 3, + }); + } + }); + }; + + const items: MenuProps['items'] = [ + { + key: 'edit', + icon: ( + + + Edit + + ), + }, + { + key: 'delete', + icon: ( + Are you sure you want to delete this ownership type?} + placement="left" + onCancel={() => {}} + onConfirm={onDelete} + okText="Yes" + cancelText="No" + > + + + Delete + + + ), + }, + ]; + + const onClick: MenuProps['onClick'] = (e) => { + const key = e.key as string; + if (key === 'edit') { + editOnClick(); + } + }; + + const menuProps: MenuProps = { + items, + onClick, + }; + + return ( + + + + ); +}; diff --git a/datahub-web-react/src/app/entityV2/ownership/table/DescriptionColumn.tsx b/datahub-web-react/src/app/entityV2/ownership/table/DescriptionColumn.tsx new file mode 100644 index 00000000000000..5db6a671960627 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/ownership/table/DescriptionColumn.tsx @@ -0,0 +1,19 @@ +import React from 'react'; +import { Typography } from 'antd'; +import styled from 'styled-components/macro'; +import { OwnershipTypeEntity } from '../../../../types.generated'; + +const DescriptionText = styled(Typography.Text)` + font-size: 12px; + font-weight: 400; +`; + +type Props = { + ownershipType: OwnershipTypeEntity; +}; + +export const DescriptionColumn = ({ ownershipType }: Props) => { + const description = ownershipType?.info?.description || ''; + + return {description}; +}; diff --git a/datahub-web-react/src/app/entityV2/ownership/table/NameColumn.tsx b/datahub-web-react/src/app/entityV2/ownership/table/NameColumn.tsx new file mode 100644 index 00000000000000..46100621eae94c --- /dev/null +++ b/datahub-web-react/src/app/entityV2/ownership/table/NameColumn.tsx @@ -0,0 +1,19 @@ +import React from 'react'; +import { Typography } from 'antd'; +import styled from 'styled-components/macro'; +import { OwnershipTypeEntity } from '../../../../types.generated'; + +const NameText = styled(Typography.Text)` + font-size: 12px; + font-weight: 700; +`; + +type Props = { + ownershipType: OwnershipTypeEntity; +}; + +export const NameColumn = ({ ownershipType }: Props) => { + const name = ownershipType?.info?.name || ownershipType?.urn; + + return {name}; +}; diff --git a/datahub-web-react/src/app/entityV2/ownership/table/OwnershipTable.tsx b/datahub-web-react/src/app/entityV2/ownership/table/OwnershipTable.tsx new file mode 100644 index 00000000000000..cc05c5e3901389 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/ownership/table/OwnershipTable.tsx @@ -0,0 +1,60 @@ +import React from 'react'; +import { Empty } from 'antd'; +import { OwnershipTypeEntity } from '../../../../types.generated'; +import { StyledTable } from '../../shared/components/styled/StyledTable'; +import { NameColumn } from './NameColumn'; +import { DescriptionColumn } from './DescriptionColumn'; +import { ActionsColumn } from './ActionsColumn'; + +type Props = { + ownershipTypes: OwnershipTypeEntity[]; + setIsOpen: (isOpen: boolean) => void; + setOwnershipType: (ownershipType: OwnershipTypeEntity) => void; + refetch: () => void; +}; + +export const OwnershipTable = ({ ownershipTypes, setIsOpen, setOwnershipType, refetch }: Props) => { + const tableColumns = [ + { + title: 'Name', + dataIndex: 'name', + sorter: (a: any, b: any) => a?.info?.name?.localeCompare(b?.info?.name), + key: 'name', + render: (_, record: any) => , + }, + { + title: 'Description', + dataIndex: 'description', + key: 'description', + render: (_, record: any) => , + }, + { + dataIndex: 'actions', + key: 'actions', + render: (_, record: any) => ( + + ), + }, + ]; + + const getRowKey = (ownershipType: OwnershipTypeEntity) => { + return ownershipType?.info?.name || ownershipType.urn; + }; + + return ( + , + }} + pagination={false} + /> + ); +}; diff --git a/datahub-web-react/src/app/entityV2/ownership/table/types.ts b/datahub-web-react/src/app/entityV2/ownership/table/types.ts new file mode 100644 index 00000000000000..6d7f9bdcf760b7 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/ownership/table/types.ts @@ -0,0 +1,14 @@ +/** + * The object represents the state of the Ownership Type Builder form. + */ +export interface OwnershipTypeBuilderState { + /** + * The name of the Ownership Type. + */ + name: string; + + /** + * The description of the Ownership Type. + */ + description: string; +} diff --git a/datahub-web-react/src/app/entityV2/query/QueryEntity.tsx b/datahub-web-react/src/app/entityV2/query/QueryEntity.tsx new file mode 100644 index 00000000000000..3cef8976b9f377 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/query/QueryEntity.tsx @@ -0,0 +1,126 @@ +import * as React from 'react'; +import { ConsoleSqlOutlined, FileOutlined } from '@ant-design/icons'; +import { DataPlatform, EntityType, QueryEntity as Query } from '../../../types.generated'; +import { Entity, IconStyleType } from '../Entity'; +import { GenericEntityProperties } from '../../entity/shared/types'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import { EntityProfile } from '../shared/containers/profile/EntityProfile'; +import { useGetQueryQuery } from '../../../graphql/query.generated'; +import { DocumentationTab } from '../shared/tabs/Documentation/DocumentationTab'; +import SidebarQueryUpdatedAtSection from '../shared/containers/profile/sidebar/Query/SidebarQueryUpdatedAtSection'; +import SidebarQueryDescriptionSection from '../shared/containers/profile/sidebar/Query/SidebarQueryDescriptionSection'; +import { TYPE_ICON_CLASS_NAME } from '../shared/components/subtypes'; +import SidebarQueryOperationsSection from '../shared/containers/profile/sidebar/Query/SidebarQueryOperationsSection'; +import SidebarQueryDefinitionSection from '../shared/containers/profile/sidebar/Query/SidebarQueryDefinitionSection'; +import { SidebarQueryLogicSection } from '../shared/containers/profile/sidebar/SidebarLogicSection'; + +/** + * Definition of the DataHub DataPlatformInstance entity. + * Most of this still needs to be filled out. + */ +export class QueryEntity implements Entity { + type: EntityType = EntityType.Query; + + icon = (fontSize?: number, _styleType?: IconStyleType, color?: string) => { + return ( + + ); + }; + + isSearchEnabled = () => false; + + isBrowseEnabled = () => false; + + isLineageEnabled = () => false; + + getAutoCompleteFieldName = () => 'name'; + + getPathName = () => 'query'; + + getEntityName = () => 'Query'; + + getCollectionName = () => 'Queries'; + + useEntityQuery = useGetQueryQuery; + + renderProfile = (urn: string) => { + return ( + ({})} + /> + ); + }; + + getOverridePropertiesFromEntity = (query?: Query | null): GenericEntityProperties => { + return { + name: query && this.displayName(query), + platform: query?.platform, + }; + }; + + renderEmbeddedProfile = (_: string) => <>; + + renderPreview = () => { + return <>; + }; + + renderSearch = () => { + return <>; + }; + + getLineageVizConfig = (query: Query) => { + // TODO: Set up types better here + const platform: DataPlatform | undefined = (query as any)?.queryPlatform; + return { + urn: query.urn, + name: query.properties?.name || query.urn, + type: EntityType.Query, + icon: platform?.properties?.logoUrl || undefined, + platform: platform || undefined, + }; + }; + + displayName = (data: Query) => { + return data?.properties?.name || (data?.properties?.source === 'SYSTEM' && 'System Query') || data?.urn; + }; + + getGenericEntityProperties = (data: Query) => { + return getDataForEntityType({ + data, + entityType: this.type, + getOverrideProperties: this.getOverridePropertiesFromEntity, + }); + }; + + supportedCapabilities = () => { + return new Set([]); + }; + + getGraphName = () => { + return 'query'; + }; +} diff --git a/datahub-web-react/src/app/entityV2/schemaField/SchemaFieldEntity.tsx b/datahub-web-react/src/app/entityV2/schemaField/SchemaFieldEntity.tsx new file mode 100644 index 00000000000000..8a0052b6b38a3d --- /dev/null +++ b/datahub-web-react/src/app/entityV2/schemaField/SchemaFieldEntity.tsx @@ -0,0 +1,114 @@ +import * as React from 'react'; +import TabFullsizedContext from '@src/app/shared/TabFullsizedContext'; +import { GenericEntityProperties } from '@app/entity/shared/types'; +import { globalEntityRegistryV2 } from '@app/EntityRegistryProvider'; +import SidebarEntityHeader from '@app/entityV2/shared/containers/profile/sidebar/SidebarEntityHeader'; +import { LineageTab } from '@app/entityV2/shared/tabs/Lineage/LineageTab'; +import { FetchedEntity } from '@app/lineage/types'; +import { useGetSchemaFieldQuery } from '@graphql/schemaField.generated'; +import { EntityProfile } from '@app/entityV2/shared/containers/profile/EntityProfile'; +import { downgradeV2FieldPath } from '@app/lineageV2/lineageUtils'; +import { decodeSchemaField } from '@app/lineage/utils/columnLineageUtils'; +import { PartitionOutlined, PicCenterOutlined, UnorderedListOutlined } from '@ant-design/icons'; +import { EntityType, SchemaFieldEntity as SchemaField, SearchResult } from '@types'; + +import { Entity, IconStyleType, PreviewType } from '../Entity'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import { Preview } from './preview/Preview'; +import SidebarNotesSection from '../shared/sidebarSection/SidebarNotesSection'; +import { EntityMenuItems } from '../shared/EntityDropdown/EntityMenuActions'; +import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; + +const headerDropdownItems = new Set([EntityMenuItems.SHARE, EntityMenuItems.ANNOUNCE]); + +export class SchemaFieldEntity implements Entity { + type: EntityType = EntityType.SchemaField; + + icon = (fontSize?: number, styleType?: IconStyleType, color = 'inherit') => ( + + ); + + isSearchEnabled = () => true; + + isBrowseEnabled = () => false; + + isLineageEnabled = () => false; + + // Currently unused. + getAutoCompleteFieldName = () => 'schemaField'; + + getPathName = () => 'schemaField'; + + getEntityName = () => 'Column'; + + getCollectionName = () => 'Columns'; + + useEntityQuery = useGetSchemaFieldQuery; + + renderProfile = (urn: string) => ( + + + + ); + + getSidebarSections = () => [{ component: SidebarEntityHeader }, { component: SidebarNotesSection }]; + + getGraphName = () => 'schemaField'; + + renderPreview = (previewType: PreviewType, data: SchemaField) => { + const genericProperties = this.getGenericEntityProperties(data); + return ( + + ); + }; + + renderSearch = (result: SearchResult) => this.renderPreview(PreviewType.SEARCH, result.entity as SchemaField); + + displayName = (data: SchemaField) => decodeSchemaField(downgradeV2FieldPath(data?.fieldPath) || '') || data.urn; + + getGenericEntityProperties = (data: SchemaField): GenericEntityProperties | null => + getDataForEntityType({ + data, + entityType: this.type, + getOverrideProperties: (newData) => newData, + }); + + getLineageVizConfig = (entity: SchemaField): FetchedEntity => { + const parent = + entity.parent && globalEntityRegistryV2.getGenericEntityProperties(entity.parent.type, entity.parent); + return { + urn: entity.urn, + type: EntityType.SchemaField, + name: entity?.fieldPath, + expandedName: `${parent?.name}.${entity?.fieldPath}`, + icon: parent?.platform?.properties?.logoUrl ?? undefined, + parent: parent ?? undefined, + }; + }; + + supportedCapabilities = () => new Set([]); +} diff --git a/datahub-web-react/src/app/entityV2/schemaField/__tests__/utils.test.ts b/datahub-web-react/src/app/entityV2/schemaField/__tests__/utils.test.ts new file mode 100644 index 00000000000000..b61dbeb0f18af5 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/schemaField/__tests__/utils.test.ts @@ -0,0 +1,22 @@ +import { getFieldPathFromSchemaFieldUrn, getSchemaFieldParentLink, getSourceUrnFromSchemaFieldUrn } from '../utils'; + +describe('schema field utils', () => { + const schemaFieldUrn = + 'urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.pet_profiles,PROD),profile_id)'; + + it('should get a parent link for a schema field urn properly', () => { + expect(getSchemaFieldParentLink(schemaFieldUrn)).toBe( + '/dataset/urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.pet_profiles,PROD)/Columns?highlightedPath=profile_id', + ); + }); + + it('should get the source urn from a schema field urn properly', () => { + expect(getSourceUrnFromSchemaFieldUrn(schemaFieldUrn)).toBe( + 'urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.pet_profiles,PROD)', + ); + }); + + it('should get the field path from a schema field urn properly', () => { + expect(getFieldPathFromSchemaFieldUrn(schemaFieldUrn)).toBe('profile_id'); + }); +}); diff --git a/datahub-web-react/src/app/entityV2/schemaField/preview/Preview.tsx b/datahub-web-react/src/app/entityV2/schemaField/preview/Preview.tsx new file mode 100644 index 00000000000000..70d00882a9c7de --- /dev/null +++ b/datahub-web-react/src/app/entityV2/schemaField/preview/Preview.tsx @@ -0,0 +1,50 @@ +import React from 'react'; +import { PicCenterOutlined } from '@ant-design/icons'; +import { GenericEntityProperties } from '@src/app/entity/shared/types'; +import { capitalizeFirstLetterOnly } from '@src/app/shared/textUtil'; +import { EntityType, Owner } from '../../../../types.generated'; +import DefaultPreviewCard from '../../../previewV2/DefaultPreviewCard'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { IconStyleType, PreviewType } from '../../Entity'; + +export const Preview = ({ + data, + datasetUrn, + name, + description, + owners, + previewType, + parent, +}: { + data: GenericEntityProperties | null; + datasetUrn: string; + name: string; + description?: string | null; + owners?: Array | null; + previewType: PreviewType; + parent?: GenericEntityProperties; +}): JSX.Element => { + const entityRegistry = useEntityRegistry(); + + const url = `${entityRegistry.getEntityUrl(EntityType.Dataset, datasetUrn)}/${encodeURIComponent( + 'Columns', + )}?schemaFilter=${encodeURIComponent(name)}`; + + return ( + } + type="Column" + typeIcon={entityRegistry.getIcon(EntityType.SchemaField, 14, IconStyleType.ACCENT)} + /> + ); +}; diff --git a/datahub-web-react/src/app/entityV2/schemaField/utils.ts b/datahub-web-react/src/app/entityV2/schemaField/utils.ts new file mode 100644 index 00000000000000..df74f3758f041f --- /dev/null +++ b/datahub-web-react/src/app/entityV2/schemaField/utils.ts @@ -0,0 +1,22 @@ +export function getSourceUrnFromSchemaFieldUrn(schemaFieldUrn: string) { + return schemaFieldUrn.replace('urn:li:schemaField:(', '').split(')')[0].concat(')'); +} + +export function getFieldPathFromSchemaFieldUrn(schemaFieldUrn: string) { + const val = schemaFieldUrn.replace('urn:li:schemaField:(', '').split(')')[1]?.replace(',', '') ?? ''; + try { + return decodeURI(val); + } catch (e) { + return val; + } +} + +/* + * Returns a link to the schemaField dataset with the field selected + */ +export function getSchemaFieldParentLink(schemaFieldUrn: string) { + const fieldPath = getFieldPathFromSchemaFieldUrn(schemaFieldUrn); + const parentUrn = getSourceUrnFromSchemaFieldUrn(schemaFieldUrn); + + return `/dataset/${parentUrn}/Columns?highlightedPath=${fieldPath}`; +} diff --git a/datahub-web-react/src/app/entityV2/shared/ActorAvatar.tsx b/datahub-web-react/src/app/entityV2/shared/ActorAvatar.tsx new file mode 100644 index 00000000000000..c6041a9383890d --- /dev/null +++ b/datahub-web-react/src/app/entityV2/shared/ActorAvatar.tsx @@ -0,0 +1,112 @@ +import { Avatar } from 'antd'; +import { Tooltip } from '@components'; +import { TooltipPlacement } from 'antd/lib/tooltip'; +import React from 'react'; +import { useHistory } from 'react-router-dom'; +import styled from 'styled-components'; +import { useIsEmbeddedProfile } from '@src/app/shared/useEmbeddedProfileLinkProps'; +import defaultAvatar from '../../../images/default_avatar.png'; +import getAvatarColor from '../../shared/avatar/getAvatarColor'; + +const AvatarStyled = styled(Avatar)<{ size?: number; $backgroundColor?: string }>` + color: #fff; + background-color: ${(props) => (props.$backgroundColor ? `${props.$backgroundColor}` : 'transparent')}; + font-size: ${(props) => (props.size ? `${Math.max(props.size / 2.0, 10)}px` : '10px')} !important; + height: ${(props) => (props.size ? props.size : 20)}px; + width: ${(props) => (props.size ? props.size : 20)}px; + + .ant-avatar-string { + text-align: center; + top: 0px; + line-height: ${(props) => (props.size ? props.size : 20)}px; + } + :hover { + cursor: pointer; + } +`; + +type Props = { + url?: string; + photoUrl?: string; + useDefaultAvatar?: boolean; + name?: string; + style?: React.CSSProperties; + placement?: TooltipPlacement; + size?: number; + isGroup?: boolean; + isPolicy?: boolean; + isRole?: boolean; + hideTooltip?: boolean; +}; + +export default function ActorAvatar({ + url, + photoUrl, + useDefaultAvatar, + name, + style, + placement, + size, + isGroup = false, + isPolicy = false, + isRole = false, + hideTooltip = false, +}: Props) { + const history = useHistory(); + const isEmbeddedProfile = useIsEmbeddedProfile(); + + const navigate = () => { + if (url) { + if (isEmbeddedProfile) window.open(url, '_blank'); + else history.push(url); + } + }; + + const avatarWithInitial = name ? ( + + {name.charAt(0).toUpperCase()} + + ) : ( + + ); + const avatarWithDefault = useDefaultAvatar ? ( + + ) : ( + avatarWithInitial + ); + const avatar = + photoUrl && photoUrl !== '' ? ( + + ) : ( + avatarWithDefault + ); + if (!name) { + return avatar; + } + + const renderTitle = (input) => { + let title = `${input}`; + if (isGroup) { + title = `${title} - Group`; + } else if (isPolicy) { + title = `${title}`; + } else if (isRole) { + title = `${title} - Role`; + } + return title; + }; + + return hideTooltip ? ( + avatar + ) : ( + + {avatar} + + ); +} diff --git a/datahub-web-react/src/app/entityV2/shared/EntityDropdown/CreateGlossaryEntityModal.tsx b/datahub-web-react/src/app/entityV2/shared/EntityDropdown/CreateGlossaryEntityModal.tsx new file mode 100644 index 00000000000000..9cc9f16c80e213 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/shared/EntityDropdown/CreateGlossaryEntityModal.tsx @@ -0,0 +1,243 @@ +import React, { useState } from 'react'; +import styled from 'styled-components/macro'; +import { EditOutlined } from '@ant-design/icons'; +import { message, Button, Input, Modal, Typography, Form, Collapse } from 'antd'; +import DOMPurify from 'dompurify'; +import { + useCreateGlossaryTermMutation, + useCreateGlossaryNodeMutation, +} from '../../../../graphql/glossaryTerm.generated'; +import { EntityType } from '../../../../types.generated'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import NodeParentSelect from './NodeParentSelect'; +import { useEntityData, useRefetch } from '../../../entity/shared/EntityContext'; +import analytics, { EventType } from '../../../analytics'; +import DescriptionModal from '../components/legacy/DescriptionModal'; +import { validateCustomUrnId } from '../../../shared/textUtil'; +import { useGlossaryEntityData } from '../GlossaryEntityContext'; +import { getGlossaryRootToUpdate, updateGlossarySidebar } from '../../../glossary/utils'; + +const StyledItem = styled(Form.Item)` + margin-bottom: 0; +`; + +const OptionalWrapper = styled.span` + font-weight: normal; +`; + +const StyledButton = styled(Button)` + padding: 0; +`; + +interface Props { + entityType: EntityType; + onClose: () => void; + refetchData?: () => void; + // acryl-main only prop + canCreateGlossaryEntity: boolean; + canSelectParentUrn?: boolean; +} + +function CreateGlossaryEntityModal(props: Props) { + const { entityType, onClose, refetchData, canCreateGlossaryEntity, canSelectParentUrn = true } = props; + const entityData = useEntityData(); + const { isInGlossaryContext, urnsToUpdate, setUrnsToUpdate } = useGlossaryEntityData(); + const [form] = Form.useForm(); + const entityRegistry = useEntityRegistry(); + const [stagedId, setStagedId] = useState(undefined); + const [stagedName, setStagedName] = useState(''); + const [selectedParentUrn, setSelectedParentUrn] = useState(entityData.urn); + const [documentation, setDocumentation] = useState(''); + const [isDocumentationModalVisible, setIsDocumentationModalVisible] = useState(false); + const [createButtonDisabled, setCreateButtonDisabled] = useState(true); + const refetch = useRefetch(); + + const [createGlossaryTermMutation] = useCreateGlossaryTermMutation(); + const [createGlossaryNodeMutation] = useCreateGlossaryNodeMutation(); + + function createGlossaryEntity() { + const mutation = + entityType === EntityType.GlossaryTerm ? createGlossaryTermMutation : createGlossaryNodeMutation; + + const sanitizedDescription = DOMPurify.sanitize(documentation); + mutation({ + variables: { + input: { + id: stagedId?.length ? stagedId : undefined, + name: stagedName, + parentNode: selectedParentUrn || null, + description: sanitizedDescription || null, + }, + }, + }) + .then(() => { + message.loading({ content: 'Updating...', duration: 2 }); + setTimeout(() => { + analytics.event({ + type: EventType.CreateGlossaryEntityEvent, + entityType, + parentNodeUrn: selectedParentUrn || undefined, + }); + message.success({ + content: `Created ${entityRegistry.getEntityName(entityType)}!`, + duration: 2, + }); + refetch(); + if (isInGlossaryContext) { + // either refresh this current glossary node or the root nodes or root terms + const nodeToUpdate = entityData?.urn || getGlossaryRootToUpdate(entityType); + updateGlossarySidebar([nodeToUpdate], urnsToUpdate, setUrnsToUpdate); + } + if (refetchData) { + refetchData(); + } + }, 2000); + }) + .catch((e) => { + message.destroy(); + message.error({ content: `Failed to create: \n ${e.message || ''}`, duration: 3 }); + }); + onClose(); + } + + function addDocumentation(description: string) { + setDocumentation(description); + setIsDocumentationModalVisible(false); + } + + return ( + + + + + } + > +
+ setCreateButtonDisabled(form.getFieldsError().some((field) => field.errors.length > 0)) + } + > + Name}> + + setStagedName(event.target.value)} + /> + + + {canSelectParentUrn && ( + + Parent (optional) + + } + > + + + + + )} + + + Documentation (optional) + + } + > + setIsDocumentationModalVisible(true)}> + + {documentation ? 'Edit' : 'Add'} Documentation + + {isDocumentationModalVisible && ( + setIsDocumentationModalVisible(false)} + onSubmit={addDocumentation} + description={documentation} + /> + )} + + + Advanced} key="1"> + + {entityRegistry.getEntityName(props.entityType)} Id + + } + > + + By default, a random UUID will be generated to uniquely identify this entity. If + you'd like to provide a custom id, you may provide it here. Note that it should be + unique across the entire Glossary. Be careful, you cannot easily change the id after + creation. + + ({ + validator(_, value) { + if (value && validateCustomUrnId(value)) { + return Promise.resolve(); + } + return Promise.reject(new Error('Please enter a valid entity id')); + }, + }), + ]} + > + setStagedId(event.target.value)} + /> + + + + + +
+ ); +} + +export default CreateGlossaryEntityModal; diff --git a/datahub-web-react/src/app/entityV2/shared/EntityDropdown/DeleteEntityMenuAction.tsx b/datahub-web-react/src/app/entityV2/shared/EntityDropdown/DeleteEntityMenuAction.tsx new file mode 100644 index 00000000000000..86bccdfd30c43a --- /dev/null +++ b/datahub-web-react/src/app/entityV2/shared/EntityDropdown/DeleteEntityMenuAction.tsx @@ -0,0 +1,63 @@ +import React from 'react'; +import { DeleteOutlined } from '@ant-design/icons'; +import { Tooltip } from '@components'; +import { Redirect } from 'react-router'; +import { useUserContext } from '../../../context/useUserContext'; +import { isDeleteDisabled, shouldDisplayChildDeletionWarning } from './utils'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import useDeleteEntity from './useDeleteEntity'; +import { getEntityProfileDeleteRedirectPath } from '../../../shared/deleteUtils'; +import { EntityType } from '../../../../types.generated'; +import { useEntityData } from '../../../entity/shared/EntityContext'; +import { ActionMenuItem } from './styledComponents'; + +interface Props { + options?: any; + onDelete?: () => void; +} + +export default function DeleteEntityMenuItem({ options, onDelete }: Props) { + const { urn, entityData, entityType } = useEntityData(); + const me = useUserContext(); + const entityRegistry = useEntityRegistry(); + const isDomainEntity = entityType === EntityType.Domain; + + const { onDeleteEntity, hasBeenDeleted } = useDeleteEntity( + urn, + entityType, + entityData, + onDelete, + options?.hideDeleteMessage, + options?.skipDeleteWait, + ); + + if (!entityData) return null; + + /** + * A default path to redirect to if the entity is deleted. + */ + const deleteRedirectPath = getEntityProfileDeleteRedirectPath(entityType, entityData); + + return ( + + + + + {hasBeenDeleted && !onDelete && deleteRedirectPath && } + + ); +} diff --git a/datahub-web-react/src/app/entityV2/shared/EntityDropdown/DomainParentSelect.tsx b/datahub-web-react/src/app/entityV2/shared/EntityDropdown/DomainParentSelect.tsx new file mode 100644 index 00000000000000..43e4eaf5791b42 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/shared/EntityDropdown/DomainParentSelect.tsx @@ -0,0 +1,107 @@ +import React, { MouseEvent } from 'react'; +import { Empty, Select } from 'antd'; +import { CloseCircleFilled } from '@ant-design/icons'; +import { useDomainsContext } from '@src/app/domainV2/DomainsContext'; +import { Domain, EntityType } from '../../../../types.generated'; +import domainAutocompleteOptions from '../../../domainV2/DomainAutocompleteOptions'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import ClickOutside from '../../../shared/ClickOutside'; +import { BrowserWrapper } from '../../../shared/tags/AddTagsTermsModal'; +import { ANTD_GRAY } from '../constants'; +import useParentSelector from './useParentSelector'; +import DomainNavigator from '../../../domain/nestedDomains/domainNavigator/DomainNavigator'; + +// filter out entity itself and its children +export function filterResultsForMove(entity: Domain, entityUrn: string) { + return ( + entity.urn !== entityUrn && + entity.__typename === 'Domain' && + !entity.parentDomains?.domains.some((node) => node.urn === entityUrn) + ); +} + +interface Props { + selectedParentUrn: string; + setSelectedParentUrn: (parent: string) => void; + isMoving?: boolean; +} + +export default function DomainParentSelect({ selectedParentUrn, setSelectedParentUrn, isMoving }: Props) { + const entityRegistry = useEntityRegistry(); + const { entityData } = useDomainsContext(); + const domainUrn = entityData?.urn; + + const { + searchResults, + searchQuery, + isFocusedOnInput, + selectedParentName, + selectParentFromBrowser, + onSelectParent, + handleSearch, + clearSelectedParent, + setIsFocusedOnInput, + autoCompleteResultsLoading, + } = useParentSelector({ + entityType: EntityType.Domain, + entityData, + selectedParentUrn, + setSelectedParentUrn, + }); + const domainSearchResultsFiltered = + isMoving && domainUrn + ? searchResults.filter((r) => filterResultsForMove(r as Domain, domainUrn)) + : searchResults; + + function selectDomain(domain: Domain) { + selectParentFromBrowser(domain.urn, entityRegistry.getDisplayName(EntityType.Domain, domain)); + } + + const isShowingDomainNavigator = !searchQuery && isFocusedOnInput; + + const handleFocus = () => setIsFocusedOnInput(true); + const handleClickOutside = () => setIsFocusedOnInput(false); + + const handleClear = (event: MouseEvent) => { + // Prevent, otherwise antd will close the select menu but leaves it focused + event.stopPropagation(); + clearSelectedParent(); + }; + + return ( + + setIsFocusedOnInput(true)} + dropdownStyle={isShowingGlossaryBrowser || !searchQuery ? { display: 'none' } : {}} + > + {nodeSearchResults?.map((result) => ( + + {entityRegistry.getDisplayName(result.type, result)} + + ))} + + + + + + ); +} + +export default NodeParentSelect; diff --git a/datahub-web-react/src/app/entityV2/shared/EntityDropdown/RaiseIncidentMenuAction.tsx b/datahub-web-react/src/app/entityV2/shared/EntityDropdown/RaiseIncidentMenuAction.tsx new file mode 100644 index 00000000000000..f78b57e178a2ee --- /dev/null +++ b/datahub-web-react/src/app/entityV2/shared/EntityDropdown/RaiseIncidentMenuAction.tsx @@ -0,0 +1,50 @@ +import React, { useState } from 'react'; +import { WarningOutlined } from '@ant-design/icons'; +import { Tooltip } from '@components'; +import { useHistory } from 'react-router'; +import { useEntityData, useRefetch } from '../../../entity/shared/EntityContext'; +import { ActionMenuItem } from './styledComponents'; +import { AddIncidentModal } from '../tabs/Incident/components/AddIncidentModal'; +import { getEntityPath } from '../containers/profile/utils'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { useIsSeparateSiblingsMode } from '../useIsSeparateSiblingsMode'; + +export default function RaiseIncidentMenuAction() { + const { urn, entityType } = useEntityData(); + const refetchForEntity = useRefetch(); + const history = useHistory(); + const entityRegistry = useEntityRegistry(); + const isHideSiblingMode = useIsSeparateSiblingsMode(); + const [isRaiseIncidentModalVisible, setIsRaiseIncidentModalVisible] = useState(false); + + return ( + + setIsRaiseIncidentModalVisible(true)}> + + + {isRaiseIncidentModalVisible && ( + setIsRaiseIncidentModalVisible(false)} + refetch={ + (() => { + refetchForEntity?.(); + history.push( + `${getEntityPath( + entityType, + urn, + entityRegistry, + false, + isHideSiblingMode, + 'Incidents', + )}`, + ); + }) as any + } + /> + )} + + ); +} diff --git a/datahub-web-react/src/app/entityV2/shared/EntityDropdown/UpdateDeprecationMenuAction.tsx b/datahub-web-react/src/app/entityV2/shared/EntityDropdown/UpdateDeprecationMenuAction.tsx new file mode 100644 index 00000000000000..5e8eb11bb88fee --- /dev/null +++ b/datahub-web-react/src/app/entityV2/shared/EntityDropdown/UpdateDeprecationMenuAction.tsx @@ -0,0 +1,71 @@ +import React, { useState } from 'react'; +import { ExclamationCircleOutlined } from '@ant-design/icons'; +import { message } from 'antd'; +import { Tooltip } from '@components'; +import { useEntityData, useRefetch } from '../../../entity/shared/EntityContext'; +import { ActionMenuItem } from './styledComponents'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { UpdateDeprecationModal } from './UpdateDeprecationModal'; +import { useUpdateDeprecationMutation } from '../../../../graphql/mutations.generated'; + +export default function UpdateDeprecationMenuAction() { + const { urn, entityData, entityType } = useEntityData(); + const refetchForEntity = useRefetch(); + const [isDeprecationModalVisible, setIsDeprecationModalVisible] = useState(false); + const entityRegistry = useEntityRegistry(); + const [updateDeprecation] = useUpdateDeprecationMutation(); + + const handleUpdateDeprecation = async (deprecatedStatus: boolean) => { + message.loading({ content: 'Updating...' }); + try { + await updateDeprecation({ + variables: { + input: { + urn, + deprecated: deprecatedStatus, + note: '', + decommissionTime: null, + }, + }, + }); + message.destroy(); + message.success({ content: 'Deprecation Updated', duration: 2 }); + } catch (e: unknown) { + message.destroy(); + if (e instanceof Error) { + message.error({ content: `Failed to update Deprecation: \n ${e.message || ''}`, duration: 2 }); + } + } + refetchForEntity?.(); + }; + + return ( + + + !entityData?.deprecation?.deprecated + ? setIsDeprecationModalVisible(true) + : handleUpdateDeprecation(false) + } + data-testid="entity-menu-deprecate-button" + > + + + {isDeprecationModalVisible && ( + setIsDeprecationModalVisible(false)} + refetch={refetchForEntity} + /> + )} + + ); +} diff --git a/datahub-web-react/src/app/entityV2/shared/EntityDropdown/UpdateDeprecationModal.tsx b/datahub-web-react/src/app/entityV2/shared/EntityDropdown/UpdateDeprecationModal.tsx new file mode 100644 index 00000000000000..fabb2be584a294 --- /dev/null +++ b/datahub-web-react/src/app/entityV2/shared/EntityDropdown/UpdateDeprecationModal.tsx @@ -0,0 +1,188 @@ +import { Button, DatePicker, Form, message, Modal, Select, Skeleton } from 'antd'; +import TextArea from 'antd/lib/input/TextArea'; +import dayjs from 'dayjs'; +import React from 'react'; +import { useGetEntitiesQuery } from '../../../../graphql/entity.generated'; +import { useBatchUpdateDeprecationMutation } from '../../../../graphql/mutations.generated'; +import { ResourceRefInput, SubResourceType } from '../../../../types.generated'; +import { EntityLink } from '../../../homeV2/reference/sections/EntityLink'; +import { getV1FieldPathFromSchemaFieldUrn } from '../../../lineageV2/lineageUtils'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { downgradeV2FieldPath } from '../../dataset/profile/schema/utils/utils'; +import { EntityCapabilityType } from '../../Entity'; +import { SearchSelectModal } from '../components/styled/search/SearchSelectModal'; +import { useGetEntityWithSchema } from '../tabs/Dataset/Schema/useGetEntitySchema'; +import { generateSchemaFieldUrn } from '../tabs/Lineage/utils'; +import { handleBatchError } from '../utils'; + +type Props = { + urns: string[]; + // if you need to provide context for subresources, resourceRefs should be provided and will take precedence over urns + resourceRefs?: ResourceRefInput[]; + onClose: () => void; + refetch?: () => void; + zIndexOverride?: number; +}; + +const SCHEMA_FIELD_PREFIX = 'urn:li:schemaField:'; + +export const UpdateDeprecationModal = ({ urns, resourceRefs, onClose, refetch, zIndexOverride }: Props) => { + const { entityWithSchema } = useGetEntityWithSchema(); + const schemaMetadata: any = entityWithSchema?.schemaMetadata || undefined; + + const [batchUpdateDeprecation] = useBatchUpdateDeprecationMutation(); + const [isReplacementModalVisible, setIsReplacementModalVisible] = React.useState(false); + const [replacementUrn, setReplacementUrn] = React.useState(null); + const entityRegistry = useEntityRegistry(); + + const isDeprecatingFields = + !!resourceRefs && resourceRefs.length > 0 && resourceRefs[0].subResourceType === SubResourceType.DatasetField; + const resourceFromWhichReplacementIsSelected = resourceRefs?.[0]?.resourceUrn; + + const { data: replacementData, loading: replacementLoading } = useGetEntitiesQuery({ + variables: { + urns: [replacementUrn || ''], + }, + skip: !replacementUrn || replacementUrn?.startsWith(SCHEMA_FIELD_PREFIX), + }); + + const [form] = Form.useForm(); + + const handleClose = () => { + form.resetFields(); + onClose(); + }; + + const handleOk = async (formData: any) => { + message.loading({ content: 'Updating...' }); + try { + await batchUpdateDeprecation({ + variables: { + input: { + resources: resourceRefs || urns.map((resourceUrn) => ({ resourceUrn })), + deprecated: true, + note: formData.note, + decommissionTime: formData.decommissionTime && formData.decommissionTime.unix() * 1000, + replacement: replacementUrn, + }, + }, + }); + message.destroy(); + message.success({ content: 'Deprecation Updated', duration: 2 }); + } catch (e: unknown) { + message.destroy(); + if (e instanceof Error) { + message.error( + handleBatchError(urns, e, { + content: `Failed to update Deprecation: \n ${e.message || ''}`, + duration: 2, + }), + ); + } + } + refetch?.(); + handleClose(); + }; + + return ( + + + + + } + > +
+ +