Skip to content

Commit 5874c4f

Browse files
derekphamDerek Pham
andauthored
Persist emitTime from IngestionTrackingContext to the new entity tables and use emitTime during backfill (#329)
* feat(backfill): Persist emitTime for the new schema as well as use the emitTime field when during backfill logic if available * update docs * Reset backfill to false when emitting MAE * Address comments * Comment * Add nullable annotation --------- Co-authored-by: Derek Pham <[email protected]>
1 parent 1472141 commit 5874c4f

File tree

9 files changed

+176
-113
lines changed

9 files changed

+176
-113
lines changed

dao-api/src/main/java/com/linkedin/metadata/dao/BaseLocalDAO.java

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -411,26 +411,29 @@ private <ASPECT extends RecordTemplate> AddResult<ASPECT> addCommon(@Nonnull URN
411411

412412
final ASPECT oldValue = latest.getAspect() == null ? null : latest.getAspect();
413413
final AuditStamp oldAuditStamp = latest.getExtraInfo() == null ? null : latest.getExtraInfo().getAudit();
414+
final Long oldEmitTime = latest.getExtraInfo() == null ? null : latest.getExtraInfo().getEmitTime();
414415

415416
boolean isBackfillEvent = trackingContext != null
416417
&& trackingContext.hasBackfill() && trackingContext.isBackfill();
417418
if (isBackfillEvent) {
418419
boolean shouldBackfill =
419420
// new value is being inserted. We should backfill
420421
oldValue == null
421-
// the time in old audit stamp represents last modified time of the aspect
422-
// if the record doesn't exist, it will be null, which means we should process the record as normal
423422
|| (
424-
oldAuditStamp != null && oldAuditStamp.hasTime()
425-
// ingestionTrackingContext if not null should always have emitTime. If emitTime doesn't exist within
426-
// a non-null IngestionTrackingContext, it should be investigated. We'll also skip backfilling in this case
427-
&& trackingContext.hasEmitTime()
428-
// we should only process this backfilling event if the emit time is greater than last modified time
429-
&& trackingContext.getEmitTime() > oldAuditStamp.getTime());
430-
431-
log.info("Encounter backfill event. Tracking context: {}. Urn: {}. Aspect class: {}. Old audit stamp: {}. "
423+
// tracking context should ideally always have emitTime. If it's not present, we will skip backfilling
424+
trackingContext.hasEmitTime()
425+
&& (
426+
// old emit time is available so we'll use it for comparison
427+
// if new event emit time > old event emit time, we'll backfill
428+
(oldEmitTime != null && trackingContext.getEmitTime() > oldEmitTime)
429+
// old emit time is not available, so we'll fall back to comparing new emit time against old audit time
430+
// old audit time represents the last modified time of the aspect
431+
|| (oldEmitTime == null && oldAuditStamp != null && oldAuditStamp.hasTime() && trackingContext.getEmitTime() > oldAuditStamp.getTime())));
432+
433+
log.info("Encounter backfill event. Old value = null: {}. Tracking context: {}. Urn: {}. Aspect class: {}. Old audit stamp: {}. "
434+
+ "Old emit time: {}. "
432435
+ "Based on this information, shouldBackfill = {}.",
433-
trackingContext, urn, aspectClass, oldAuditStamp, shouldBackfill);
436+
oldValue == null, trackingContext, urn, aspectClass, oldAuditStamp, oldEmitTime, shouldBackfill);
434437

435438
if (!shouldBackfill) {
436439
return new AddResult<>(oldValue, oldValue, aspectClass);
@@ -578,6 +581,10 @@ private <ASPECT extends RecordTemplate> ASPECT_UNION unwrapAddResultToUnion(URN
578581

579582
private <ASPECT extends RecordTemplate> ASPECT unwrapAddResult(URN urn, AddResult<ASPECT> result, @Nonnull AuditStamp auditStamp,
580583
@Nullable IngestionTrackingContext trackingContext) {
584+
if (trackingContext != null) {
585+
trackingContext.setBackfill(false); // reset backfill since MAE won't be a backfill event
586+
}
587+
581588
Class<ASPECT> aspectClass = result.getKlass();
582589
final ASPECT oldValue = result.getOldValue();
583590
final ASPECT newValue = result.getNewValue();

dao-api/src/main/pegasus/com/linkedin/metadata/aspect/AuditedAspect.pdl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ record AuditedAspect {
3737
* This value is different from lastmodifiedon / the timestamp in AuditStamp since auditStamp
3838
* is created when the restli resource receives the ingestion request.
3939
* This is set by the MCE producers (or MCE consumers if not set by producers)
40+
*
41+
* This will be null in the following scenarios:
42+
* - The record is from the old schema
43+
* - The record was inserted before we started persisting emitTime to the new schema
44+
* - The record was inserted via ingest instead of ingestWithTracking
4045
*/
4146
emitTime: optional long
4247
}

dao-api/src/main/pegasus/com/linkedin/metadata/query/ListResultMetadata.pdl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ record ListResultMetadata {
4040
* This value is different from lastmodifiedon / the timestamp in AuditStamp since auditStamp
4141
* is created when the restli resource receives the ingestion request.
4242
* This is set by the MCE producers (or MCE consumers if not set by producers)
43+
*
44+
* This will be null in the following scenarios:
45+
* - The record is from the old schema
46+
* - The record was inserted before we started persisting emitTime to the new schema
47+
* - The record was inserted via ingest instead of ingestWithTracking
4348
*/
4449
emitTime: optional long
4550
}]

dao-api/src/test/java/com/linkedin/metadata/dao/BaseLocalDAOTest.java

Lines changed: 64 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import com.linkedin.common.AuditStamp;
44
import com.linkedin.data.template.RecordTemplate;
5+
import com.linkedin.data.template.SetMode;
56
import com.linkedin.metadata.dao.builder.BaseLocalRelationshipBuilder.LocalRelationshipUpdates;
67
import com.linkedin.metadata.dao.producer.BaseMetadataEventProducer;
78
import com.linkedin.metadata.dao.producer.BaseTrackingMetadataEventProducer;
@@ -437,39 +438,55 @@ public Object[][] addBackfillForNoopCases() {
437438
AuditStamp oldAuditStamp = makeAuditStamp("susActor", 6L);
438439

439440
// case 1 - emitTime doesn't exist
440-
IngestionTrackingContext contextWithNoEmitTime = new IngestionTrackingContext();
441-
contextWithNoEmitTime.setBackfill(true);
442-
443-
// case 2 - emitTime < old stamp
444-
IngestionTrackingContext contextWithSmallEmitTime = new IngestionTrackingContext();
445-
contextWithSmallEmitTime.setBackfill(true);
446-
contextWithSmallEmitTime.setEmitTime(5L);
441+
IngestionTrackingContext context1 = new IngestionTrackingContext();
442+
context1.setBackfill(true);
443+
444+
// case 2 - new emit time < old emit time
445+
IngestionTrackingContext context2 = new IngestionTrackingContext();
446+
context2.setBackfill(true);
447+
context2.setEmitTime(4L);
448+
long oldEmitTime2 = 5L;
449+
450+
// case 3 - new emit time < old emit time (same as case 2, but old stamp < new emit time)
451+
IngestionTrackingContext context3 = new IngestionTrackingContext();
452+
context3.setBackfill(true);
453+
context3.setEmitTime(10L);
454+
long oldEmitTime3 = 11L;
455+
456+
// case 4 - old emit time = null, new emit time < old audit stamp
457+
IngestionTrackingContext context4 = new IngestionTrackingContext();
458+
context4.setBackfill(true);
459+
context4.setEmitTime(3L);
447460

448461
return new Object[][] {
449-
{ contextWithNoEmitTime, oldAuditStamp },
450-
{ contextWithSmallEmitTime, oldAuditStamp }
462+
{ context1, oldAuditStamp, null },
463+
{ context2, oldAuditStamp, oldEmitTime2 },
464+
{ context3, oldAuditStamp, oldEmitTime3 },
465+
{ context4, oldAuditStamp, null }
451466
};
452467
}
453468

454469
@Test(description = "Each test case represents a scenario where a backfill event should NOT be backfilled",
455470
dataProvider = "addBackfillForNoopCases")
456-
public void testAddBackfillEmitTimeLargerThanOldAuditTime(
457-
IngestionTrackingContext ingestionTrackingContext, AuditStamp oldAuditStamp
471+
public void testAddForBackfillEventsWhenWeShouldNotDoBackfill(
472+
IngestionTrackingContext ingestionTrackingContext, AuditStamp oldAuditStamp, Long oldEmitTime
458473
) throws URISyntaxException {
459474
FooUrn urn = new FooUrn(1);
460475
AspectFoo oldFoo = new AspectFoo().setValue("oldFoo");
461476
AspectFoo newFoo = new AspectFoo().setValue("newFoo");
462477

463478
ExtraInfo extraInfo = new ExtraInfo();
464479
extraInfo.setAudit(oldAuditStamp);
480+
extraInfo.setEmitTime(oldEmitTime, SetMode.IGNORE_NULL);
465481

466482
DummyLocalDAO dummyLocalDAO = new DummyLocalDAO(_mockGetLatestFunction, _mockTrackingEventProducer, _mockTrackingManager,
467483
_dummyLocalDAO._transactionRunner);
468484
dummyLocalDAO.setEmitAuditEvent(true);
469485
dummyLocalDAO.setAlwaysEmitAuditEvent(true);
470486
dummyLocalDAO.setEmitAspectSpecificAuditEvent(true);
471487
dummyLocalDAO.setAlwaysEmitAspectSpecificAuditEvent(true);
472-
expectGetLatest(urn, AspectFoo.class, Collections.singletonList(makeAspectEntry(oldFoo, oldAuditStamp)));
488+
BaseLocalDAO.AspectEntry<AspectFoo> aspectEntry = new BaseLocalDAO.AspectEntry<>(oldFoo, extraInfo);
489+
expectGetLatest(urn, AspectFoo.class, Collections.singletonList(aspectEntry));
473490

474491
dummyLocalDAO.add(urn, newFoo, _dummyAuditStamp, ingestionTrackingContext);
475492

@@ -479,27 +496,54 @@ public void testAddBackfillEmitTimeLargerThanOldAuditTime(
479496
verifyNoMoreInteractions(_mockTrackingEventProducer);
480497
}
481498

482-
@Test(description = "Event should be processed for backfill event")
483-
public void testAddForBackfill() throws URISyntaxException {
499+
@DataProvider(name = "addBackfillForCasesThatShouldBackfill")
500+
public Object[][] addBackfillForCasesThatShouldBackfill() {
501+
AuditStamp oldAuditStamp = makeAuditStamp("susActor", 6L);
502+
503+
// case 1 - emitTime exists and is larger than old emit time
504+
IngestionTrackingContext context1 = new IngestionTrackingContext();
505+
context1.setBackfill(true);
506+
context1.setEmitTime(5L);
507+
long oldEmitTime1 = 4L;
508+
509+
// case 2 - emitTime exists and is larger than old emit time
510+
IngestionTrackingContext context2 = new IngestionTrackingContext();
511+
context2.setBackfill(true);
512+
context2.setEmitTime(10L);
513+
long oldEmitTime2 = 4L;
514+
515+
// case 3 - emitTime exists, old emitTime doesn't exist, emitTime > old audit stamp
516+
IngestionTrackingContext context3 = new IngestionTrackingContext();
517+
context3.setBackfill(true);
518+
context3.setEmitTime(7L);
519+
520+
return new Object[][] {
521+
{ context1, oldAuditStamp, oldEmitTime1 },
522+
{ context2, oldAuditStamp, oldEmitTime2 },
523+
{ context3, oldAuditStamp, null }
524+
};
525+
}
526+
527+
@Test(description = "Event should be processed for backfill event", dataProvider = "addBackfillForCasesThatShouldBackfill")
528+
public void testAddForBackfill(
529+
IngestionTrackingContext ingestionTrackingContext, AuditStamp oldAuditStamp, Long oldEmitTime
530+
) throws URISyntaxException {
484531
FooUrn urn = new FooUrn(1);
485532
AspectFoo oldFoo = new AspectFoo().setValue("oldFoo");
486533
AspectFoo newFoo = new AspectFoo().setValue("newFoo");
487534

488535
ExtraInfo extraInfo = new ExtraInfo();
489-
AuditStamp oldAuditStamp = makeAuditStamp("nonSusActor", 5L);
490536
extraInfo.setAudit(oldAuditStamp);
537+
extraInfo.setEmitTime(oldEmitTime, SetMode.IGNORE_NULL);
491538

492539
DummyLocalDAO dummyLocalDAO = new DummyLocalDAO(_mockGetLatestFunction, _mockTrackingEventProducer, _mockTrackingManager,
493540
_dummyLocalDAO._transactionRunner);
494541
dummyLocalDAO.setEmitAuditEvent(true);
495542
dummyLocalDAO.setAlwaysEmitAuditEvent(true);
496543
dummyLocalDAO.setEmitAspectSpecificAuditEvent(true);
497544
dummyLocalDAO.setAlwaysEmitAspectSpecificAuditEvent(true);
498-
expectGetLatest(urn, AspectFoo.class, Collections.singletonList(makeAspectEntry(oldFoo, oldAuditStamp)));
499-
500-
IngestionTrackingContext ingestionTrackingContext = new IngestionTrackingContext();
501-
ingestionTrackingContext.setBackfill(true);
502-
ingestionTrackingContext.setEmitTime(6L);
545+
BaseLocalDAO.AspectEntry<AspectFoo> aspectEntry = new BaseLocalDAO.AspectEntry<>(oldFoo, extraInfo);
546+
expectGetLatest(urn, AspectFoo.class, Collections.singletonList(aspectEntry));
503547

504548
dummyLocalDAO.add(urn, newFoo, _dummyAuditStamp, ingestionTrackingContext);
505549

dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/EbeanLocalAccess.java

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package com.linkedin.metadata.dao;
22

3-
import com.linkedin.avro2pegasus.events.UUID;
43
import com.linkedin.common.AuditStamp;
54
import com.linkedin.common.urn.Urn;
65
import com.linkedin.data.template.RecordTemplate;
@@ -15,6 +14,7 @@
1514
import com.linkedin.metadata.dao.utils.RecordUtils;
1615
import com.linkedin.metadata.dao.utils.SQLSchemaUtils;
1716
import com.linkedin.metadata.dao.utils.SQLStatementUtils;
17+
import com.linkedin.metadata.events.IngestionTrackingContext;
1818
import com.linkedin.metadata.query.ExtraInfo;
1919
import com.linkedin.metadata.query.ExtraInfoArray;
2020
import com.linkedin.metadata.query.IndexFilter;
@@ -92,14 +92,18 @@ public void ensureSchemaUpToDate() {
9292
@Override
9393
@Transactional
9494
public <ASPECT extends RecordTemplate> int add(@Nonnull URN urn, @Nullable ASPECT newValue, @Nonnull Class<ASPECT> aspectClass,
95-
@Nonnull AuditStamp auditStamp, @Nullable UUID messageId) {
96-
return addWithOptimisticLocking(urn, newValue, aspectClass, auditStamp, null, messageId);
95+
@Nonnull AuditStamp auditStamp, @Nullable IngestionTrackingContext ingestionTrackingContext) {
96+
return addWithOptimisticLocking(urn, newValue, aspectClass, auditStamp, null, ingestionTrackingContext);
9797
}
9898

9999
@Override
100-
public <ASPECT extends RecordTemplate> int addWithOptimisticLocking(@Nonnull URN urn, @Nullable ASPECT newValue,
101-
@Nonnull Class<ASPECT> aspectClass, @Nonnull AuditStamp auditStamp, @Nonnull Timestamp oldTimestamp,
102-
@Nullable UUID messageId) {
100+
public <ASPECT extends RecordTemplate> int addWithOptimisticLocking(
101+
@Nonnull URN urn,
102+
@Nullable ASPECT newValue,
103+
@Nonnull Class<ASPECT> aspectClass,
104+
@Nonnull AuditStamp auditStamp,
105+
@Nullable Timestamp oldTimestamp,
106+
@Nullable IngestionTrackingContext ingestionTrackingContext) {
103107

104108
final long timestamp = auditStamp.hasTime() ? auditStamp.getTime() : System.currentTimeMillis();
105109
final String actor = auditStamp.hasActor() ? auditStamp.getActor().toString() : DEFAULT_ACTOR;
@@ -151,6 +155,9 @@ public <ASPECT extends RecordTemplate> int addWithOptimisticLocking(@Nonnull URN
151155
.setLastmodifiedby(actor)
152156
.setLastmodifiedon(new Timestamp(timestamp).toString())
153157
.setCreatedfor(impersonator, SetMode.IGNORE_NULL);
158+
if (ingestionTrackingContext != null) {
159+
auditedAspect.setEmitTime(ingestionTrackingContext.getEmitTime(), SetMode.IGNORE_NULL);
160+
}
154161

155162
final String metadata = toJsonString(auditedAspect);
156163
return sqlUpdate.setParameter("metadata", metadata).execute();

dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/EbeanLocalDAO.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package com.linkedin.metadata.dao;
22

33
import com.google.common.annotations.VisibleForTesting;
4-
import com.linkedin.avro2pegasus.events.UUID;
54
import com.linkedin.common.AuditStamp;
65
import com.linkedin.common.urn.Urn;
76
import com.linkedin.data.schema.DataSchema;
@@ -760,11 +759,11 @@ protected <ASPECT extends RecordTemplate> void updateWithOptimisticLocking(@Nonn
760759
// aspect table will apply regular update over (urn, aspect, version) primary key combination.
761760
oldSchemaSqlUpdate = assembleOldSchemaSqlUpdate(aspect, null);
762761
numOfUpdatedRows = runInTransactionWithRetry(() -> {
763-
UUID messageId = trackingContext != null ? trackingContext.getTrackingId() : null;
764762
// DUAL WRITE: 1) update aspect table, 2) update entity table.
765763
// Note: when cold-archive is enabled, this method: updateWithOptimisticLocking will not be called.
766764
_server.execute(oldSchemaSqlUpdate);
767-
return _localAccess.addWithOptimisticLocking(urn, (ASPECT) value, aspectClass, newAuditStamp, oldTimestamp, messageId);
765+
return _localAccess.addWithOptimisticLocking(urn, (ASPECT) value, aspectClass, newAuditStamp, oldTimestamp,
766+
trackingContext);
768767
}, 1);
769768
} else {
770769
// In OLD_SCHEMA mode since aspect table is the SOT and the getLatest (oldTimestamp) is from the aspect table
@@ -787,8 +786,7 @@ protected <ASPECT extends RecordTemplate> void insert(@Nonnull URN urn, @Nullabl
787786
if (_schemaConfig != SchemaConfig.OLD_SCHEMA_ONLY && version == LATEST_VERSION) {
788787
// insert() could be called when updating log table (moving current versions into new history version)
789788
// the metadata entity tables shouldn't been updated.
790-
UUID messageId = trackingContext != null ? trackingContext.getTrackingId() : null;
791-
_localAccess.add(urn, (ASPECT) value, aspectClass, auditStamp, messageId);
789+
_localAccess.add(urn, (ASPECT) value, aspectClass, auditStamp, trackingContext);
792790
}
793791

794792
if (_changeLogEnabled) {

dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/IEbeanLocalAccess.java

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
package com.linkedin.metadata.dao;
22

3-
import com.linkedin.avro2pegasus.events.UUID;
43
import com.linkedin.common.AuditStamp;
54
import com.linkedin.common.urn.Urn;
65
import com.linkedin.data.template.RecordTemplate;
76
import com.linkedin.metadata.dao.builder.BaseLocalRelationshipBuilder.LocalRelationshipUpdates;
87
import com.linkedin.metadata.dao.builder.LocalRelationshipBuilderRegistry;
98
import com.linkedin.metadata.dao.scsi.UrnPathExtractor;
9+
import com.linkedin.metadata.events.IngestionTrackingContext;
1010
import com.linkedin.metadata.query.IndexFilter;
1111
import com.linkedin.metadata.query.IndexGroupByCriterion;
1212
import com.linkedin.metadata.query.IndexSortCriterion;
@@ -25,28 +25,32 @@ public interface IEbeanLocalAccess<URN extends Urn> {
2525

2626
/**
2727
* Upsert aspect into entity table.
28-
* @param urn entity urn
29-
* @param newValue aspect value in {@link RecordTemplate}
30-
* @param aspectClass class of the aspect
31-
* @param auditStamp audit timestamp
32-
* @param <ASPECT> metadata aspect value
28+
*
29+
* @param <ASPECT> metadata aspect value
30+
* @param urn entity urn
31+
* @param newValue aspect value in {@link RecordTemplate}
32+
* @param aspectClass class of the aspect
33+
* @param auditStamp audit timestamp
34+
* @param ingestionTrackingContext the ingestionTrackingContext of the MCE responsible for this update
3335
* @return number of rows inserted or updated
3436
*/
3537
<ASPECT extends RecordTemplate> int add(@Nonnull URN urn, @Nullable ASPECT newValue, @Nonnull Class<ASPECT> aspectClass,
36-
@Nonnull AuditStamp auditStamp, @Nullable UUID messageId);
38+
@Nonnull AuditStamp auditStamp, @Nullable IngestionTrackingContext ingestionTrackingContext);
3739

3840
/**
3941
* Update aspect on entity table with optimistic locking. (compare-and-update on oldTimestamp).
40-
* @param urn entity urn
41-
* @param newValue aspect value in {@link RecordTemplate}
42-
* @param aspectClass class of the aspect
43-
* @param auditStamp audit timestamp
44-
* @param oldTimestamp old time stamp for optimistic lock checking
45-
* @param <ASPECT> metadata aspect value
42+
*
43+
* @param <ASPECT> metadata aspect value
44+
* @param urn entity urn
45+
* @param newValue aspect value in {@link RecordTemplate}
46+
* @param aspectClass class of the aspect
47+
* @param auditStamp audit timestamp
48+
* @param oldTimestamp old time stamp for optimistic lock checking
49+
* @param ingestionTrackingContext the ingestionTrackingContext of the MCE responsible for calling this update
4650
* @return number of rows inserted or updated
4751
*/
4852
<ASPECT extends RecordTemplate> int addWithOptimisticLocking(@Nonnull URN urn, @Nullable ASPECT newValue, @Nonnull Class<ASPECT> aspectClass,
49-
@Nonnull AuditStamp auditStamp, @Nullable Timestamp oldTimestamp, @Nullable UUID messageId);
53+
@Nonnull AuditStamp auditStamp, @Nullable Timestamp oldTimestamp, @Nullable IngestionTrackingContext ingestionTrackingContext);
5054

5155
/**
5256
* Upsert relationships to the local relationship table(s).

0 commit comments

Comments
 (0)