40
40
import static org .apache .ignite .internal .distributionzones .rebalance .ZoneRebalanceUtil .zoneAssignmentsGetLocally ;
41
41
import static org .apache .ignite .internal .distributionzones .rebalance .ZoneRebalanceUtil .zonePartitionAssignmentsGetLocally ;
42
42
import static org .apache .ignite .internal .hlc .HybridTimestamp .LOGICAL_TIME_BITS_SIZE ;
43
+ import static org .apache .ignite .internal .hlc .HybridTimestamp .nullableHybridTimestamp ;
43
44
import static org .apache .ignite .internal .lang .IgniteSystemProperties .getBoolean ;
44
45
import static org .apache .ignite .internal .metastorage .dsl .Conditions .notExists ;
45
46
import static org .apache .ignite .internal .metastorage .dsl .Operations .put ;
128
129
import org .apache .ignite .internal .replicator .ZonePartitionId ;
129
130
import org .apache .ignite .internal .replicator .listener .ReplicaListener ;
130
131
import org .apache .ignite .internal .schema .SchemaSyncService ;
132
+ import org .apache .ignite .internal .tx .storage .state .TxStatePartitionStorage ;
133
+ import org .apache .ignite .internal .tx .storage .state .rocksdb .TxStateRocksDbSharedStorage ;
131
134
import org .apache .ignite .internal .util .Cursor ;
132
135
import org .apache .ignite .internal .util .IgniteSpinBusyLock ;
136
+ import org .apache .ignite .internal .util .IgniteUtils ;
133
137
import org .apache .ignite .network .ClusterNode ;
134
138
import org .jetbrains .annotations .Nullable ;
135
139
@@ -213,6 +217,8 @@ public class PartitionReplicaLifecycleManager extends
213
217
214
218
private final ConcurrentMap <ZonePartitionId , ZonePartitionRaftListener > zonePartitionRaftListeners = new ConcurrentHashMap <>();
215
219
220
+ private final ZoneResourcesManager zoneResourcesManager ;
221
+
216
222
/**
217
223
* The constructor.
218
224
*
@@ -228,6 +234,7 @@ public class PartitionReplicaLifecycleManager extends
228
234
* @param placementDriver Placement driver.
229
235
* @param schemaSyncService Schema synchronization service.
230
236
* @param systemDistributedConfiguration System distributed configuration.
237
+ * @param sharedTxStateStorage Shared tx state storage.
231
238
*/
232
239
public PartitionReplicaLifecycleManager (
233
240
CatalogManager catalogMgr ,
@@ -242,7 +249,8 @@ public PartitionReplicaLifecycleManager(
242
249
ClockService clockService ,
243
250
PlacementDriver placementDriver ,
244
251
SchemaSyncService schemaSyncService ,
245
- SystemDistributedConfiguration systemDistributedConfiguration
252
+ SystemDistributedConfiguration systemDistributedConfiguration ,
253
+ TxStateRocksDbSharedStorage sharedTxStateStorage
246
254
) {
247
255
this .catalogMgr = catalogMgr ;
248
256
this .replicaMgr = replicaMgr ;
@@ -266,6 +274,8 @@ public PartitionReplicaLifecycleManager(
266
274
Integer ::parseInt
267
275
);
268
276
277
+ zoneResourcesManager = new ZoneResourcesManager (sharedTxStateStorage );
278
+
269
279
pendingAssignmentsRebalanceListener = createPendingAssignmentsRebalanceListener ();
270
280
stableAssignmentsRebalanceListener = createStableAssignmentsRebalanceListener ();
271
281
assignmentsSwitchRebalanceListener = createAssignmentsSwitchRebalanceListener ();
@@ -415,14 +425,17 @@ private CompletableFuture<Void> calculateZoneAssignmentsAndCreateReplicationNode
415
425
416
426
return getOrCreateAssignments (zoneDescriptor , causalityToken , catalogVersion )
417
427
.thenCompose (assignments -> writeZoneAssignmentsToMetastore (zoneId , assignments ))
418
- .thenCompose (assignments -> createZoneReplicationNodes (zoneId , assignments , causalityToken ));
428
+ .thenCompose (
429
+ assignments -> createZoneReplicationNodes (zoneId , assignments , causalityToken , zoneDescriptor .partitions ())
430
+ );
419
431
});
420
432
}
421
433
422
434
private CompletableFuture <Void > createZoneReplicationNodes (
423
435
int zoneId ,
424
436
List <Assignments > assignments ,
425
- long revision
437
+ long revision ,
438
+ int partitionCount
426
439
) {
427
440
return inBusyLockAsync (busyLock , () -> {
428
441
assert assignments != null : IgniteStringFormatter .format ("Zone has empty assignments [id={}]." , zoneId );
@@ -440,7 +453,8 @@ private CompletableFuture<Void> createZoneReplicationNodes(
440
453
zonePartitionId ,
441
454
localMemberAssignment ,
442
455
zoneAssignment ,
443
- revision
456
+ revision ,
457
+ partitionCount
444
458
);
445
459
}
446
460
@@ -456,13 +470,15 @@ private CompletableFuture<Void> createZoneReplicationNodes(
456
470
* @param localMemberAssignment Assignment of the local member, or null if local member is not part of the assignment.
457
471
* @param stableAssignments Stable assignments.
458
472
* @param revision Event's revision.
473
+ * @param partitionCount Number of partitions on the zone.
459
474
* @return Future that completes when a replica is started.
460
475
*/
461
476
private CompletableFuture <?> createZonePartitionReplicationNode (
462
477
ZonePartitionId zonePartitionId ,
463
478
@ Nullable Assignment localMemberAssignment ,
464
479
Assignments stableAssignments ,
465
- long revision
480
+ long revision ,
481
+ int partitionCount
466
482
) {
467
483
if (localMemberAssignment == null ) {
468
484
return nullCompletedFuture ();
@@ -490,6 +506,12 @@ private CompletableFuture<?> createZonePartitionReplicationNode(
490
506
);
491
507
492
508
Supplier <CompletableFuture <Boolean >> startReplicaSupplier = () -> {
509
+ TxStatePartitionStorage txStatePartitionStorage = zoneResourcesManager .getOrCreatePartitionTxStateStorage (
510
+ zonePartitionId .zoneId (),
511
+ partitionCount ,
512
+ zonePartitionId .partitionId ()
513
+ );
514
+
493
515
try {
494
516
return replicaMgr .startReplica (
495
517
zonePartitionId ,
@@ -499,6 +521,9 @@ private CompletableFuture<?> createZonePartitionReplicationNode(
499
521
stablePeersAndLearners ,
500
522
raftGroupListener ,
501
523
raftGroupEventsListener ,
524
+ // TODO: IGNITE-24371 - pass real isVolatile flag
525
+ false ,
526
+ txStatePartitionStorage ,
502
527
busyLock
503
528
).thenCompose (replica -> executeUnderZoneWriteLock (zonePartitionId .zoneId (), () -> {
504
529
replicationGroupIds .add (zonePartitionId );
@@ -998,11 +1023,19 @@ private CompletableFuture<Void> handleChangePendingAssignmentEvent(
998
1023
CompletableFuture <?> localServicesStartFuture ;
999
1024
1000
1025
if (shouldStartLocalGroupNode ) {
1026
+ // We can safely access the Catalog at the timestamp because:
1027
+ // 1. It is guaranteed that Catalog update bringing the Catalog version has been applied (as we are now handling
1028
+ // a Metastorage event that was caused by that same Catalog version we need, so the Catalog version update is already
1029
+ // handled), so no Schema sync is needed.
1030
+ // 2. It is guaranteed that Catalog compactor cannot remove Catalog version corresponding to pending assignments timestamp.
1031
+ CatalogZoneDescriptor zoneDescriptor = zoneDescriptorAt (replicaGrpId .zoneId (), pendingAssignments .timestamp ());
1032
+
1001
1033
localServicesStartFuture = createZonePartitionReplicationNode (
1002
1034
replicaGrpId ,
1003
1035
localMemberAssignment ,
1004
1036
computedStableAssignments ,
1005
- revision
1037
+ revision ,
1038
+ zoneDescriptor .partitions ()
1006
1039
);
1007
1040
} else if (pendingAssignmentsAreForced && localMemberAssignment != null ) {
1008
1041
localServicesStartFuture = runAsync (() -> {
@@ -1041,6 +1074,16 @@ private CompletableFuture<Void> handleChangePendingAssignmentEvent(
1041
1074
}), ioExecutor );
1042
1075
}
1043
1076
1077
+ private CatalogZoneDescriptor zoneDescriptorAt (int zoneId , long timestamp ) {
1078
+ Catalog catalog = catalogMgr .activeCatalog (timestamp );
1079
+ assert catalog != null : "Catalog is not available at " + nullableHybridTimestamp (timestamp );
1080
+
1081
+ CatalogZoneDescriptor zoneDescriptor = catalog .zone (zoneId );
1082
+ assert zoneDescriptor != null : "Zone descriptor is not available at " + nullableHybridTimestamp (timestamp ) + " for zone " + zoneId ;
1083
+
1084
+ return zoneDescriptor ;
1085
+ }
1086
+
1044
1087
private CompletableFuture <Void > changePeersOnRebalance (
1045
1088
ReplicaManager replicaMgr ,
1046
1089
ZonePartitionId replicaGrpId ,
@@ -1191,6 +1234,12 @@ public CompletableFuture<Void> stopAsync(ComponentContext componentContext) {
1191
1234
return nullCompletedFuture ();
1192
1235
}
1193
1236
1237
+ try {
1238
+ IgniteUtils .closeAllManually (zoneResourcesManager );
1239
+ } catch (Exception e ) {
1240
+ return failedFuture (e );
1241
+ }
1242
+
1194
1243
return nullCompletedFuture ();
1195
1244
}
1196
1245
0 commit comments