Skip to content

Commit a4fe0bb

Browse files
authored
glebashnik/add-deferred-reconfiguration-for-remote-session (#35941)
This fixies a bug when only local session in Deployment.java marks clusters to defer reconfiguration, while remote sessions retrieved by config servers from zookeeper in SessionRepository don't do it. This results in different applyOnRestart response from different config servers.
1 parent d7daf87 commit a4fe0bb

2 files changed

Lines changed: 111 additions & 2 deletions

File tree

configserver/src/main/java/com/yahoo/vespa/config/server/session/SessionRepository.java

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import com.yahoo.config.application.api.DeployLogger;
1111
import com.yahoo.config.model.api.ConfigDefinitionRepo;
1212
import com.yahoo.config.model.api.EndpointCertificateSecretStore;
13+
import com.yahoo.config.model.api.Model;
1314
import com.yahoo.config.model.api.OnnxModelCost;
1415
import com.yahoo.config.model.application.provider.DeployData;
1516
import com.yahoo.config.model.application.provider.FilesApplicationPackage;
@@ -24,10 +25,12 @@
2425
import com.yahoo.vespa.config.server.ConfigServerDB;
2526
import com.yahoo.vespa.config.server.TimeoutBudget;
2627
import com.yahoo.vespa.config.server.application.InheritableApplications;
28+
import com.yahoo.vespa.config.server.application.Application;
2729
import com.yahoo.vespa.config.server.application.ApplicationVersions;
2830
import com.yahoo.vespa.config.server.application.TenantApplications;
2931
import com.yahoo.vespa.config.server.configchange.ConfigChangeActions;
3032
import com.yahoo.vespa.config.server.deploy.TenantFileSystemDirs;
33+
import com.yahoo.text.Text;
3134
import com.yahoo.vespa.config.server.filedistribution.FileDistributionFactory;
3235
import com.yahoo.vespa.config.server.http.InvalidApplicationException;
3336
import com.yahoo.vespa.config.server.http.UnknownVespaVersionException;
@@ -76,6 +79,7 @@
7679
import java.util.Map;
7780
import java.util.Optional;
7881
import java.util.Set;
82+
import java.util.stream.Collectors;
7983
import java.util.concurrent.ExecutionException;
8084
import java.util.concurrent.Executor;
8185
import java.util.concurrent.ExecutorService;
@@ -86,6 +90,7 @@
8690
import java.util.logging.Level;
8791
import java.util.logging.Logger;
8892

93+
import static com.yahoo.vespa.config.server.session.ActivationTriggers.DeferredReconfiguration;
8994
import static com.yahoo.vespa.config.server.session.Session.Status.ACTIVATE;
9095
import static com.yahoo.vespa.config.server.session.Session.Status.DEACTIVATE;
9196
import static com.yahoo.vespa.config.server.session.Session.Status.NEW;
@@ -464,18 +469,51 @@ void activate(long sessionId) {
464469

465470
CompletionWaiter waiter = createSessionZooKeeperClient(sessionId).getActiveWaiter();
466471
log.log(Level.FINE, () -> session.logPre() + "Activating " + sessionId);
467-
tenantApplications.activateApplication(ensureApplicationLoaded(session), sessionId);
472+
473+
ApplicationVersions applicationVersions = ensureApplicationLoaded(session);
474+
applyDeferredReconfigurationOfClusters(session, applicationVersions);
475+
476+
tenantApplications.activateApplication(applicationVersions, sessionId);
468477
log.log(Level.FINE, () -> session.logPre() + "Notifying " + waiter);
469478
notifyCompletion(waiter);
470479
log.log(Level.INFO, session.logPre() + "Session activated: " + sessionId);
471480
}
472481

482+
/**
483+
* Marks clusters for deferred reconfiguration in the model, i.e. wait until restart to apply new config.
484+
* This is similar to {@link com.yahoo.vespa.config.server.deploy.Deployment::applyDeferredReconfigurationOfClusters}
485+
* but for {@link ActivationTriggers} from {@link RemoteSession} in ZooKeeper
486+
* rather than from {@link Session} created locally on the config server.
487+
*/
488+
private void applyDeferredReconfigurationOfClusters(RemoteSession session, ApplicationVersions applicationVersions) {
489+
Set<String> clustersWithDeferredReconfiguration = session.getActivationTriggers().deferredReconfigurations().stream()
490+
.map(DeferredReconfiguration::clusterId)
491+
.collect(Collectors.toSet());
492+
493+
if (clustersWithDeferredReconfiguration.isEmpty()) {
494+
return;
495+
}
496+
497+
// Get the model and mark clusters for deferred reconfiguration
498+
Model model = applicationVersions.get(session.getVespaVersion())
499+
.map(Application::getModel)
500+
.orElseThrow(() -> new IllegalStateException(
501+
"Cannot apply deferred reconfiguration: no model available for session " + session.getSessionId()));
502+
model.markClustersForDeferredReconfiguration(clustersWithDeferredReconfiguration);
503+
504+
clustersWithDeferredReconfiguration.forEach(clusterName ->
505+
log.log(Level.INFO, session.logPre() +
506+
Text.format("Deferring reconfiguration of cluster '%s' until restart is completed", clusterName)));
507+
}
508+
473509
private void loadSessionIfActive(RemoteSession session) {
474510
for (ApplicationId applicationId : tenantApplications.activeApplications()) {
475511
Optional<Long> activeSession = tenantApplications.activeSessionOf(applicationId);
476512
if (activeSession.isPresent() && activeSession.get() == session.getSessionId()) {
477513
log.log(Level.FINE, () -> "Found active application for session " + session.getSessionId() + " , loading it");
478-
tenantApplications.activateApplication(ensureApplicationLoaded(session), session.getSessionId());
514+
ApplicationVersions applicationVersions = ensureApplicationLoaded(session);
515+
applyDeferredReconfigurationOfClusters(session, applicationVersions);
516+
tenantApplications.activateApplication(applicationVersions, session.getSessionId());
479517
log.log(Level.INFO, session.logPre() + "Application activated successfully: " + applicationId + " (generation " + session.getSessionId() + ")");
480518
return;
481519
}

configserver/src/test/java/com/yahoo/vespa/config/server/session/SessionRepositoryTest.java

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,4 +384,75 @@ public ModelCreateResult createAndValidateModel(ModelContext modelContext, Valid
384384
}
385385
}
386386

387+
@Test
388+
public void activate_applies_deferred_reconfiguration_from_zookeeper() throws Exception {
389+
// Tests that SessionRepository.activate() reads ActivationTriggers from ZooKeeper
390+
// and applies deferred reconfigurations to the model (non-primary server path)
391+
setup();
392+
long sessionId = deploy();
393+
394+
// Write ActivationTriggers to ZooKeeper to simulate what primary server writes
395+
var zkClient = new SessionZooKeeperClient(curator, tenantName, sessionId,
396+
applicationRepository.configserverConfig());
397+
var triggers = new ActivationTriggers(
398+
List.of(), // nodeRestarts
399+
List.of(), // reindexings
400+
List.of(new ActivationTriggers.DeferredReconfiguration("container")) // deferredReconfigurations
401+
);
402+
zkClient.writeActivationTriggers(triggers);
403+
404+
// Deactivate the session to clear applicationVersions
405+
sessionRepository.deactivateSession(sessionId);
406+
407+
// Now activate it - this should read ActivationTriggers and apply deferred reconfigurations
408+
sessionRepository.activate(sessionId);
409+
410+
// Verify that the model has deferChangesUntilRestart set
411+
var session = sessionRepository.getRemoteSession(sessionId);
412+
var appVersions = session.applicationVersions();
413+
assertTrue("Session should be activated", appVersions.isPresent());
414+
415+
var model = (com.yahoo.vespa.model.VespaModel) appVersions.get().applications().get(0).getModel();
416+
var cluster = model.getContainerClusters().get("container");
417+
assertNotNull("Container cluster should exist", cluster);
418+
assertTrue("Cluster should have deferChangesUntilRestart set after activation",
419+
cluster.getDeferChangesUntilRestart());
420+
}
421+
422+
@Test
423+
public void loadSessionIfActive_applies_deferred_reconfiguration_from_zookeeper() throws Exception {
424+
// Tests that loadSessionIfActive() applies deferred reconfigurations
425+
// when discovering an already-active session (bootstrap/discovery path)
426+
setup();
427+
long sessionId = deploy();
428+
429+
// Write ActivationTriggers to ZooKeeper
430+
SessionZooKeeperClient zkClient = new SessionZooKeeperClient(curator, tenantName, sessionId,
431+
applicationRepository.configserverConfig());
432+
ActivationTriggers triggers = new ActivationTriggers(
433+
List.of(), // nodeRestarts
434+
List.of(), // reindexings
435+
List.of(new ActivationTriggers.DeferredReconfiguration("container")) // deferredReconfigurations
436+
);
437+
zkClient.writeActivationTriggers(triggers);
438+
439+
// Deactivate and then re-discover the session
440+
sessionRepository.deactivateSession(sessionId);
441+
442+
// createRemoteSessionAndActivate calls loadSessionIfActive internally
443+
RemoteSession session = sessionRepository.createRemoteSessionAndActivate(sessionId);
444+
445+
// Verify deferred reconfigurations were applied
446+
var appVersions = session.applicationVersions();
447+
if (appVersions.isPresent()) {
448+
var model = (com.yahoo.vespa.model.VespaModel) appVersions.get().applications().get(0).getModel();
449+
var cluster = model.getContainerClusters().get("container");
450+
assertNotNull("Container cluster should exist", cluster);
451+
assertTrue("Cluster should have deferChangesUntilRestart set when loading active session",
452+
cluster.getDeferChangesUntilRestart());
453+
}
454+
// If no appVersions, the session wasn't marked as active in tenantApplications,
455+
// so loadSessionIfActive didn't load it - this is expected behavior
456+
}
457+
387458
}

0 commit comments

Comments
 (0)