Layr-Labs
diff --git a/‎coordinator/api/provider.go‎
Lines changed: 14 additions & 4 deletions b/‎coordinator/api/provider.go‎
Lines changed: 14 additions & 4 deletions
diff --git a/‎coordinator/api/provider_test.go‎
Lines changed: 52 additions & 0 deletions b/‎coordinator/api/provider_test.go‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎coordinator/api/release_handlers.go‎
Lines changed: 21 additions & 0 deletions b/‎coordinator/api/release_handlers.go‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎coordinator/protocol/messages.go‎
Lines changed: 9 additions & 0 deletions b/‎coordinator/protocol/messages.go‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎coordinator/registry/pending_model_loads_test.go‎
Lines changed: 77 additions & 0 deletions b/‎coordinator/registry/pending_model_loads_test.go‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎coordinator/registry/registry.go‎
Lines changed: 63 additions & 6 deletions b/‎coordinator/registry/registry.go‎
Lines changed: 63 additions & 6 deletions
diff --git a/‎provider-swift/Sources/ProviderCore/Protocol/Types.swift‎
Lines changed: 8 additions & 0 deletions b/‎provider-swift/Sources/ProviderCore/Protocol/Types.swift‎
Lines changed: 8 additions & 0 deletions
@@ -420,10 +420,20 @@ func (s *Server) providerReadLoop(ctx context.Context, conn *websocket.Conn, pro
 				s.registry.ClearPendingModelLoad(providerID, statusMsg.ModelID)
 				s.registry.DrainQueuedRequestsForModel(statusMsg.ModelID)
 			case protocol.LoadModelStatusFailed:
-				// Keep the pending entry (TTL cooldown suppresses retry storms).
-				// If no other provider can serve this model, reject queued
-				// requests immediately rather than making them wait 120s.
-				s.registry.RejectUnservableQueuedRequests(statusMsg.ModelID)
+				if statusMsg.Error == protocol.ProviderDrainingForUpdate {
+					// Transient: the provider refused only because it is
+					// draining ahead of an auto-update restart. Shorten the
+					// cooldown so a failed restart (provider resumes serving)
+					// becomes loadable again quickly; queued requests are NOT
+					// rejected — the provider is back within the queue window
+					// and other providers remain plannable.
+					s.registry.BackoffPendingModelLoadForDrain(providerID, statusMsg.ModelID)
+				} else {
+					// Keep the pending entry (TTL cooldown suppresses retry storms).
+					// If no other provider can serve this model, reject queued
+					// requests immediately rather than making them wait 120s.
+					s.registry.RejectUnservableQueuedRequests(statusMsg.ModelID)
+				}
 			}
 			// "started" status: no action — load is in progress.
 
 
@@ -690,6 +690,58 @@ func TestSyncBinaryHashesPreservesAdditionalConfiguredHashes(t *testing.T) {
 	}
 }
 
+func TestAdminDeleteReleaseBlocksActiveBinaryHashWhenEnforced(t *testing.T) {
+	logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError}))
+	st := store.NewMemory(store.Config{AdminKey: "test-key"})
+	reg := registry.New(logger)
+	srv := NewServer(reg, st, ServerConfig{AdminKey: "admin-key"}, logger)
+	srv.SetBinaryHashEnforcement(true)
+
+	releaseHash := strings.Repeat("c", 64)
+	if err := st.SetRelease(&store.Release{
+		Version:    "1.0.0",
+		Platform:   "macos-arm64",
+		BinaryHash: releaseHash,
+		BundleHash: strings.Repeat("d", 64),
+		URL:        "https://r2.example.com/releases/v1.0.0/darkbloom-bundle-macos-arm64.tar.gz",
+	}); err != nil {
+		t.Fatalf("SetRelease: %v", err)
+	}
+	p := reg.Register("provider-old", nil, &protocol.RegisterMessage{
+		Type:    protocol.TypeRegister,
+		Backend: "mlx-swift",
+		Hardware: protocol.Hardware{
+			MachineModel: "Mac15,8",
+			ChipName:     "Apple M3 Max",
+			MemoryGB:     64,
+		},
+		Models: []protocol.ModelInfo{{ID: "test-model", ModelType: "chat", Quantization: "4bit"}},
+	})
+	p.SetAttestationResult(&attestation.VerificationResult{Valid: true, BinaryHash: releaseHash})
+
+	req := httptest.NewRequest(http.MethodDelete, "/v1/admin/releases", strings.NewReader(`{"version":"1.0.0","platform":"macos-arm64"}`))
+	req.Header.Set("Authorization", "Bearer admin-key")
+	w := httptest.NewRecorder()
+	srv.handleAdminDeleteRelease(w, req)
+	if w.Code != http.StatusConflict {
+		t.Fatalf("delete without force status = %d, want %d; body=%s", w.Code, http.StatusConflict, w.Body.String())
+	}
+	if latest := st.GetLatestRelease("macos-arm64"); latest == nil || !latest.Active {
+		t.Fatal("release should remain active after protected delete")
+	}
+
+	forceReq := httptest.NewRequest(http.MethodDelete, "/v1/admin/releases", strings.NewReader(`{"version":"1.0.0","platform":"macos-arm64","force":true}`))
+	forceReq.Header.Set("Authorization", "Bearer admin-key")
+	forceW := httptest.NewRecorder()
+	srv.handleAdminDeleteRelease(forceW, forceReq)
+	if forceW.Code != http.StatusOK {
+		t.Fatalf("force delete status = %d, want %d; body=%s", forceW.Code, http.StatusOK, forceW.Body.String())
+	}
+	if latest := st.GetLatestRelease("macos-arm64"); latest != nil {
+		t.Fatal("release should be inactive after forced delete")
+	}
+}
+
 func TestBinaryHashPolicySnapshotConcurrentSync(t *testing.T) {
 	logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError}))
 	st := store.NewMemory(store.Config{AdminKey: "test-key"})
 
@@ -505,6 +505,7 @@ func (s *Server) handleAdminDeleteRelease(w http.ResponseWriter, r *http.Request
 	var req struct {
 		Version  string `json:"version"`
 		Platform string `json:"platform"`
+		Force    bool   `json:"force,omitempty"`
 	}
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
 		writeJSON(w, http.StatusBadRequest, errorResponse("invalid_request_error", "invalid JSON: "+err.Error()))
@@ -517,6 +518,17 @@ func (s *Server) handleAdminDeleteRelease(w http.ResponseWriter, r *http.Request
 	if req.Platform == "" {
 		req.Platform = "macos-arm64"
 	}
+	if s.binaryHashEnforce && !req.Force {
+		if release, ok := findReleaseForDeactivation(s.store.ListReleases(), req.Version, req.Platform); ok {
+			if activeProviders := s.registry.CountProvidersByBinaryHash(release.BinaryHash); activeProviders > 0 {
+				writeJSON(w, http.StatusConflict, errorResponse(
+					"release_in_use",
+					fmt.Sprintf("release %s/%s binary hash is still used by %d connected provider(s); wait for rollout or set force=true", req.Version, req.Platform, activeProviders),
+				))
+				return
+			}
+		}
+	}
 
 	if err := s.store.DeleteRelease(req.Version, req.Platform); err != nil {
 		writeJSON(w, http.StatusNotFound, errorResponse("not_found", err.Error()))
@@ -535,6 +547,15 @@ func (s *Server) handleAdminDeleteRelease(w http.ResponseWriter, r *http.Request
 	})
 }
 
+func findReleaseForDeactivation(releases []store.Release, version, platform string) (store.Release, bool) {
+	for _, release := range releases {
+		if release.Version == version && release.Platform == platform && release.Active {
+			return release, true
+		}
+	}
+	return store.Release{}, false
+}
+
 // isAdminAuthorized checks if the request is from an admin.
 // Accepts either Privy admin (email in admin list) OR EIGENINFERENCE_ADMIN_KEY.
 func (s *Server) isAdminAuthorized(w http.ResponseWriter, r *http.Request) bool {
 
@@ -63,6 +63,15 @@ const (
 	LoadModelStatusFailed    = "failed"
 )
 
+// ProviderDrainingForUpdate is the well-known error reason a provider attaches
+// to inference / load_model / prefetch_model rejections while it is draining
+// ahead of an auto-update restart. The coordinator matches this exact string
+// to treat such a load_model failure as transient (short retry backoff,
+// provider is about to restart) rather than a genuine load failure that earns
+// the full cooldown. Mirrored in
+// provider-swift/Sources/ProviderCore/Protocol/Types.swift.
+const ProviderDrainingForUpdate = "provider draining for update"
+
 // PrefetchModelStatus is the lifecycle state reported by a provider in
 // response to a PrefetchModelMessage. Unlike a load, a prefetch only
 // downloads + verifies the model on disk; it does NOT load weights into
 
@@ -0,0 +1,77 @@
+package registry
+
+import (
+	"testing"
+	"time"
+)
+
+func hasPendingLoad(r *Registry, providerID string) bool {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	return r.providerHasPendingLoad(providerID)
+}
+
+func TestPendingModelLoadReserveAndExpiry(t *testing.T) {
+	r := New(testLogger())
+	now := time.Now()
+
+	reserved := r.reservePendingModelLoads([]modelLoadAction{{providerID: "p1", modelID: "m1"}}, now)
+	if len(reserved) != 1 {
+		t.Fatalf("expected 1 reserved action, got %d", len(reserved))
+	}
+
+	// While the entry lives, the provider must not be reserved again — not
+	// even for a different model (single-slot swap oscillation guard).
+	again := r.reservePendingModelLoads([]modelLoadAction{{providerID: "p1", modelID: "m2"}}, now.Add(time.Minute))
+	if len(again) != 0 {
+		t.Fatal("provider with a pending load was reserved again")
+	}
+
+	r.expirePendingModelLoads(now.Add(pendingModelLoadTTL - time.Second))
+	if !hasPendingLoad(r, "p1") {
+		t.Fatal("pending load expired before the TTL")
+	}
+
+	r.expirePendingModelLoads(now.Add(pendingModelLoadTTL + time.Second))
+	if hasPendingLoad(r, "p1") {
+		t.Fatal("pending load survived past the TTL")
+	}
+}
+
+func TestDrainBackoffShortensPendingLoadCooldown(t *testing.T) {
+	r := New(testLogger())
+	r.reservePendingModelLoads([]modelLoadAction{{providerID: "p1", modelID: "m1"}}, time.Now())
+
+	// A drain rejection re-stamps the entry with the short backoff: long
+	// enough to keep the planner off a provider that is about to restart,
+	// short enough that an aborted restart leaves it plannable again well
+	// inside the queue window.
+	r.BackoffPendingModelLoadForDrain("p1", "m1")
+
+	r.expirePendingModelLoads(time.Now().Add(pendingModelLoadDrainBackoff - 5*time.Second))
+	if !hasPendingLoad(r, "p1") {
+		t.Fatal("drain backoff cleared too early")
+	}
+
+	r.expirePendingModelLoads(time.Now().Add(pendingModelLoadDrainBackoff + time.Second))
+	if hasPendingLoad(r, "p1") {
+		t.Fatal("drain backoff survived past pendingModelLoadDrainBackoff")
+	}
+}
+
+func TestDrainBackoffAppliesWithoutPriorReservation(t *testing.T) {
+	// The coordinator may learn of a drain rejection for a load_model it sent
+	// before a restart (entry already expired or cleared). The backoff must
+	// still record the provider as temporarily unplannable.
+	r := New(testLogger())
+	r.BackoffPendingModelLoadForDrain("p1", "m1")
+
+	if !hasPendingLoad(r, "p1") {
+		t.Fatal("drain backoff did not create a pending entry")
+	}
+
+	r.expirePendingModelLoads(time.Now().Add(pendingModelLoadDrainBackoff + time.Second))
+	if hasPendingLoad(r, "p1") {
+		t.Fatal("drain backoff survived past pendingModelLoadDrainBackoff")
+	}
+}
@@ -639,13 +639,26 @@ type Registry struct {
 	modelProvidersMu sync.Mutex
 
 	// pendingModelLoads tracks provider-model pairs that have been sent a
-	// load_model command and are awaiting completion. Prevents duplicate
-	// sends across heartbeat cycles.
-	pendingModelLoads map[string]time.Time // key: "providerID:modelID"
+	// load_model command and are awaiting completion, or are cooling down
+	// after a failed one. The value is the entry's expiry time. While an
+	// entry lives, the provider is skipped for new load_model sends
+	// (bestModelLoadProviderLocked / reservePendingModelLoads).
+	pendingModelLoads map[string]time.Time // key: "providerID:modelID", value: expiry
 }
 
+// pendingModelLoadTTL bounds how long an outstanding (or failed) load_model
+// suppresses re-sends to the same provider.
 const pendingModelLoadTTL = 2 * time.Minute
 
+// pendingModelLoadDrainBackoff is the short cooldown used when a provider
+// rejects load_model because it is draining for an auto-update restart. The
+// entry keeps the planner away from a provider that is about to bounce, but
+// must not outlive a failed restart: if the provider aborts the restart and
+// resumes serving, it is fully loadable again, and the full 2-minute cooldown
+// would strand queued requests that this provider (or its post-restart
+// re-registration) could serve.
+const pendingModelLoadDrainBackoff = 30 * time.Second
+
 type modelLoadAction struct {
 	providerID string
 	modelID    string
@@ -2102,8 +2115,8 @@ func (r *Registry) TriggerModelSwaps() {
 func (r *Registry) expirePendingModelLoads(now time.Time) {
 	r.mu.Lock()
 	defer r.mu.Unlock()
-	for key, sentAt := range r.pendingModelLoads {
-		if now.Sub(sentAt) > pendingModelLoadTTL {
+	for key, expiresAt := range r.pendingModelLoads {
+		if now.After(expiresAt) {
 			delete(r.pendingModelLoads, key)
 		}
 	}
@@ -2290,7 +2303,7 @@ func (r *Registry) reservePendingModelLoads(actions []modelLoadAction, now time.
 		if r.providerHasPendingLoad(action.providerID) {
 			continue
 		}
-		r.pendingModelLoads[modelLoadKey(action.providerID, action.modelID)] = now
+		r.pendingModelLoads[modelLoadKey(action.providerID, action.modelID)] = now.Add(pendingModelLoadTTL)
 		reserved = append(reserved, action)
 	}
 	return reserved
@@ -2382,6 +2395,19 @@ func (r *Registry) ClearPendingModelLoad(providerID, modelID string) {
 	r.mu.Unlock()
 }
 
+// BackoffPendingModelLoadForDrain re-stamps a pending load entry with the
+// short drain backoff. Called when a provider rejects load_model because it
+// is draining ahead of an auto-update restart: clearing the entry outright
+// would re-send load_model to the same draining provider on the very next
+// TriggerModelSwaps pass, while the full failure cooldown would suppress the
+// provider long after a failed restart resumed serving. A successful restart
+// clears the entry anyway via Disconnect.
+func (r *Registry) BackoffPendingModelLoadForDrain(providerID, modelID string) {
+	r.mu.Lock()
+	r.pendingModelLoads[modelLoadKey(providerID, modelID)] = time.Now().Add(pendingModelLoadDrainBackoff)
+	r.mu.Unlock()
+}
+
 // RejectUnservableQueuedRequests checks whether any eligible provider can
 // serve the given model. If not, all queued requests for the model are
 // rejected immediately rather than waiting for the 120s queue timeout.
@@ -2516,6 +2542,37 @@ func (r *Registry) GetProvider(id string) *Provider {
 	return r.providers[id]
 }
 
+// CountProvidersByBinaryHash returns the number of currently connected
+// providers whose registration attested the given provider binary hash. Used by
+// release administration to avoid removing a hash from the forced allowlist
+// while old-but-still-connected providers are draining/restarting into a newer
+// release.
+func (r *Registry) CountProvidersByBinaryHash(hash string) int {
+	normalized := strings.ToLower(strings.TrimSpace(hash))
+	if normalized == "" {
+		return 0
+	}
+
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+
+	count := 0
+	for _, p := range r.providers {
+		p.mu.Lock()
+		status := p.Status
+		attestedHash := ""
+		if p.AttestationResult != nil {
+			attestedHash = p.AttestationResult.BinaryHash
+		}
+		p.mu.Unlock()
+
+		if status != StatusOffline && strings.EqualFold(attestedHash, normalized) {
+			count++
+		}
+	}
+	return count
+}
+
 // MarkUntrusted sets a provider's status to untrusted for a hard/security
 // reason (bad encrypted chunk, MDM/MDA failure, SIP disabled, binary or model
 // hash mismatch, serial impersonation, attestation failure). The deroute is
 
@@ -1,5 +1,13 @@
 import Foundation
 
+/// Well-known error reason attached to inference / load_model / prefetch_model
+/// rejections while the provider is draining ahead of an auto-update restart.
+/// The coordinator matches this exact string to treat a load_model failure as
+/// transient (short retry backoff, provider is about to restart) rather than a
+/// genuine load failure. Mirrored in coordinator/protocol/messages.go
+/// (ProviderDrainingForUpdate) — keep the two in sync.
+public let providerDrainingForUpdateReason = "provider draining for update"
+
 public struct HardwareInfo: Codable, Sendable, Equatable {
     public var machineModel: String
     public var chipName: String