@@ -639,13 +639,26 @@ type Registry struct {
639639 modelProvidersMu sync.Mutex
640640
641641 // pendingModelLoads tracks provider-model pairs that have been sent a
642- // load_model command and are awaiting completion. Prevents duplicate
643- // sends across heartbeat cycles.
644- pendingModelLoads map [string ]time.Time // key: "providerID:modelID"
642+ // load_model command and are awaiting completion, or are cooling down
643+ // after a failed one. The value is the entry's expiry time. While an
644+ // entry lives, the provider is skipped for new load_model sends
645+ // (bestModelLoadProviderLocked / reservePendingModelLoads).
646+ pendingModelLoads map [string ]time.Time // key: "providerID:modelID", value: expiry
645647}
646648
649+ // pendingModelLoadTTL bounds how long an outstanding (or failed) load_model
650+ // suppresses re-sends to the same provider.
647651const pendingModelLoadTTL = 2 * time .Minute
648652
653+ // pendingModelLoadDrainBackoff is the short cooldown used when a provider
654+ // rejects load_model because it is draining for an auto-update restart. The
655+ // entry keeps the planner away from a provider that is about to bounce, but
656+ // must not outlive a failed restart: if the provider aborts the restart and
657+ // resumes serving, it is fully loadable again, and the full 2-minute cooldown
658+ // would strand queued requests that this provider (or its post-restart
659+ // re-registration) could serve.
660+ const pendingModelLoadDrainBackoff = 30 * time .Second
661+
649662type modelLoadAction struct {
650663 providerID string
651664 modelID string
@@ -2102,8 +2115,8 @@ func (r *Registry) TriggerModelSwaps() {
21022115func (r * Registry ) expirePendingModelLoads (now time.Time ) {
21032116 r .mu .Lock ()
21042117 defer r .mu .Unlock ()
2105- for key , sentAt := range r .pendingModelLoads {
2106- if now .Sub ( sentAt ) > pendingModelLoadTTL {
2118+ for key , expiresAt := range r .pendingModelLoads {
2119+ if now .After ( expiresAt ) {
21072120 delete (r .pendingModelLoads , key )
21082121 }
21092122 }
@@ -2290,7 +2303,7 @@ func (r *Registry) reservePendingModelLoads(actions []modelLoadAction, now time.
22902303 if r .providerHasPendingLoad (action .providerID ) {
22912304 continue
22922305 }
2293- r .pendingModelLoads [modelLoadKey (action .providerID , action .modelID )] = now
2306+ r .pendingModelLoads [modelLoadKey (action .providerID , action .modelID )] = now . Add ( pendingModelLoadTTL )
22942307 reserved = append (reserved , action )
22952308 }
22962309 return reserved
@@ -2382,6 +2395,19 @@ func (r *Registry) ClearPendingModelLoad(providerID, modelID string) {
23822395 r .mu .Unlock ()
23832396}
23842397
2398+ // BackoffPendingModelLoadForDrain re-stamps a pending load entry with the
2399+ // short drain backoff. Called when a provider rejects load_model because it
2400+ // is draining ahead of an auto-update restart: clearing the entry outright
2401+ // would re-send load_model to the same draining provider on the very next
2402+ // TriggerModelSwaps pass, while the full failure cooldown would suppress the
2403+ // provider long after a failed restart resumed serving. A successful restart
2404+ // clears the entry anyway via Disconnect.
2405+ func (r * Registry ) BackoffPendingModelLoadForDrain (providerID , modelID string ) {
2406+ r .mu .Lock ()
2407+ r .pendingModelLoads [modelLoadKey (providerID , modelID )] = time .Now ().Add (pendingModelLoadDrainBackoff )
2408+ r .mu .Unlock ()
2409+ }
2410+
23852411// RejectUnservableQueuedRequests checks whether any eligible provider can
23862412// serve the given model. If not, all queued requests for the model are
23872413// rejected immediately rather than waiting for the 120s queue timeout.
@@ -2516,6 +2542,37 @@ func (r *Registry) GetProvider(id string) *Provider {
25162542 return r .providers [id ]
25172543}
25182544
2545+ // CountProvidersByBinaryHash returns the number of currently connected
2546+ // providers whose registration attested the given provider binary hash. Used by
2547+ // release administration to avoid removing a hash from the forced allowlist
2548+ // while old-but-still-connected providers are draining/restarting into a newer
2549+ // release.
2550+ func (r * Registry ) CountProvidersByBinaryHash (hash string ) int {
2551+ normalized := strings .ToLower (strings .TrimSpace (hash ))
2552+ if normalized == "" {
2553+ return 0
2554+ }
2555+
2556+ r .mu .RLock ()
2557+ defer r .mu .RUnlock ()
2558+
2559+ count := 0
2560+ for _ , p := range r .providers {
2561+ p .mu .Lock ()
2562+ status := p .Status
2563+ attestedHash := ""
2564+ if p .AttestationResult != nil {
2565+ attestedHash = p .AttestationResult .BinaryHash
2566+ }
2567+ p .mu .Unlock ()
2568+
2569+ if status != StatusOffline && strings .EqualFold (attestedHash , normalized ) {
2570+ count ++
2571+ }
2572+ }
2573+ return count
2574+ }
2575+
25192576// MarkUntrusted sets a provider's status to untrusted for a hard/security
25202577// reason (bad encrypted chunk, MDM/MDA failure, SIP disabled, binary or model
25212578// hash mismatch, serial impersonation, attestation failure). The deroute is
0 commit comments