Skip to content

Commit 1d4b332

Browse files
committed
enhance: improve reliability of start healthz
Signed-off-by: Donnie Adams <[email protected]>
1 parent ed22001 commit 1d4b332

File tree

3 files changed

+30
-24
lines changed

3 files changed

+30
-24
lines changed

pkg/router/handler.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,14 +75,22 @@ func NewHandlerSet(name string, scheme *runtime.Scheme, backend backend.Backend)
7575
}
7676

7777
func (m *HandlerSet) Start(ctx context.Context) error {
78-
m.ctx = ctx
78+
if m.ctx == nil {
79+
m.ctx = ctx
80+
}
7981
if err := m.WatchGVK(m.handlers.GVKs()...); err != nil {
8082
return err
8183
}
8284
return m.backend.Start(ctx)
8385
}
8486

8587
func (m *HandlerSet) Preload(ctx context.Context) error {
88+
if m.ctx == nil {
89+
m.ctx = ctx
90+
}
91+
if err := m.WatchGVK(m.handlers.GVKs()...); err != nil {
92+
return err
93+
}
8694
return m.backend.Preload(ctx)
8795
}
8896

@@ -253,7 +261,7 @@ func (m *HandlerSet) onChange(gvk schema.GroupVersionKind, key string, runtimeOb
253261
}
254262

255263
if !fromReplay && !fromTrigger {
256-
// Process delay have key has be reassigned from the TriggerPrefix
264+
// Process delay have key has been reassigned from the TriggerPrefix
257265
if !m.checkDelay(gvk, key) {
258266
return runtimeObject, nil
259267
}

pkg/router/healthz.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,15 @@ func setHealthy(name string, healthy bool) {
3939
healthz.healths[name] = healthy
4040
}
4141

42-
func getHealthy() bool {
42+
func GetHealthy() bool {
4343
healthz.lock.RLock()
4444
defer healthz.lock.RUnlock()
4545
for _, healthy := range healthz.healths {
4646
if !healthy {
4747
return false
4848
}
4949
}
50-
return true
50+
return len(healthz.healths) > 0
5151
}
5252

5353
// startHealthz starts a healthz server on the healthzPort. If the server is already running, then this is a no-op.
@@ -65,7 +65,7 @@ func startHealthz(ctx context.Context) {
6565

6666
mux := http.NewServeMux()
6767
mux.HandleFunc("/healthz", func(w http.ResponseWriter, req *http.Request) {
68-
if getHealthy() {
68+
if GetHealthy() {
6969
w.WriteHeader(http.StatusOK)
7070
return
7171
}

pkg/router/router.go

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"os"
77
"path/filepath"
88
"runtime"
9+
"sync"
910

1011
"github.com/obot-platform/nah/pkg/backend"
1112
"github.com/obot-platform/nah/pkg/leader"
@@ -21,10 +22,9 @@ type Router struct {
2122
OnErrorHandler ErrorHandler
2223
handlers *HandlerSet
2324
electionConfig *leader.ElectionConfig
24-
hasHealthz bool
25+
startLock sync.Mutex
2526
postStarts []func(context.Context, kclient.Client)
2627
signalStopped chan struct{}
27-
cancel func()
2828
}
2929

3030
// New returns a new *Router with given HandlerSet and ElectionConfig. Passing a nil ElectionConfig is valid and results
@@ -41,7 +41,6 @@ func New(handlerSet *HandlerSet, electionConfig *leader.ElectionConfig, healthzP
4141

4242
if healthzPort > 0 {
4343
setPort(healthzPort)
44-
r.hasHealthz = true
4544
}
4645

4746
r.RouteBuilder.router = r
@@ -187,44 +186,43 @@ func (r RouteBuilder) Handler(h Handler) {
187186
}
188187

189188
func (r *Router) Start(ctx context.Context) error {
190-
if r.cancel != nil {
191-
return fmt.Errorf("router already started")
192-
}
193-
194189
id, err := os.Hostname()
195190
if err != nil {
196191
return err
197192
}
198193

199-
if r.hasHealthz {
200-
startHealthz(ctx)
201-
}
194+
startHealthz(ctx)
202195

203196
r.handlers.onError = r.OnErrorHandler
204197

205-
ctx, r.cancel = context.WithCancel(ctx)
206-
207198
// It's OK to start the electionConfig even if it's nil.
208199
return r.electionConfig.Run(ctx, id, r.startHandlers, func(leader string) {
200+
if id == leader {
201+
return
202+
}
203+
204+
r.startLock.Lock()
205+
defer r.startLock.Unlock()
206+
207+
setHealthy(r.name, false)
208+
defer setHealthy(r.name, true)
209209
// I am not the leader, so I am healthy when my cache is ready.
210210
if err := r.handlers.Preload(ctx); err != nil {
211211
// Failed to preload caches, panic
212212
log.Fatalf("failed to preload caches: %v", err)
213213
}
214-
if r.hasHealthz {
215-
setHealthy(r.name, id != leader)
216-
}
217214
}, r.signalStopped)
218215
}
219216

220217
// startHandlers gets called when we become the leader or if there is no leader election.
221218
func (r *Router) startHandlers(ctx context.Context) error {
219+
r.startLock.Lock()
220+
defer r.startLock.Unlock()
221+
222222
var err error
223223
// This is the leader now, so not ready until the controller is started and caches are ready.
224-
if r.hasHealthz {
225-
setHealthy(r.name, false)
226-
defer setHealthy(r.name, err == nil)
227-
}
224+
setHealthy(r.name, false)
225+
defer setHealthy(r.name, err == nil)
228226

229227
if err = r.handlers.Start(ctx); err != nil {
230228
return err

0 commit comments

Comments
 (0)