@@ -130,7 +130,7 @@ type Lifecycler struct {
130
130
// goes away and comes back empty. The state changes during lifecycle of instance.
131
131
stateMtx sync.RWMutex
132
132
state InstanceState
133
- tokens Tokens
133
+ tokenFile * TokenFile
134
134
registeredAt time.Time
135
135
136
136
// Controls the ready-reporting
@@ -205,6 +205,7 @@ func NewLifecycler(
205
205
actorChan : make (chan func ()),
206
206
autojoinChan : make (chan struct {}, 1 ),
207
207
state : PENDING ,
208
+ tokenFile : & TokenFile {PreviousState : ACTIVE },
208
209
lifecyclerMetrics : NewLifecyclerMetrics (ringName , reg ),
209
210
logger : logger ,
210
211
tg : tg ,
@@ -301,6 +302,7 @@ func (i *Lifecycler) GetState() InstanceState {
301
302
func (i * Lifecycler ) setState (state InstanceState ) {
302
303
i .stateMtx .Lock ()
303
304
defer i .stateMtx .Unlock ()
305
+ level .Info (i .logger ).Log ("msg" , "set state" , "old_state" , i .state , "new_state" , state )
304
306
i .state = state
305
307
}
306
308
@@ -334,7 +336,7 @@ func (i *Lifecycler) ChangeState(ctx context.Context, state InstanceState) error
334
336
func (i * Lifecycler ) getTokens () Tokens {
335
337
i .stateMtx .RLock ()
336
338
defer i .stateMtx .RUnlock ()
337
- return i .tokens
339
+ return i .tokenFile . Tokens
338
340
}
339
341
340
342
func (i * Lifecycler ) setTokens (tokens Tokens ) {
@@ -343,14 +345,54 @@ func (i *Lifecycler) setTokens(tokens Tokens) {
343
345
i .stateMtx .Lock ()
344
346
defer i .stateMtx .Unlock ()
345
347
346
- i .tokens = tokens
348
+ i .tokenFile . Tokens = tokens
347
349
if i .cfg .TokensFilePath != "" {
348
- if err := i .tokens .StoreToFile (i .cfg .TokensFilePath ); err != nil {
350
+ if err := i .tokenFile .StoreToFile (i .cfg .TokensFilePath ); err != nil {
349
351
level .Error (i .logger ).Log ("msg" , "error storing tokens to disk" , "path" , i .cfg .TokensFilePath , "err" , err )
350
352
}
351
353
}
352
354
}
353
355
356
+ func (i * Lifecycler ) getPreviousState () InstanceState {
357
+ i .stateMtx .RLock ()
358
+ defer i .stateMtx .RUnlock ()
359
+ return i .tokenFile .PreviousState
360
+ }
361
+
362
+ func (i * Lifecycler ) setPreviousState (state InstanceState ) {
363
+ i .stateMtx .Lock ()
364
+ defer i .stateMtx .Unlock ()
365
+
366
+ if ! (state == ACTIVE || state == READONLY ) {
367
+ level .Error (i .logger ).Log ("msg" , "cannot store unsupported state to disk" , "new_state" , state , "old_state" , i .tokenFile .PreviousState )
368
+ return
369
+ }
370
+
371
+ i .tokenFile .PreviousState = state
372
+ if i .cfg .TokensFilePath != "" {
373
+ if err := i .tokenFile .StoreToFile (i .cfg .TokensFilePath ); err != nil {
374
+ level .Error (i .logger ).Log ("msg" , "error storing state to disk" , "path" , i .cfg .TokensFilePath , "err" , err )
375
+ } else {
376
+ level .Info (i .logger ).Log ("msg" , "saved state to disk" , "state" , state , "path" , i .cfg .TokensFilePath )
377
+ }
378
+ }
379
+ }
380
+
381
+ func (i * Lifecycler ) loadTokenFile () (* TokenFile , error ) {
382
+
383
+ t , err := LoadTokenFile (i .cfg .TokensFilePath )
384
+ if err != nil {
385
+ return nil , err
386
+ }
387
+
388
+ i .stateMtx .Lock ()
389
+ defer i .stateMtx .Unlock ()
390
+
391
+ i .tokenFile = t
392
+ level .Info (i .logger ).Log ("msg" , "loaded token file" , "state" , i .tokenFile .PreviousState , "num_tokens" , len (i .tokenFile .Tokens ), "path" , i .cfg .TokensFilePath )
393
+ return i .tokenFile , nil
394
+ }
395
+
354
396
func (i * Lifecycler ) getRegisteredAt () time.Time {
355
397
i .stateMtx .RLock ()
356
398
defer i .stateMtx .RUnlock ()
@@ -501,8 +543,8 @@ func (i *Lifecycler) loop(ctx context.Context) error {
501
543
level .Info (i .logger ).Log ("msg" , "observing tokens before going ACTIVE" , "ring" , i .RingName )
502
544
observeChan = time .After (i .cfg .ObservePeriod )
503
545
} else {
504
- if err := i .autoJoin (context .Background (), ACTIVE ); err != nil {
505
- return errors .Wrapf (err , "failed to pick tokens in the KV store, ring: %s" , i .RingName )
546
+ if err := i .autoJoin (context .Background (), i . getPreviousState () ); err != nil {
547
+ return errors .Wrapf (err , "failed to pick tokens in the KV store, ring: %s, state: %s " , i .RingName , i . getPreviousState () )
506
548
}
507
549
}
508
550
}
@@ -519,9 +561,9 @@ func (i *Lifecycler) loop(ctx context.Context) error {
519
561
if i .verifyTokens (context .Background ()) {
520
562
level .Info (i .logger ).Log ("msg" , "token verification successful" , "ring" , i .RingName )
521
563
522
- err := i .changeState (context .Background (), ACTIVE )
564
+ err := i .changeState (context .Background (), i . getPreviousState () )
523
565
if err != nil {
524
- level .Error (i .logger ).Log ("msg" , "failed to set state to ACTIVE " , "ring" , i .RingName , "err" , err )
566
+ level .Error (i .logger ).Log ("msg" , "failed to set state" , "ring" , i .RingName , "state" , i . getPreviousState () , "err" , err )
525
567
}
526
568
} else {
527
569
level .Info (i .logger ).Log ("msg" , "token verification failed, observing" , "ring" , i .RingName )
@@ -564,6 +606,12 @@ func (i *Lifecycler) stopping(runningError error) error {
564
606
heartbeatTickerStop , heartbeatTickerChan := newDisableableTicker (i .cfg .HeartbeatPeriod )
565
607
defer heartbeatTickerStop ()
566
608
609
+ // save current state into file
610
+ if i .cfg .TokensFilePath != "" {
611
+ currentState := i .GetState ()
612
+ i .setPreviousState (currentState )
613
+ }
614
+
567
615
// Mark ourselved as Leaving so no more samples are send to us.
568
616
err := i .changeState (context .Background (), LEAVING )
569
617
if err != nil {
@@ -613,9 +661,13 @@ func (i *Lifecycler) initRing(ctx context.Context) error {
613
661
)
614
662
615
663
if i .cfg .TokensFilePath != "" {
616
- tokensFromFile , err = LoadTokensFromFile ( i . cfg . TokensFilePath )
664
+ tokenFile , err := i . loadTokenFile ( )
617
665
if err != nil && ! os .IsNotExist (err ) {
618
- level .Error (i .logger ).Log ("msg" , "error loading tokens from file" , "err" , err )
666
+ level .Error (i .logger ).Log ("msg" , "error loading tokens and previous state from file" , "err" , err )
667
+ }
668
+
669
+ if tokenFile != nil {
670
+ tokensFromFile = tokenFile .Tokens
619
671
}
620
672
} else {
621
673
level .Info (i .logger ).Log ("msg" , "not loading tokens from file, tokens file path is empty" )
@@ -639,7 +691,7 @@ func (i *Lifecycler) initRing(ctx context.Context) error {
639
691
if len (tokensFromFile ) > 0 {
640
692
level .Info (i .logger ).Log ("msg" , "adding tokens from file" , "num_tokens" , len (tokensFromFile ))
641
693
if len (tokensFromFile ) >= i .cfg .NumTokens && i .autoJoinOnStartup {
642
- i .setState (ACTIVE )
694
+ i .setState (i . getPreviousState () )
643
695
}
644
696
ringDesc .AddIngester (i .ID , i .Addr , i .Zone , tokensFromFile , i .GetState (), registeredAt )
645
697
i .setTokens (tokensFromFile )
@@ -669,11 +721,11 @@ func (i *Lifecycler) initRing(ctx context.Context) error {
669
721
670
722
// If the ingester failed to clean its ring entry up in can leave its state in LEAVING
671
723
// OR unregister_on_shutdown=false
672
- // if autoJoinOnStartup, move it into ACTIVE to ensure the ingester joins the ring.
673
- // else set to PENDING
724
+ // if autoJoinOnStartup, move it into previous state based on token file (default: ACTIVE)
725
+ // to ensure the ingester joins the ring. else set to PENDING
674
726
if instanceDesc .State == LEAVING && len (instanceDesc .Tokens ) != 0 {
675
727
if i .autoJoinOnStartup {
676
- instanceDesc .State = ACTIVE
728
+ instanceDesc .State = i . getPreviousState ()
677
729
} else {
678
730
instanceDesc .State = PENDING
679
731
}
@@ -908,10 +960,12 @@ func (i *Lifecycler) updateConsul(ctx context.Context) error {
908
960
func (i * Lifecycler ) changeState (ctx context.Context , state InstanceState ) error {
909
961
currState := i .GetState ()
910
962
// Only the following state transitions can be triggered externally
911
- if ! ((currState == PENDING && state == JOINING ) || // triggered by TransferChunks at the beginning
912
- (currState == JOINING && state == PENDING ) || // triggered by TransferChunks on failure
913
- (currState == JOINING && state == ACTIVE ) || // triggered by TransferChunks on success
963
+ if ! ((currState == PENDING && state == JOINING ) ||
964
+ (currState == JOINING && state == PENDING ) ||
965
+ (currState == JOINING && state == ACTIVE ) ||
966
+ (currState == JOINING && state == READONLY ) ||
914
967
(currState == PENDING && state == ACTIVE ) || // triggered by autoJoin
968
+ (currState == PENDING && state == READONLY ) || // triggered by autoJoin
915
969
(currState == ACTIVE && state == LEAVING ) || // triggered by shutdown
916
970
(currState == ACTIVE && state == READONLY ) || // triggered by ingester mode
917
971
(currState == READONLY && state == ACTIVE ) || // triggered by ingester mode
0 commit comments