Skip to content

Commit fd19e65

Browse files
authored
Add tokenFile and persisit on ring features for AM ring (#6628)
* Add tokenFile and persisit on ring features for AM ring Signed-off-by: Daniel Deluiggi <[email protected]> * changelog Signed-off-by: Daniel Deluiggi <[email protected]> * docs Signed-off-by: Daniel Deluiggi <[email protected]> * changelog Signed-off-by: Daniel Deluiggi <[email protected]> --------- Signed-off-by: Daniel Deluiggi <[email protected]>
1 parent f77525a commit fd19e65

File tree

4 files changed

+25
-9
lines changed

4 files changed

+25
-9
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
* [ENHANCEMENT] Alertmanager: Add receiver validations for msteamsv2 and rocketchat. #6606
1515
* [ENHANCEMENT] Query Frontend: Add a `-frontend.enabled-ruler-query-stats` flag to configure whether to report the query stats log for queries coming from the Ruler. #6504
1616
* [ENHANCEMENT] OTLP: Support otlp metadata ingestion. #6617
17+
* [ENHANCEMENT] AlertManager: Add `keep_instance_in_the_ring_on_shutdown` and `tokens_file_path` configs for alertmanager ring. #6628
1718
* [BUGFIX] Ingester: Avoid error or early throttling when READONLY ingesters are present in the ring #6517
1819
* [BUGFIX] Ingester: Fix labelset data race condition. #6573
1920
* [BUGFIX] Compactor: Cleaner should not put deletion marker for blocks with no-compact marker. #6576

docs/configuration/config-file-reference.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,11 @@ sharding_ring:
388388
# CLI flag: -alertmanager.sharding-ring.zone-awareness-enabled
389389
[zone_awareness_enabled: <boolean> | default = false]
390390
391+
# File path where tokens are stored. If empty, tokens are not stored at
392+
# shutdown and restored at startup.
393+
# CLI flag: -alertmanager.sharding-ring.tokens-file-path
394+
[tokens_file_path: <string> | default = ""]
395+
391396
# The sleep seconds when alertmanager is shutting down. Need to be close to or
392397
# larger than KV Store information propagation delay
393398
# CLI flag: -alertmanager.sharding-ring.final-sleep
@@ -397,6 +402,10 @@ sharding_ring:
397402
# CLI flag: -alertmanager.sharding-ring.wait-instance-state-timeout
398403
[wait_instance_state_timeout: <duration> | default = 10m]
399404
405+
# Keep instance in the ring on shut down.
406+
# CLI flag: -alertmanager.sharding-ring.keep-instance-in-the-ring-on-shutdown
407+
[keep_instance_in_the_ring_on_shutdown: <boolean> | default = false]
408+
400409
# Name of network interface to read address from.
401410
# CLI flag: -alertmanager.sharding-ring.instance-interface-names
402411
[instance_interface_names: <list of string> | default = [eth0 en0]]

pkg/alertmanager/alertmanager_ring.go

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,11 @@ type RingConfig struct {
4848
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
4949
ReplicationFactor int `yaml:"replication_factor"`
5050
ZoneAwarenessEnabled bool `yaml:"zone_awareness_enabled"`
51+
TokensFilePath string `yaml:"tokens_file_path"`
5152

52-
FinalSleep time.Duration `yaml:"final_sleep"`
53-
WaitInstanceStateTimeout time.Duration `yaml:"wait_instance_state_timeout"`
53+
FinalSleep time.Duration `yaml:"final_sleep"`
54+
WaitInstanceStateTimeout time.Duration `yaml:"wait_instance_state_timeout"`
55+
KeepInstanceInTheRingOnShutdown bool `yaml:"keep_instance_in_the_ring_on_shutdown"`
5456

5557
// Instance details
5658
InstanceID string `yaml:"instance_id" doc:"hidden"`
@@ -85,6 +87,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {
8587
f.DurationVar(&cfg.FinalSleep, rfprefix+"final-sleep", 0*time.Second, "The sleep seconds when alertmanager is shutting down. Need to be close to or larger than KV Store information propagation delay")
8688
f.IntVar(&cfg.ReplicationFactor, rfprefix+"replication-factor", 3, "The replication factor to use when sharding the alertmanager.")
8789
f.BoolVar(&cfg.ZoneAwarenessEnabled, rfprefix+"zone-awareness-enabled", false, "True to enable zone-awareness and replicate alerts across different availability zones.")
90+
f.StringVar(&cfg.TokensFilePath, rfprefix+"tokens-file-path", "", "File path where tokens are stored. If empty, tokens are not stored at shutdown and restored at startup.")
8891

8992
// Instance flags
9093
cfg.InstanceInterfaceNames = []string{"eth0", "en0"}
@@ -93,6 +96,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {
9396
f.IntVar(&cfg.InstancePort, rfprefix+"instance-port", 0, "Port to advertise in the ring (defaults to server.grpc-listen-port).")
9497
f.StringVar(&cfg.InstanceID, rfprefix+"instance-id", hostname, "Instance ID to register in the ring.")
9598
f.StringVar(&cfg.InstanceZone, rfprefix+"instance-availability-zone", "", "The availability zone where this instance is running. Required if zone-awareness is enabled.")
99+
f.BoolVar(&cfg.KeepInstanceInTheRingOnShutdown, rfprefix+"keep-instance-in-the-ring-on-shutdown", false, "Keep instance in the ring on shut down.")
96100

97101
cfg.RingCheckPeriod = 5 * time.Second
98102

@@ -111,13 +115,14 @@ func (cfg *RingConfig) ToLifecyclerConfig(logger log.Logger) (ring.BasicLifecycl
111115
instancePort := ring.GetInstancePort(cfg.InstancePort, cfg.ListenPort)
112116

113117
return ring.BasicLifecyclerConfig{
114-
ID: cfg.InstanceID,
115-
Addr: fmt.Sprintf("%s:%d", instanceAddr, instancePort),
116-
HeartbeatPeriod: cfg.HeartbeatPeriod,
117-
TokensObservePeriod: 0,
118-
Zone: cfg.InstanceZone,
119-
NumTokens: RingNumTokens,
120-
FinalSleep: cfg.FinalSleep,
118+
ID: cfg.InstanceID,
119+
Addr: fmt.Sprintf("%s:%d", instanceAddr, instancePort),
120+
HeartbeatPeriod: cfg.HeartbeatPeriod,
121+
TokensObservePeriod: 0,
122+
Zone: cfg.InstanceZone,
123+
NumTokens: RingNumTokens,
124+
FinalSleep: cfg.FinalSleep,
125+
KeepInstanceInTheRingOnShutdown: cfg.KeepInstanceInTheRingOnShutdown,
121126
}, nil
122127
}
123128

pkg/alertmanager/multitenant.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,7 @@ func createMultitenantAlertmanager(cfg *MultitenantAlertmanagerConfig, fallbackC
417417
delegate := ring.BasicLifecyclerDelegate(am)
418418
delegate = ring.NewLeaveOnStoppingDelegate(delegate, am.logger)
419419
delegate = ring.NewAutoForgetDelegate(am.cfg.ShardingRing.HeartbeatTimeout*ringAutoForgetUnhealthyPeriods, delegate, am.logger)
420+
delegate = ring.NewTokensPersistencyDelegate(am.cfg.ShardingRing.TokensFilePath, ring.JOINING, delegate, am.logger)
420421

421422
am.ringLifecycler, err = ring.NewBasicLifecycler(lifecyclerCfg, RingNameForServer, RingKey, ringStore, delegate, am.logger, prometheus.WrapRegistererWithPrefix("cortex_", am.registry))
422423
if err != nil {

0 commit comments

Comments
 (0)