Skip to content

Commit 8571fe5

Browse files
Register telemetry gauges on startup
1 parent 909af1e commit 8571fe5

File tree

17 files changed

+270
-469
lines changed

17 files changed

+270
-469
lines changed

Cargo.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

client/CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Changelog
22

3+
## 1.12.5
4+
5+
- Update `avail-light-core` to 1.2.0
6+
- Remove `ot_flush_block_interval` from configuration
7+
38
## [1.12.4](https://github.com/availproject/avail-light/releases/tag/avail-light-client-v1.12.4) - 2024-12-20
49

510
- Update `avail-light-core` to 1.1.0

client/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "avail-light-client"
3-
version = "1.12.4"
3+
version = "1.12.5"
44
build = "../build.rs"
55
edition = "2021"
66
description = "Avail network p2p Light Client"

client/src/config.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ impl From<&RuntimeConfig> for MaintenanceConfig {
8484
replication_factor: val.libp2p.kademlia.record_replication_factor.get() as u16,
8585
query_timeout: val.libp2p.kademlia.query_timeout,
8686
pruning_interval: val.libp2p.kademlia.store_pruning_interval,
87-
telemetry_flush_interval: val.otel.ot_flush_block_interval,
8887
automatic_server_mode: val.libp2p.kademlia.automatic_server_mode,
8988
total_memory_gb_threshold: val.total_memory_gb_threshold,
9089
num_cpus_threshold: val.num_cpus_threshold,

client/src/main.rs

Lines changed: 39 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -296,23 +296,27 @@ async fn run(
296296

297297
// construct Metric Attributes and initialize Metrics
298298
let metric_attributes = vec![
299-
("version".to_string(), version.to_string()),
300-
("role".to_string(), "lightnode".to_string()),
301-
("origin".to_string(), cfg.origin.to_string()),
302-
("peerID".to_string(), peer_id.to_string()),
303-
("avail_address".to_string(), identity_cfg.avail_public_key),
304-
("network".to_string(), Network::name(&cfg.genesis_hash)),
305-
("client_id".to_string(), client_id.to_string()),
306-
("execution_id".to_string(), execution_id.to_string()),
299+
("version", version.to_string()),
300+
("role", "lightnode".to_string()),
301+
("origin", cfg.origin.to_string()),
302+
("peerID", peer_id.to_string()),
303+
("avail_address", identity_cfg.avail_public_key),
304+
("network", Network::name(&cfg.genesis_hash)),
305+
("client_id", client_id.to_string()),
306+
("execution_id", execution_id.to_string()),
307307
(
308-
"client_alias".to_string(),
308+
"client_alias",
309309
cfg.client_alias.clone().unwrap_or("".to_string()),
310310
),
311311
];
312312

313-
let metrics =
314-
telemetry::otlp::initialize(cfg.project_name.clone(), &cfg.origin, cfg.otel.clone())
315-
.wrap_err("Unable to initialize OpenTelemetry service")?;
313+
let metrics = telemetry::otlp::initialize(
314+
cfg.project_name.clone(),
315+
&cfg.origin,
316+
cfg.otel.clone(),
317+
metric_attributes,
318+
)
319+
.wrap_err("Unable to initialize OpenTelemetry service")?;
316320

317321
let rpc_host = db
318322
.get(RpcNodeKey)
@@ -324,7 +328,6 @@ async fn run(
324328
cfg.libp2p.kademlia.operation_mode.into(),
325329
rpc_host,
326330
Multiaddr::empty(),
327-
metric_attributes,
328331
);
329332

330333
spawn_in_span(shutdown.with_cancel(async move {
@@ -446,52 +449,33 @@ impl BlockStat {
446449

447450
struct ClientState {
448451
metrics: Metrics,
449-
kad_mode: Mode,
450-
multiaddress: Multiaddr,
451-
rpc_host: String,
452-
metric_attributes: Vec<(String, String)>,
453452
active_blocks: HashMap<u32, BlockStat>,
454453
}
455454

456455
impl ClientState {
457-
fn new(
458-
metrics: Metrics,
459-
kad_mode: Mode,
460-
rpc_host: String,
461-
multiaddress: Multiaddr,
462-
metric_attributes: Vec<(String, String)>,
463-
) -> Self {
464-
ClientState {
456+
fn new(metrics: Metrics, kad_mode: Mode, rpc_host: String, multiaddress: Multiaddr) -> Self {
457+
let mut state = ClientState {
465458
metrics,
466-
kad_mode,
467-
multiaddress,
468-
rpc_host,
469-
metric_attributes,
470459
active_blocks: Default::default(),
471-
}
460+
};
461+
state.update_operating_mode(kad_mode);
462+
state.update_rpc_host(rpc_host);
463+
state.update_multiaddress(multiaddress);
464+
state
472465
}
473466

474467
fn update_multiaddress(&mut self, value: Multiaddr) {
475-
self.multiaddress = value;
468+
self.metrics
469+
.set_attribute("multiaddress", value.to_string());
476470
}
477471

478472
fn update_operating_mode(&mut self, value: Mode) {
479-
self.kad_mode = value;
473+
self.metrics
474+
.set_attribute("operating_mode", value.to_string());
480475
}
481476

482477
fn update_rpc_host(&mut self, value: String) {
483-
self.rpc_host = value;
484-
}
485-
486-
fn attributes(&self) -> Vec<(String, String)> {
487-
let mut attrs = vec![
488-
("operating_mode".to_string(), self.kad_mode.to_string()),
489-
("multiaddress".to_string(), self.multiaddress.to_string()),
490-
("rpc_host".to_string(), self.rpc_host.to_string()),
491-
];
492-
493-
attrs.extend(self.metric_attributes.clone());
494-
attrs
478+
self.metrics.set_attribute("rpc_host", value);
495479
}
496480

497481
fn get_block_stat(&mut self, block_num: u32) -> Result<&mut BlockStat> {
@@ -576,40 +560,41 @@ impl ClientState {
576560
mut lc_receiver: UnboundedReceiver<LcEvent>,
577561
mut rpc_receiver: broadcast::Receiver<RpcEvent>,
578562
) {
579-
self.metrics.count(MetricCounter::Starts, self.attributes());
563+
self.metrics.count(MetricCounter::Starts);
580564
loop {
581565
select! {
582566
Some(p2p_event) = p2p_receiver.recv() => {
583567
match p2p_event {
584568
P2pEvent::Count => {
585-
self.metrics.count(MetricCounter::EventLoopEvent, self.attributes());
569+
self.metrics.count(MetricCounter::EventLoopEvent);
586570
},
587571
P2pEvent::IncomingGetRecord => {
588-
self.metrics.count(MetricCounter::IncomingGetRecord, self.attributes());
572+
self.metrics.count(MetricCounter::IncomingGetRecord);
589573
},
590574
P2pEvent::IncomingPutRecord => {
591-
self.metrics.count(MetricCounter::IncomingPutRecord, self.attributes());
575+
self.metrics.count(MetricCounter::IncomingPutRecord);
592576
},
593577
P2pEvent::KadModeChange(mode) => {
578+
594579
self.update_operating_mode(mode);
595580
},
596581
P2pEvent::Ping(rtt) => {
597582
self.metrics.record(MetricValue::DHTPingLatency(rtt.as_millis() as f64));
598583
},
599584
P2pEvent::IncomingConnection => {
600-
self.metrics.count(MetricCounter::IncomingConnections, self.attributes());
585+
self.metrics.count(MetricCounter::IncomingConnections);
601586
},
602587
P2pEvent::IncomingConnectionError => {
603-
self.metrics.count(MetricCounter::IncomingConnectionErrors, self.attributes());
588+
self.metrics.count(MetricCounter::IncomingConnectionErrors);
604589
},
605590
P2pEvent::MultiaddressUpdate(address) => {
606591
self.update_multiaddress(address);
607592
},
608593
P2pEvent::EstablishedConnection => {
609-
self.metrics.count(MetricCounter::EstablishedConnections, self.attributes());
594+
self.metrics.count(MetricCounter::EstablishedConnections);
610595
},
611596
P2pEvent::OutgoingConnectionError => {
612-
self.metrics.count(MetricCounter::OutgoingConnectionErrors, self.attributes());
597+
self.metrics.count(MetricCounter::OutgoingConnectionErrors);
613598
},
614599
P2pEvent::PutRecord { block_num, records } => {
615600
self.handle_new_put_record(block_num, records);
@@ -634,16 +619,6 @@ impl ClientState {
634619
}
635620
Some(maintenance_event) = maintenance_receiver.recv() => {
636621
match maintenance_event {
637-
MaintenanceEvent::FlushMetrics(block_num) => {
638-
if let Err(error) = self.metrics.flush(self.attributes()) {
639-
error!(
640-
block_num,
641-
"Could not handle Flush Maintenance event properly: {error}"
642-
);
643-
} else {
644-
info!(block_num, "Flushing metrics finished");
645-
};
646-
},
647622
MaintenanceEvent::RecordStats {
648623
connected_peers,
649624
block_confidence_treshold,
@@ -656,7 +631,7 @@ impl ClientState {
656631
self.metrics.record(MetricValue::DHTQueryTimeout(query_timeout));
657632
},
658633
MaintenanceEvent::CountUps => {
659-
self.metrics.count(MetricCounter::Up, self.attributes());
634+
self.metrics.count(MetricCounter::Up);
660635
},
661636
}
662637
}
@@ -666,7 +641,7 @@ impl ClientState {
666641
self.metrics.record(MetricValue::BlockProcessingDelay(delay));
667642
},
668643
LcEvent::CountSessionBlocks => {
669-
self.metrics.count(MetricCounter::SessionBlocks,self.attributes());
644+
self.metrics.count(MetricCounter::SessionBlocks);
670645
},
671646
LcEvent::RecordBlockHeight(block_num) => {
672647
self.metrics.record(MetricValue::BlockHeight(block_num));

core/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
## 1.2.0
4+
5+
- Fix issue with multiple telemetry gauge callbacks
6+
37
## [1.1.0](https://github.com/availproject/avail-light/tree/avail-light-core-v1.1.0) - 2024-12-20
48

59
- Temporary remove WebRTC support to reduce memory usage

core/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "avail-light-core"
3-
version = "1.1.0"
3+
version = "1.2.0"
44
edition = "2021"
55
description = "Avail Light core driving library"
66

core/src/maintenance.rs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ use crate::{
1010
};
1111

1212
pub enum OutputEvent {
13-
FlushMetrics(u32),
1413
RecordStats {
1514
connected_peers: usize,
1615
block_confidence_treshold: f64,
@@ -34,11 +33,6 @@ pub async fn process_block(
3433
}
3534
}
3635

37-
if block_number % maintenance_config.telemetry_flush_interval == 0 {
38-
info!(block_number, "Flushing metrics...");
39-
event_sender.send(OutputEvent::FlushMetrics(block_number))?;
40-
}
41-
4236
p2p_client
4337
.shrink_kademlia_map()
4438
.await

core/src/telemetry/mod.rs

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,19 @@ impl MetricName for MetricCounter {
4343
}
4444

4545
impl MetricCounter {
46-
fn is_buffered(&self) -> bool {
47-
!matches!(self, MetricCounter::Starts)
48-
}
49-
50-
fn as_last(&self) -> bool {
51-
matches!(self, MetricCounter::Up)
46+
pub fn default_values() -> Vec<MetricCounter> {
47+
vec![
48+
MetricCounter::Starts,
49+
MetricCounter::Up,
50+
MetricCounter::SessionBlocks,
51+
MetricCounter::OutgoingConnectionErrors,
52+
MetricCounter::IncomingConnectionErrors,
53+
MetricCounter::IncomingConnections,
54+
MetricCounter::EstablishedConnections,
55+
MetricCounter::IncomingPutRecord,
56+
MetricCounter::IncomingGetRecord,
57+
MetricCounter::EventLoopEvent,
58+
]
5259
}
5360

5461
fn is_allowed(&self, origin: &Origin) -> bool {
@@ -85,6 +92,29 @@ pub enum MetricValue {
8592
RPCCallDuration(f64),
8693
}
8794

95+
impl MetricValue {
96+
pub fn default_values() -> Vec<MetricValue> {
97+
vec![
98+
MetricValue::BlockHeight(0),
99+
MetricValue::BlockConfidence(0.0),
100+
MetricValue::BlockConfidenceThreshold(0.0),
101+
MetricValue::BlockProcessingDelay(0.0),
102+
MetricValue::DHTReplicationFactor(0),
103+
MetricValue::DHTFetched(0.0),
104+
MetricValue::DHTFetchedPercentage(0.0),
105+
MetricValue::DHTFetchDuration(0.0),
106+
MetricValue::DHTPutDuration(0.0),
107+
MetricValue::DHTPutSuccess(0.0),
108+
MetricValue::DHTConnectedPeers(0),
109+
MetricValue::DHTQueryTimeout(0),
110+
MetricValue::DHTPingLatency(0.0),
111+
MetricValue::RPCFetched(0.0),
112+
MetricValue::RPCFetchDuration(0.0),
113+
MetricValue::RPCCallDuration(0.0),
114+
]
115+
}
116+
}
117+
88118
impl MetricName for MetricValue {
89119
fn name(&self) -> &'static str {
90120
use MetricValue::*;

0 commit comments

Comments
 (0)