Skip to content

Commit 506cfd9

Browse files
authored
feat(metrics): add progress metrics via collector (#17359)
* add query scan rows metrics * fix build * register the metrics * track scan & write progress * track spill progress * refactor * attach session manager to it * update the finished query * finish the metrics * fix clippy * fix header * fix cargo fmt * fix tablo
1 parent 31d0fac commit 506cfd9

File tree

11 files changed

+241
-3
lines changed

11 files changed

+241
-3
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/common/base/src/base/progress.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,15 @@ pub struct ProgressValues {
2424
pub bytes: usize,
2525
}
2626

27+
impl ProgressValues {
28+
pub fn add(&self, other: &ProgressValues) -> ProgressValues {
29+
ProgressValues {
30+
rows: self.rows + other.rows,
31+
bytes: self.bytes + other.bytes,
32+
}
33+
}
34+
}
35+
2736
#[derive(Debug)]
2837
pub struct Progress {
2938
rows: AtomicUsize,

src/common/base/src/runtime/metrics/registry.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,10 @@ impl GlobalRegistry {
110110
metric
111111
}
112112

113+
pub fn register_collector(&self, collector: Box<dyn prometheus_client::collector::Collector>) {
114+
self.inner.lock().registry.register_collector(collector);
115+
}
116+
113117
pub(crate) fn new_scoped_metric(&self, index: usize) -> impl Iterator<Item = ScopedMetric> {
114118
let global_registry = self.inner.lock();
115119
let mut scoped_metrics = Vec::with_capacity(global_registry.metrics.len() - index);

src/common/metrics/src/metrics/interpreter.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@ const METRIC_QUERY_TOTAL_PARTITIONS: &str = "query_total_partitions";
3939
const METRIC_QUERY_RESULT_ROWS: &str = "query_result_rows";
4040
const METRIC_QUERY_RESULT_BYTES: &str = "query_result_bytes";
4141

42+
pub const METRIC_QUERY_SCAN_PROGRESS_ROWS: &str = "query_scan_progress_rows";
43+
pub const METRIC_QUERY_SCAN_PROGRESS_BYTES: &str = "query_scan_progress_bytes";
44+
pub const METRIC_QUERY_WRITE_PROGRESS_ROWS: &str = "query_write_progress_rows";
45+
pub const METRIC_QUERY_WRITE_PROGRESS_BYTES: &str = "query_write_progress_bytes";
46+
pub const METRIC_QUERY_SPILL_PROGRESS_ROWS: &str = "query_spill_progress_rows";
47+
pub const METRIC_QUERY_SPILL_PROGRESS_BYTES: &str = "query_spill_progress_bytes";
48+
4249
pub static QUERY_START: LazyLock<FamilyCounter<VecLabels>> =
4350
LazyLock::new(|| register_counter_family(METRIC_QUERY_START));
4451
pub static QUERY_SUCCESS: LazyLock<FamilyCounter<VecLabels>> =

src/query/catalog/src/table_context.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ pub struct ProcessInfo {
9292
/// storage metrics for persisted data reading.
9393
pub data_metrics: Option<StorageMetrics>,
9494
pub scan_progress_value: Option<ProgressValues>,
95+
pub write_progress_value: Option<ProgressValues>,
96+
pub spill_progress_value: Option<ProgressValues>,
9597
pub mysql_connection_id: Option<u32>,
9698
pub created_time: SystemTime,
9799
pub status_info: Option<String>,

src/query/service/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ paste = { workspace = true }
150150
petgraph = { workspace = true }
151151
pin-project-lite = { workspace = true }
152152
poem = { workspace = true }
153+
prometheus-client = { workspace = true }
153154
prost = { workspace = true }
154155
rand = { workspace = true }
155156
recursive = { workspace = true }

src/query/service/src/interpreters/interpreter.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,16 @@ fn log_query_finished(ctx: &QueryContext, error: Option<ErrorCode>, has_profiles
187187
let typ = session.get_type();
188188
if typ.is_user_session() {
189189
SessionManager::instance().status.write().query_finish(now);
190+
SessionManager::instance()
191+
.metrics_collector
192+
.track_finished_query(
193+
ctx.get_scan_progress_value(),
194+
ctx.get_write_progress_value(),
195+
ctx.get_join_spill_progress_value(),
196+
ctx.get_aggregate_spill_progress_value(),
197+
ctx.get_group_by_spill_progress_value(),
198+
ctx.get_window_partition_spill_progress_value(),
199+
);
190200
}
191201

192202
if let Err(error) = InterpreterQueryLog::log_finish(ctx, now, error, has_profiles) {

src/query/service/src/sessions/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ mod session;
2020
mod session_ctx;
2121
mod session_info;
2222
mod session_mgr;
23+
mod session_mgr_metrics;
2324
mod session_mgr_status;
2425
mod session_privilege_mgr;
2526
mod session_status;

src/query/service/src/sessions/session_info.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ impl Session {
5454
memory_usage,
5555
data_metrics: Self::query_data_metrics(session_ctx),
5656
scan_progress_value: Self::query_scan_progress_value(session_ctx),
57+
write_progress_value: Self::query_write_progress_value(session_ctx),
58+
spill_progress_value: Self::query_spill_progress_value(session_ctx),
5759
mysql_connection_id: self.mysql_connection_id,
5860
created_time: Self::query_created_time(session_ctx),
5961
status_info: shared_query_context
@@ -105,6 +107,27 @@ impl Session {
105107
.map(|context_shared| context_shared.scan_progress.get_values())
106108
}
107109

110+
fn query_write_progress_value(status: &SessionContext) -> Option<ProgressValues> {
111+
status
112+
.get_query_context_shared()
113+
.as_ref()
114+
.map(|context_shared| context_shared.write_progress.get_values())
115+
}
116+
117+
fn query_spill_progress_value(status: &SessionContext) -> Option<ProgressValues> {
118+
status
119+
.get_query_context_shared()
120+
.as_ref()
121+
.map(|context_shared| {
122+
context_shared
123+
.agg_spill_progress
124+
.get_values()
125+
.add(&context_shared.join_spill_progress.get_values())
126+
.add(&context_shared.window_partition_spill_progress.get_values())
127+
.add(&context_shared.group_by_spill_progress.get_values())
128+
})
129+
}
130+
108131
fn query_created_time(status: &SessionContext) -> SystemTime {
109132
match status.get_query_context_shared() {
110133
None => SystemTime::now(),

src/query/service/src/sessions/session_mgr.rs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use std::time::Duration;
2424
use databend_common_base::base::tokio;
2525
use databend_common_base::base::GlobalInstance;
2626
use databend_common_base::base::SignalStream;
27+
use databend_common_base::runtime::metrics::GLOBAL_METRICS_REGISTRY;
2728
use databend_common_catalog::table_context::ProcessInfoState;
2829
use databend_common_config::GlobalConfig;
2930
use databend_common_config::InnerConfig;
@@ -38,6 +39,7 @@ use log::info;
3839
use parking_lot::RwLock;
3940

4041
use crate::sessions::session::Session;
42+
use crate::sessions::session_mgr_metrics::SessionManagerMetricsCollector;
4143
use crate::sessions::ProcessInfo;
4244
use crate::sessions::SessionContext;
4345
use crate::sessions::SessionManagerStatus;
@@ -47,6 +49,7 @@ pub struct SessionManager {
4749
pub(in crate::sessions) max_sessions: usize,
4850
pub(in crate::sessions) active_sessions: Arc<RwLock<HashMap<String, Weak<Session>>>>,
4951
pub status: Arc<RwLock<SessionManagerStatus>>,
52+
pub metrics_collector: SessionManagerMetricsCollector,
5053

5154
// When typ is MySQL, insert into this map, key is id, val is MySQL connection id.
5255
pub(crate) mysql_conn_map: Arc<RwLock<HashMap<Option<u32>, String>>>,
@@ -55,20 +58,26 @@ pub struct SessionManager {
5558

5659
impl SessionManager {
5760
pub fn init(conf: &InnerConfig) -> Result<()> {
58-
GlobalInstance::set(Self::create(conf));
61+
let global_instance = Self::create(conf);
62+
GlobalInstance::set(global_instance.clone());
63+
GLOBAL_METRICS_REGISTRY
64+
.register_collector(Box::new(global_instance.metrics_collector.clone()));
5965

6066
Ok(())
6167
}
6268

6369
pub fn create(conf: &InnerConfig) -> Arc<SessionManager> {
6470
let max_sessions = conf.query.max_active_sessions as usize;
65-
Arc::new(SessionManager {
71+
let mgr = Arc::new(SessionManager {
6672
max_sessions,
6773
mysql_basic_conn_id: AtomicU32::new(9_u32.to_le()),
6874
status: Arc::new(RwLock::new(SessionManagerStatus::default())),
6975
mysql_conn_map: Arc::new(RwLock::new(HashMap::with_capacity(max_sessions))),
7076
active_sessions: Arc::new(RwLock::new(HashMap::with_capacity(max_sessions))),
71-
})
77+
metrics_collector: SessionManagerMetricsCollector::new(),
78+
});
79+
mgr.metrics_collector.attach_session_manager(mgr.clone());
80+
mgr
7281
}
7382

7483
pub fn instance() -> Arc<SessionManager> {

0 commit comments

Comments
 (0)