Skip to content

Commit fdf4332

Browse files
dantengskyzhyass
andauthored
chore: backport tweak compact strategy to 636-rc8.1 (#17246)
* chore(storage): do compact before recluster during compact hook (#16949) * chore: do compact before recluster during compact hook * update --------- Co-authored-by: dantengsky <[email protected]> * chore(storage): hook compact avoid scan all segments (#16954) * tweak gh action setup_bendsql * fix yml lint * fix statless tests * sync stateless tests --------- Co-authored-by: zhya <[email protected]>
1 parent 0fe1d11 commit fdf4332

File tree

12 files changed

+113
-91
lines changed

12 files changed

+113
-91
lines changed

.github/actions/setup_bendsql/action.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ runs:
77
if: runner.os == 'Linux'
88
shell: bash
99
run: |
10-
if bendsql --version; then
10+
if command -v bendsql >/dev/null 2>&1; then
1111
exit 0
1212
fi
1313
case $RUNNER_PROVIDER in
@@ -29,7 +29,7 @@ runs:
2929
if: runner.os == 'macOS'
3030
shell: bash
3131
run: |
32-
if bendsql --version; then
32+
if command -v bendsql >/dev/null 2>&1; then
3333
exit 0
3434
fi
3535
brew install databendcloud/homebrew-tap/bendsql

src/query/service/src/interpreters/hook/compact_hook.rs

Lines changed: 77 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -78,46 +78,50 @@ async fn do_hook_compact(
7878
}
7979

8080
pipeline.set_on_finished(move |info: &ExecutionInfo| {
81-
let compaction_limits = match compact_target.mutation_kind {
82-
MutationKind::Insert => {
83-
let compaction_num_block_hint = ctx.get_compaction_num_block_hint(&compact_target.table);
84-
info!("table {} hint number of blocks need to be compacted {}", compact_target.table, compaction_num_block_hint);
85-
if compaction_num_block_hint == 0 {
86-
return Ok(());
87-
}
88-
CompactionLimits {
89-
segment_limit: None,
90-
block_limit: Some(compaction_num_block_hint as usize),
81+
if info.res.is_ok() {
82+
let op_name = &trace_ctx.operation_name;
83+
metrics_inc_compact_hook_main_operation_time_ms(op_name, trace_ctx.start.elapsed().as_millis() as u64);
84+
info!("execute {op_name} finished successfully. running table optimization job.");
85+
86+
let compact_start_at = Instant::now();
87+
let compaction_limits = match compact_target.mutation_kind {
88+
MutationKind::Insert => {
89+
let compaction_num_block_hint = ctx.get_compaction_num_block_hint(&compact_target.table);
90+
info!("table {} hint number of blocks need to be compacted {}", compact_target.table, compaction_num_block_hint);
91+
if compaction_num_block_hint == 0 {
92+
return Ok(());
93+
}
94+
CompactionLimits {
95+
segment_limit: None,
96+
block_limit: Some(compaction_num_block_hint as usize),
97+
}
9198
}
92-
}
93-
_ =>
94-
// for mutations other than Insertions, we use an empirical value of 3 segments as the
95-
// limit for compaction. to be refined later.
96-
{
99+
_ => {
97100
let auto_compaction_segments_limit = ctx.get_settings().get_auto_compaction_segments_limit()?;
98101
CompactionLimits {
99102
segment_limit: Some(auto_compaction_segments_limit as usize),
100103
block_limit: None,
101104
}
102105
}
103-
};
106+
};
104107

105-
let op_name = &trace_ctx.operation_name;
106-
metrics_inc_compact_hook_main_operation_time_ms(op_name, trace_ctx.start.elapsed().as_millis() as u64);
108+
// keep the original progress value
109+
let progress = ctx.get_write_progress();
110+
let progress_value = progress.as_ref().get_values();
107111

108-
let compact_start_at = Instant::now();
109-
if info.res.is_ok() {
110-
info!("execute {op_name} finished successfully. running table optimization job.");
111112
match GlobalIORuntime::instance().block_on({
112113
compact_table(ctx, compact_target, compaction_limits, lock_opt)
113114
}) {
114115
Ok(_) => {
115116
info!("execute {op_name} finished successfully. table optimization job finished.");
116117
}
117-
Err(e) => { info!("execute {op_name} finished successfully. table optimization job failed. {:?}", e) }
118+
Err(e) => { info!("execute {op_name} finished successfully. table optimization job failed. {:?}", e); }
118119
}
120+
121+
// reset the progress value
122+
progress.set(&progress_value);
123+
metrics_inc_compact_hook_compact_time_ms(&trace_ctx.operation_name, compact_start_at.elapsed().as_millis() as u64);
119124
}
120-
metrics_inc_compact_hook_compact_time_ms(&trace_ctx.operation_name, compact_start_at.elapsed().as_millis() as u64);
121125

122126
Ok(())
123127
});
@@ -141,7 +145,7 @@ async fn compact_table(
141145
&compact_target.table,
142146
)
143147
.await?;
144-
let do_recluster = !table.cluster_keys(ctx.clone()).is_empty();
148+
let settings = ctx.get_settings();
145149

146150
// evict the table from cache
147151
ctx.evict_table_from_cache(
@@ -150,56 +154,60 @@ async fn compact_table(
150154
&compact_target.table,
151155
)?;
152156

153-
let mut build_res = if do_recluster {
154-
let recluster = RelOperator::Recluster(Recluster {
155-
catalog: compact_target.catalog,
156-
database: compact_target.database,
157-
table: compact_target.table,
158-
filters: None,
159-
limit: compaction_limits.segment_limit,
160-
});
161-
let s_expr = SExpr::create_leaf(Arc::new(recluster));
162-
let recluster_interpreter =
163-
ReclusterTableInterpreter::try_create(ctx.clone(), s_expr, lock_opt, false)?;
164-
recluster_interpreter.execute2().await?
165-
} else {
157+
{
158+
// do compact.
166159
let compact_block = RelOperator::CompactBlock(OptimizeCompactBlock {
167-
catalog: compact_target.catalog,
168-
database: compact_target.database,
169-
table: compact_target.table,
170-
limit: compaction_limits,
160+
catalog: compact_target.catalog.clone(),
161+
database: compact_target.database.clone(),
162+
table: compact_target.table.clone(),
163+
limit: compaction_limits.clone(),
171164
});
172165
let s_expr = SExpr::create_leaf(Arc::new(compact_block));
173-
let compact_interpreter =
174-
OptimizeCompactBlockInterpreter::try_create(ctx.clone(), s_expr, lock_opt, false)?;
175-
compact_interpreter.execute2().await?
176-
};
177-
178-
if build_res.main_pipeline.is_empty() {
179-
return Ok(());
166+
let compact_interpreter = OptimizeCompactBlockInterpreter::try_create(
167+
ctx.clone(),
168+
s_expr,
169+
lock_opt.clone(),
170+
false,
171+
)?;
172+
let mut build_res = compact_interpreter.execute2().await?;
173+
// execute the compact pipeline
174+
if build_res.main_pipeline.is_complete_pipeline()? {
175+
build_res.set_max_threads(settings.get_max_threads()? as usize);
176+
let executor_settings = ExecutorSettings::try_create(ctx.clone())?;
177+
178+
let mut pipelines = build_res.sources_pipelines;
179+
pipelines.push(build_res.main_pipeline);
180+
181+
let complete_executor =
182+
PipelineCompleteExecutor::from_pipelines(pipelines, executor_settings)?;
183+
184+
// Clears previously generated segment locations to avoid duplicate data in the refresh phase
185+
ctx.clear_segment_locations()?;
186+
ctx.set_executor(complete_executor.get_inner())?;
187+
complete_executor.execute()?;
188+
drop(complete_executor);
189+
}
180190
}
181191

182-
// execute the compact pipeline (for table with cluster keys, re-cluster will also be executed)
183-
let settings = ctx.get_settings();
184-
build_res.set_max_threads(settings.get_max_threads()? as usize);
185-
let settings = ExecutorSettings::try_create(ctx.clone())?;
186-
187-
if build_res.main_pipeline.is_complete_pipeline()? {
188-
let mut pipelines = build_res.sources_pipelines;
189-
pipelines.push(build_res.main_pipeline);
190-
191-
let complete_executor = PipelineCompleteExecutor::from_pipelines(pipelines, settings)?;
192-
193-
// keep the original progress value
194-
let progress_value = ctx.get_write_progress_value();
195-
// Clears previously generated segment locations to avoid duplicate data in the refresh phase
196-
ctx.clear_segment_locations()?;
197-
ctx.set_executor(complete_executor.get_inner())?;
198-
complete_executor.execute()?;
199-
drop(complete_executor);
200-
201-
// reset the progress value
202-
ctx.get_write_progress().set(&progress_value);
192+
{
193+
// do recluster.
194+
if !table.cluster_keys(ctx.clone()).is_empty() {
195+
let recluster = RelOperator::Recluster(Recluster {
196+
catalog: compact_target.catalog,
197+
database: compact_target.database,
198+
table: compact_target.table,
199+
filters: None,
200+
limit: Some(settings.get_auto_compaction_segments_limit()? as usize),
201+
});
202+
let s_expr = SExpr::create_leaf(Arc::new(recluster));
203+
let recluster_interpreter =
204+
ReclusterTableInterpreter::try_create(ctx.clone(), s_expr, lock_opt, false)?;
205+
// Recluster will be done in `ReclusterTableInterpreter::execute2` directly,
206+
// we do not need to use `PipelineCompleteExecutor` to execute it.
207+
let build_res = recluster_interpreter.execute2().await?;
208+
assert!(build_res.main_pipeline.is_empty());
209+
}
203210
}
211+
204212
Ok(())
205213
}

src/query/storages/fuse/src/operations/mutation/mutator/block_compact_mutator.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ impl BlockCompactMutator {
168168
self.ctx.set_status_info(&status);
169169
}
170170

171-
if is_end {
171+
if is_end || segment_idx >= num_segment_limit {
172172
break;
173173
}
174174
}

tests/shell_env.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ export QUERY_CLICKHOUSE_HTTP_HANDLER_PORT=${QUERY_CLICKHOUSE_HTTP_HANDLER_PORT:=
1414

1515

1616
export BENDSQL_CLIENT_CONNECT="bendsql -uroot --host ${QUERY_MYSQL_HANDLER_HOST} --port ${QUERY_HTTP_HANDLER_PORT} --quote-style=never"
17+
export BENDSQL_CLIENT_OUTPUT_NULL="bendsql -uroot --host ${QUERY_MYSQL_HANDLER_HOST} --port ${QUERY_HTTP_HANDLER_PORT} --quote-style=never --output null"
1718

1819
# share client
1920
export QUERY_MYSQL_HANDLER_SHARE_PROVIDER_PORT="18000"

tests/suites/0_stateless/03_dml/03_0016_update_with_lock.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ echo "set global enable_table_lock = 1" | $BENDSQL_CLIENT_CONNECT
1212
for i in $(seq 1 10);do
1313
(
1414
j=$(($i+1))
15-
echo "insert into test_update.t values($i, $j)" | $BENDSQL_CLIENT_CONNECT
15+
echo "insert into test_update.t values($i, $j)" | $BENDSQL_CLIENT_OUTPUT_NULL
1616
)&
1717
done
1818
wait
@@ -23,7 +23,7 @@ echo "select count() from test_update.t where a + 1 = b" | $BENDSQL_CLIENT_CONNE
2323
echo "Test table lock for update"
2424
for i in $(seq 1 10);do
2525
(
26-
echo "update test_update.t set b = $i where a = $i" | $BENDSQL_CLIENT_CONNECT
26+
echo "update test_update.t set b = $i where a = $i" | $BENDSQL_CLIENT_OUTPUT_NULL
2727
)&
2828
done
2929
wait

tests/suites/0_stateless/05_hints/05_0001_set_var.sh

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@ echo "select /*+ SET_VAR(storage_read_buffer_size=200) SET_VAR(timezone=x) */ na
1818
echo "drop database if exists set_var;" | $BENDSQL_CLIENT_CONNECT
1919
echo "create database set_var;" | $BENDSQL_CLIENT_CONNECT
2020
echo "create table set_var.test(id int);" | $BENDSQL_CLIENT_CONNECT
21-
echo "insert /*+SET_VAR(timezone='Asia/Shanghai') SET_VAR(storage_read_buffer_size=200)*/ into set_var.test values(1)" | $BENDSQL_CLIENT_CONNECT
22-
echo "insert /*+SET_VAR(timezone='Asia/Shanghai') (storage_read_buffer_size=200)*/ into set_var.test values(3)" | $BENDSQL_CLIENT_CONNECT
21+
echo "insert /*+SET_VAR(timezone='Asia/Shanghai') SET_VAR(storage_read_buffer_size=200)*/ into set_var.test values(1)" | $BENDSQL_CLIENT_OUTPUT_NULL
22+
echo "insert /*+SET_VAR(timezone='Asia/Shanghai') (storage_read_buffer_size=200)*/ into set_var.test values(3)" | $BENDSQL_CLIENT_OUTPUT_NULL
2323
echo "select /*+SET_VAR(timezone='Asia/Shanghai') SET_VAR(storage_read_buffer_size=200)*/ * from set_var.test order by id" | $BENDSQL_CLIENT_CONNECT
2424
echo "select /*+SET_VAR(timezone='Asia/Shanghai') (storage_read_buffer_size=200)*/ id from set_var.test order by id" | $BENDSQL_CLIENT_CONNECT
25-
echo "update /*+SET_VAR(timezone='Asia/Shanghai') SET_VAR(storage_read_buffer_size=200)*/ set_var.test set id=2 where id=1" | $BENDSQL_CLIENT_CONNECT
26-
echo "update /*+SET_VAR(timezone='Asia/Shanghai') (storage_read_buffer_size=200)*/ set_var.test set id=4 where id=3" | $BENDSQL_CLIENT_CONNECT
25+
echo "update /*+SET_VAR(timezone='Asia/Shanghai') SET_VAR(storage_read_buffer_size=200)*/ set_var.test set id=2 where id=1" | $BENDSQL_CLIENT_OUTPUT_NULL
26+
echo "update /*+SET_VAR(timezone='Asia/Shanghai') (storage_read_buffer_size=200)*/ set_var.test set id=4 where id=3" | $BENDSQL_CLIENT_OUTPUT_NULL
2727
echo "select * from set_var.test order by id" | $BENDSQL_CLIENT_CONNECT
28-
echo "delete /*+SET_VAR(timezone='Asia/Shanghai') SET_VAR(storage_read_buffer_size=200)*/ from set_var.test where id=2" | $BENDSQL_CLIENT_CONNECT
29-
echo "delete /*+SET_VAR(timezone='Asia/Shanghai') (storage_read_buffer_size=200)*/ from set_var.test where id=4" | $BENDSQL_CLIENT_CONNECT
28+
echo "delete /*+SET_VAR(timezone='Asia/Shanghai') SET_VAR(storage_read_buffer_size=200)*/ from set_var.test where id=2" | $BENDSQL_CLIENT_OUTPUT_NULL
29+
echo "delete /*+SET_VAR(timezone='Asia/Shanghai') (storage_read_buffer_size=200)*/ from set_var.test where id=4" | $BENDSQL_CLIENT_OUTPUT_NULL
3030
echo "select * from set_var.test" | $BENDSQL_CLIENT_CONNECT
3131

3232
echo "set timezone='America/Toronto'; select /*+SET_VAR(timezone='Asia/Shanghai') */ timezone(); select timezone();" | $BENDSQL_CLIENT_CONNECT

tests/suites/0_stateless/05_hints/05_0002_deduplicate_label.sh

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ echo "drop stage if exists s5_1;" | $BENDSQL_CLIENT_CONNECT
1212

1313
echo "CREATE TABLE t5(a Int, b bool) Engine = Fuse;" | $BENDSQL_CLIENT_CONNECT
1414

15-
echo "INSERT /*+ SET_VAR(deduplicate_label='insert-test') */ INTO t5 (a, b) VALUES(1, false)" | $BENDSQL_CLIENT_CONNECT
16-
echo "INSERT /*+ SET_VAR(deduplicate_label='insert-test') */ INTO t5 (a, b) VALUES(1, false)" | $BENDSQL_CLIENT_CONNECT
15+
echo "INSERT /*+ SET_VAR(deduplicate_label='insert-test') */ INTO t5 (a, b) VALUES(1, false)" | $BENDSQL_CLIENT_OUTPUT_NULL
16+
echo "INSERT /*+ SET_VAR(deduplicate_label='insert-test') */ INTO t5 (a, b) VALUES(1, false)" | $BENDSQL_CLIENT_OUTPUT_NULL
1717
echo "select * from t5" | $BENDSQL_CLIENT_CONNECT
1818

1919
echo "CREATE STAGE s5_1;" | $BENDSQL_CLIENT_CONNECT
@@ -23,12 +23,12 @@ echo "CREATE STAGE s5;" | $MYSQL_CLINEENRT_CONNECT
2323
echo "copy /*+SET_VAR(deduplicate_label='copy-test')*/ into @s5 from (select * from t5);" | $MYSQL_CLINEENRT_CONNECT
2424
echo "select * from @s5;" | $MYSQL_CLINEENRT_CONNECT
2525

26-
echo "UPDATE /*+ SET_VAR(deduplicate_label='update-test') */ t5 SET a = 20 WHERE b = false;" | $BENDSQL_CLIENT_CONNECT
27-
echo "UPDATE /*+ SET_VAR(deduplicate_label='update-test') */ t5 SET a = 30 WHERE b = false;" | $BENDSQL_CLIENT_CONNECT
26+
echo "UPDATE /*+ SET_VAR(deduplicate_label='update-test') */ t5 SET a = 20 WHERE b = false;" | $BENDSQL_CLIENT_OUTPUT_NULL
27+
echo "UPDATE /*+ SET_VAR(deduplicate_label='update-test') */ t5 SET a = 30 WHERE b = false;" | $BENDSQL_CLIENT_OUTPUT_NULL
2828
echo "select * from t5" | $BENDSQL_CLIENT_CONNECT
2929

30-
echo "replace /*+ SET_VAR(deduplicate_label='replace-test') */ into t5 on(a,b) values(40,false);" | $BENDSQL_CLIENT_CONNECT
31-
echo "replace /*+ SET_VAR(deduplicate_label='replace-test') */ into t5 on(a,b) values(50,false);" | $BENDSQL_CLIENT_CONNECT
30+
echo "replace /*+ SET_VAR(deduplicate_label='replace-test') */ into t5 on(a,b) values(40,false);" | $BENDSQL_CLIENT_OUTPUT_NULL
31+
echo "replace /*+ SET_VAR(deduplicate_label='replace-test') */ into t5 on(a,b) values(50,false);" | $BENDSQL_CLIENT_OUTPUT_NULL
3232
echo "select * from t5 order by a" | $BENDSQL_CLIENT_CONNECT
3333

3434
echo "drop table if exists t5;" | $BENDSQL_CLIENT_CONNECT

tests/suites/0_stateless/12_time_travel/12_0005_changes_select.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
66

77
## Create table t12_0005
88
echo "create table t12_0005(a int, b int) change_tracking=true" | $BENDSQL_CLIENT_CONNECT
9-
echo "insert into t12_0005 values(1, 1),(2, 1)" | $BENDSQL_CLIENT_CONNECT
9+
echo "insert into t12_0005 values(1, 1),(2, 1)" | $BENDSQL_CLIENT_OUTPUT_NULL
1010

11-
echo "update t12_0005 set b = 2 where a = 2" | $BENDSQL_CLIENT_CONNECT
12-
echo "delete from t12_0005 where a = 1" | $BENDSQL_CLIENT_CONNECT
13-
echo "insert into t12_0005 values(3, 3)" | $BENDSQL_CLIENT_CONNECT
11+
echo "update t12_0005 set b = 2 where a = 2" | $BENDSQL_CLIENT_OUTPUT_NULL
12+
echo "delete from t12_0005 where a = 1" | $BENDSQL_CLIENT_OUTPUT_NULL
13+
echo "insert into t12_0005 values(3, 3)" | $BENDSQL_CLIENT_OUTPUT_NULL
1414

1515
echo "latest snapshot should contain 2 rows"
1616
echo "select count(*) from t12_0005" | $BENDSQL_CLIENT_CONNECT
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
alter table add a column
22
update table column
3+
1
34
alter table drop a column
45
update table column
6+
0
57
alter table add a column
68
update table column
9+
1
710
alter table drop a column
811
update table column
12+
0

0 commit comments

Comments
 (0)