Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/query/service/src/physical_plans/physical_hash_join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ impl HashJoin {
stage_sync_barrier.clone(),
self.projections.clone(),
rf_desc.clone(),
);
)?;

join_pipe_items.push(PipeItem::create(
hash_join,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ use parking_lot::RwLock;

use super::concat_buffer::ConcatBuffer;
use super::desc::RuntimeFilterDesc;
use super::runtime_filter::JoinRuntimeFilterPacket;
use crate::pipelines::memory_settings::MemorySettingsExt;
use crate::pipelines::processors::transforms::hash_join::common::wrap_true_validity;
use crate::pipelines::processors::transforms::hash_join::desc::MARKER_KIND_FALSE;
Expand Down Expand Up @@ -104,6 +105,7 @@ pub struct HashJoinBuildState {
pub(crate) concat_buffer: Mutex<ConcatBuffer>,
pub(crate) broadcast_id: Option<u32>,
pub(crate) is_runtime_filter_added: AtomicBool,
runtime_filter_packets: Mutex<Vec<JoinRuntimeFilterPacket>>,
}

impl HashJoinBuildState {
Expand Down Expand Up @@ -154,6 +156,7 @@ impl HashJoinBuildState {
concat_buffer: Mutex::new(ConcatBuffer::new(concat_threshold)),
broadcast_id,
is_runtime_filter_added: AtomicBool::new(false),
runtime_filter_packets: Mutex::new(Vec::new()),
}))
}

Expand Down Expand Up @@ -875,6 +878,15 @@ impl HashJoinBuildState {
&self.hash_join_state.hash_join_desc.runtime_filter.filters
}

pub fn add_runtime_filter_packet(&self, packet: JoinRuntimeFilterPacket) {
self.runtime_filter_packets.lock().push(packet);
}

pub fn take_runtime_filter_packets(&self) -> Vec<JoinRuntimeFilterPacket> {
let mut guard = self.runtime_filter_packets.lock();
guard.drain(..).collect()
}

/// only used for test
pub fn get_enable_bloom_runtime_filter(&self) -> bool {
self.hash_join_state
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
use std::collections::HashMap;
use std::sync::Arc;

use databend_common_base::runtime::execute_futures_in_parallel;
use databend_common_catalog::runtime_filter_info::RuntimeFilterBloom;
use databend_common_catalog::runtime_filter_info::RuntimeFilterEntry;
use databend_common_catalog::runtime_filter_info::RuntimeFilterInfo;
Expand Down Expand Up @@ -74,7 +75,10 @@ pub async fn build_runtime_filter_infos(
probe_expr: probe_key.clone(),
bloom: if enabled {
if let Some(ref bloom) = packet.bloom {
Some(build_bloom_filter(bloom.clone(), probe_key, max_threads).await?)
Some(
build_bloom_filter(bloom.clone(), probe_key, max_threads, desc.id)
.await?,
)
} else {
None
}
Expand Down Expand Up @@ -256,6 +260,7 @@ async fn build_bloom_filter(
bloom: Vec<u64>,
probe_key: &Expr<String>,
max_threads: usize,
filter_id: usize,
) -> Result<RuntimeFilterBloom> {
let probe_key = match probe_key {
Expr::ColumnRef(col) => col,
Expand Down Expand Up @@ -286,27 +291,32 @@ async fn build_bloom_filter(
.map(|chunk| chunk.to_vec())
.collect();

let tasks: Vec<_> = chunks
.into_iter()
.map(|chunk| {
databend_common_base::runtime::spawn(async move {
let mut filter = Sbbf::new_with_ndv_fpp(total_items as u64, 0.01)
.map_err(|e| ErrorCode::Internal(e.to_string()))?;

filter.insert_hash_batch(&chunk);
Ok::<Sbbf, ErrorCode>(filter)
})
})
.collect();

let task_results = futures::future::join_all(tasks).await;

let filters: Vec<Sbbf> = task_results
.into_iter()
.map(|r| r.expect("Task panicked"))
.collect::<Result<Vec<_>>>()?;
let tasks = chunks.into_iter().map(|chunk| async move {
let mut filter = Sbbf::new_with_ndv_fpp(total_items as u64, 0.01)
.map_err(|e| ErrorCode::Internal(e.to_string()))?;

filter.insert_hash_batch(&chunk);
Ok::<Sbbf, ErrorCode>(filter)
});

let filters: Vec<Sbbf> = execute_futures_in_parallel(
tasks,
max_threads,
max_threads,
"runtime-filter-bloom-worker".to_owned(),
)
.await?
.into_iter()
.collect::<Result<Vec<_>>>()?;

let start = std::time::Instant::now();
let merged_filter = merge_bloom_filters_tree(filters);
let end = std::time::Instant::now();
log::info!(
"filter_id: {}, merge_bloom_filters_tree time: {:?}",
filter_id,
end - start
);

Ok(RuntimeFilterBloom {
column_name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,64 +12,22 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::sync::atomic::Ordering;
use std::time::Instant;

use databend_common_exception::Result;
use databend_common_expression::DataBlock;
use databend_common_storages_fuse::TableContext;

use super::convert::build_runtime_filter_infos;
use super::global::get_global_runtime_filter_packet;
use crate::pipelines::processors::transforms::build_runtime_filter_packet;
use crate::pipelines::processors::transforms::JoinRuntimeFilterPacket;
use crate::pipelines::processors::HashJoinBuildState;

pub async fn build_and_push_down_runtime_filter(
build_chunks: &[DataBlock],
build_num_rows: usize,
mut packet: JoinRuntimeFilterPacket,
join: &HashJoinBuildState,
) -> Result<()> {
let overall_start = Instant::now();

let is_spill_happened = join.hash_join_state.need_next_round.load(Ordering::Acquire)
|| join
.hash_join_state
.is_spill_happened
.load(Ordering::Acquire);

let inlist_threshold = join
.ctx
.get_settings()
.get_inlist_runtime_filter_threshold()? as usize;
let bloom_threshold = join
.ctx
.get_settings()
.get_bloom_runtime_filter_threshold()? as usize;
let min_max_threshold = join
.ctx
.get_settings()
.get_min_max_runtime_filter_threshold()? as usize;
let selectivity_threshold = join
.ctx
.get_settings()
.get_join_runtime_filter_selectivity_threshold()?;

let build_start = Instant::now();
let mut packet = build_runtime_filter_packet(
build_chunks,
build_num_rows,
join.runtime_filter_desc(),
&join.func_ctx,
inlist_threshold,
bloom_threshold,
min_max_threshold,
selectivity_threshold,
is_spill_happened,
)?;
let build_time = build_start.elapsed();

log::info!("RUNTIME-FILTER: build runtime filter packet: {:?}, build_num_rows: {}, runtime_filter_desc: {:?}", packet, build_num_rows, join.runtime_filter_desc());

if let Some(broadcast_id) = join.broadcast_id {
let merge_start = Instant::now();
packet = get_global_runtime_filter_packet(broadcast_id, packet, &join.ctx).await?;
Expand All @@ -85,7 +43,12 @@ pub async fn build_and_push_down_runtime_filter(
.iter()
.map(|r| (r.id, r))
.collect();
let selectivity_threshold = join
.ctx
.get_settings()
.get_join_runtime_filter_selectivity_threshold()?;
let max_threads = join.ctx.get_settings().get_max_threads()? as usize;
let build_rows = packet.build_rows;
let runtime_filter_infos = build_runtime_filter_infos(
packet,
runtime_filter_descs,
Expand All @@ -98,11 +61,10 @@ pub async fn build_and_push_down_runtime_filter(
let filter_count = runtime_filter_infos.len();

log::info!(
"RUNTIME-FILTER: Built and deployed {} filters in {:?} (build: {:?}) for {} rows",
"RUNTIME-FILTER: Built and deployed {} filters in {:?} for {} rows",
filter_count,
total_time,
build_time,
build_num_rows
build_rows
);
log::info!(
"RUNTIME-FILTER: runtime_filter_infos: {:?}",
Expand Down
Loading