-
Notifications
You must be signed in to change notification settings - Fork 221
/
Copy pathnested_loop_join.rs
162 lines (146 loc) · 6.2 KB
/
nested_loop_join.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
// Copyright 2024 RisingLight Project Authors. Licensed under Apache-2.0.
use std::vec::Vec;
use futures::TryStreamExt;
use super::*;
use crate::array::{
Array, ArrayBuilder, ArrayBuilderImpl, ArrayImpl, BoolArrayBuilder, DataChunk,
DataChunkBuilder, RowRef,
};
use crate::types::{DataType, DataValue};
/// The executor for nested loop join.
pub struct NestedLoopJoinExecutor {
pub op: Expr,
pub condition: RecExpr,
pub left_types: Vec<DataType>,
pub right_types: Vec<DataType>,
}
impl NestedLoopJoinExecutor {
#[try_stream(boxed, ok = DataChunk, error = ExecutorError)]
pub async fn execute(self, left_child: BoxedExecutor, right_child: BoxedExecutor) {
if !matches!(self.op, Expr::Inner | Expr::LeftOuter) {
todo!("unsupported join type: {:?}", self.op);
}
let left_chunks = left_child.try_collect::<Vec<DataChunk>>().await?;
let left_rows = || left_chunks.iter().flat_map(|chunk| chunk.rows());
let data_types = self.left_types.iter().chain(self.right_types.iter());
let mut builder = DataChunkBuilder::new(data_types, PROCESSING_WINDOW_SIZE);
let mut filter_builder = BoolArrayBuilder::with_capacity(PROCESSING_WINDOW_SIZE);
let mut right_row_num = 0;
// inner join: left x right
#[for_await]
for right_chunk in right_child {
let right_chunk = right_chunk?;
for right_row in right_chunk.rows() {
for left_row in left_rows() {
let values = left_row.values().chain(right_row.values());
if let Some(chunk) = builder.push_row(values) {
// evaluate filter bitmap
let ArrayImpl::Bool(a) = Evaluator::new(&self.condition).eval(&chunk)?
else {
panic!("join condition should return bool");
};
yield chunk.filter(a.true_array());
filter_builder.append(&a);
}
tokio::task::consume_budget().await;
}
}
right_row_num += right_chunk.cardinality();
}
// take rest of data
if let Some(chunk) = builder.take() {
// evaluate filter bitmap
let ArrayImpl::Bool(a) = Evaluator::new(&self.condition).eval(&chunk)? else {
panic!("join condition should return bool");
};
yield chunk.filter(a.true_array());
filter_builder.append(&a);
}
let filter = filter_builder.take();
// append rows for left outer join
if matches!(self.op, Expr::LeftOuter) {
// we need to pick row of left_row which unmatched rows
let left_row_num = left_rows().count();
for (mut i, left_row) in left_rows().enumerate() {
let mut matched = false;
for _ in 0..right_row_num {
// the `filter` has all matching results of the cross join result `right x left`
// to compute the unmatched rows, we will need to first pick a row of left_row
// (namely the i-th row). then we check if all `filter[i + left_row_num * j]`
matched |= matches!(filter.get(i), Some(true));
i += left_row_num;
}
if matched {
continue;
}
// if all false, we append row: (left, NULL)
let values =
(left_row.values()).chain(self.right_types.iter().map(|_| DataValue::Null));
if let Some(chunk) = builder.push_row(values) {
yield chunk;
}
tokio::task::consume_budget().await;
}
}
if let Some(chunk) = builder.take() {
yield chunk;
}
}
}
/// The executor for nested loop semi/anti join.
pub struct NestedLoopSemiJoinExecutor {
pub anti: bool,
pub condition: RecExpr,
pub left_types: Vec<DataType>,
}
impl NestedLoopSemiJoinExecutor {
#[try_stream(boxed, ok = DataChunk, error = ExecutorError)]
pub async fn execute(self, left_child: BoxedExecutor, right_child: BoxedExecutor) {
let right_chunks = right_child.try_collect::<Vec<DataChunk>>().await?;
let mut builder = DataChunkBuilder::new(&self.left_types, PROCESSING_WINDOW_SIZE);
#[for_await]
for left_chunk in left_child {
let left_chunk = left_chunk?;
'left_row: for left_row in left_chunk.rows() {
let mut exists = false;
for right_chunk in &right_chunks {
let left_chunk = self.left_row_to_chunk(&left_row, right_chunk.cardinality());
let join_chunk = left_chunk.row_concat(right_chunk.clone());
// evaluate filter bitmap
let ArrayImpl::Bool(a) = Evaluator::new(&self.condition).eval(&join_chunk)?
else {
panic!("join condition should return bool");
};
exists |= a.true_array().iter().any(|v| *v);
if exists && !self.anti {
if let Some(chunk) = builder.push_row(left_row.values()) {
yield chunk;
}
continue 'left_row;
}
tokio::task::consume_budget().await;
}
if exists ^ self.anti {
if let Some(chunk) = builder.push_row(left_row.values()) {
yield chunk;
}
}
}
}
if let Some(chunk) = builder.take() {
yield chunk;
}
}
/// Expand the left row to a chunk with given length.
fn left_row_to_chunk(&self, row: &RowRef<'_>, len: usize) -> DataChunk {
self.left_types
.iter()
.zip(row.values())
.map(|(ty, value)| {
let mut builder = ArrayBuilderImpl::with_capacity(len, ty);
builder.push_n(len, &value);
builder.finish()
})
.collect()
}
}