Skip to content

Commit 547bceb

Browse files
authored
Merge pull request duckdb#115 from Mytherin/noexperimentalpushdown
Enable filter pushdown by default - and add more extensive testing for filtering on all types
2 parents fc671e4 + 20d16db commit 547bceb

File tree

5 files changed

+76
-5
lines changed

5 files changed

+76
-5
lines changed

src/mysql_extension.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ static void LoadInternal(DatabaseInstance &db) {
110110

111111
config.AddExtensionOption("mysql_experimental_filter_pushdown",
112112
"Whether or not to use filter pushdown (currently experimental)", LogicalType::BOOLEAN,
113-
Value::BOOLEAN(false));
113+
Value::BOOLEAN(true));
114114
config.AddExtensionOption("mysql_debug_show_queries", "DEBUG SETTING: print all queries sent to MySQL to stdout",
115115
LogicalType::BOOLEAN, Value::BOOLEAN(false), SetMySQLDebugQueryPrint);
116116
config.AddExtensionOption("mysql_tinyint1_as_boolean", "Whether or not to convert TINYINT(1) columns to BOOLEAN",

src/mysql_filter_pushdown.cpp

+32-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include "mysql_filter_pushdown.hpp"
22
#include "mysql_utils.hpp"
3+
#include "duckdb/planner/filter/optional_filter.hpp"
4+
#include "duckdb/planner/filter/in_filter.hpp"
35

46
namespace duckdb {
57

@@ -30,12 +32,26 @@ string MySQLFilterPushdown::TransformComparison(ExpressionType type) {
3032
}
3133
}
3234

35+
36+
static string TransformBlobToMySQL(const string &val) {
37+
char const HEX_DIGITS[] = "0123456789ABCDEF";
38+
39+
string result = "x'";
40+
for(idx_t i = 0; i < val.size(); i++) {
41+
uint8_t byte_val = static_cast<uint8_t>(val[i]);
42+
result += HEX_DIGITS[(byte_val >> 4) & 0xf];
43+
result += HEX_DIGITS[byte_val & 0xf];
44+
}
45+
result += "'";
46+
return result;
47+
}
48+
3349
string MySQLFilterPushdown::TransformConstant(const Value &val) {
3450
if (val.type().IsNumeric()) {
3551
return val.ToSQLString();
3652
}
3753
if (val.type().id() == LogicalTypeId::BLOB) {
38-
throw NotImplementedException("Unsupported type for filter pushdown: BLOB");
54+
return TransformBlobToMySQL(StringValue::Get(val));
3955
}
4056
if (val.type().id() == LogicalTypeId::TIMESTAMP_TZ) {
4157
return val.DefaultCastAs(LogicalType::TIMESTAMP).DefaultCastAs(LogicalType::VARCHAR).ToSQLString();
@@ -63,6 +79,21 @@ string MySQLFilterPushdown::TransformFilter(string &column_name, TableFilter &fi
6379
auto operator_string = TransformComparison(constant_filter.comparison_type);
6480
return StringUtil::Format("%s %s %s", column_name, operator_string, constant_string);
6581
}
82+
case TableFilterType::OPTIONAL_FILTER: {
83+
auto &optional_filter = filter.Cast<OptionalFilter>();
84+
return TransformFilter(column_name, *optional_filter.child_filter);
85+
}
86+
case TableFilterType::IN_FILTER: {
87+
auto &in_filter = filter.Cast<InFilter>();
88+
string in_list;
89+
for(auto &val : in_filter.values) {
90+
if (!in_list.empty()) {
91+
in_list += ", ";
92+
}
93+
in_list += TransformConstant(val);
94+
}
95+
return column_name + " IN (" + in_list + ")";
96+
}
6697
default:
6798
throw InternalException("Unsupported table filter type");
6899
}

src/storage/mysql_execute_query.cpp

+23-1
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,29 @@ string ExtractFilters(PhysicalOperator &child, const string &statement) {
111111
if (!table_scan.table_filters) {
112112
return string();
113113
}
114-
throw NotImplementedException("Pushed down table filters not supported currently");
114+
string result;
115+
for(auto &entry : table_scan.table_filters->filters) {
116+
auto column_index = entry.first;
117+
auto &filter = entry.second;
118+
string column_name;
119+
if (column_index < table_scan.names.size()) {
120+
const auto col_id = table_scan.column_ids[column_index].GetPrimaryIndex();
121+
if (col_id == COLUMN_IDENTIFIER_ROW_ID) {
122+
column_name = "rowid";
123+
} else {
124+
column_name = table_scan.names[col_id];
125+
}
126+
}
127+
BoundReferenceExpression bound_ref(std::move(column_name), LogicalTypeId::INVALID, 0);
128+
auto filter_expr = filter->ToExpression(bound_ref);
129+
auto filter_str = filter_expr->ToString();
130+
if (result.empty()) {
131+
result = std::move(filter_str);
132+
} else {
133+
result += " AND " + filter_str;
134+
}
135+
}
136+
return result;
115137
} else {
116138
throw NotImplementedException("Unsupported operator type %s in %s statement - only simple deletes "
117139
"(e.g. %s "

test/sql/attach_filter_pushdown.test

+2-2
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ SELECT COUNT(*) FROM s1.text_tbl WHERE v= '🦆'
6363
1
6464

6565
# blob pushdown
66-
statement error
66+
query I
6767
SELECT COUNT(*) FROM s1.blob_tbl WHERE bl= BLOB '\x80'
6868
----
69-
Unsupported
69+
1

test/sql/attach_types.test

+18
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,21 @@ SELECT COLUMNS(*)::VARCHAR FROM s.all_types
6666
false -128 -32768 -2147483648 -9223372036854775808 0 0 0 2000-01-01 00:00:00 2000-01-01 01:02:03 2000-01-01 01:02:03 2000-01-01 01:02:03 2000-01-01 01:02:03 00:00:00+15:00 2000-01-01 01:02:03 -999.9 -99999.9999 -999999999999.999999 -9999999999999999999999999999.9999999999 00000000-0000-0000-0000-000000000000 00:00:00 🦆🦆🦆🦆🦆🦆 thisisalongblob\x00withnullbytes 0010001001011100010101011010111 DUCK_DUCK_ENUM enum_0 enum_0
6767
true 127 32767 2147483647 9223372036854775807 255 65535 4294967295 2000-01-01 24:00:00 2000-01-01 01:02:03 2000-01-01 01:02:03 2000-01-01 01:02:03 2000-01-01 01:02:03 00:00:00+15:00 2000-01-01 01:02:03 999.9 99999.9999 999999999999.999999 9999999999999999999999999999.9999999999 ffffffff-ffff-ffff-ffff-ffffffffffff 83 years 3 months 999 days 00:16:39.999999 goo\0se \x00\x00\x00a 10101 GOOSE enum_299 enum_69999
6868
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
69+
70+
# filter pushdown
71+
foreach column_name bool tinyint smallint int bigint utinyint usmallint uint date time timestamp timestamp_s timestamp_ms timestamp_ns time_tz timestamp_tz dec_4_1 dec_9_4 dec_18_6 dec38_10 uuid interval varchar blob bit small_enum medium_enum large_enum
72+
73+
statement ok
74+
SET VARIABLE minimum_value=(SELECT MIN(${column_name}) min_val FROM s.all_types);
75+
76+
query I
77+
SELECT ANY_VALUE(${column_name})=getvariable('minimum_value') FROM s.all_types WHERE ${column_name}=getvariable('minimum_value')
78+
----
79+
true
80+
81+
query I
82+
SELECT ANY_VALUE(${column_name})=getvariable('minimum_value') FROM s.all_types WHERE ${column_name} IN (getvariable('minimum_value'), getvariable('minimum_value'))
83+
----
84+
true
85+
86+
endloop

0 commit comments

Comments
 (0)