Skip to content

Commit 06d5de1

Browse files
authored
Merge pull request duckdb#112 from Mytherin/uri
Add support for MySQL URI connection strings
2 parents 360c933 + e183160 commit 06d5de1

File tree

4 files changed

+335
-6
lines changed

4 files changed

+335
-6
lines changed

src/mysql_utils.cpp

+13
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,19 @@ MySQLConnectionParameters MySQLUtils::ParseConnectionParameters(const string &ds
125125
} else {
126126
result.client_flag &= ~CLIENT_COMPRESS;
127127
}
128+
} else if (key == "compression") {
129+
set_options.insert("compress");
130+
auto val = StringUtil::Lower(value);
131+
if (val == "required") {
132+
result.client_flag |= CLIENT_COMPRESS;
133+
} else if (val == "disabled") {
134+
result.client_flag &= ~CLIENT_COMPRESS;
135+
} else if (val == "preferred") {
136+
// nop
137+
} else {
138+
throw InvalidInputException("Invalid dsn - compression mode must be either disabled/required/preferred - got %s",
139+
value);
140+
}
128141
} else if (key == "ssl_mode") {
129142
set_options.insert("ssl_mode");
130143
auto val = StringUtil::Lower(value);

src/storage/mysql_catalog.cpp

+254
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,256 @@ unique_ptr<SecretEntry> GetSecret(ClientContext &context, const string &secret_n
6464
return nullptr;
6565
}
6666

67+
struct URIToken {
68+
string value;
69+
char delimiter;
70+
};
71+
72+
string UnescapePercentage(const string &input, idx_t start, idx_t end) {
73+
// url escapes encoded as [ESC][RESULT]
74+
auto url_escapes = "20 3C<3E>23#25%2B+7B{7D}7C|5C\\5E^7E~5B[5D]60`3B;2F/3F?3A;40@3D=26&24$";
75+
76+
string result;
77+
for(idx_t i = start; i < end; i++) {
78+
if (i + 2 < end && input[i] == '%') {
79+
// find the escape code
80+
char first_char = StringUtil::CharacterToUpper(input[i + 1]);
81+
char second_char = StringUtil::CharacterToUpper(input[i + 2]);
82+
char escape_result = '\0';
83+
for(idx_t esc_pos = 0; url_escapes[esc_pos]; esc_pos += 3) {
84+
if (first_char == url_escapes[esc_pos] && second_char == url_escapes[esc_pos + 1]) {
85+
// found the correct escape
86+
escape_result = url_escapes[esc_pos + 2];
87+
break;
88+
}
89+
}
90+
if (escape_result != '\0') {
91+
// found the escape - skip forward
92+
result += escape_result;
93+
i += 2;
94+
continue;
95+
}
96+
// escape not found - just put the % in as normal
97+
}
98+
result += input[i];
99+
}
100+
return result;
101+
}
102+
103+
vector<URIToken> ParseURITokens(const string &dsn, idx_t start) {
104+
vector<URIToken> result;
105+
for(idx_t pos = start; pos < dsn.size(); pos++) {
106+
switch(dsn[pos]) {
107+
case ':':
108+
case '@':
109+
case '/':
110+
case '?':
111+
case '=':
112+
case '&': {
113+
// found a delimiter
114+
URIToken token;
115+
token.value = UnescapePercentage(dsn, start, pos);
116+
token.delimiter = dsn[pos];
117+
start = pos + 1;
118+
result.push_back(std::move(token));
119+
break;
120+
}
121+
default:
122+
// include in token
123+
break;
124+
}
125+
}
126+
URIToken token;
127+
token.value = UnescapePercentage(dsn, start, dsn.size());
128+
token.delimiter = '\0';
129+
result.push_back(std::move(token));
130+
return result;
131+
}
132+
133+
struct URIValue {
134+
URIValue(string name_p, string value_p) : name(std::move(name_p)), value(std::move(value_p)) {}
135+
136+
string name;
137+
string value;
138+
};
139+
140+
vector<string> GetAttributeNames(const vector<URIToken> &tokens, idx_t token_count, ErrorData &error) {
141+
// [scheme://][user[:[password]]@]host[:port][/schema][?attribute1=value1&attribute2=value2...
142+
vector<string> result;
143+
if (token_count == 1) {
144+
// only one token - always the host
145+
result.emplace_back("host");
146+
return result;
147+
}
148+
idx_t current_pos = 0;
149+
if (tokens[0].delimiter == '@') {
150+
// user@...
151+
result.emplace_back("user");
152+
result.emplace_back("host");
153+
current_pos = 1;
154+
} else if (tokens[1].delimiter == '@') {
155+
// user:password@
156+
if (tokens[0].delimiter != ':') {
157+
error = ParserException("Invalid URI string - expected user:password");
158+
return result;
159+
}
160+
D_ASSERT(token_count > 2);
161+
result.emplace_back("user");
162+
result.emplace_back("passwd");
163+
result.emplace_back("host");
164+
current_pos = 2;
165+
} else {
166+
// neither user nor password - this MUST be the host
167+
result.emplace_back("host");
168+
current_pos = 0;
169+
}
170+
if (current_pos + 1 == token_count) {
171+
// we have parsed the entire string (until the attributes)
172+
return result;
173+
}
174+
// we are at host_pos
175+
if (tokens[current_pos].delimiter == ':') {
176+
// host:port
177+
result.emplace_back("port");
178+
current_pos++;
179+
if (current_pos + 1 == token_count) {
180+
return result;
181+
}
182+
// we still have a "/schema"
183+
if (tokens[current_pos].delimiter != '/') {
184+
error = ParserException("Invalid URI string - expected host:port/schema");
185+
}
186+
result.emplace_back("db");
187+
current_pos++;
188+
} else if (tokens[current_pos].delimiter == '/') {
189+
// host/schema
190+
result.emplace_back("db");
191+
current_pos++;
192+
} else {
193+
error = ParserException("Invalid URI string - expected host:port or host/schema");
194+
}
195+
if (current_pos + 1 != token_count) {
196+
error = ParserException("Invalid URI string - expected ? after [user[:[password]]@]host[:port][/schema]");
197+
}
198+
return result;
199+
}
200+
201+
void ParseMainAttributes(const vector<URIToken> &tokens, idx_t token_count, vector<URIValue> &result, ErrorData &error) {
202+
auto attribute_names = GetAttributeNames(tokens, token_count, error);
203+
if (error.HasError()) {
204+
return;
205+
}
206+
D_ASSERT(attribute_names.size() == token_count);
207+
for(idx_t i = 0; i < token_count; i++) {
208+
result.emplace_back(attribute_names[i], tokens[i].value);
209+
}
210+
}
211+
212+
void ParseAttributes(const vector<URIToken> &tokens, idx_t attribute_start, vector<URIValue> &result) {
213+
unordered_map<string, string> uri_attribute_map;
214+
uri_attribute_map["socket"] = "socket";
215+
uri_attribute_map["compression"] = "compression";
216+
uri_attribute_map["ssl-mode"] = "ssl_mode";
217+
uri_attribute_map["ssl-ca"] = "ssl_ca";
218+
uri_attribute_map["ssl-capath"] = "ssl_capath";
219+
uri_attribute_map["ssl-cert"] = "ssl_cert";
220+
uri_attribute_map["ssl-cipher"] = "ssl_cipher";
221+
uri_attribute_map["ssl-crl"] = "ssl_crl";
222+
uri_attribute_map["ssl-crlpath"] = "ssl_crlpath";
223+
uri_attribute_map["ssl-key"] = "ssl_key";
224+
225+
// parse key=value attributes
226+
for(idx_t i = attribute_start; i < tokens.size(); i += 2) {
227+
// check if the format is correct
228+
if (i + 1 >= tokens.size() || tokens[i].delimiter != '=') {
229+
throw ParserException("Invalid URI string - expected attribute=value pairs after ?");
230+
}
231+
if (tokens[i + 1].delimiter != '\0' && tokens[i + 1].delimiter != '&') {
232+
throw ParserException("Invalid URI string - attribute=value pairs must be separated by &");
233+
}
234+
auto entry = uri_attribute_map.find(tokens[i].value);
235+
if (entry == uri_attribute_map.end()) {
236+
string supported_options;
237+
for(auto &entry : uri_attribute_map) {
238+
if (!supported_options.empty()) {
239+
supported_options += ", ";
240+
}
241+
supported_options += entry.first;
242+
}
243+
throw ParserException("Invalid URI string - unsupported attribute \"%s\"\nSupported options: %s", tokens[i].value, supported_options);
244+
}
245+
result.emplace_back(entry->second, tokens[i + 1].value);
246+
}
247+
}
248+
249+
vector<URIValue> ExtractURIValues(const vector<URIToken> &tokens, ErrorData &error) {
250+
// [scheme://][user[:[password]]@]host[:port][/schema][?attribute1=value1&attribute2=value2...
251+
vector<URIValue> result;
252+
if (tokens.empty()) {
253+
return result;
254+
}
255+
// figure out how many "non-attribute" tokens we have
256+
idx_t attribute_start = tokens.size();
257+
for(idx_t i = 0; i < tokens.size(); i++) {
258+
if (tokens[i].delimiter == '?') {
259+
// found a question-mark - this is a token
260+
attribute_start = i + 1;
261+
break;
262+
}
263+
}
264+
265+
// parse the main attributes in the string
266+
ParseMainAttributes(tokens, attribute_start, result, error);
267+
// parse key-value attributes
268+
ParseAttributes(tokens, attribute_start, result);
269+
270+
return result;
271+
}
272+
273+
bool TryConvertURIInternal(const string &dsn, idx_t start_pos, string &connection_string, ErrorData &error) {
274+
// parse tokens from the string
275+
auto tokens = ParseURITokens(dsn, start_pos);
276+
277+
auto values = ExtractURIValues(tokens, error);
278+
if (error.HasError()) {
279+
return false;
280+
}
281+
for(auto &val : values) {
282+
if (!connection_string.empty()) {
283+
connection_string += " ";
284+
}
285+
connection_string += val.name;
286+
connection_string += "=";
287+
connection_string += EscapeConnectionString(val.value);
288+
}
289+
return true;
290+
}
291+
292+
void TryConvertURI(string &dsn) {
293+
// [scheme://][user[:[password]]@]host[:port][/schema][?attribute1=value1&attribute2=value2...
294+
idx_t start_pos = 0;
295+
// skip the past the scheme (either mysql:// or mysqlx://)
296+
if (StringUtil::StartsWith(dsn, "mysql://")) {
297+
start_pos = 8;
298+
} else if (StringUtil::StartsWith(dsn, "mysqlx://")) {
299+
start_pos = 9;
300+
}
301+
302+
// try to convert this as a URI
303+
string connection_string;
304+
ErrorData error;
305+
if (TryConvertURIInternal(dsn, start_pos, connection_string, error)) {
306+
// success! this is a URI
307+
dsn = std::move(connection_string);
308+
return;
309+
}
310+
// not a URI
311+
if (start_pos > 0) {
312+
// but it started with mysql:// or mysqlx:// - throw an error
313+
error.Throw();
314+
}
315+
}
316+
67317
string MySQLCatalog::GetConnectionString(ClientContext &context, const string &attach_path, string secret_name) {
68318
// if no secret is specified we default to the unnamed mysql secret, if it
69319
// exists
@@ -75,6 +325,10 @@ string MySQLCatalog::GetConnectionString(ClientContext &context, const string &a
75325
}
76326
auto secret_entry = GetSecret(context, secret_name);
77327
auto connection_string = attach_path;
328+
329+
// if the connection string is a URI, try and convert it
330+
TryConvertURI(connection_string);
331+
78332
if (secret_entry) {
79333
// secret found - read data
80334
const auto &kv_secret = dynamic_cast<const KeyValueSecret &>(*secret_entry->secret);

test/sql/attach_dsn.test

-6
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,6 @@ require mysql_scanner
66

77
require-env MYSQL_TEST_DATABASE_AVAILABLE
88

9-
# dsn parsing failures
10-
statement error
11-
ATTACH 'host' AS s (TYPE MYSQL_SCANNER)
12-
----
13-
expected key=value pairs separated by spaces
14-
159
statement error
1610
ATTACH 'host=' AS s (TYPE MYSQL_SCANNER)
1711
----

test/sql/attach_uri.test

+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# name: test/sql/attach_uri.test
2+
# description: Test attaching using a URI
3+
# group: [storage]
4+
5+
require mysql_scanner
6+
7+
require-env MYSQL_TEST_DATABASE_AVAILABLE
8+
9+
statement ok
10+
PRAGMA enable_verification
11+
12+
# create a default secret that fills in the correct missing values in the URI
13+
statement ok
14+
CREATE SECRET (
15+
TYPE MYSQL,
16+
HOST localhost,
17+
USER root,
18+
PORT 0
19+
);
20+
21+
# uri with mysql: prefix
22+
statement ok
23+
ATTACH 'mysql:root@localhost' AS uri_attach
24+
25+
statement ok
26+
DETACH uri_attach
27+
28+
# try various URIs
29+
foreach uri localhost root@localhost mysql://localhost mysql://localhost:0 mysql://root@localhost mysql://root@localhost:0 mysql://root:@localhost:0 mysql://root:@localhost:0?compression=preferred
30+
31+
statement ok
32+
ATTACH '${uri}' AS uri_attach (TYPE MYSQL)
33+
34+
statement ok
35+
DETACH uri_attach
36+
37+
endloop
38+
39+
# now with an unknown database
40+
foreach uri mysql://localhost/unknown_db mysql://localhost:0/unknown_db mysql://root@localhost/unknown_db mysql://root@localhost:0/unknown_db mysql://root:@localhost:0/unknown_db
41+
42+
statement error
43+
ATTACH '${uri}' AS secret_attach (TYPE MYSQL)
44+
----
45+
unknown_db
46+
47+
endloop
48+
49+
# invalid URIs
50+
foreach uri mysql://abc@abc@localhost mysql://abc:abc:abc mysql://abc:abc/abc/abc mysql://localhost?abc mysql://localhost?abc=1?abc=2
51+
52+
statement error
53+
ATTACH '${uri}' AS secret_attach (TYPE MYSQL)
54+
----
55+
Invalid URI string
56+
57+
endloop
58+
59+
# unrecognized attribute
60+
statement error
61+
ATTACH 'mysql://root@localhost?unrecognized_attribute=42' AS secret_attach (TYPE MYSQL)
62+
----
63+
unrecognized_attribute
64+
65+
statement error
66+
ATTACH 'mysql://root@localhost?attribute_with_escape_codes_%20%3C%3E%23%25%2B%7B%7D%7C%5C%5E%7E%5B%5D%60%3B%2F%3F%3A%40%3D%26%24%XX%=42' AS secret_attach (TYPE MYSQL)
67+
----
68+
attribute_with_escape_codes_ <>#%+{}|\^~[]`;/?;@=&$%XX%

0 commit comments

Comments
 (0)