Skip to content

Commit 1d1afc5

Browse files
committed
Make allow_malformed option apply to url decoding
1 parent df045ca commit 1d1afc5

File tree

2 files changed

+35
-1
lines changed

2 files changed

+35
-1
lines changed

src/main/java/org/elasticsearch/index/analysis/url/URLTokenizer.java

+7-1
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,13 @@ private List<Token> tokenize(URL url, URLPart part) throws IOException {
196196
int start = 0;
197197
int end = 0;
198198
if (urlDecode) {
199-
partString = URLDecoder.decode(partString, "UTF-8");
199+
try {
200+
partString = URLDecoder.decode(partString, "UTF-8");
201+
} catch (IllegalArgumentException e) {
202+
if (!allowMalformed) {
203+
throw new IOException("Error performing URL decoding on string: " + partString, e);
204+
}
205+
}
200206
}
201207
switch (part) {
202208
case HOST:

src/test/java/org/elasticsearch/index/analysis/url/URLTokenizerTest.java

+28
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,34 @@ public void testAllowMalformed() throws IOException {
147147
}
148148

149149

150+
@Test
151+
public void testUrlDecode() throws Exception {
152+
String url = "http://foo.com?baz=foo%20bat";
153+
URLTokenizer tokenizer = createTokenizer(url, URLPart.QUERY);
154+
tokenizer.setUrlDecode(true);
155+
assertTokenStreamContents(tokenizer, stringArray("baz=foo bat"));
156+
}
157+
158+
159+
@Test(expected = IOException.class)
160+
public void testUrlDecodeIllegalCharacters() throws Exception {
161+
String url = "http://foo.com?baz=foo%2vbat";
162+
URLTokenizer tokenizer = createTokenizer(url, URLPart.QUERY);
163+
tokenizer.setUrlDecode(true);
164+
assertTokenStreamContents(tokenizer, "");
165+
}
166+
167+
168+
@Test
169+
public void testUrlDecodeAllowMalformed() throws Exception {
170+
String url = "http://foo.com?baz=foo%2vbat";
171+
URLTokenizer tokenizer = createTokenizer(url, URLPart.QUERY);
172+
tokenizer.setUrlDecode(true);
173+
tokenizer.setAllowMalformed(true);
174+
assertTokenStreamContents(tokenizer, "baz=foo%2vbat");
175+
}
176+
177+
150178
private URLTokenizer createTokenizer(String input, URLPart part) throws IOException {
151179
URLTokenizer tokenizer = new URLTokenizer(part);
152180
tokenizer.setReader(new StringReader(input));

0 commit comments

Comments
 (0)