Skip to content

Commit 3f7a7b4

Browse files
committed
MODE-2720: Lucene index provider does not correctly handle LIKE constraints containing a backslash
1 parent 00f4d20 commit 3f7a7b4

File tree

2 files changed

+107
-38
lines changed
  • index-providers/modeshape-lucene-index-provider/src/main/java/org/modeshape/jcr/index/lucene/query
  • modeshape-jcr/src/main/java/org/modeshape/jcr/query/engine

2 files changed

+107
-38
lines changed

index-providers/modeshape-lucene-index-provider/src/main/java/org/modeshape/jcr/index/lucene/query/CompareStringQuery.java

Lines changed: 83 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import java.util.Objects;
2121
import java.util.function.BiPredicate;
2222
import java.util.function.Function;
23+
import java.util.function.IntPredicate;
2324
import java.util.regex.Pattern;
2425
import javax.jcr.query.qom.Comparison;
2526
import org.apache.lucene.index.LeafReaderContext;
@@ -29,7 +30,6 @@
2930
import org.apache.lucene.search.TermRangeQuery;
3031
import org.apache.lucene.search.Weight;
3132
import org.apache.lucene.search.WildcardQuery;
32-
import org.apache.lucene.util.BytesRef;
3333
import org.modeshape.common.annotation.Immutable;
3434
import org.modeshape.jcr.query.engine.QueryUtil;
3535

@@ -40,7 +40,44 @@
4040
*/
4141
@Immutable
4242
public class CompareStringQuery extends CompareQuery<String> {
43-
43+
private enum FieldComparison {
44+
EQ(cmp -> cmp == 0), GT(cmp -> cmp > 0), GE(cmp -> cmp >= 0), LT(cmp -> cmp < 0), LE(cmp -> cmp <= 0);
45+
46+
final IntPredicate testCmp;
47+
48+
private FieldComparison(IntPredicate testCmp) {
49+
this.testCmp = testCmp;
50+
}
51+
52+
boolean test(int cmp) {
53+
return testCmp.test(cmp);
54+
}
55+
56+
Query createQueryForNodesWithField(String constraintValue, String fieldName, Function<String, String> caseOperation) {
57+
constraintValue = QueryUtil.unescape(constraintValue);
58+
if (caseOperation == null) {
59+
// no need to process the stored index values, so we can use a default Lucene query
60+
if (this == EQ) {
61+
return new TermQuery(new Term(fieldName, constraintValue));
62+
}
63+
return TermRangeQuery.newStringRange(fieldName,
64+
test(-1) ? null : constraintValue,
65+
test(1) ? null : constraintValue,
66+
test(0),
67+
test(0));
68+
}
69+
final BiPredicate<String, String> evaluator;
70+
if (this == EQ) {
71+
evaluator = Objects::equals;
72+
} else {
73+
evaluator = ( s1, s2 ) -> test(STRING_COMPARATOR.compare(s1, s2));
74+
}
75+
return new CompareStringQuery(fieldName, constraintValue, evaluator, caseOperation);
76+
}
77+
}
78+
79+
private static final String LUCENE_SPECIAL_CHARACTERS = "+-&|!(){}[]^\"~?*:\\";
80+
4481
/**
4582
* Construct a {@link Query} implementation that scores nodes according to the supplied comparator.
4683
*
@@ -80,11 +117,7 @@ public Query clone() {
80117
public static Query createQueryForNodesWithFieldEqualTo(String constraintValue,
81118
String fieldName,
82119
Function<String, String> caseOperation) {
83-
if (caseOperation == null) {
84-
// no need to process the stored index values, so we can use a default Lucene query
85-
return new TermQuery(new Term(fieldName, constraintValue));
86-
}
87-
return new CompareStringQuery(fieldName, constraintValue, Objects::equals, caseOperation);
120+
return FieldComparison.EQ.createQueryForNodesWithField(constraintValue, fieldName, caseOperation);
88121
}
89122

90123
/**
@@ -100,14 +133,9 @@ public static Query createQueryForNodesWithFieldEqualTo(String constraintValue,
100133
public static Query createQueryForNodesWithFieldGreaterThan(String constraintValue,
101134
String fieldName,
102135
Function<String, String> caseOperation) {
103-
if (caseOperation == null) {
104-
// no need to process the stored index values, so we can use a default Lucene query
105-
return new TermRangeQuery(fieldName, new BytesRef(constraintValue), null, false, false);
106-
}
107-
return new CompareStringQuery(fieldName, constraintValue,
108-
(s1, s2) -> STRING_COMPARATOR.compare(s1, s2) > 0, caseOperation);
136+
return FieldComparison.GT.createQueryForNodesWithField(constraintValue, fieldName, caseOperation);
109137
}
110-
138+
111139
/**
112140
* Construct a {@link Query} implementation that scores documents with a string field value that is greater than or equal to
113141
* the supplied constraint value.
@@ -121,12 +149,7 @@ public static Query createQueryForNodesWithFieldGreaterThan(String constraintVal
121149
public static Query createQueryForNodesWithFieldGreaterThanOrEqualTo(String constraintValue,
122150
String fieldName,
123151
Function<String, String> caseOperation) {
124-
if (caseOperation == null) {
125-
// no need to process the stored index values, so we can use a default Lucene query
126-
return new TermRangeQuery(fieldName, new BytesRef(constraintValue), null, true, false);
127-
}
128-
return new CompareStringQuery(fieldName, constraintValue,
129-
(s1, s2) -> STRING_COMPARATOR.compare(s1, s2) >= 0, caseOperation);
152+
return FieldComparison.GE.createQueryForNodesWithField(constraintValue, fieldName, caseOperation);
130153
}
131154

132155
/**
@@ -142,12 +165,7 @@ public static Query createQueryForNodesWithFieldGreaterThanOrEqualTo(String cons
142165
public static Query createQueryForNodesWithFieldLessThan(String constraintValue,
143166
String fieldName,
144167
Function<String, String> caseOperation) {
145-
if (caseOperation == null) {
146-
// no need to process the stored index values, so we can use a default Lucene query
147-
return new TermRangeQuery(fieldName, null, new BytesRef(constraintValue), false, false);
148-
}
149-
return new CompareStringQuery(fieldName, constraintValue,
150-
(s1, s2) -> STRING_COMPARATOR.compare(s1, s2) < 0, caseOperation);
168+
return FieldComparison.LT.createQueryForNodesWithField(constraintValue, fieldName, caseOperation);
151169
}
152170

153171
/**
@@ -163,12 +181,7 @@ public static Query createQueryForNodesWithFieldLessThan(String constraintValue,
163181
public static Query createQueryForNodesWithFieldLessThanOrEqualTo(String constraintValue,
164182
String fieldName,
165183
Function<String, String> caseOperation) {
166-
if (caseOperation == null) {
167-
// no need to process the stored index values, so we can use a default Lucene query
168-
return new TermRangeQuery(fieldName, null, new BytesRef(constraintValue), true, true);
169-
}
170-
return new CompareStringQuery(fieldName, constraintValue,
171-
(s1, s2) -> STRING_COMPARATOR.compare(s1, s2) <= 0, caseOperation);
184+
return FieldComparison.LE.createQueryForNodesWithField(constraintValue, fieldName, caseOperation);
172185
}
173186

174187
/**
@@ -205,8 +218,7 @@ protected static Query createQueryForNodesWithFieldLike(String likeExpression,
205218
char firstChar = likeExpression.charAt(0);
206219
if (firstChar != '%' && firstChar != '_' && firstChar != '*' && firstChar != '?') {
207220
// Create a wildcard query ...
208-
String expression = toWildcardExpression(likeExpression);
209-
return new WildcardQuery(new Term(fieldName, expression));
221+
return new WildcardQuery(new Term(fieldName, toWildcardExpression(likeExpression)));
210222
}
211223
}
212224
// Create a regex query...
@@ -218,12 +230,47 @@ protected static Query createQueryForNodesWithFieldLike(String likeExpression,
218230
/**
219231
* Convert the JCR like expression to a Lucene wildcard expression. The JCR like expression uses '%' to match 0 or more
220232
* characters, '_' to match any single character, '\x' to match the 'x' character, and all other characters to match
221-
* themselves.
233+
* themselves. Since ModeShape v5.5, this method additionally escapes Lucene special characters, with the exception,
234+
* for backwards compatibility, of the '*' and '?' wildcard characters themselves, which are supported alternatives
235+
* despite not being officially part of the JCR specification.
222236
*
223237
* @param likeExpression the like expression; may not be null
224238
* @return the expression that can be used with a WildcardQuery; never null
225239
*/
226240
protected static String toWildcardExpression( String likeExpression ) {
227-
return likeExpression.replace('%', '*').replace('_', '?').replaceAll("\\\\(.)", "$1");
241+
if (likeExpression.isEmpty()) {
242+
return likeExpression;
243+
}
244+
final int sz = likeExpression.length();
245+
final StringBuilder buf = new StringBuilder(sz);
246+
int pos = -1;
247+
while (++pos < sz) {
248+
final char c = likeExpression.charAt(pos);
249+
char out;
250+
switch (c) {
251+
case '%':
252+
case '*':
253+
buf.append('*');
254+
continue;
255+
case '_':
256+
case '?':
257+
buf.append('?');
258+
continue;
259+
case '\\':
260+
if (++pos < sz) {
261+
out = likeExpression.charAt(pos);
262+
break;
263+
}
264+
// weird case with a trailing backslash, treat as "escaped nothing" i.e. skip it
265+
continue;
266+
default:
267+
out = c;
268+
}
269+
if (LUCENE_SPECIAL_CHARACTERS.indexOf(out) >= 0) {
270+
buf.append('\\');
271+
}
272+
buf.append(c);
273+
}
274+
return buf.toString();
228275
}
229276
}

modeshape-jcr/src/main/java/org/modeshape/jcr/query/engine/QueryUtil.java

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,30 @@ public static boolean hasWildcardCharacters( String expression ) {
4949
}
5050
return false;
5151
}
52-
53-
52+
53+
/**
54+
* Removes {@code \}-triggered escape sequences from {@code jcrExpression}.
55+
* @param jcrExpression {@link String}, never {@code null}
56+
* @return {@link String}
57+
* @since 5.5
58+
*/
59+
public static String unescape( String jcrExpression ) {
60+
Objects.requireNonNull(jcrExpression);
61+
if (jcrExpression.indexOf('\\') < 0) {
62+
return jcrExpression;
63+
}
64+
final StringBuilder buf = new StringBuilder(jcrExpression.length());
65+
66+
for (CharacterIterator iter = new StringCharacterIterator(jcrExpression); iter.current() != CharacterIterator.DONE; iter.next()) {
67+
if (iter.current() == '\\' && iter.next() == '\\') {
68+
buf.append('\\');
69+
continue;
70+
}
71+
buf.append(iter.current());
72+
}
73+
return buf.toString();
74+
}
75+
5476
/**
5577
* Convert the JCR like expression to a regular expression. The JCR like expression uses '%' to match 0 or more characters,
5678
* '_' to match any single character, '\x' to match the 'x' character, and all other characters to match themselves. Note that

0 commit comments

Comments
 (0)