Skip to content

Commit df71159

Browse files
author
Horia Chiorean
authored
Merge pull request #1649 from hchiorean/MODE-2683
MODE-2683 Fixes the behavior of Regex queries for the Lucene Index Provider
2 parents 7432510 + cb5fbb9 commit df71159

File tree

4 files changed

+46
-17
lines changed

4 files changed

+46
-17
lines changed

index-providers/modeshape-lucene-index-provider/src/main/java/org/modeshape/jcr/index/lucene/query/CompareStringQuery.java

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
import org.apache.lucene.index.LeafReaderContext;
2626
import org.apache.lucene.index.Term;
2727
import org.apache.lucene.search.Query;
28-
import org.apache.lucene.search.RegexpQuery;
2928
import org.apache.lucene.search.TermQuery;
3029
import org.apache.lucene.search.TermRangeQuery;
3130
import org.apache.lucene.search.Weight;
@@ -194,7 +193,7 @@ protected static Query createQueryForNodesWithFieldLike(String likeExpression,
194193
return createQueryForNodesWithFieldEqualTo(likeExpression, fieldName, caseOperation);
195194
}
196195
if (caseOperation == null) {
197-
// We can just do a normal Wildcard or RegEx query ...
196+
// We can just do a normal Wildcard query ...
198197

199198
// '%' matches 0 or more characters
200199
// '_' matches any single character
@@ -210,16 +209,10 @@ protected static Query createQueryForNodesWithFieldLike(String likeExpression,
210209
return new WildcardQuery(new Term(fieldName, expression));
211210
}
212211
}
213-
// Create a regex query (which will be done using the correct case) ...
212+
// Create a regex query...
214213
String regex = QueryUtil.toRegularExpression(likeExpression);
215-
216-
int flags = Pattern.UNICODE_CASE;
217-
if (caseOperation != null) {
218-
// if we're searching either for the UPPERCASE or LOWERCASE of something, use Case Insensitive matching
219-
// even though it could produce false positive
220-
flags = flags | Pattern.CASE_INSENSITIVE;
221-
}
222-
return new RegexpQuery(new Term(fieldName, regex), flags);
214+
Pattern pattern = Pattern.compile(regex, Pattern.UNICODE_CASE);
215+
return new RegexQuery(fieldName, pattern, caseOperation);
223216
}
224217

225218
/**

index-providers/modeshape-lucene-index-provider/src/main/java/org/modeshape/jcr/index/lucene/query/LuceneQueryFactory.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@
4949
import org.apache.lucene.search.MatchNoDocsQuery;
5050
import org.apache.lucene.search.PhraseQuery;
5151
import org.apache.lucene.search.Query;
52-
import org.apache.lucene.search.RegexpQuery;
5352
import org.modeshape.common.annotation.Immutable;
5453
import org.modeshape.common.annotation.ThreadSafe;
5554
import org.modeshape.jcr.JcrI18n;
@@ -889,16 +888,15 @@ protected Query pathFieldQuery( String field, Operator operator, Object value, F
889888
if (likeExpression.contains("[%]")) {
890889
// We can't use '[%]' because we only want to match digits,
891890
// so handle this using a regex ...
892-
// !!! LUCENE Regexp is not the same as Java's. See the javadoc RegExp
893891
String regex = likeExpression;
894892
regex = regex.replace("[%]", "(\\[[0-9]+\\])?");
895-
regex = regex.replaceAll("\\[\\d+\\]", "\\[[0-9]+\\]");
893+
regex = regex.replaceAll("\\[(\\d+)\\]", "\\\\[$1\\\\]");
896894
//regex = regex.replace("]", "\\]");
897895
regex = regex.replace("*", ".*");
898896
regex = regex.replace("%", ".*").replace("_", ".");
899897
// Now create a regex query ...
900-
int flags = caseOperation == null ? 0 : Pattern.CASE_INSENSITIVE;
901-
return new RegexpQuery(new Term(field, regex), flags);
898+
Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
899+
return new RegexQuery(field, pattern, caseOperation);
902900
} else {
903901
return CompareStringQuery.createQueryForNodesWithFieldLike(likeExpression, field, caseOperation);
904902
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
* ModeShape (http://www.modeshape.org)
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.modeshape.jcr.index.lucene.query;
18+
19+
import java.util.function.Function;
20+
import java.util.regex.Pattern;
21+
22+
/**
23+
* Lucene query which uses Java Regex syntax and flags to match the content stored in the indexes against a {@link Pattern}
24+
*
25+
* @author Horia Chiorean ([email protected])
26+
*/
27+
public final class RegexQuery extends CompareQuery<String> {
28+
29+
public RegexQuery(String fieldName,
30+
Pattern pattern,
31+
Function<String, String> caseOperation) {
32+
super(fieldName, pattern.pattern(), (storedValue, regexp) -> pattern.matcher(storedValue).matches(), caseOperation);
33+
}
34+
35+
@Override
36+
protected String convertValue(String casedValue) {
37+
return casedValue;
38+
}
39+
}

index-providers/modeshape-lucene-index-provider/src/test/java/org/modeshape/jcr/index/lucene/LuceneIndexProviderTest.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,6 @@ public void shouldNotReindexOnStartup() throws Exception {
164164

165165
@Test
166166
@FixFor("MODE-2683")
167-
@Ignore
168167
public void shouldUseIndexWithUpperAndLowerOperands() throws Exception {
169168
registerValueIndex("descriptionIndex", "mix:title", "Index for the 'jcr:title' property on mix:title", "*", "jcr:title",
170169
PropertyType.STRING);

0 commit comments

Comments
 (0)