Skip to content

Commit

Permalink
Merge branch 'main' into release-notes-2.19-on-main
Browse files Browse the repository at this point in the history
  • Loading branch information
jainankitk committed Feb 11, 2025
2 parents 4fe7fdd + 302a3fd commit 0910451
Show file tree
Hide file tree
Showing 11 changed files with 23 additions and 412 deletions.
1 change: 1 addition & 0 deletions CHANGELOG-3.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Refactor `:libs` module `bootstrap` package to eliminate top level split packages for JPMS support ([#17117](https://github.com/opensearch-project/OpenSearch/pull/17117))
- Refactor the codebase to eliminate top level split packages for JPMS support ([#17153](https://github.com/opensearch-project/OpenSearch/pull/17153)
- Refactor `:server` module `org.apacge.lucene` package to eliminate top level split packages for JPMS support ([#17241](https://github.com/opensearch-project/OpenSearch/pull/17241))
- Stop minimizing automata used for case-insensitive matches ([#17268](https://github.com/opensearch-project/OpenSearch/pull/17268))
- Refactor the `:server` module `org.opensearch.client` to `org.opensearch.transport.client` to eliminate top level split packages for JPMS support ([#17272](https://github.com/opensearch-project/OpenSearch/pull/17272))

### Deprecated
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
import org.opensearch.common.settings.Settings;
import org.opensearch.core.common.Strings;
import org.opensearch.index.IndexNotFoundException;
import org.opensearch.lucene.util.automaton.MinimizationOperations;
import org.opensearch.search.builder.SearchSourceBuilder;

import java.util.List;
Expand Down Expand Up @@ -114,7 +113,7 @@ static CharacterRunAutomaton buildRemoteAllowlist(List<String> allowlist) {
return new CharacterRunAutomaton(Automata.makeEmpty());
}
Automaton automaton = Regex.simpleMatchToAutomaton(allowlist.toArray(Strings.EMPTY_ARRAY));
automaton = MinimizationOperations.minimize(automaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
automaton = Operations.determinize(automaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
if (Operations.isTotal(automaton)) {
throw new IllegalArgumentException(
"Refusing to start because allowlist "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.opensearch.lucene.util.automaton.MinimizationOperations;

import java.util.ArrayList;
import java.util.Iterator;
Expand All @@ -57,12 +56,12 @@ public static Automaton caseInsensitivePrefix(String s) {
List<Automaton> list = new ArrayList<>();
Iterator<Integer> iter = s.codePoints().iterator();
while (iter.hasNext()) {
list.add(toCaseInsensitiveChar(iter.next(), Integer.MAX_VALUE));
list.add(toCaseInsensitiveChar(iter.next()));
}
list.add(Automata.makeAnyString());

Automaton a = Operations.concatenate(list);
a = MinimizationOperations.minimize(a, Integer.MAX_VALUE);
assert a.isDeterministic();
return a;
}

Expand All @@ -85,14 +84,14 @@ public static AutomatonQuery caseInsensitivePrefixQuery(Term prefix, MultiTermQu
*/
public static AutomatonQuery caseInsensitiveTermQuery(Term term) {
BytesRef prefix = term.bytes();
return new AutomatonQuery(term, toCaseInsensitiveString(prefix, Integer.MAX_VALUE));
return new AutomatonQuery(term, toCaseInsensitiveString(prefix));
}

/**
* Build an automaton matching a wildcard pattern, ASCII case insensitive, if the method is null, then will use {@link MultiTermQuery#CONSTANT_SCORE_BLENDED_REWRITE}.
*/
public static AutomatonQuery caseInsensitiveWildcardQuery(Term wildcardquery, MultiTermQuery.RewriteMethod method) {
return createAutomatonQuery(wildcardquery, toCaseInsensitiveWildcardAutomaton(wildcardquery, Integer.MAX_VALUE), method);
return createAutomatonQuery(wildcardquery, toCaseInsensitiveWildcardAutomaton(wildcardquery), method);
}

/**
Expand Down Expand Up @@ -124,7 +123,7 @@ public static AutomatonQuery createAutomatonQuery(Term term, Automaton automaton
* Convert Lucene wildcard syntax into an automaton.
*/
@SuppressWarnings("fallthrough")
public static Automaton toCaseInsensitiveWildcardAutomaton(Term wildcardquery, int maxDeterminizedStates) {
public static Automaton toCaseInsensitiveWildcardAutomaton(Term wildcardquery) {
List<Automaton> automata = new ArrayList<>();

String wildcardText = wildcardquery.text();
Expand All @@ -148,32 +147,28 @@ public static Automaton toCaseInsensitiveWildcardAutomaton(Term wildcardquery, i
break;
} // else fallthru, lenient parsing with a trailing \
default:
automata.add(toCaseInsensitiveChar(c, maxDeterminizedStates));
automata.add(toCaseInsensitiveChar(c));
}
i += length;
}

return Operations.concatenate(automata);
}

protected static Automaton toCaseInsensitiveString(BytesRef br, int maxDeterminizedStates) {
return toCaseInsensitiveString(br.utf8ToString(), maxDeterminizedStates);
protected static Automaton toCaseInsensitiveString(BytesRef br) {
return toCaseInsensitiveString(br.utf8ToString());
}

public static Automaton toCaseInsensitiveString(String s, int maxDeterminizedStates) {
public static Automaton toCaseInsensitiveString(String s) {
List<Automaton> list = new ArrayList<>();
Iterator<Integer> iter = s.codePoints().iterator();
while (iter.hasNext()) {
list.add(toCaseInsensitiveChar(iter.next(), maxDeterminizedStates));
list.add(toCaseInsensitiveChar(iter.next()));
}

Automaton a = Operations.concatenate(list);
a = MinimizationOperations.minimize(a, maxDeterminizedStates);
return a;

return Operations.concatenate(list);
}

public static Automaton toCaseInsensitiveChar(int codepoint, int maxDeterminizedStates) {
public static Automaton toCaseInsensitiveChar(int codepoint) {
Automaton case1 = Automata.makeChar(codepoint);
// For now we only work with ASCII characters
if (codepoint > 128) {
Expand All @@ -183,7 +178,6 @@ public static Automaton toCaseInsensitiveChar(int codepoint, int maxDeterminized
Automaton result;
if (altCase != codepoint) {
result = Operations.union(case1, Automata.makeChar(altCase));
result = MinimizationOperations.minimize(result, maxDeterminizedStates);
} else {
result = case1;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ public Query termQueryCaseInsensitive(Object value, QueryShardContext context) {
Term term = new Term(name(), bytesRef);
Query query = AutomatonQueries.createAutomatonQuery(
term,
AutomatonQueries.toCaseInsensitiveString(bytesRef.utf8ToString(), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT),
AutomatonQueries.toCaseInsensitiveString(bytesRef.utf8ToString()),
MultiTermQuery.DOC_VALUES_REWRITE
);
if (boost() != 1f) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,7 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, bool
}
List<Automaton> automata = new ArrayList<>();
if (caseInsensitive) {
automata.add(AutomatonQueries.toCaseInsensitiveString(value, Integer.MAX_VALUE));
automata.add(AutomatonQueries.toCaseInsensitiveString(value));
} else {
automata.add(Automata.makeString(value));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
import org.opensearch.common.Nullable;
import org.opensearch.common.regex.Regex;
import org.opensearch.core.index.Index;
import org.opensearch.lucene.util.automaton.MinimizationOperations;

import java.util.Collection;
import java.util.List;
Expand Down Expand Up @@ -147,6 +146,8 @@ private static CharacterRunAutomaton buildCharacterRunAutomaton(Collection<Syste
Optional<Automaton> automaton = descriptors.stream()
.map(descriptor -> Regex.simpleMatchToAutomaton(descriptor.getIndexPattern()))
.reduce(Operations::union);
return new CharacterRunAutomaton(MinimizationOperations.minimize(automaton.orElse(Automata.makeEmpty()), Integer.MAX_VALUE));
return new CharacterRunAutomaton(
Operations.determinize(automaton.orElse(Automata.makeEmpty()), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT)
);
}
}
Loading

0 comments on commit 0910451

Please sign in to comment.