From ee4d7b959e7cc7e2998451e1d68f1506d1495501 Mon Sep 17 00:00:00 2001 From: Moriarty <22225248+apmoriarty@users.noreply.github.com> Date: Tue, 21 Jan 2025 13:52:08 +0000 Subject: [PATCH 1/3] Disable evaluation for grouping function queries under certain circumstances Feature flag disabling evaluation for queries with groupby functions --- .../DisableEvaluationForGroupingVisitor.java | 220 ++++++++++++++++++ .../query/planner/DefaultQueryPlanner.java | 26 ++- ...sableEvaluationForGroupingVisitorTest.java | 144 ++++++++++++ .../datawave/query/QueryLogicFactory.xml | 1 + 4 files changed, 388 insertions(+), 3 deletions(-) create mode 100644 warehouse/query-core/src/main/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitor.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitorTest.java diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitor.java new file mode 100644 index 00000000000..02dd40a373f --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitor.java @@ -0,0 +1,220 @@ +package datawave.query.jexl.visitors; + +import java.util.Set; + +import org.apache.commons.jexl3.parser.ASTAndNode; +import org.apache.commons.jexl3.parser.ASTEQNode; +import org.apache.commons.jexl3.parser.ASTERNode; +import org.apache.commons.jexl3.parser.ASTFunctionNode; +import org.apache.commons.jexl3.parser.ASTGENode; +import org.apache.commons.jexl3.parser.ASTGTNode; +import org.apache.commons.jexl3.parser.ASTLENode; +import org.apache.commons.jexl3.parser.ASTLTNode; +import org.apache.commons.jexl3.parser.ASTNENode; +import org.apache.commons.jexl3.parser.ASTNRNode; +import org.apache.commons.jexl3.parser.ASTOrNode; +import org.apache.commons.jexl3.parser.JexlNode; + +import datawave.query.jexl.JexlASTHelper; +import datawave.query.jexl.functions.FunctionJexlNodeVisitor; +import datawave.query.jexl.nodes.QueryPropertyMarker; + +/** + * Determines if a query can disable evaluation when a grouping function is present + *

+ * Evaluation cannot be disabled if any content, query, or filter functions exist, or if delayed or evaluation only markers are present. + */ +public class DisableEvaluationForGroupingVisitor extends ShortCircuitBaseVisitor { + + private boolean canDisableEvaluation = true; + + private final Set indexedFields; + private final Set indexOnlyFields; + + /** + * Public entrypoint, this visitor requires the full set of indexed and index only fields + * + * @param node + * the JexlNode + * @param indexedFields + * the set of indexed fields + * @param indexOnlyFields + * the set of index only fields + * @return true if evaluation can be disabled + */ + public static boolean canDisableEvaluation(JexlNode node, Set indexedFields, Set indexOnlyFields) { + DisableEvaluationForGroupingVisitor visitor = new DisableEvaluationForGroupingVisitor(indexedFields, indexOnlyFields); + node.jjtAccept(visitor, null); + return visitor.canDisableEvaluation; + } + + /** + * Private constructor to force static access + */ + private DisableEvaluationForGroupingVisitor(Set indexedFields, Set indexOnlyFields) { + this.indexedFields = indexedFields; + this.indexOnlyFields = indexOnlyFields; + } + + @Override + public Object visit(ASTAndNode node, Object data) { + if (!canDisableEvaluation) { + return data; + } + + QueryPropertyMarker.Instance instance = QueryPropertyMarker.findInstance(node); + if (instance.isAnyType()) { + switch (instance.getType()) { + case EVALUATION_ONLY: + case DELAYED: + canDisableEvaluation = false; + return data; + case BOUNDED_RANGE: + case EXCEEDED_OR: + case EXCEEDED_TERM: + case EXCEEDED_VALUE: + // continue recursing + break; + default: + // query planner should handled value, range terms + return data; + } + } + + node.childrenAccept(this, data); + return data; + } + + @Override + public Object visit(ASTOrNode node, Object data) { + if (!canDisableEvaluation) { + return data; + } + + node.childrenAccept(this, data); + return data; + } + + @Override + public Object visit(ASTNENode node, Object data) { + if (!canDisableEvaluation || !isFieldIndexed(node)) { + return data; + } + node.childrenAccept(this, data); + return data; + } + + @Override + public Object visit(ASTEQNode node, Object data) { + if (!canDisableEvaluation || !isFieldIndexed(node)) { + return data; + } + + node.childrenAccept(this, data); + return data; + } + + @Override + public Object visit(ASTLTNode node, Object data) { + if (!canDisableEvaluation || !isFieldIndexed(node)) { + return data; + } + + node.childrenAccept(this, data); + return data; + } + + @Override + public Object visit(ASTGTNode node, Object data) { + if (!canDisableEvaluation || !isFieldIndexed(node)) { + return data; + } + + node.childrenAccept(this, data); + return data; + } + + @Override + public Object visit(ASTLENode node, Object data) { + if (!canDisableEvaluation || !isFieldIndexed(node)) { + return data; + } + + node.childrenAccept(this, data); + return data; + } + + @Override + public Object visit(ASTGENode node, Object data) { + if (!canDisableEvaluation || !isFieldIndexed(node)) { + return data; + } + + node.childrenAccept(this, data); + return data; + } + + @Override + public Object visit(ASTERNode node, Object data) { + if (!canDisableEvaluation || !isFieldIndexed(node)) { + return data; + } + + node.childrenAccept(this, data); + return data; + } + + @Override + public Object visit(ASTNRNode node, Object data) { + if (!canDisableEvaluation || !isFieldIndexed(node)) { + return data; + } + + node.childrenAccept(this, data); + return data; + } + + @Override + public Object visit(ASTFunctionNode node, Object data) { + if (!canDisableEvaluation) { + return data; + } + + FunctionJexlNodeVisitor visitor = new FunctionJexlNodeVisitor(); + visitor.visit(node, data); + + switch (visitor.namespace()) { + case "filter": + case "content": + case "f": + // must evaluate for these functions + canDisableEvaluation = false; + return data; + } + + node.childrenAccept(this, data); + return data; + } + + private boolean isFieldIndexed(JexlNode node) { + String field = JexlASTHelper.getIdentifier(node); + if (field == null) { + return false; + } + + Object literal = JexlASTHelper.getLiteralValue(node); + if (literal == null) { + // term like (FIELD == null) requires an evaluation + canDisableEvaluation = false; + return false; + } + + boolean indexed = indexedFields.contains(field) || indexOnlyFields.contains(field); + + if (!indexed) { + canDisableEvaluation = false; + } + + return indexed; + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java index 2549e5de3d4..eb41f9e573e 100644 --- a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java +++ b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java @@ -103,9 +103,9 @@ import datawave.query.jexl.visitors.AddShardsAndDaysVisitor; import datawave.query.jexl.visitors.BoundedRangeDetectionVisitor; import datawave.query.jexl.visitors.BoundedRangeIndexExpansionVisitor; -import datawave.query.jexl.visitors.CaseSensitivityVisitor; import datawave.query.jexl.visitors.ConjunctionEliminationVisitor; import datawave.query.jexl.visitors.DepthVisitor; +import datawave.query.jexl.visitors.DisableEvaluationForGroupingVisitor; import datawave.query.jexl.visitors.DisjunctionEliminationVisitor; import datawave.query.jexl.visitors.ExecutableDeterminationVisitor; import datawave.query.jexl.visitors.ExecutableDeterminationVisitor.STATE; @@ -135,7 +135,6 @@ import datawave.query.jexl.visitors.PushdownMissingIndexRangeNodesVisitor; import datawave.query.jexl.visitors.PushdownUnexecutableNodesVisitor; import datawave.query.jexl.visitors.QueryFieldsVisitor; -import datawave.query.jexl.visitors.QueryModelVisitor; import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor; import datawave.query.jexl.visitors.QueryPropertyMarkerSourceConsolidator; import datawave.query.jexl.visitors.QueryPruningVisitor; @@ -156,7 +155,6 @@ import datawave.query.jexl.visitors.ValidateFilterFunctionVisitor; import datawave.query.jexl.visitors.order.OrderByCostVisitor; import datawave.query.jexl.visitors.whindex.WhindexVisitor; -import datawave.query.language.functions.jexl.Unique; import datawave.query.model.QueryModel; import datawave.query.planner.async.AbstractQueryPlannerCallable; import datawave.query.planner.async.FetchCompositeMetadata; @@ -350,6 +348,10 @@ public class DefaultQueryPlanner extends QueryPlanner implements Cloneable { * performance impact. */ protected boolean showReducedQueryPrune = true; + /** + * Feature flag to attempt disabling evaluation under certain circumstances when a query contains a grouping function + */ + protected boolean disableGroupByEvaluation = false; // handles boilerplate operations that surround a visitor's execution (e.g., timers, logging, validating) private TimedVisitorManager visitorManager = new TimedVisitorManager(); @@ -559,6 +561,16 @@ protected CloseableIterable process(ScannerFactory scannerFactory, Me cfg = getQueryIterator(metadataHelper, config, "", false, false); } configureIterator(config, cfg, newQueryString, isFullTable); + + // check for the case where evaluation can be disabled due to the presence of Grouping functions + // but only if query functions and content functions are absent as well + if (!config.getFullTableScanEnabled() && config.getGroupFields().hasGroupByFields()) { + boolean canDisable = DisableEvaluationForGroupingVisitor.canDisableEvaluation(config.getQueryTree(), getIndexedFields(), getIndexOnlyFields()); + if (canDisable) { + config.setDisableEvaluation(true); + addOption(cfg, QueryOptions.DISABLE_EVALUATION, "true", false); + } + } } final QueryData queryData = new QueryData().withQuery(newQueryString).withSettings(Lists.newArrayList(cfg)); @@ -3455,4 +3467,12 @@ public int getConcurrentTimeoutMillis() { public void setConcurrentTimeoutMillis(int concurrentTimeoutMillis) { this.concurrentTimeoutMillis = concurrentTimeoutMillis; } + + public boolean getDisableGroupByEvaluation() { + return disableGroupByEvaluation; + } + + public void setDisableGroupByEvaluation(boolean disableGroupByEvaluation) { + this.disableGroupByEvaluation = disableGroupByEvaluation; + } } diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitorTest.java new file mode 100644 index 00000000000..100ba29d2a9 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitorTest.java @@ -0,0 +1,144 @@ +package datawave.query.jexl.visitors; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.fail; + +import java.util.Set; + +import org.apache.commons.jexl3.parser.ASTJexlScript; +import org.apache.commons.jexl3.parser.ParseException; +import org.junit.jupiter.api.Test; + +import datawave.query.jexl.JexlASTHelper; + +public class DisableEvaluationForGroupingVisitorTest { + + private final Set indexedFields = Set.of("INDEXED_FIELD", "INDEXED_FIELD2"); + private final Set indexOnlyFields = Set.of("INDEX_ONLY", "INDEX_ONLY2"); + + @Test + public void testEQ() { + // equals + test("INDEXED_FIELD == 'value'", true); + test("INDEX_ONLY == 'value'", true); + test("NON_INDEXED_FIELD == 'value'", false); + + // equals with null literal + test("INDEXED_FIELD == null", false); + test("INDEX_ONLY == null", false); + test("NON_INDEXED_FIELD == null", false); + } + + @Test + public void testNE() { + // not equals + test("INDEXED_FIELD != 'value'", true); + test("INDEX_ONLY != 'value'", true); + test("NON_INDEXED_FIELD != 'value'", false); + + // not equals with null literal + test("INDEXED_FIELD != null", false); + test("INDEX_ONLY != null", false); + test("NON_INDEXED_FIELD != null", false); + } + + @Test + public void testNotEQ() { + // not equals + test("!(INDEXED_FIELD == 'value')", true); + test("!(INDEX_ONLY == 'value')", true); + test("!(NON_INDEXED_FIELD == 'value')", false); + } + + @Test + public void testLt() { + // less than, not part of a bounded range + test("INDEXED_FIELD < 'value'", true); + test("INDEX_ONLY < 'value'", true); + test("NON_INDEXED_FIELD < 'value'", false); + } + + @Test + public void testGt() { + // greater than, not part of a bounded range + test("INDEXED_FIELD > 'value'", true); + test("INDEX_ONLY > 'value'", true); + test("NON_INDEXED_FIELD > 'value'", false); + } + + @Test + public void testLe() { + // less than equals, not part of a bounded range + test("INDEXED_FIELD <= 'value'", true); + test("INDEX_ONLY <= 'value'", true); + test("NON_INDEXED_FIELD <= 'value'", false); + } + + @Test + public void testGe() { + // less than, not part of a bounded range + test("INDEXED_FIELD >= 'value'", true); + test("INDEX_ONLY >= 'value'", true); + test("NON_INDEXED_FIELD >= 'value'", false); + } + + @Test + public void testBoundedRangeMarker() { + // bounded range + test("((_Bounded_ = true) && (INDEXED_FIELD > 'aa' && INDEXED_FIELD < 'bb'))", true); + test("((_Bounded_ = true) && (INDEX_ONLY > 'aa' && INDEX_ONLY < 'bb'))", true); + test("((_Bounded_ = true) && (NON_INDEXED_FIELD > 'aa' && NON_INDEXED_FIELD < 'bb'))", false); + } + + @Test + public void testValueExceededMarker() { + // exceeded value marker + test("((_Value_ = true) && (INDEXED_FIELD =~ 'ba.*'))", true); + test("((_Value_ = true) && (INDEX_ONLY =~ 'ba.*'))", true); + test("((_Value_ = true) && (NON_INDEXED_FIELD =~ 'ba.*'))", false); + } + + @Test + public void testTermExceededMarker() { + // exceeded term marker + test("((_Term_ = true) && (INDEXED_FIELD =~ 'ba.*'))", true); + test("((_Term_ = true) && (INDEX_ONLY =~ 'ba.*'))", true); + test("((_Term_ = true) && (NON_INDEXED_FIELD =~ 'ba.*'))", false); + } + + @Test + public void testDelayedMarker() { + // delayed leaf + test("((_Delayed_ = true) && (INDEXED_FIELD =~ 'ba.*'))", false); + test("((_Delayed_ = true) && (INDEX_ONLY =~ 'ba.*'))", false); + test("((_Delayed_ = true) && (NON_INDEXED_FIELD =~ 'ba.*'))", false); + // delayed bounded range + test("((_Delayed_ = true) && ((_Bounded_ = true) && (INDEXED_FIELD > 'aa' && INDEXED_FIELD < 'bb')))", false); + test("((_Delayed_ = true) && ((_Bounded_ = true) && (INDEX_ONLY > 'aa' && INDEX_ONLY < 'bb')))", false); + test("((_Delayed_ = true) && ((_Bounded_ = true) && (NON_INDEXED_FIELD > 'aa' && NON_INDEXED_FIELD < 'bb')))", false); + } + + @Test + public void testEvalOnlyMarker() { + // evaluation only marker + test("((_Eval_ = true) && (INDEXED_FIELD =~ 'ba.*'))", false); + test("((_Eval_ = true) && (INDEX_ONLY =~ 'ba.*'))", false); + test("((_Eval_ = true) && (NON_INDEXED_FIELD =~ 'ba.*'))", false); + } + + private void test(String query, boolean expected) { + ASTJexlScript script = parse(query); + boolean result = DisableEvaluationForGroupingVisitor.canDisableEvaluation(script, indexedFields, indexOnlyFields); + assertEquals(expected, result); + } + + private ASTJexlScript parse(String query) { + try { + return JexlASTHelper.parseAndFlattenJexlQuery(query); + } catch (ParseException e) { + fail("Failed to parse query: " + query); + throw new RuntimeException(e); + } + } + +} diff --git a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml index e8b9e2bbb84..dfa461f223b 100644 --- a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml +++ b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml @@ -489,6 +489,7 @@ + From 4194ceec25ae9430fd6b719cd228558f8904551e Mon Sep 17 00:00:00 2001 From: Moriarty <22225248+apmoriarty@users.noreply.github.com> Date: Mon, 27 Jan 2025 10:07:21 +0000 Subject: [PATCH 2/3] Feature flag actually integrated into the DefaultQueryPlanner --- .../main/java/datawave/query/planner/DefaultQueryPlanner.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java index eb41f9e573e..c2e23333fa7 100644 --- a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java +++ b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java @@ -564,7 +564,7 @@ protected CloseableIterable process(ScannerFactory scannerFactory, Me // check for the case where evaluation can be disabled due to the presence of Grouping functions // but only if query functions and content functions are absent as well - if (!config.getFullTableScanEnabled() && config.getGroupFields().hasGroupByFields()) { + if (!config.getFullTableScanEnabled() && getDisableGroupByEvaluation() && config.getGroupFields().hasGroupByFields()) { boolean canDisable = DisableEvaluationForGroupingVisitor.canDisableEvaluation(config.getQueryTree(), getIndexedFields(), getIndexOnlyFields()); if (canDisable) { config.setDisableEvaluation(true); From 1f71cd433d456e5f1a34e3912edeb097ea20231a Mon Sep 17 00:00:00 2001 From: Moriarty <22225248+apmoriarty@users.noreply.github.com> Date: Tue, 28 Jan 2025 13:26:54 +0000 Subject: [PATCH 3/3] Update via merge request feedback --- ...tor.java => DisableEvaluationVisitor.java} | 87 +++++++++++++------ .../query/planner/DefaultQueryPlanner.java | 19 ++-- ...java => DisableEvaluationVisitorTest.java} | 69 +++++++++++---- .../datawave/query/QueryLogicFactory.xml | 2 +- 4 files changed, 125 insertions(+), 52 deletions(-) rename warehouse/query-core/src/main/java/datawave/query/jexl/visitors/{DisableEvaluationForGroupingVisitor.java => DisableEvaluationVisitor.java} (69%) rename warehouse/query-core/src/test/java/datawave/query/jexl/visitors/{DisableEvaluationForGroupingVisitorTest.java => DisableEvaluationVisitorTest.java} (61%) diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/DisableEvaluationVisitor.java similarity index 69% rename from warehouse/query-core/src/main/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitor.java rename to warehouse/query-core/src/main/java/datawave/query/jexl/visitors/DisableEvaluationVisitor.java index 02dd40a373f..a6e9e7fbd9c 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/DisableEvaluationVisitor.java @@ -12,6 +12,7 @@ import org.apache.commons.jexl3.parser.ASTLTNode; import org.apache.commons.jexl3.parser.ASTNENode; import org.apache.commons.jexl3.parser.ASTNRNode; +import org.apache.commons.jexl3.parser.ASTNotNode; import org.apache.commons.jexl3.parser.ASTOrNode; import org.apache.commons.jexl3.parser.JexlNode; @@ -20,11 +21,16 @@ import datawave.query.jexl.nodes.QueryPropertyMarker; /** - * Determines if a query can disable evaluation when a grouping function is present + * Determines if evaluation can be disabled for a given query. Evaluation can be disabled given the following conditions *

- * Evaluation cannot be disabled if any content, query, or filter functions exist, or if delayed or evaluation only markers are present. + *

*/ -public class DisableEvaluationForGroupingVisitor extends ShortCircuitBaseVisitor { +public class DisableEvaluationVisitor extends ShortCircuitBaseVisitor { private boolean canDisableEvaluation = true; @@ -43,7 +49,7 @@ public class DisableEvaluationForGroupingVisitor extends ShortCircuitBaseVisitor * @return true if evaluation can be disabled */ public static boolean canDisableEvaluation(JexlNode node, Set indexedFields, Set indexOnlyFields) { - DisableEvaluationForGroupingVisitor visitor = new DisableEvaluationForGroupingVisitor(indexedFields, indexOnlyFields); + DisableEvaluationVisitor visitor = new DisableEvaluationVisitor(indexedFields, indexOnlyFields); node.jjtAccept(visitor, null); return visitor.canDisableEvaluation; } @@ -51,7 +57,7 @@ public static boolean canDisableEvaluation(JexlNode node, Set indexedFie /** * Private constructor to force static access */ - private DisableEvaluationForGroupingVisitor(Set indexedFields, Set indexOnlyFields) { + private DisableEvaluationVisitor(Set indexedFields, Set indexOnlyFields) { this.indexedFields = indexedFields; this.indexOnlyFields = indexOnlyFields; } @@ -65,18 +71,19 @@ public Object visit(ASTAndNode node, Object data) { QueryPropertyMarker.Instance instance = QueryPropertyMarker.findInstance(node); if (instance.isAnyType()) { switch (instance.getType()) { - case EVALUATION_ONLY: case DELAYED: + case EVALUATION_ONLY: + case EXCEEDED_TERM: canDisableEvaluation = false; return data; case BOUNDED_RANGE: case EXCEEDED_OR: - case EXCEEDED_TERM: case EXCEEDED_VALUE: - // continue recursing - break; + // pass in a flag that says the parent was a marker + node.childrenAccept(this, true); + return data; default: - // query planner should handled value, range terms + // unknown marker type return data; } } @@ -97,6 +104,11 @@ public Object visit(ASTOrNode node, Object data) { @Override public Object visit(ASTNENode node, Object data) { + if (isRoot(node)) { + // negations cannot be the root of a query, so we also cannot disable evaluation + canDisableEvaluation = false; + } + if (!canDisableEvaluation || !isFieldIndexed(node)) { return data; } @@ -105,9 +117,10 @@ public Object visit(ASTNENode node, Object data) { } @Override - public Object visit(ASTEQNode node, Object data) { - if (!canDisableEvaluation || !isFieldIndexed(node)) { - return data; + public Object visit(ASTNotNode node, Object data) { + if (isRoot(node)) { + // negations cannot be the root of a query, so we also cannot disable evaluation + canDisableEvaluation = false; } node.childrenAccept(this, data); @@ -115,7 +128,7 @@ public Object visit(ASTEQNode node, Object data) { } @Override - public Object visit(ASTLTNode node, Object data) { + public Object visit(ASTEQNode node, Object data) { if (!canDisableEvaluation || !isFieldIndexed(node)) { return data; } @@ -125,27 +138,32 @@ public Object visit(ASTLTNode node, Object data) { } @Override - public Object visit(ASTGTNode node, Object data) { - if (!canDisableEvaluation || !isFieldIndexed(node)) { - return data; - } + public Object visit(ASTLTNode node, Object data) { + return visitRangeOperator(node, data); + } - node.childrenAccept(this, data); - return data; + @Override + public Object visit(ASTGTNode node, Object data) { + return visitRangeOperator(node, data); } @Override public Object visit(ASTLENode node, Object data) { - if (!canDisableEvaluation || !isFieldIndexed(node)) { - return data; - } - - node.childrenAccept(this, data); - return data; + return visitRangeOperator(node, data); } @Override public Object visit(ASTGENode node, Object data) { + return visitRangeOperator(node, data); + } + + private Object visitRangeOperator(JexlNode node, Object data) { + if (data == null) { + // not part of a bounded range. the best case is that the field is part of the event which + // will require evaluation + canDisableEvaluation = false; + } + if (!canDisableEvaluation || !isFieldIndexed(node)) { return data; } @@ -217,4 +235,21 @@ private boolean isFieldIndexed(JexlNode node) { return indexed; } + + /** + * Helper routine to determine if an arbitrary JexlNode is the root of a query + * + * @param node + * the JexlNode + * @return true if the node is the root + */ + private boolean isRoot(JexlNode node) { + while (node.jjtGetParent() != null) { + node = node.jjtGetParent(); + if (node instanceof ASTAndNode || node instanceof ASTOrNode) { + return false; + } + } + return true; + } } diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java index c2e23333fa7..e982e4d105a 100644 --- a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java +++ b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java @@ -105,7 +105,7 @@ import datawave.query.jexl.visitors.BoundedRangeIndexExpansionVisitor; import datawave.query.jexl.visitors.ConjunctionEliminationVisitor; import datawave.query.jexl.visitors.DepthVisitor; -import datawave.query.jexl.visitors.DisableEvaluationForGroupingVisitor; +import datawave.query.jexl.visitors.DisableEvaluationVisitor; import datawave.query.jexl.visitors.DisjunctionEliminationVisitor; import datawave.query.jexl.visitors.ExecutableDeterminationVisitor; import datawave.query.jexl.visitors.ExecutableDeterminationVisitor.STATE; @@ -349,9 +349,10 @@ public class DefaultQueryPlanner extends QueryPlanner implements Cloneable { */ protected boolean showReducedQueryPrune = true; /** - * Feature flag to attempt disabling evaluation under certain circumstances when a query contains a grouping function + * Feature flag to attempt disabling evaluation under certain circumstances, i.e. when a query contains a groupby function or if the query does not require + * hit terms */ - protected boolean disableGroupByEvaluation = false; + protected boolean allowedToDisableEvaluation = false; // handles boilerplate operations that surround a visitor's execution (e.g., timers, logging, validating) private TimedVisitorManager visitorManager = new TimedVisitorManager(); @@ -564,8 +565,8 @@ protected CloseableIterable process(ScannerFactory scannerFactory, Me // check for the case where evaluation can be disabled due to the presence of Grouping functions // but only if query functions and content functions are absent as well - if (!config.getFullTableScanEnabled() && getDisableGroupByEvaluation() && config.getGroupFields().hasGroupByFields()) { - boolean canDisable = DisableEvaluationForGroupingVisitor.canDisableEvaluation(config.getQueryTree(), getIndexedFields(), getIndexOnlyFields()); + if (!config.getFullTableScanEnabled() && getAllowedToDisableEvaluation() && config.getGroupFields().hasGroupByFields()) { + boolean canDisable = DisableEvaluationVisitor.canDisableEvaluation(config.getQueryTree(), getIndexedFields(), getIndexOnlyFields()); if (canDisable) { config.setDisableEvaluation(true); addOption(cfg, QueryOptions.DISABLE_EVALUATION, "true", false); @@ -3468,11 +3469,11 @@ public void setConcurrentTimeoutMillis(int concurrentTimeoutMillis) { this.concurrentTimeoutMillis = concurrentTimeoutMillis; } - public boolean getDisableGroupByEvaluation() { - return disableGroupByEvaluation; + public boolean getAllowedToDisableEvaluation() { + return allowedToDisableEvaluation; } - public void setDisableGroupByEvaluation(boolean disableGroupByEvaluation) { - this.disableGroupByEvaluation = disableGroupByEvaluation; + public void setAllowedToDisableEvaluation(boolean allowedToDisableEvaluation) { + this.allowedToDisableEvaluation = allowedToDisableEvaluation; } } diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/DisableEvaluationVisitorTest.java similarity index 61% rename from warehouse/query-core/src/test/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitorTest.java rename to warehouse/query-core/src/test/java/datawave/query/jexl/visitors/DisableEvaluationVisitorTest.java index 100ba29d2a9..a7a4a50dede 100644 --- a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitorTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/DisableEvaluationVisitorTest.java @@ -3,6 +3,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.fail; +import java.util.HashSet; import java.util.Set; import org.apache.commons.jexl3.parser.ASTJexlScript; @@ -11,7 +12,7 @@ import datawave.query.jexl.JexlASTHelper; -public class DisableEvaluationForGroupingVisitorTest { +public class DisableEvaluationVisitorTest { private final Set indexedFields = Set.of("INDEXED_FIELD", "INDEXED_FIELD2"); private final Set indexOnlyFields = Set.of("INDEX_ONLY", "INDEX_ONLY2"); @@ -32,8 +33,8 @@ public void testEQ() { @Test public void testNE() { // not equals - test("INDEXED_FIELD != 'value'", true); - test("INDEX_ONLY != 'value'", true); + test("INDEXED_FIELD != 'value'", false); + test("INDEX_ONLY != 'value'", false); test("NON_INDEXED_FIELD != 'value'", false); // not equals with null literal @@ -45,40 +46,40 @@ public void testNE() { @Test public void testNotEQ() { // not equals - test("!(INDEXED_FIELD == 'value')", true); - test("!(INDEX_ONLY == 'value')", true); + test("!(INDEXED_FIELD == 'value')", false); + test("!(INDEX_ONLY == 'value')", false); test("!(NON_INDEXED_FIELD == 'value')", false); } @Test public void testLt() { // less than, not part of a bounded range - test("INDEXED_FIELD < 'value'", true); - test("INDEX_ONLY < 'value'", true); + test("INDEXED_FIELD < 'value'", false); + test("INDEX_ONLY < 'value'", false); test("NON_INDEXED_FIELD < 'value'", false); } @Test public void testGt() { // greater than, not part of a bounded range - test("INDEXED_FIELD > 'value'", true); - test("INDEX_ONLY > 'value'", true); + test("INDEXED_FIELD > 'value'", false); + test("INDEX_ONLY > 'value'", false); test("NON_INDEXED_FIELD > 'value'", false); } @Test public void testLe() { // less than equals, not part of a bounded range - test("INDEXED_FIELD <= 'value'", true); - test("INDEX_ONLY <= 'value'", true); + test("INDEXED_FIELD <= 'value'", false); + test("INDEX_ONLY <= 'value'", false); test("NON_INDEXED_FIELD <= 'value'", false); } @Test public void testGe() { // less than, not part of a bounded range - test("INDEXED_FIELD >= 'value'", true); - test("INDEX_ONLY >= 'value'", true); + test("INDEXED_FIELD >= 'value'", false); + test("INDEX_ONLY >= 'value'", false); test("NON_INDEXED_FIELD >= 'value'", false); } @@ -101,8 +102,8 @@ public void testValueExceededMarker() { @Test public void testTermExceededMarker() { // exceeded term marker - test("((_Term_ = true) && (INDEXED_FIELD =~ 'ba.*'))", true); - test("((_Term_ = true) && (INDEX_ONLY =~ 'ba.*'))", true); + test("((_Term_ = true) && (INDEXED_FIELD =~ 'ba.*'))", false); + test("((_Term_ = true) && (INDEX_ONLY =~ 'ba.*'))", false); test("((_Term_ = true) && (NON_INDEXED_FIELD =~ 'ba.*'))", false); } @@ -126,9 +127,45 @@ public void testEvalOnlyMarker() { test("((_Eval_ = true) && (NON_INDEXED_FIELD =~ 'ba.*'))", false); } + @Test + public void testEqAndNotEq() { + // EQ and !EQ + test("INDEXED_FIELD == 'a' && !(INDEXED_FIELD == 'b')", true); + test("INDEXED_FIELD == 'a' && !(INDEX_ONLY == 'b')", true); + test("INDEXED_FIELD == 'a' && !(NON_INDEXED_FIELD == 'b')", false); + + test("INDEX_ONLY == 'a' && !(INDEXED_FIELD == 'b')", true); + test("INDEX_ONLY == 'a' && !(INDEX_ONLY == 'b')", true); + test("INDEX_ONLY == 'a' && !(NON_INDEXED_FIELD == 'b')", false); + + test("NON_INDEXED_FIELD == 'a' && !(INDEXED_FIELD == 'b')", false); + test("NON_INDEXED_FIELD == 'a' && !(INDEX_ONLY == 'b')", false); + test("NON_INDEXED_FIELD == 'a' && !(NON_INDEXED_FIELD == 'b')", false); + } + + @Test + public void testEqOrNotEq() { + // EQ or !EQ, verifies that negations as part of a top level union are correctly identified as 'root negations' + test("INDEXED_FIELD == 'a' || !(INDEXED_FIELD == 'b')", true); + test("INDEXED_FIELD == 'a' || !(INDEX_ONLY == 'b')", true); + test("INDEXED_FIELD == 'a' || !(NON_INDEXED_FIELD == 'b')", false); + + test("INDEX_ONLY == 'a' || !(INDEXED_FIELD == 'b')", true); + test("INDEX_ONLY == 'a' || !(INDEX_ONLY == 'b')", true); + test("INDEX_ONLY == 'a' || !(NON_INDEXED_FIELD == 'b')", false); + + test("NON_INDEXED_FIELD == 'a' || !(INDEXED_FIELD == 'b')", false); + test("NON_INDEXED_FIELD == 'a' || !(INDEX_ONLY == 'b')", false); + test("NON_INDEXED_FIELD == 'a' || !(NON_INDEXED_FIELD == 'b')", false); + } + private void test(String query, boolean expected) { + Set allIndexedFields = new HashSet<>(); + allIndexedFields.addAll(indexedFields); + allIndexedFields.addAll(indexOnlyFields); + ASTJexlScript script = parse(query); - boolean result = DisableEvaluationForGroupingVisitor.canDisableEvaluation(script, indexedFields, indexOnlyFields); + boolean result = DisableEvaluationVisitor.canDisableEvaluation(script, allIndexedFields, indexOnlyFields); assertEquals(expected, result); } diff --git a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml index dfa461f223b..53ab3d7d5bc 100644 --- a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml +++ b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml @@ -489,7 +489,7 @@ - +