From ee4d7b959e7cc7e2998451e1d68f1506d1495501 Mon Sep 17 00:00:00 2001
From: Moriarty <22225248+apmoriarty@users.noreply.github.com>
Date: Tue, 21 Jan 2025 13:52:08 +0000
Subject: [PATCH 1/3] Disable evaluation for grouping function queries under
certain circumstances
Feature flag disabling evaluation for queries with groupby functions
---
.../DisableEvaluationForGroupingVisitor.java | 220 ++++++++++++++++++
.../query/planner/DefaultQueryPlanner.java | 26 ++-
...sableEvaluationForGroupingVisitorTest.java | 144 ++++++++++++
.../datawave/query/QueryLogicFactory.xml | 1 +
4 files changed, 388 insertions(+), 3 deletions(-)
create mode 100644 warehouse/query-core/src/main/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitor.java
create mode 100644 warehouse/query-core/src/test/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitorTest.java
diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitor.java
new file mode 100644
index 00000000000..02dd40a373f
--- /dev/null
+++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitor.java
@@ -0,0 +1,220 @@
+package datawave.query.jexl.visitors;
+
+import java.util.Set;
+
+import org.apache.commons.jexl3.parser.ASTAndNode;
+import org.apache.commons.jexl3.parser.ASTEQNode;
+import org.apache.commons.jexl3.parser.ASTERNode;
+import org.apache.commons.jexl3.parser.ASTFunctionNode;
+import org.apache.commons.jexl3.parser.ASTGENode;
+import org.apache.commons.jexl3.parser.ASTGTNode;
+import org.apache.commons.jexl3.parser.ASTLENode;
+import org.apache.commons.jexl3.parser.ASTLTNode;
+import org.apache.commons.jexl3.parser.ASTNENode;
+import org.apache.commons.jexl3.parser.ASTNRNode;
+import org.apache.commons.jexl3.parser.ASTOrNode;
+import org.apache.commons.jexl3.parser.JexlNode;
+
+import datawave.query.jexl.JexlASTHelper;
+import datawave.query.jexl.functions.FunctionJexlNodeVisitor;
+import datawave.query.jexl.nodes.QueryPropertyMarker;
+
+/**
+ * Determines if a query can disable evaluation when a grouping function is present
+ *
+ * Evaluation cannot be disabled if any content, query, or filter functions exist, or if delayed or evaluation only markers are present.
+ */
+public class DisableEvaluationForGroupingVisitor extends ShortCircuitBaseVisitor {
+
+ private boolean canDisableEvaluation = true;
+
+ private final Set indexedFields;
+ private final Set indexOnlyFields;
+
+ /**
+ * Public entrypoint, this visitor requires the full set of indexed and index only fields
+ *
+ * @param node
+ * the JexlNode
+ * @param indexedFields
+ * the set of indexed fields
+ * @param indexOnlyFields
+ * the set of index only fields
+ * @return true if evaluation can be disabled
+ */
+ public static boolean canDisableEvaluation(JexlNode node, Set indexedFields, Set indexOnlyFields) {
+ DisableEvaluationForGroupingVisitor visitor = new DisableEvaluationForGroupingVisitor(indexedFields, indexOnlyFields);
+ node.jjtAccept(visitor, null);
+ return visitor.canDisableEvaluation;
+ }
+
+ /**
+ * Private constructor to force static access
+ */
+ private DisableEvaluationForGroupingVisitor(Set indexedFields, Set indexOnlyFields) {
+ this.indexedFields = indexedFields;
+ this.indexOnlyFields = indexOnlyFields;
+ }
+
+ @Override
+ public Object visit(ASTAndNode node, Object data) {
+ if (!canDisableEvaluation) {
+ return data;
+ }
+
+ QueryPropertyMarker.Instance instance = QueryPropertyMarker.findInstance(node);
+ if (instance.isAnyType()) {
+ switch (instance.getType()) {
+ case EVALUATION_ONLY:
+ case DELAYED:
+ canDisableEvaluation = false;
+ return data;
+ case BOUNDED_RANGE:
+ case EXCEEDED_OR:
+ case EXCEEDED_TERM:
+ case EXCEEDED_VALUE:
+ // continue recursing
+ break;
+ default:
+ // query planner should handled value, range terms
+ return data;
+ }
+ }
+
+ node.childrenAccept(this, data);
+ return data;
+ }
+
+ @Override
+ public Object visit(ASTOrNode node, Object data) {
+ if (!canDisableEvaluation) {
+ return data;
+ }
+
+ node.childrenAccept(this, data);
+ return data;
+ }
+
+ @Override
+ public Object visit(ASTNENode node, Object data) {
+ if (!canDisableEvaluation || !isFieldIndexed(node)) {
+ return data;
+ }
+ node.childrenAccept(this, data);
+ return data;
+ }
+
+ @Override
+ public Object visit(ASTEQNode node, Object data) {
+ if (!canDisableEvaluation || !isFieldIndexed(node)) {
+ return data;
+ }
+
+ node.childrenAccept(this, data);
+ return data;
+ }
+
+ @Override
+ public Object visit(ASTLTNode node, Object data) {
+ if (!canDisableEvaluation || !isFieldIndexed(node)) {
+ return data;
+ }
+
+ node.childrenAccept(this, data);
+ return data;
+ }
+
+ @Override
+ public Object visit(ASTGTNode node, Object data) {
+ if (!canDisableEvaluation || !isFieldIndexed(node)) {
+ return data;
+ }
+
+ node.childrenAccept(this, data);
+ return data;
+ }
+
+ @Override
+ public Object visit(ASTLENode node, Object data) {
+ if (!canDisableEvaluation || !isFieldIndexed(node)) {
+ return data;
+ }
+
+ node.childrenAccept(this, data);
+ return data;
+ }
+
+ @Override
+ public Object visit(ASTGENode node, Object data) {
+ if (!canDisableEvaluation || !isFieldIndexed(node)) {
+ return data;
+ }
+
+ node.childrenAccept(this, data);
+ return data;
+ }
+
+ @Override
+ public Object visit(ASTERNode node, Object data) {
+ if (!canDisableEvaluation || !isFieldIndexed(node)) {
+ return data;
+ }
+
+ node.childrenAccept(this, data);
+ return data;
+ }
+
+ @Override
+ public Object visit(ASTNRNode node, Object data) {
+ if (!canDisableEvaluation || !isFieldIndexed(node)) {
+ return data;
+ }
+
+ node.childrenAccept(this, data);
+ return data;
+ }
+
+ @Override
+ public Object visit(ASTFunctionNode node, Object data) {
+ if (!canDisableEvaluation) {
+ return data;
+ }
+
+ FunctionJexlNodeVisitor visitor = new FunctionJexlNodeVisitor();
+ visitor.visit(node, data);
+
+ switch (visitor.namespace()) {
+ case "filter":
+ case "content":
+ case "f":
+ // must evaluate for these functions
+ canDisableEvaluation = false;
+ return data;
+ }
+
+ node.childrenAccept(this, data);
+ return data;
+ }
+
+ private boolean isFieldIndexed(JexlNode node) {
+ String field = JexlASTHelper.getIdentifier(node);
+ if (field == null) {
+ return false;
+ }
+
+ Object literal = JexlASTHelper.getLiteralValue(node);
+ if (literal == null) {
+ // term like (FIELD == null) requires an evaluation
+ canDisableEvaluation = false;
+ return false;
+ }
+
+ boolean indexed = indexedFields.contains(field) || indexOnlyFields.contains(field);
+
+ if (!indexed) {
+ canDisableEvaluation = false;
+ }
+
+ return indexed;
+ }
+}
diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java
index 2549e5de3d4..eb41f9e573e 100644
--- a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java
+++ b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java
@@ -103,9 +103,9 @@
import datawave.query.jexl.visitors.AddShardsAndDaysVisitor;
import datawave.query.jexl.visitors.BoundedRangeDetectionVisitor;
import datawave.query.jexl.visitors.BoundedRangeIndexExpansionVisitor;
-import datawave.query.jexl.visitors.CaseSensitivityVisitor;
import datawave.query.jexl.visitors.ConjunctionEliminationVisitor;
import datawave.query.jexl.visitors.DepthVisitor;
+import datawave.query.jexl.visitors.DisableEvaluationForGroupingVisitor;
import datawave.query.jexl.visitors.DisjunctionEliminationVisitor;
import datawave.query.jexl.visitors.ExecutableDeterminationVisitor;
import datawave.query.jexl.visitors.ExecutableDeterminationVisitor.STATE;
@@ -135,7 +135,6 @@
import datawave.query.jexl.visitors.PushdownMissingIndexRangeNodesVisitor;
import datawave.query.jexl.visitors.PushdownUnexecutableNodesVisitor;
import datawave.query.jexl.visitors.QueryFieldsVisitor;
-import datawave.query.jexl.visitors.QueryModelVisitor;
import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor;
import datawave.query.jexl.visitors.QueryPropertyMarkerSourceConsolidator;
import datawave.query.jexl.visitors.QueryPruningVisitor;
@@ -156,7 +155,6 @@
import datawave.query.jexl.visitors.ValidateFilterFunctionVisitor;
import datawave.query.jexl.visitors.order.OrderByCostVisitor;
import datawave.query.jexl.visitors.whindex.WhindexVisitor;
-import datawave.query.language.functions.jexl.Unique;
import datawave.query.model.QueryModel;
import datawave.query.planner.async.AbstractQueryPlannerCallable;
import datawave.query.planner.async.FetchCompositeMetadata;
@@ -350,6 +348,10 @@ public class DefaultQueryPlanner extends QueryPlanner implements Cloneable {
* performance impact.
*/
protected boolean showReducedQueryPrune = true;
+ /**
+ * Feature flag to attempt disabling evaluation under certain circumstances when a query contains a grouping function
+ */
+ protected boolean disableGroupByEvaluation = false;
// handles boilerplate operations that surround a visitor's execution (e.g., timers, logging, validating)
private TimedVisitorManager visitorManager = new TimedVisitorManager();
@@ -559,6 +561,16 @@ protected CloseableIterable process(ScannerFactory scannerFactory, Me
cfg = getQueryIterator(metadataHelper, config, "", false, false);
}
configureIterator(config, cfg, newQueryString, isFullTable);
+
+ // check for the case where evaluation can be disabled due to the presence of Grouping functions
+ // but only if query functions and content functions are absent as well
+ if (!config.getFullTableScanEnabled() && config.getGroupFields().hasGroupByFields()) {
+ boolean canDisable = DisableEvaluationForGroupingVisitor.canDisableEvaluation(config.getQueryTree(), getIndexedFields(), getIndexOnlyFields());
+ if (canDisable) {
+ config.setDisableEvaluation(true);
+ addOption(cfg, QueryOptions.DISABLE_EVALUATION, "true", false);
+ }
+ }
}
final QueryData queryData = new QueryData().withQuery(newQueryString).withSettings(Lists.newArrayList(cfg));
@@ -3455,4 +3467,12 @@ public int getConcurrentTimeoutMillis() {
public void setConcurrentTimeoutMillis(int concurrentTimeoutMillis) {
this.concurrentTimeoutMillis = concurrentTimeoutMillis;
}
+
+ public boolean getDisableGroupByEvaluation() {
+ return disableGroupByEvaluation;
+ }
+
+ public void setDisableGroupByEvaluation(boolean disableGroupByEvaluation) {
+ this.disableGroupByEvaluation = disableGroupByEvaluation;
+ }
}
diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitorTest.java
new file mode 100644
index 00000000000..100ba29d2a9
--- /dev/null
+++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitorTest.java
@@ -0,0 +1,144 @@
+package datawave.query.jexl.visitors;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.fail;
+
+import java.util.Set;
+
+import org.apache.commons.jexl3.parser.ASTJexlScript;
+import org.apache.commons.jexl3.parser.ParseException;
+import org.junit.jupiter.api.Test;
+
+import datawave.query.jexl.JexlASTHelper;
+
+public class DisableEvaluationForGroupingVisitorTest {
+
+ private final Set indexedFields = Set.of("INDEXED_FIELD", "INDEXED_FIELD2");
+ private final Set indexOnlyFields = Set.of("INDEX_ONLY", "INDEX_ONLY2");
+
+ @Test
+ public void testEQ() {
+ // equals
+ test("INDEXED_FIELD == 'value'", true);
+ test("INDEX_ONLY == 'value'", true);
+ test("NON_INDEXED_FIELD == 'value'", false);
+
+ // equals with null literal
+ test("INDEXED_FIELD == null", false);
+ test("INDEX_ONLY == null", false);
+ test("NON_INDEXED_FIELD == null", false);
+ }
+
+ @Test
+ public void testNE() {
+ // not equals
+ test("INDEXED_FIELD != 'value'", true);
+ test("INDEX_ONLY != 'value'", true);
+ test("NON_INDEXED_FIELD != 'value'", false);
+
+ // not equals with null literal
+ test("INDEXED_FIELD != null", false);
+ test("INDEX_ONLY != null", false);
+ test("NON_INDEXED_FIELD != null", false);
+ }
+
+ @Test
+ public void testNotEQ() {
+ // not equals
+ test("!(INDEXED_FIELD == 'value')", true);
+ test("!(INDEX_ONLY == 'value')", true);
+ test("!(NON_INDEXED_FIELD == 'value')", false);
+ }
+
+ @Test
+ public void testLt() {
+ // less than, not part of a bounded range
+ test("INDEXED_FIELD < 'value'", true);
+ test("INDEX_ONLY < 'value'", true);
+ test("NON_INDEXED_FIELD < 'value'", false);
+ }
+
+ @Test
+ public void testGt() {
+ // greater than, not part of a bounded range
+ test("INDEXED_FIELD > 'value'", true);
+ test("INDEX_ONLY > 'value'", true);
+ test("NON_INDEXED_FIELD > 'value'", false);
+ }
+
+ @Test
+ public void testLe() {
+ // less than equals, not part of a bounded range
+ test("INDEXED_FIELD <= 'value'", true);
+ test("INDEX_ONLY <= 'value'", true);
+ test("NON_INDEXED_FIELD <= 'value'", false);
+ }
+
+ @Test
+ public void testGe() {
+ // less than, not part of a bounded range
+ test("INDEXED_FIELD >= 'value'", true);
+ test("INDEX_ONLY >= 'value'", true);
+ test("NON_INDEXED_FIELD >= 'value'", false);
+ }
+
+ @Test
+ public void testBoundedRangeMarker() {
+ // bounded range
+ test("((_Bounded_ = true) && (INDEXED_FIELD > 'aa' && INDEXED_FIELD < 'bb'))", true);
+ test("((_Bounded_ = true) && (INDEX_ONLY > 'aa' && INDEX_ONLY < 'bb'))", true);
+ test("((_Bounded_ = true) && (NON_INDEXED_FIELD > 'aa' && NON_INDEXED_FIELD < 'bb'))", false);
+ }
+
+ @Test
+ public void testValueExceededMarker() {
+ // exceeded value marker
+ test("((_Value_ = true) && (INDEXED_FIELD =~ 'ba.*'))", true);
+ test("((_Value_ = true) && (INDEX_ONLY =~ 'ba.*'))", true);
+ test("((_Value_ = true) && (NON_INDEXED_FIELD =~ 'ba.*'))", false);
+ }
+
+ @Test
+ public void testTermExceededMarker() {
+ // exceeded term marker
+ test("((_Term_ = true) && (INDEXED_FIELD =~ 'ba.*'))", true);
+ test("((_Term_ = true) && (INDEX_ONLY =~ 'ba.*'))", true);
+ test("((_Term_ = true) && (NON_INDEXED_FIELD =~ 'ba.*'))", false);
+ }
+
+ @Test
+ public void testDelayedMarker() {
+ // delayed leaf
+ test("((_Delayed_ = true) && (INDEXED_FIELD =~ 'ba.*'))", false);
+ test("((_Delayed_ = true) && (INDEX_ONLY =~ 'ba.*'))", false);
+ test("((_Delayed_ = true) && (NON_INDEXED_FIELD =~ 'ba.*'))", false);
+ // delayed bounded range
+ test("((_Delayed_ = true) && ((_Bounded_ = true) && (INDEXED_FIELD > 'aa' && INDEXED_FIELD < 'bb')))", false);
+ test("((_Delayed_ = true) && ((_Bounded_ = true) && (INDEX_ONLY > 'aa' && INDEX_ONLY < 'bb')))", false);
+ test("((_Delayed_ = true) && ((_Bounded_ = true) && (NON_INDEXED_FIELD > 'aa' && NON_INDEXED_FIELD < 'bb')))", false);
+ }
+
+ @Test
+ public void testEvalOnlyMarker() {
+ // evaluation only marker
+ test("((_Eval_ = true) && (INDEXED_FIELD =~ 'ba.*'))", false);
+ test("((_Eval_ = true) && (INDEX_ONLY =~ 'ba.*'))", false);
+ test("((_Eval_ = true) && (NON_INDEXED_FIELD =~ 'ba.*'))", false);
+ }
+
+ private void test(String query, boolean expected) {
+ ASTJexlScript script = parse(query);
+ boolean result = DisableEvaluationForGroupingVisitor.canDisableEvaluation(script, indexedFields, indexOnlyFields);
+ assertEquals(expected, result);
+ }
+
+ private ASTJexlScript parse(String query) {
+ try {
+ return JexlASTHelper.parseAndFlattenJexlQuery(query);
+ } catch (ParseException e) {
+ fail("Failed to parse query: " + query);
+ throw new RuntimeException(e);
+ }
+ }
+
+}
diff --git a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml
index e8b9e2bbb84..dfa461f223b 100644
--- a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml
+++ b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml
@@ -489,6 +489,7 @@
+
From 4194ceec25ae9430fd6b719cd228558f8904551e Mon Sep 17 00:00:00 2001
From: Moriarty <22225248+apmoriarty@users.noreply.github.com>
Date: Mon, 27 Jan 2025 10:07:21 +0000
Subject: [PATCH 2/3] Feature flag actually integrated into the
DefaultQueryPlanner
---
.../main/java/datawave/query/planner/DefaultQueryPlanner.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java
index eb41f9e573e..c2e23333fa7 100644
--- a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java
+++ b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java
@@ -564,7 +564,7 @@ protected CloseableIterable process(ScannerFactory scannerFactory, Me
// check for the case where evaluation can be disabled due to the presence of Grouping functions
// but only if query functions and content functions are absent as well
- if (!config.getFullTableScanEnabled() && config.getGroupFields().hasGroupByFields()) {
+ if (!config.getFullTableScanEnabled() && getDisableGroupByEvaluation() && config.getGroupFields().hasGroupByFields()) {
boolean canDisable = DisableEvaluationForGroupingVisitor.canDisableEvaluation(config.getQueryTree(), getIndexedFields(), getIndexOnlyFields());
if (canDisable) {
config.setDisableEvaluation(true);
From 1f71cd433d456e5f1a34e3912edeb097ea20231a Mon Sep 17 00:00:00 2001
From: Moriarty <22225248+apmoriarty@users.noreply.github.com>
Date: Tue, 28 Jan 2025 13:26:54 +0000
Subject: [PATCH 3/3] Update via merge request feedback
---
...tor.java => DisableEvaluationVisitor.java} | 87 +++++++++++++------
.../query/planner/DefaultQueryPlanner.java | 19 ++--
...java => DisableEvaluationVisitorTest.java} | 69 +++++++++++----
.../datawave/query/QueryLogicFactory.xml | 2 +-
4 files changed, 125 insertions(+), 52 deletions(-)
rename warehouse/query-core/src/main/java/datawave/query/jexl/visitors/{DisableEvaluationForGroupingVisitor.java => DisableEvaluationVisitor.java} (69%)
rename warehouse/query-core/src/test/java/datawave/query/jexl/visitors/{DisableEvaluationForGroupingVisitorTest.java => DisableEvaluationVisitorTest.java} (61%)
diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/DisableEvaluationVisitor.java
similarity index 69%
rename from warehouse/query-core/src/main/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitor.java
rename to warehouse/query-core/src/main/java/datawave/query/jexl/visitors/DisableEvaluationVisitor.java
index 02dd40a373f..a6e9e7fbd9c 100644
--- a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitor.java
+++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/DisableEvaluationVisitor.java
@@ -12,6 +12,7 @@
import org.apache.commons.jexl3.parser.ASTLTNode;
import org.apache.commons.jexl3.parser.ASTNENode;
import org.apache.commons.jexl3.parser.ASTNRNode;
+import org.apache.commons.jexl3.parser.ASTNotNode;
import org.apache.commons.jexl3.parser.ASTOrNode;
import org.apache.commons.jexl3.parser.JexlNode;
@@ -20,11 +21,16 @@
import datawave.query.jexl.nodes.QueryPropertyMarker;
/**
- * Determines if a query can disable evaluation when a grouping function is present
+ * Determines if evaluation can be disabled for a given query. Evaluation can be disabled given the following conditions
*
- * Evaluation cannot be disabled if any content, query, or filter functions exist, or if delayed or evaluation only markers are present.
+ *
+ * - the query has a GROUPBY function (note: groupby effectively disables hit terms)
+ * - the query is NOT requesting HIT_TERMS
+ * - the query does NOT contain any content, query, or filter functions
+ * - the query does NOT contain any delayed or evaluation only markers
+ *
*/
-public class DisableEvaluationForGroupingVisitor extends ShortCircuitBaseVisitor {
+public class DisableEvaluationVisitor extends ShortCircuitBaseVisitor {
private boolean canDisableEvaluation = true;
@@ -43,7 +49,7 @@ public class DisableEvaluationForGroupingVisitor extends ShortCircuitBaseVisitor
* @return true if evaluation can be disabled
*/
public static boolean canDisableEvaluation(JexlNode node, Set indexedFields, Set indexOnlyFields) {
- DisableEvaluationForGroupingVisitor visitor = new DisableEvaluationForGroupingVisitor(indexedFields, indexOnlyFields);
+ DisableEvaluationVisitor visitor = new DisableEvaluationVisitor(indexedFields, indexOnlyFields);
node.jjtAccept(visitor, null);
return visitor.canDisableEvaluation;
}
@@ -51,7 +57,7 @@ public static boolean canDisableEvaluation(JexlNode node, Set indexedFie
/**
* Private constructor to force static access
*/
- private DisableEvaluationForGroupingVisitor(Set indexedFields, Set indexOnlyFields) {
+ private DisableEvaluationVisitor(Set indexedFields, Set indexOnlyFields) {
this.indexedFields = indexedFields;
this.indexOnlyFields = indexOnlyFields;
}
@@ -65,18 +71,19 @@ public Object visit(ASTAndNode node, Object data) {
QueryPropertyMarker.Instance instance = QueryPropertyMarker.findInstance(node);
if (instance.isAnyType()) {
switch (instance.getType()) {
- case EVALUATION_ONLY:
case DELAYED:
+ case EVALUATION_ONLY:
+ case EXCEEDED_TERM:
canDisableEvaluation = false;
return data;
case BOUNDED_RANGE:
case EXCEEDED_OR:
- case EXCEEDED_TERM:
case EXCEEDED_VALUE:
- // continue recursing
- break;
+ // pass in a flag that says the parent was a marker
+ node.childrenAccept(this, true);
+ return data;
default:
- // query planner should handled value, range terms
+ // unknown marker type
return data;
}
}
@@ -97,6 +104,11 @@ public Object visit(ASTOrNode node, Object data) {
@Override
public Object visit(ASTNENode node, Object data) {
+ if (isRoot(node)) {
+ // negations cannot be the root of a query, so we also cannot disable evaluation
+ canDisableEvaluation = false;
+ }
+
if (!canDisableEvaluation || !isFieldIndexed(node)) {
return data;
}
@@ -105,9 +117,10 @@ public Object visit(ASTNENode node, Object data) {
}
@Override
- public Object visit(ASTEQNode node, Object data) {
- if (!canDisableEvaluation || !isFieldIndexed(node)) {
- return data;
+ public Object visit(ASTNotNode node, Object data) {
+ if (isRoot(node)) {
+ // negations cannot be the root of a query, so we also cannot disable evaluation
+ canDisableEvaluation = false;
}
node.childrenAccept(this, data);
@@ -115,7 +128,7 @@ public Object visit(ASTEQNode node, Object data) {
}
@Override
- public Object visit(ASTLTNode node, Object data) {
+ public Object visit(ASTEQNode node, Object data) {
if (!canDisableEvaluation || !isFieldIndexed(node)) {
return data;
}
@@ -125,27 +138,32 @@ public Object visit(ASTLTNode node, Object data) {
}
@Override
- public Object visit(ASTGTNode node, Object data) {
- if (!canDisableEvaluation || !isFieldIndexed(node)) {
- return data;
- }
+ public Object visit(ASTLTNode node, Object data) {
+ return visitRangeOperator(node, data);
+ }
- node.childrenAccept(this, data);
- return data;
+ @Override
+ public Object visit(ASTGTNode node, Object data) {
+ return visitRangeOperator(node, data);
}
@Override
public Object visit(ASTLENode node, Object data) {
- if (!canDisableEvaluation || !isFieldIndexed(node)) {
- return data;
- }
-
- node.childrenAccept(this, data);
- return data;
+ return visitRangeOperator(node, data);
}
@Override
public Object visit(ASTGENode node, Object data) {
+ return visitRangeOperator(node, data);
+ }
+
+ private Object visitRangeOperator(JexlNode node, Object data) {
+ if (data == null) {
+ // not part of a bounded range. the best case is that the field is part of the event which
+ // will require evaluation
+ canDisableEvaluation = false;
+ }
+
if (!canDisableEvaluation || !isFieldIndexed(node)) {
return data;
}
@@ -217,4 +235,21 @@ private boolean isFieldIndexed(JexlNode node) {
return indexed;
}
+
+ /**
+ * Helper routine to determine if an arbitrary JexlNode is the root of a query
+ *
+ * @param node
+ * the JexlNode
+ * @return true if the node is the root
+ */
+ private boolean isRoot(JexlNode node) {
+ while (node.jjtGetParent() != null) {
+ node = node.jjtGetParent();
+ if (node instanceof ASTAndNode || node instanceof ASTOrNode) {
+ return false;
+ }
+ }
+ return true;
+ }
}
diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java
index c2e23333fa7..e982e4d105a 100644
--- a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java
+++ b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java
@@ -105,7 +105,7 @@
import datawave.query.jexl.visitors.BoundedRangeIndexExpansionVisitor;
import datawave.query.jexl.visitors.ConjunctionEliminationVisitor;
import datawave.query.jexl.visitors.DepthVisitor;
-import datawave.query.jexl.visitors.DisableEvaluationForGroupingVisitor;
+import datawave.query.jexl.visitors.DisableEvaluationVisitor;
import datawave.query.jexl.visitors.DisjunctionEliminationVisitor;
import datawave.query.jexl.visitors.ExecutableDeterminationVisitor;
import datawave.query.jexl.visitors.ExecutableDeterminationVisitor.STATE;
@@ -349,9 +349,10 @@ public class DefaultQueryPlanner extends QueryPlanner implements Cloneable {
*/
protected boolean showReducedQueryPrune = true;
/**
- * Feature flag to attempt disabling evaluation under certain circumstances when a query contains a grouping function
+ * Feature flag to attempt disabling evaluation under certain circumstances, i.e. when a query contains a groupby function or if the query does not require
+ * hit terms
*/
- protected boolean disableGroupByEvaluation = false;
+ protected boolean allowedToDisableEvaluation = false;
// handles boilerplate operations that surround a visitor's execution (e.g., timers, logging, validating)
private TimedVisitorManager visitorManager = new TimedVisitorManager();
@@ -564,8 +565,8 @@ protected CloseableIterable process(ScannerFactory scannerFactory, Me
// check for the case where evaluation can be disabled due to the presence of Grouping functions
// but only if query functions and content functions are absent as well
- if (!config.getFullTableScanEnabled() && getDisableGroupByEvaluation() && config.getGroupFields().hasGroupByFields()) {
- boolean canDisable = DisableEvaluationForGroupingVisitor.canDisableEvaluation(config.getQueryTree(), getIndexedFields(), getIndexOnlyFields());
+ if (!config.getFullTableScanEnabled() && getAllowedToDisableEvaluation() && config.getGroupFields().hasGroupByFields()) {
+ boolean canDisable = DisableEvaluationVisitor.canDisableEvaluation(config.getQueryTree(), getIndexedFields(), getIndexOnlyFields());
if (canDisable) {
config.setDisableEvaluation(true);
addOption(cfg, QueryOptions.DISABLE_EVALUATION, "true", false);
@@ -3468,11 +3469,11 @@ public void setConcurrentTimeoutMillis(int concurrentTimeoutMillis) {
this.concurrentTimeoutMillis = concurrentTimeoutMillis;
}
- public boolean getDisableGroupByEvaluation() {
- return disableGroupByEvaluation;
+ public boolean getAllowedToDisableEvaluation() {
+ return allowedToDisableEvaluation;
}
- public void setDisableGroupByEvaluation(boolean disableGroupByEvaluation) {
- this.disableGroupByEvaluation = disableGroupByEvaluation;
+ public void setAllowedToDisableEvaluation(boolean allowedToDisableEvaluation) {
+ this.allowedToDisableEvaluation = allowedToDisableEvaluation;
}
}
diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/DisableEvaluationVisitorTest.java
similarity index 61%
rename from warehouse/query-core/src/test/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitorTest.java
rename to warehouse/query-core/src/test/java/datawave/query/jexl/visitors/DisableEvaluationVisitorTest.java
index 100ba29d2a9..a7a4a50dede 100644
--- a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/DisableEvaluationForGroupingVisitorTest.java
+++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/DisableEvaluationVisitorTest.java
@@ -3,6 +3,7 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.fail;
+import java.util.HashSet;
import java.util.Set;
import org.apache.commons.jexl3.parser.ASTJexlScript;
@@ -11,7 +12,7 @@
import datawave.query.jexl.JexlASTHelper;
-public class DisableEvaluationForGroupingVisitorTest {
+public class DisableEvaluationVisitorTest {
private final Set indexedFields = Set.of("INDEXED_FIELD", "INDEXED_FIELD2");
private final Set indexOnlyFields = Set.of("INDEX_ONLY", "INDEX_ONLY2");
@@ -32,8 +33,8 @@ public void testEQ() {
@Test
public void testNE() {
// not equals
- test("INDEXED_FIELD != 'value'", true);
- test("INDEX_ONLY != 'value'", true);
+ test("INDEXED_FIELD != 'value'", false);
+ test("INDEX_ONLY != 'value'", false);
test("NON_INDEXED_FIELD != 'value'", false);
// not equals with null literal
@@ -45,40 +46,40 @@ public void testNE() {
@Test
public void testNotEQ() {
// not equals
- test("!(INDEXED_FIELD == 'value')", true);
- test("!(INDEX_ONLY == 'value')", true);
+ test("!(INDEXED_FIELD == 'value')", false);
+ test("!(INDEX_ONLY == 'value')", false);
test("!(NON_INDEXED_FIELD == 'value')", false);
}
@Test
public void testLt() {
// less than, not part of a bounded range
- test("INDEXED_FIELD < 'value'", true);
- test("INDEX_ONLY < 'value'", true);
+ test("INDEXED_FIELD < 'value'", false);
+ test("INDEX_ONLY < 'value'", false);
test("NON_INDEXED_FIELD < 'value'", false);
}
@Test
public void testGt() {
// greater than, not part of a bounded range
- test("INDEXED_FIELD > 'value'", true);
- test("INDEX_ONLY > 'value'", true);
+ test("INDEXED_FIELD > 'value'", false);
+ test("INDEX_ONLY > 'value'", false);
test("NON_INDEXED_FIELD > 'value'", false);
}
@Test
public void testLe() {
// less than equals, not part of a bounded range
- test("INDEXED_FIELD <= 'value'", true);
- test("INDEX_ONLY <= 'value'", true);
+ test("INDEXED_FIELD <= 'value'", false);
+ test("INDEX_ONLY <= 'value'", false);
test("NON_INDEXED_FIELD <= 'value'", false);
}
@Test
public void testGe() {
// less than, not part of a bounded range
- test("INDEXED_FIELD >= 'value'", true);
- test("INDEX_ONLY >= 'value'", true);
+ test("INDEXED_FIELD >= 'value'", false);
+ test("INDEX_ONLY >= 'value'", false);
test("NON_INDEXED_FIELD >= 'value'", false);
}
@@ -101,8 +102,8 @@ public void testValueExceededMarker() {
@Test
public void testTermExceededMarker() {
// exceeded term marker
- test("((_Term_ = true) && (INDEXED_FIELD =~ 'ba.*'))", true);
- test("((_Term_ = true) && (INDEX_ONLY =~ 'ba.*'))", true);
+ test("((_Term_ = true) && (INDEXED_FIELD =~ 'ba.*'))", false);
+ test("((_Term_ = true) && (INDEX_ONLY =~ 'ba.*'))", false);
test("((_Term_ = true) && (NON_INDEXED_FIELD =~ 'ba.*'))", false);
}
@@ -126,9 +127,45 @@ public void testEvalOnlyMarker() {
test("((_Eval_ = true) && (NON_INDEXED_FIELD =~ 'ba.*'))", false);
}
+ @Test
+ public void testEqAndNotEq() {
+ // EQ and !EQ
+ test("INDEXED_FIELD == 'a' && !(INDEXED_FIELD == 'b')", true);
+ test("INDEXED_FIELD == 'a' && !(INDEX_ONLY == 'b')", true);
+ test("INDEXED_FIELD == 'a' && !(NON_INDEXED_FIELD == 'b')", false);
+
+ test("INDEX_ONLY == 'a' && !(INDEXED_FIELD == 'b')", true);
+ test("INDEX_ONLY == 'a' && !(INDEX_ONLY == 'b')", true);
+ test("INDEX_ONLY == 'a' && !(NON_INDEXED_FIELD == 'b')", false);
+
+ test("NON_INDEXED_FIELD == 'a' && !(INDEXED_FIELD == 'b')", false);
+ test("NON_INDEXED_FIELD == 'a' && !(INDEX_ONLY == 'b')", false);
+ test("NON_INDEXED_FIELD == 'a' && !(NON_INDEXED_FIELD == 'b')", false);
+ }
+
+ @Test
+ public void testEqOrNotEq() {
+ // EQ or !EQ, verifies that negations as part of a top level union are correctly identified as 'root negations'
+ test("INDEXED_FIELD == 'a' || !(INDEXED_FIELD == 'b')", true);
+ test("INDEXED_FIELD == 'a' || !(INDEX_ONLY == 'b')", true);
+ test("INDEXED_FIELD == 'a' || !(NON_INDEXED_FIELD == 'b')", false);
+
+ test("INDEX_ONLY == 'a' || !(INDEXED_FIELD == 'b')", true);
+ test("INDEX_ONLY == 'a' || !(INDEX_ONLY == 'b')", true);
+ test("INDEX_ONLY == 'a' || !(NON_INDEXED_FIELD == 'b')", false);
+
+ test("NON_INDEXED_FIELD == 'a' || !(INDEXED_FIELD == 'b')", false);
+ test("NON_INDEXED_FIELD == 'a' || !(INDEX_ONLY == 'b')", false);
+ test("NON_INDEXED_FIELD == 'a' || !(NON_INDEXED_FIELD == 'b')", false);
+ }
+
private void test(String query, boolean expected) {
+ Set allIndexedFields = new HashSet<>();
+ allIndexedFields.addAll(indexedFields);
+ allIndexedFields.addAll(indexOnlyFields);
+
ASTJexlScript script = parse(query);
- boolean result = DisableEvaluationForGroupingVisitor.canDisableEvaluation(script, indexedFields, indexOnlyFields);
+ boolean result = DisableEvaluationVisitor.canDisableEvaluation(script, allIndexedFields, indexOnlyFields);
assertEquals(expected, result);
}
diff --git a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml
index dfa461f223b..53ab3d7d5bc 100644
--- a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml
+++ b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml
@@ -489,7 +489,7 @@
-
+