You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@asterixdb.apache.org by AsterixDB Code Review <do...@asterix-gerrit.ics.uci.edu> on 2023/01/25 22:50:40 UTC
Change in asterixdb[master]: [NO-ISSUE][COMP] Generalize subplan recognition for array indexes
From Glenn Galvizo <gg...@uci.edu>:
Glenn Galvizo has uploaded this change for review. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17322 )
Change subject: [NO-ISSUE][COMP] Generalize subplan recognition for array indexes
......................................................................
[NO-ISSUE][COMP] Generalize subplan recognition for array indexes
- user model changes: no
- storage format changes: no
- interface changes: no
details:
Patch to help CBO recognize array indexes. Instead of only searching the
immediate SUBPLAN of a SELECT, we search as many SUBPLANs as we can.
Change-Id: I41cdbaa384ef160256351a149499091955bbcad9
---
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/JoinFromSubplanRewrite.java
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/SelectFromSubplanRewrite.java
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/AbstractOperatorFromSubplanRewrite.java
3 files changed, 140 insertions(+), 24 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/22/17322/1
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/AbstractOperatorFromSubplanRewrite.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/AbstractOperatorFromSubplanRewrite.java
index 587132f..902f2c1 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/AbstractOperatorFromSubplanRewrite.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/AbstractOperatorFromSubplanRewrite.java
@@ -100,20 +100,38 @@
this.context = context;
}
- protected LogicalVariable getConditioningVariable(ILogicalExpression condition) {
+ protected void gatherBooleanVariables(ILogicalExpression condition, List<LogicalVariable> outputList) {
List<Mutable<ILogicalExpression>> selectConjuncts = new ArrayList<>();
if (splitIntoConjuncts(condition, selectConjuncts)) {
for (Mutable<ILogicalExpression> conjunct : selectConjuncts) {
if (conjunct.getValue().getExpressionTag().equals(LogicalExpressionTag.VARIABLE)) {
- return ((VariableReferenceExpression) conjunct.getValue()).getVariableReference();
+ outputList.add(((VariableReferenceExpression) conjunct.getValue()).getVariableReference());
}
}
-
} else if (condition.getExpressionTag().equals(LogicalExpressionTag.VARIABLE)) {
- return ((VariableReferenceExpression) condition).getVariableReference();
-
+ outputList.add(((VariableReferenceExpression) condition).getVariableReference());
}
- return null;
+ }
+
+ protected void gatherSubplanOperators(ILogicalOperator rootOperator, List<SubplanOperator> outputList) {
+ for (Mutable<ILogicalOperator> inputOpRef : rootOperator.getInputs()) {
+ LogicalOperatorTag operatorTag = inputOpRef.getValue().getOperatorTag();
+ switch (operatorTag) {
+ case SUBPLAN:
+ outputList.add((SubplanOperator) inputOpRef.getValue());
+ gatherSubplanOperators(inputOpRef.getValue(), outputList);
+ break;
+
+ case ASSIGN:
+ case UNNEST:
+ gatherSubplanOperators(inputOpRef.getValue(), outputList);
+ break;
+
+ default:
+ // We will break early if we encounter any other operator.
+ return;
+ }
+ }
}
protected Pair<SelectOperator, UnnestOperator> traverseSubplanBranch(SubplanOperator subplanOperator,
@@ -374,7 +392,7 @@
return ConstantExpression.TRUE;
}
- private AggregateOperator getAggregateFromSubplan(SubplanOperator subplanOperator) {
+ protected AggregateOperator getAggregateFromSubplan(SubplanOperator subplanOperator) {
// We only expect one plan, and one root.
if (subplanOperator.getNestedPlans().size() > 1
|| subplanOperator.getNestedPlans().get(0).getRoots().size() > 1) {
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/JoinFromSubplanRewrite.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/JoinFromSubplanRewrite.java
index aa80462..c5b7265 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/JoinFromSubplanRewrite.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/JoinFromSubplanRewrite.java
@@ -123,6 +123,7 @@
return;
}
+ // TODO (GLENN): These assumptions should be relaxed in the future, but for now we'll roll with it.
// We expect a) the operator immediately above to be a SUBPLAN, and b) the next operator above to be a SELECT.
Mutable<ILogicalOperator> afterJoinOpRef1 = afterJoinRefs.get(afterJoinRefs.size() - 1);
Mutable<ILogicalOperator> afterJoinOpRef2 = afterJoinRefs.get(afterJoinRefs.size() - 2);
@@ -136,8 +137,10 @@
}
// Additionally, verify that our SELECT is conditioning on a variable.
+ List<LogicalVariable> booleanVariables = new ArrayList<>();
joinContext.selectAfterSubplan = (SelectOperator) afterJoinOp2;
- if (getConditioningVariable(joinContext.selectAfterSubplan.getCondition().getValue()) == null) {
+ gatherBooleanVariables(joinContext.selectAfterSubplan.getCondition().getValue(), booleanVariables);
+ if (booleanVariables.isEmpty()) {
return;
}
@@ -224,7 +227,7 @@
*/
@Override
public AbstractBinaryJoinOperator restoreBeforeRewrite(List<Mutable<ILogicalOperator>> afterOperatorRefs,
- IOptimizationContext context) throws AlgebricksException {
+ IOptimizationContext context) {
JoinFromSubplanContext joinContext = contextStack.pop();
if (joinContext.removedAfterJoinOperators != null) {
afterOperatorRefs.addAll(joinContext.removedAfterJoinOperators);
@@ -265,14 +268,14 @@
VariableUtilities.getUsedVariables(newAssign, usedVarsFromFunc);
VariableUtilities.getProducedVariablesInDescendantsAndSelf(leftBranchRoot, varsFromLeftBranch);
VariableUtilities.getProducedVariablesInDescendantsAndSelf(rightBranchRoot, varsFromRightBranch);
- if (varsFromLeftBranch.containsAll(usedVarsFromFunc)) {
+ if (new HashSet<>(varsFromLeftBranch).containsAll(usedVarsFromFunc)) {
newAssign.getInputs().add(new MutableObject<>(leftBranchRoot));
context.computeAndSetTypeEnvironmentForOperator(newAssign);
joinOp.getInputs().get(0).setValue(newAssign);
context.computeAndSetTypeEnvironmentForOperator(joinOp);
arg.setValue(newVarRef);
- } else if (varsFromRightBranch.containsAll(usedVarsFromFunc)) {
+ } else if (new HashSet<>(varsFromRightBranch).containsAll(usedVarsFromFunc)) {
newAssign.getInputs().add(new MutableObject<>(rightBranchRoot));
context.computeAndSetTypeEnvironmentForOperator(newAssign);
joinOp.getInputs().get(1).setValue(newAssign);
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/SelectFromSubplanRewrite.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/SelectFromSubplanRewrite.java
index 594ba41..24874ce 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/SelectFromSubplanRewrite.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/SelectFromSubplanRewrite.java
@@ -19,22 +19,29 @@
package org.apache.asterix.optimizer.rules.am.array;
import java.util.ArrayDeque;
+import java.util.ArrayList;
import java.util.Deque;
import java.util.HashSet;
+import java.util.Iterator;
import java.util.List;
import java.util.Set;
+import org.apache.asterix.om.functions.BuiltinFunctions;
import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.algebricks.common.utils.Pair;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
-import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
+import org.apache.hyracks.algebricks.core.algebra.util.OperatorManipulationUtil;
/**
* For use in writing a "throwaway" branch which removes NTS and subplan operators. The result of this invocation is to
@@ -57,7 +64,7 @@
* UNNEST(on variable)
* (parent branch input)
* </pre>
- *
+ * <p>
* If we are given the pattern (a universal quantification query):
* <pre>
* SELECT_1(some variable AND array is not empty)
@@ -75,7 +82,7 @@
* UNNEST(on variable)
* (parent branch input)
* </pre>
- *
+ * <p>
* In the case of nested-subplans, we return a copy of the innermost SELECT followed by all relevant UNNEST/ASSIGNs.
*/
public class SelectFromSubplanRewrite extends AbstractOperatorFromSubplanRewrite<SelectOperator> {
@@ -98,7 +105,7 @@
* UNNEST
* ...
* </pre>
- *
+ * <p>
* Operators are *created* here, rather than just reconnected from the original branch.
*/
@Override
@@ -108,17 +115,88 @@
selectRootStack.push(originalOperator);
reset(originalOperator.getSourceLocation(), context, optimizableFunctions);
- // We expect a) a SUBPLAN as input to this SELECT, and b) our SELECT to be conditioning on a variable.
- LogicalVariable originalSelectVar = getConditioningVariable(originalOperator.getCondition().getValue());
- if (!originalOperator.getInputs().get(0).getValue().getOperatorTag().equals(LogicalOperatorTag.SUBPLAN)
- || originalSelectVar == null) {
+ // Gather all boolean variables and SUBPLANs.
+ List<LogicalVariable> booleanVariables = new ArrayList<>();
+ List<SubplanOperator> subplanOperators = new ArrayList<>();
+ gatherBooleanVariables(originalOperator.getCondition().getValue(), booleanVariables);
+ gatherSubplanOperators(originalOperator, subplanOperators);
+ Iterator<SubplanOperator> subplanIterator = subplanOperators.listIterator();
+ if (booleanVariables.isEmpty() || subplanOperators.isEmpty()) {
return null;
}
- // Traverse our subplan and generate a SELECT branch if applicable.
- SubplanOperator subplanOperator = (SubplanOperator) originalOperator.getInputs().get(0).getValue();
- Pair<SelectOperator, UnnestOperator> traversalOutput = traverseSubplanBranch(subplanOperator, null, true);
- return (traversalOutput == null) ? null : traversalOutput.first;
+ // TODO (GLENN): We currently assume that SUBPLAN-SELECTs are back-to-back.
+ SubplanOperator bottommostSubplanOperator = subplanOperators.get(subplanOperators.size() - 1);
+
+ // We now need to match these variables to SUBPLANs downstream.
+ while (subplanIterator.hasNext()) {
+ SubplanOperator workingSubplanOperator = subplanIterator.next();
+ AggregateOperator aggregateFromSubplan = getAggregateFromSubplan(workingSubplanOperator);
+ if (aggregateFromSubplan == null) {
+ continue;
+ }
+ for (LogicalVariable aggregateVariable : aggregateFromSubplan.getVariables()) {
+ if (!booleanVariables.contains(aggregateVariable)) {
+ subplanIterator.remove();
+ }
+
+ // Note: we (currently) don't expect variables to shared in multiple subplan outputs.
+ booleanVariables.remove(aggregateVariable);
+ }
+ }
+ if (subplanOperators.isEmpty()) {
+ // No <boolean variable, SUBPLAN> pairs could be found.
+ return null;
+ }
+
+ // For each subplan, traverse and generate a SELECT branch if applicable.
+ List<Pair<SelectOperator, UnnestOperator>> traversalOutputs = new ArrayList<>();
+ for (SubplanOperator subplanBranch : subplanOperators) {
+ Pair<SelectOperator, UnnestOperator> traversalOutput = traverseSubplanBranch(subplanBranch, null, false);
+ if (traversalOutput != null) {
+ traversalOutputs.add(traversalOutput);
+ }
+ }
+ if (traversalOutputs.size() == 0) {
+ return null;
+
+ } else if (traversalOutputs.size() == 1) {
+ Pair<SelectOperator, UnnestOperator> traversalOutput = traversalOutputs.get(0);
+ ILogicalOperator bottommostOperator = traversalOutput.second;
+ SelectOperator selectRewriteOperator = traversalOutput.first;
+ bottommostOperator.getInputs().addAll(bottommostSubplanOperator.getInputs());
+ OperatorManipulationUtil.computeTypeEnvironmentBottomUp(selectRewriteOperator, context);
+ return selectRewriteOperator;
+
+ } else {
+ ScalarFunctionCallExpression workingSelectCondition =
+ new ScalarFunctionCallExpression(BuiltinFunctions.getBuiltinFunctionInfo(BuiltinFunctions.AND));
+ SelectOperator mergedSelectOperator = new SelectOperator(new MutableObject<>(workingSelectCondition));
+ ILogicalOperator workingLeafOperator = mergedSelectOperator;
+ for (Pair<SelectOperator, UnnestOperator> traversalOutput : traversalOutputs) {
+ SelectOperator selectRewriteOperator = traversalOutput.first;
+ ILogicalExpression selectRewriteExpr = selectRewriteOperator.getCondition().getValue();
+
+ // First, we coalesce our SELECT conditions.
+ List<Mutable<ILogicalExpression>> selectRewriteExprConjuncts = new ArrayList<>();
+ if (selectRewriteExpr.splitIntoConjuncts(selectRewriteExprConjuncts)) {
+ for (Mutable<ILogicalExpression> conjunct : selectRewriteExprConjuncts) {
+ workingSelectCondition.getArguments().add(new MutableObject<>(conjunct.getValue()));
+ }
+ } else {
+ workingSelectCondition.getArguments().add(new MutableObject<>(selectRewriteExpr));
+ }
+
+ // Next, we connect the bottommost operator back to the current leaf.
+ workingLeafOperator.getInputs().add(new MutableObject<>(traversalOutput.second));
+ workingLeafOperator = traversalOutput.second;
+ }
+
+ // Finally, we connect the leaf to the bottommost subplan input.
+ workingLeafOperator.getInputs().addAll(bottommostSubplanOperator.getInputs());
+ OperatorManipulationUtil.computeTypeEnvironmentBottomUp(mergedSelectOperator, context);
+ return mergedSelectOperator;
+ }
}
/**
@@ -127,7 +205,7 @@
*/
@Override
public SelectOperator restoreBeforeRewrite(List<Mutable<ILogicalOperator>> afterOperatorRefs,
- IOptimizationContext context) throws AlgebricksException {
+ IOptimizationContext context) {
return selectRootStack.pop();
}
}
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17322
To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I41cdbaa384ef160256351a149499091955bbcad9
Gerrit-Change-Number: 17322
Gerrit-PatchSet: 1
Gerrit-Owner: Glenn Galvizo <gg...@uci.edu>
Gerrit-MessageType: newchange