You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@asterixdb.apache.org by AsterixDB Code Review <do...@asterix-gerrit.ics.uci.edu> on 2023/01/25 22:50:40 UTC

Change in asterixdb[master]: [NO-ISSUE][COMP] Generalize subplan recognition for array indexes

From Glenn Galvizo <gg...@uci.edu>:

Glenn Galvizo has uploaded this change for review. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17322 )


Change subject: [NO-ISSUE][COMP] Generalize subplan recognition for array indexes
......................................................................

[NO-ISSUE][COMP] Generalize subplan recognition for array indexes

- user model changes: no
- storage format changes: no
- interface changes: no

details:
Patch to help CBO recognize array indexes. Instead of only searching the
immediate SUBPLAN of a SELECT, we search as many SUBPLANs as we can.

Change-Id: I41cdbaa384ef160256351a149499091955bbcad9
---
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/JoinFromSubplanRewrite.java
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/SelectFromSubplanRewrite.java
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/AbstractOperatorFromSubplanRewrite.java
3 files changed, 140 insertions(+), 24 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/22/17322/1

diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/AbstractOperatorFromSubplanRewrite.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/AbstractOperatorFromSubplanRewrite.java
index 587132f..902f2c1 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/AbstractOperatorFromSubplanRewrite.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/AbstractOperatorFromSubplanRewrite.java
@@ -100,20 +100,38 @@
         this.context = context;
     }
 
-    protected LogicalVariable getConditioningVariable(ILogicalExpression condition) {
+    protected void gatherBooleanVariables(ILogicalExpression condition, List<LogicalVariable> outputList) {
         List<Mutable<ILogicalExpression>> selectConjuncts = new ArrayList<>();
         if (splitIntoConjuncts(condition, selectConjuncts)) {
             for (Mutable<ILogicalExpression> conjunct : selectConjuncts) {
                 if (conjunct.getValue().getExpressionTag().equals(LogicalExpressionTag.VARIABLE)) {
-                    return ((VariableReferenceExpression) conjunct.getValue()).getVariableReference();
+                    outputList.add(((VariableReferenceExpression) conjunct.getValue()).getVariableReference());
                 }
             }
-
         } else if (condition.getExpressionTag().equals(LogicalExpressionTag.VARIABLE)) {
-            return ((VariableReferenceExpression) condition).getVariableReference();
-
+            outputList.add(((VariableReferenceExpression) condition).getVariableReference());
         }
-        return null;
+    }
+
+    protected void gatherSubplanOperators(ILogicalOperator rootOperator, List<SubplanOperator> outputList) {
+        for (Mutable<ILogicalOperator> inputOpRef : rootOperator.getInputs()) {
+            LogicalOperatorTag operatorTag = inputOpRef.getValue().getOperatorTag();
+            switch (operatorTag) {
+                case SUBPLAN:
+                    outputList.add((SubplanOperator) inputOpRef.getValue());
+                    gatherSubplanOperators(inputOpRef.getValue(), outputList);
+                    break;
+
+                case ASSIGN:
+                case UNNEST:
+                    gatherSubplanOperators(inputOpRef.getValue(), outputList);
+                    break;
+
+                default:
+                    // We will break early if we encounter any other operator.
+                    return;
+            }
+        }
     }
 
     protected Pair<SelectOperator, UnnestOperator> traverseSubplanBranch(SubplanOperator subplanOperator,
@@ -374,7 +392,7 @@
         return ConstantExpression.TRUE;
     }
 
-    private AggregateOperator getAggregateFromSubplan(SubplanOperator subplanOperator) {
+    protected AggregateOperator getAggregateFromSubplan(SubplanOperator subplanOperator) {
         // We only expect one plan, and one root.
         if (subplanOperator.getNestedPlans().size() > 1
                 || subplanOperator.getNestedPlans().get(0).getRoots().size() > 1) {
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/JoinFromSubplanRewrite.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/JoinFromSubplanRewrite.java
index aa80462..c5b7265 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/JoinFromSubplanRewrite.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/JoinFromSubplanRewrite.java
@@ -123,6 +123,7 @@
             return;
         }
 
+        // TODO (GLENN): These assumptions should be relaxed in the future, but for now we'll roll with it.
         // We expect a) the operator immediately above to be a SUBPLAN, and b) the next operator above to be a SELECT.
         Mutable<ILogicalOperator> afterJoinOpRef1 = afterJoinRefs.get(afterJoinRefs.size() - 1);
         Mutable<ILogicalOperator> afterJoinOpRef2 = afterJoinRefs.get(afterJoinRefs.size() - 2);
@@ -136,8 +137,10 @@
         }
 
         // Additionally, verify that our SELECT is conditioning on a variable.
+        List<LogicalVariable> booleanVariables = new ArrayList<>();
         joinContext.selectAfterSubplan = (SelectOperator) afterJoinOp2;
-        if (getConditioningVariable(joinContext.selectAfterSubplan.getCondition().getValue()) == null) {
+        gatherBooleanVariables(joinContext.selectAfterSubplan.getCondition().getValue(), booleanVariables);
+        if (booleanVariables.isEmpty()) {
             return;
         }
 
@@ -224,7 +227,7 @@
      */
     @Override
     public AbstractBinaryJoinOperator restoreBeforeRewrite(List<Mutable<ILogicalOperator>> afterOperatorRefs,
-            IOptimizationContext context) throws AlgebricksException {
+            IOptimizationContext context) {
         JoinFromSubplanContext joinContext = contextStack.pop();
         if (joinContext.removedAfterJoinOperators != null) {
             afterOperatorRefs.addAll(joinContext.removedAfterJoinOperators);
@@ -265,14 +268,14 @@
             VariableUtilities.getUsedVariables(newAssign, usedVarsFromFunc);
             VariableUtilities.getProducedVariablesInDescendantsAndSelf(leftBranchRoot, varsFromLeftBranch);
             VariableUtilities.getProducedVariablesInDescendantsAndSelf(rightBranchRoot, varsFromRightBranch);
-            if (varsFromLeftBranch.containsAll(usedVarsFromFunc)) {
+            if (new HashSet<>(varsFromLeftBranch).containsAll(usedVarsFromFunc)) {
                 newAssign.getInputs().add(new MutableObject<>(leftBranchRoot));
                 context.computeAndSetTypeEnvironmentForOperator(newAssign);
                 joinOp.getInputs().get(0).setValue(newAssign);
                 context.computeAndSetTypeEnvironmentForOperator(joinOp);
                 arg.setValue(newVarRef);
 
-            } else if (varsFromRightBranch.containsAll(usedVarsFromFunc)) {
+            } else if (new HashSet<>(varsFromRightBranch).containsAll(usedVarsFromFunc)) {
                 newAssign.getInputs().add(new MutableObject<>(rightBranchRoot));
                 context.computeAndSetTypeEnvironmentForOperator(newAssign);
                 joinOp.getInputs().get(1).setValue(newAssign);
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/SelectFromSubplanRewrite.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/SelectFromSubplanRewrite.java
index 594ba41..24874ce 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/SelectFromSubplanRewrite.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/array/SelectFromSubplanRewrite.java
@@ -19,22 +19,29 @@
 package org.apache.asterix.optimizer.rules.am.array;
 
 import java.util.ArrayDeque;
+import java.util.ArrayList;
 import java.util.Deque;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 
+import org.apache.asterix.om.functions.BuiltinFunctions;
 import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
 import org.apache.hyracks.algebricks.common.utils.Pair;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
 import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
-import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
 import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
 import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
+import org.apache.hyracks.algebricks.core.algebra.util.OperatorManipulationUtil;
 
 /**
  * For use in writing a "throwaway" branch which removes NTS and subplan operators. The result of this invocation is to
@@ -57,7 +64,7 @@
  * UNNEST(on variable)
  * (parent branch input)
  * </pre>
- *
+ * <p>
  * If we are given the pattern (a universal quantification query):
  * <pre>
  * SELECT_1(some variable AND array is not empty)
@@ -75,7 +82,7 @@
  * UNNEST(on variable)
  * (parent branch input)
  * </pre>
- *
+ * <p>
  * In the case of nested-subplans, we return a copy of the innermost SELECT followed by all relevant UNNEST/ASSIGNs.
  */
 public class SelectFromSubplanRewrite extends AbstractOperatorFromSubplanRewrite<SelectOperator> {
@@ -98,7 +105,7 @@
      * UNNEST
      * ...
      * </pre>
-     *
+     * <p>
      * Operators are *created* here, rather than just reconnected from the original branch.
      */
     @Override
@@ -108,17 +115,88 @@
         selectRootStack.push(originalOperator);
         reset(originalOperator.getSourceLocation(), context, optimizableFunctions);
 
-        // We expect a) a SUBPLAN as input to this SELECT, and b) our SELECT to be conditioning on a variable.
-        LogicalVariable originalSelectVar = getConditioningVariable(originalOperator.getCondition().getValue());
-        if (!originalOperator.getInputs().get(0).getValue().getOperatorTag().equals(LogicalOperatorTag.SUBPLAN)
-                || originalSelectVar == null) {
+        // Gather all boolean variables and SUBPLANs.
+        List<LogicalVariable> booleanVariables = new ArrayList<>();
+        List<SubplanOperator> subplanOperators = new ArrayList<>();
+        gatherBooleanVariables(originalOperator.getCondition().getValue(), booleanVariables);
+        gatherSubplanOperators(originalOperator, subplanOperators);
+        Iterator<SubplanOperator> subplanIterator = subplanOperators.listIterator();
+        if (booleanVariables.isEmpty() || subplanOperators.isEmpty()) {
             return null;
         }
 
-        // Traverse our subplan and generate a SELECT branch if applicable.
-        SubplanOperator subplanOperator = (SubplanOperator) originalOperator.getInputs().get(0).getValue();
-        Pair<SelectOperator, UnnestOperator> traversalOutput = traverseSubplanBranch(subplanOperator, null, true);
-        return (traversalOutput == null) ? null : traversalOutput.first;
+        // TODO (GLENN): We currently assume that SUBPLAN-SELECTs are back-to-back.
+        SubplanOperator bottommostSubplanOperator = subplanOperators.get(subplanOperators.size() - 1);
+
+        // We now need to match these variables to SUBPLANs downstream.
+        while (subplanIterator.hasNext()) {
+            SubplanOperator workingSubplanOperator = subplanIterator.next();
+            AggregateOperator aggregateFromSubplan = getAggregateFromSubplan(workingSubplanOperator);
+            if (aggregateFromSubplan == null) {
+                continue;
+            }
+            for (LogicalVariable aggregateVariable : aggregateFromSubplan.getVariables()) {
+                if (!booleanVariables.contains(aggregateVariable)) {
+                    subplanIterator.remove();
+                }
+
+                // Note: we (currently) don't expect variables to shared in multiple subplan outputs.
+                booleanVariables.remove(aggregateVariable);
+            }
+        }
+        if (subplanOperators.isEmpty()) {
+            // No <boolean variable, SUBPLAN> pairs could be found.
+            return null;
+        }
+
+        // For each subplan, traverse and generate a SELECT branch if applicable.
+        List<Pair<SelectOperator, UnnestOperator>> traversalOutputs = new ArrayList<>();
+        for (SubplanOperator subplanBranch : subplanOperators) {
+            Pair<SelectOperator, UnnestOperator> traversalOutput = traverseSubplanBranch(subplanBranch, null, false);
+            if (traversalOutput != null) {
+                traversalOutputs.add(traversalOutput);
+            }
+        }
+        if (traversalOutputs.size() == 0) {
+            return null;
+
+        } else if (traversalOutputs.size() == 1) {
+            Pair<SelectOperator, UnnestOperator> traversalOutput = traversalOutputs.get(0);
+            ILogicalOperator bottommostOperator = traversalOutput.second;
+            SelectOperator selectRewriteOperator = traversalOutput.first;
+            bottommostOperator.getInputs().addAll(bottommostSubplanOperator.getInputs());
+            OperatorManipulationUtil.computeTypeEnvironmentBottomUp(selectRewriteOperator, context);
+            return selectRewriteOperator;
+
+        } else {
+            ScalarFunctionCallExpression workingSelectCondition =
+                    new ScalarFunctionCallExpression(BuiltinFunctions.getBuiltinFunctionInfo(BuiltinFunctions.AND));
+            SelectOperator mergedSelectOperator = new SelectOperator(new MutableObject<>(workingSelectCondition));
+            ILogicalOperator workingLeafOperator = mergedSelectOperator;
+            for (Pair<SelectOperator, UnnestOperator> traversalOutput : traversalOutputs) {
+                SelectOperator selectRewriteOperator = traversalOutput.first;
+                ILogicalExpression selectRewriteExpr = selectRewriteOperator.getCondition().getValue();
+
+                // First, we coalesce our SELECT conditions.
+                List<Mutable<ILogicalExpression>> selectRewriteExprConjuncts = new ArrayList<>();
+                if (selectRewriteExpr.splitIntoConjuncts(selectRewriteExprConjuncts)) {
+                    for (Mutable<ILogicalExpression> conjunct : selectRewriteExprConjuncts) {
+                        workingSelectCondition.getArguments().add(new MutableObject<>(conjunct.getValue()));
+                    }
+                } else {
+                    workingSelectCondition.getArguments().add(new MutableObject<>(selectRewriteExpr));
+                }
+
+                // Next, we connect the bottommost operator back to the current leaf.
+                workingLeafOperator.getInputs().add(new MutableObject<>(traversalOutput.second));
+                workingLeafOperator = traversalOutput.second;
+            }
+
+            // Finally, we connect the leaf to the bottommost subplan input.
+            workingLeafOperator.getInputs().addAll(bottommostSubplanOperator.getInputs());
+            OperatorManipulationUtil.computeTypeEnvironmentBottomUp(mergedSelectOperator, context);
+            return mergedSelectOperator;
+        }
     }
 
     /**
@@ -127,7 +205,7 @@
      */
     @Override
     public SelectOperator restoreBeforeRewrite(List<Mutable<ILogicalOperator>> afterOperatorRefs,
-            IOptimizationContext context) throws AlgebricksException {
+            IOptimizationContext context) {
         return selectRootStack.pop();
     }
 }

-- 
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17322
To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I41cdbaa384ef160256351a149499091955bbcad9
Gerrit-Change-Number: 17322
Gerrit-PatchSet: 1
Gerrit-Owner: Glenn Galvizo <gg...@uci.edu>
Gerrit-MessageType: newchange