You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@asterixdb.apache.org by AsterixDB Code Review <do...@asterix-gerrit.ics.uci.edu> on 2022/02/24 01:16:24 UTC

Change in asterixdb[neo]: Latest changes 021422

From Vijay Sarathy <vi...@couchbase.com>:

Vijay Sarathy has uploaded this change for review. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/15424 )


Change subject: Latest changes 021422
......................................................................

Latest changes 021422

Change-Id: If3b0699a904fe1d7bf2f453ddb3941748c4aa553
---
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/EnumerateJoinsRule.java
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinEnumCtx.java
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinStruct.java
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/PlanNode.java
4 files changed, 140 insertions(+), 65 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/24/15424/1

diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/EnumerateJoinsRule.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/EnumerateJoinsRule.java
index b5d83b9..f03896e 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/EnumerateJoinsRule.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/EnumerateJoinsRule.java
@@ -30,12 +30,14 @@
 import org.apache.commons.lang3.mutable.Mutable;
 import org.apache.commons.lang3.mutable.MutableBoolean;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.common.utils.Pair;
 import org.apache.hyracks.algebricks.core.algebra.base.*;
 import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
 import org.apache.hyracks.algebricks.core.algebra.prettyprint.IPlanPrettyPrinter;
 import org.apache.hyracks.algebricks.core.config.AlgebricksConfig;
 import org.apache.hyracks.algebricks.core.rewriter.base.CardHints;
@@ -48,7 +50,8 @@
 public final class EnumerateJoinsRule implements IAlgebraicRewriteRule {
 
     private int totalNumberOfJoins;
-    DataSourceScanOperator leafNodeVar;
+    DataSourceScanOperator dataSourceOp;
+    EmptyTupleSourceOperator emptyTupleSourceOp;
     List<ILogicalExpression> joinConditions = new ArrayList<>();
     List<Boolean> joinConditionUsed = new ArrayList<>();
 
@@ -68,14 +71,15 @@
             return;
         }
 
+        if (op.getOperatorTag() == LogicalOperatorTag.EMPTYTUPLESOURCE) {
+            EmptyTupleSourceOperator emptyTupleSourceOperator = (EmptyTupleSourceOperator) op;
+            emptyTupleSourceOp = emptyTupleSourceOperator;
+            return;
+
+        }
         if (op.getOperatorTag() == LogicalOperatorTag.DATASOURCESCAN) {
             DataSourceScanOperator dataSourceScanOperator = (DataSourceScanOperator) op;
-            List<LogicalVariable> variables = dataSourceScanOperator.getVariables();
-            //LogicalVariable lv = variables.get(1);
-            //String vars = lv.toString();
-            //leafNodeVar = vars.substring(2); // skip the $$
-            leafNodeVar = dataSourceScanOperator;
-            return;
+            dataSourceOp = dataSourceScanOperator;
         }
 
         for (Mutable<ILogicalOperator> nextOp : op.getInputs()) {
@@ -94,8 +98,10 @@
         return true;
     }
 
-    void getJoinOpsAndLeafInputs(ILogicalOperator op, List<DataSourceScanOperator> dataSourceScanVars,
-            HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap,
+    void getJoinOpsAndLeafInputs(ILogicalOperator op,
+            List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps,
+            HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap,
+            HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap,
             List<ILogicalOperator> internalEdges, List<ILogicalOperator> joinOps, MutableBoolean canTransform) {
 
         if (canTransform.isFalse()) {
@@ -107,17 +113,17 @@
             return;
         }
         for (Mutable<ILogicalOperator> nextOp : op.getInputs()) {
-            getJoinOpsAndLeafInputs(nextOp.getValue(), dataSourceScanVars, joinLeafInputsHashMap, internalEdges,
-                    joinOps, canTransform);
+            getJoinOpsAndLeafInputs(nextOp.getValue(), emptyTupleAndDataSourceOps, joinLeafInputsHashMap,
+                    dataSourceEmptyTupleHashMap, internalEdges, joinOps, canTransform);
         }
         if (op.getOperatorTag() == LogicalOperatorTag.INNERJOIN) {
             joinOps.add(op);
             // follow the inputs and see if they reach a datascan operator
             for (Mutable<ILogicalOperator> nextOp : op.getInputs()) {
                 //Mutable<String> leafNodeVar = new MutableObject<String>();
-                leafNodeVar = null;
+                emptyTupleSourceOp = null;
                 containsLeafNodeOnly(nextOp.getValue());
-                if (leafNodeVar == null) {
+                if (emptyTupleSourceOp == null) { // This means that we did not find a emptyTupleSourceOp operator. Could be an internal edge
                     if (nextOp.getValue().getOperatorTag() != LogicalOperatorTag.INNERJOIN) {
                         if (OnlyOneAssign(nextOp)) {
                             // Currently will handle only assign statement and nothing else in an internal Edge.
@@ -128,8 +134,9 @@
                         }
                     }
                 } else {
-                    dataSourceScanVars.add(leafNodeVar);
-                    joinLeafInputsHashMap.put(leafNodeVar, nextOp.getValue()); // should not need both leafS! get rid of one later
+                    emptyTupleAndDataSourceOps.add(new Pair<>(emptyTupleSourceOp, dataSourceOp));
+                    joinLeafInputsHashMap.put(emptyTupleSourceOp, nextOp.getValue());
+                    dataSourceEmptyTupleHashMap.put(dataSourceOp, emptyTupleSourceOp);
                 }
             }
         }
@@ -137,7 +144,7 @@
 
     // we have to move the inputs in op around so that they match the tree structure in pn
     void getNewTree(ILogicalOperator root, JoinEnumCtx jeCtx, PlanNode plan,
-            HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap, List<ILogicalOperator> joinOps,
+            HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap, List<ILogicalOperator> joinOps,
             int joinNumber) {
 
         List<PlanNode> allPlans = jeCtx.getAllPlans();
@@ -161,7 +168,7 @@
         }
 
         if (leftIndex <= size) { // leaf
-            ILogicalOperator leftInput = joinLeafInputsHashMap.get(leftPlan.getDataSourceScanOp());
+            ILogicalOperator leftInput = joinLeafInputsHashMap.get(leftPlan.getEmptyTupleSourceOp());
             joinOp.getInputs().get(0).setValue(leftInput);
         } else { // join
             totalNumberOfJoins++;
@@ -172,7 +179,7 @@
         }
 
         if (rightIndex <= size) { // leaf
-            ILogicalOperator rightInput = joinLeafInputsHashMap.get(rightPlan.getDataSourceScanOp());
+            ILogicalOperator rightInput = joinLeafInputsHashMap.get(rightPlan.getEmptyTupleSourceOp());
             joinOp.getInputs().get(1).setValue(rightInput);
         } else { // join
             totalNumberOfJoins++;
@@ -204,12 +211,13 @@
         System.out.println("---------------------------- ");
     }
 
-    void printLeafPlans(IPlanPrettyPrinter pp, HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap)
-            throws AlgebricksException {
-        Iterator<Map.Entry<DataSourceScanOperator, ILogicalOperator>> li = joinLeafInputsHashMap.entrySet().iterator();
+    void printLeafPlans(IPlanPrettyPrinter pp,
+            HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap) throws AlgebricksException {
+        Iterator<Map.Entry<EmptyTupleSourceOperator, ILogicalOperator>> li =
+                joinLeafInputsHashMap.entrySet().iterator();
         int i = 0;
         while (li.hasNext()) {
-            Map.Entry<DataSourceScanOperator, ILogicalOperator> pair = li.next();
+            Map.Entry<EmptyTupleSourceOperator, ILogicalOperator> pair = li.next();
             ILogicalOperator element = pair.getValue();
             printPlan(pp, (AbstractLogicalOperator) element, "Printing Leaf Input" + i);
             i++;
@@ -233,8 +241,10 @@
 
         List<ILogicalOperator> joinOps = new ArrayList<>();
         List<ILogicalOperator> internalEdges = new ArrayList<>();
-        HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap = new HashMap<>();
-        List<DataSourceScanOperator> dataSourceScanOps = new ArrayList<>();
+        HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap = new HashMap<>();
+        //List<DataSourceScanOperator> dataSourceScanOps = new ArrayList<>();
+        List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps = new ArrayList<>();
+        HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap = new HashMap<>();
         // The data scan operators. Will be in the order of the from clause.
         // Important for position ordering when assigning bits to join expressions.
 
@@ -245,13 +255,14 @@
         printPlan(pp, (AbstractLogicalOperator) op, "Original Whole plan1");
 
         //findDataSourceScanVars(op, dataSourceScanVars, canTransform);
-        getJoinOpsAndLeafInputs(op, dataSourceScanOps, joinLeafInputsHashMap, internalEdges, joinOps, canTransform);
+        getJoinOpsAndLeafInputs(op, emptyTupleAndDataSourceOps, joinLeafInputsHashMap, dataSourceEmptyTupleHashMap,
+                internalEdges, joinOps, canTransform);
 
         if (canTransform.isFalse()) {
             return false;
         }
 
-        if (dataSourceScanOps.size() != joinLeafInputsHashMap.size())
+        if (emptyTupleAndDataSourceOps.size() != joinLeafInputsHashMap.size())
             return false; // if this happens, something in the input plan is not acceptable to the new code.
 
         printPlan(pp, (AbstractLogicalOperator) op, "Original Whole plan2");
@@ -264,13 +275,13 @@
             return false;
         }
 
-        int numberOfFromTerms = dataSourceScanOps.size();
+        int numberOfFromTerms = emptyTupleAndDataSourceOps.size();
 
         // jsArray, allPlans, joinConditions, cardHints are initialized
         // in the JoinEnumCtx. The jeCtx is attached to each joinStruct and planNode
         // also, so they have access to the context when needed.
-        JoinEnumCtx jeCtx = new JoinEnumCtx((AbstractLogicalOperator) op, numberOfFromTerms, dataSourceScanOps,
-                joinLeafInputsHashMap, internalEdges, joinOps, cardHints, context);
+        JoinEnumCtx jeCtx = new JoinEnumCtx((AbstractLogicalOperator) op, numberOfFromTerms, emptyTupleAndDataSourceOps,
+                joinLeafInputsHashMap, dataSourceEmptyTupleHashMap, internalEdges, joinOps, cardHints, context);
         JoinStruct[] jsArray = jeCtx.getJsArray(); // will not use [0] element;
         JoinStruct js = jsArray[0]; // jsArray[0] is not used for join enumeration,
                                     // only used to call member method enumerateJoins()
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinEnumCtx.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinEnumCtx.java
index 1b6196b..3084c41 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinEnumCtx.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinEnumCtx.java
@@ -6,10 +6,12 @@
 
 import org.apache.asterix.common.config.CompilerProperties;
 import org.apache.asterix.metadata.declared.MetadataProvider;
+import org.apache.hyracks.algebricks.common.utils.Pair;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
 import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
 import org.apache.hyracks.algebricks.core.rewriter.base.CardHints;
 import org.apache.hyracks.algebricks.core.rewriter.base.PhysicalOptimizationConfig;
 
@@ -19,8 +21,10 @@
     JoinStruct[] jsArray; // array of all join structs
     int jsArraySize;
     CardHints cardHints; // cardinality hints
-    List<DataSourceScanOperator> dataSourceScanOps;
-    HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap;
+    //List<DataSourceScanOperator> dataSourceScanOps;
+    List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps;
+    HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap;
+    HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap;
     List<ILogicalOperator> internalEdges;
     List<ILogicalOperator> joinOps;
     ILogicalOperator localJoinOp; // used in nestedLoopsApplicable code.
@@ -35,8 +39,9 @@
     HashMap<String, Integer> sizeMap = new HashMap<>();
 
     public JoinEnumCtx(AbstractLogicalOperator op, int numberOfFromTerms,
-            List<DataSourceScanOperator> dataSourceScanOps,
-            HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap,
+            List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps,
+            HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap,
+            HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap,
             List<ILogicalOperator> internalEdges, List<ILogicalOperator> joinOps, CardHints cardHints,
             IOptimizationContext context) {
         this.joinConditions = new ArrayList<>();
@@ -53,8 +58,10 @@
         this.optCtx = context;
         this.physOptConfig = context.getPhysicalOptimizationConfig();
 
-        this.dataSourceScanOps = dataSourceScanOps;
+        //this.dataSourceScanOps = dataSourceScanOps;
+        this.emptyTupleAndDataSourceOps = emptyTupleAndDataSourceOps;
         this.joinLeafInputsHashMap = joinLeafInputsHashMap;
+        this.dataSourceEmptyTupleHashMap = dataSourceEmptyTupleHashMap;
         this.internalEdges = internalEdges;
         this.joinOps = joinOps;
         this.op = op;
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinStruct.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinStruct.java
index 6418a2c..9e44361 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinStruct.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinStruct.java
@@ -3,7 +3,11 @@
 import java.util.*;
 
 import org.apache.asterix.metadata.declared.DataSourceId;
+import org.apache.asterix.om.base.AOrderedList;
+import org.apache.asterix.om.base.IAObject;
+import org.apache.asterix.om.constants.AsterixConstantValue;
 import org.apache.asterix.om.functions.BuiltinFunctions;
+import org.apache.asterix.om.types.ATypeTag;
 import org.apache.asterix.optimizer.cost.Cost;
 import org.apache.asterix.optimizer.cost.CostMethods;
 import org.apache.asterix.optimizer.rules.EnumerateJoinsRule;
@@ -12,10 +16,17 @@
 import org.apache.commons.lang3.mutable.MutableInt;
 import org.apache.commons.lang3.mutable.MutableObject;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.common.utils.Pair;
 import org.apache.hyracks.algebricks.core.algebra.base.*;
 import org.apache.hyracks.algebricks.core.algebra.expressions.*;
 import org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
-import org.apache.hyracks.algebricks.core.algebra.operators.logical.*;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
 import org.apache.hyracks.algebricks.core.algebra.prettyprint.IPlanPrettyPrinter;
 import org.apache.hyracks.algebricks.core.algebra.util.OperatorManipulationUtil;
@@ -54,8 +65,8 @@
     List<Integer> planIndexesArray; // indexes into the PlanNode array in enumerateJoins
     int jsIndex, level, highestDatasetId;
     List<Integer> applicableJoinConditions;
-    DataSourceScanOperator correspondingDataSourceScanOp; // There is a 1-1 relationsjip between the LVs and the dataSourceScanOps and the leafInputs.
-
+    //DataSourceScanOperator correspondingDataSourceScanOp; // There is a 1-1 relationsjip between the LVs and the dataSourceScanOps and the leafInputs.
+    EmptyTupleSourceOperator correspondingEmptyTupleSourceOp; // There is a 1-1 relationsjip between the LVs and the dataSourceScanOps and the leafInputs.
     private final double CARDMAX = 1.0e200;
 
     public JoinStruct(JoinEnumCtx JECtx) { //empty constructor. Will fill in all the fields in the code.
@@ -75,12 +86,13 @@
 
     ILogicalOperator findLeafInput(LogicalVariable dollarDollarVar, MutableInt position) throws AlgebricksException {
 
-        List<DataSourceScanOperator> dataSourceScanVars = jeCtx.dataSourceScanOps;
-        HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
+        List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps =
+                jeCtx.emptyTupleAndDataSourceOps;
+        HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
 
-        for (int pos = 0; pos < dataSourceScanVars.size(); pos++) {
-            DataSourceScanOperator dataVar = dataSourceScanVars.get(pos);
-            ILogicalOperator op = joinLeafInputsHashMap.get(dataVar);
+        for (int pos = 0; pos < emptyTupleAndDataSourceOps.size(); pos++) {
+            EmptyTupleSourceOperator emptyOp = emptyTupleAndDataSourceOps.get(pos).getFirst();
+            ILogicalOperator op = joinLeafInputsHashMap.get(emptyOp);
             HashSet<LogicalVariable> vars = new HashSet<>();
             VariableUtilities.getLiveVariables(op, vars); // this is expensive to do. So store this once and reuse
             if (vars.contains(dollarDollarVar)) {
@@ -112,12 +124,12 @@
         }
         List<JoinCondition> joinConditions = jeCtx.getJoinConditions();
 
-        ScalarFunctionCallExpression andExpr = new ScalarFunctionCallExpression(
-                BuiltinFunctions.getBuiltinFunctionInfo(AlgebricksBuiltinFunctions.AND));
-
         if (newJoinConditions.size() == 1)
             return joinConditions.get(newJoinConditions.get(0)).joinCondition;
 
+        ScalarFunctionCallExpression andExpr = new ScalarFunctionCallExpression(
+                BuiltinFunctions.getBuiltinFunctionInfo(AlgebricksBuiltinFunctions.AND));
+
         for (int i = 0; i < newJoinConditions.size(); i++) { // Need to AND all the expressions.
             int joinNum = newJoinConditions.get(i);
             andExpr.getArguments().add(new MutableObject<>(joinConditions.get(joinNum).joinCondition));
@@ -155,7 +167,7 @@
         // We need to find out which one of these is the inner joinLeafInput. So for that get the joinLeafInput using innerJs
 
         ILogicalOperator innerLeafInput =
-                jeCtx.joinLeafInputsHashMap.get(jeCtx.jsArray[innerJs].correspondingDataSourceScanOp);
+                jeCtx.joinLeafInputsHashMap.get(jeCtx.jsArray[innerJs].correspondingEmptyTupleSourceOp);
         // This must equal one of the two joinLeafInputsHashMap found above. check for sanity!!
 
         if (innerLeafInput != joinLeafInput1 && innerLeafInput != joinLeafInput0)
@@ -676,12 +688,12 @@
         PlanNode pn = new PlanNode(jeCtx);
         pn.jsIndexes[0] = index;
         pn.datasetName = jsArray[index].datasetNames.get(0);
-        pn.correspondingDataSourceScanOp = jsArray[index].correspondingDataSourceScanOp;
+        pn.correspondingEmptyTupleSourceOp = jsArray[index].correspondingEmptyTupleSourceOp;
         pn.jsIndexes[1] = 0;
         pn.planIndexes[0] = pn.planIndexes[1] = 0; // There ane no plans below this plan.
         pn.opCost = CostMethods.costFullScan(jsArray[index].origCardinality, jsArray[index].size,
                 jsArray[index].cardinality, jsArray[index].size, blockSize,
-                jeCtx.optCtx.getComputationNodeDomain().cardinality()); // temp. cost for now
+                jeCtx.optCtx.getComputationNodeDomain().cardinality()); // temp. cost for now.
         pn.totalCost = pn.opCost;
         pn.card = jsArray[index].cardinality;
         pn.op = PhysicalOperatorTag.DATASOURCE_SCAN;
@@ -731,18 +743,19 @@
     }
 
     private int findJoinStructIndex(LogicalVariable lv) throws AlgebricksException {
-        List<DataSourceScanOperator> dataSourceScanVars = jeCtx.dataSourceScanOps;
-        HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
+        List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps =
+                jeCtx.emptyTupleAndDataSourceOps;
+        HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
 
-        for (Map.Entry<DataSourceScanOperator, ILogicalOperator> mapElement : joinLeafInputsHashMap.entrySet()) {
+        for (Map.Entry<EmptyTupleSourceOperator, ILogicalOperator> mapElement : joinLeafInputsHashMap.entrySet()) {
             ILogicalOperator joinLeafInput = mapElement.getValue();
             HashSet<LogicalVariable> vars = new HashSet<>();
             // this should get the variables from the inputs only, since the join condition is itself set to null
             VariableUtilities.getLiveVariables(joinLeafInput, vars);
             if (vars.contains(lv)) {
-                DataSourceScanOperator key = mapElement.getKey();
-                for (int i = 0; i < dataSourceScanVars.size(); i++) {
-                    if (key.equals(dataSourceScanVars.get(i))) {
+                EmptyTupleSourceOperator key = mapElement.getKey();
+                for (int i = 0; i < emptyTupleAndDataSourceOps.size(); i++) {
+                    if (key.equals(emptyTupleAndDataSourceOps.get(i).getFirst())) {
                         return i;
                     }
                 }
@@ -836,8 +849,12 @@
     private double getSelectivity(DataSourceScanOperator lv) throws AlgebricksException {
         double sel = 1.0; // safe to return 1 if there is no annotation
 
-        HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
-        ILogicalOperator op = joinLeafInputsHashMap.get(lv);
+        if (lv == null) {
+            return sel;
+        }
+        HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
+        EmptyTupleSourceOperator etso = jeCtx.dataSourceEmptyTupleHashMap.get(lv);
+        ILogicalOperator op = joinLeafInputsHashMap.get(etso);
 
         // find all the selectOperators here.
 
@@ -1016,6 +1033,32 @@
         }
     }
 
+    private double findSize(ILogicalOperator op) {
+        if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN)
+            return 1.0;
+
+        if (op.getOperatorTag() == LogicalOperatorTag.UNNEST) {
+            UnnestOperator unnestOp = (UnnestOperator) op;
+            ILogicalExpression unnestExpr = unnestOp.getExpressionRef().getValue();
+            UnnestingFunctionCallExpression unnestingFuncExpr = (UnnestingFunctionCallExpression) unnestExpr;
+
+            if (unnestingFuncExpr.getFunctionIdentifier().equals(BuiltinFunctions.SCAN_COLLECTION))
+                if (unnestingFuncExpr.getArguments().get(0).getValue().getExpressionTag()
+                        .equals(LogicalExpressionTag.CONSTANT)) {
+                    ConstantExpression constantExpr =
+                            (ConstantExpression) unnestingFuncExpr.getArguments().get(0).getValue();
+                    AsterixConstantValue constantValue = (AsterixConstantValue) constantExpr.getValue();
+                    IAObject v = (IAObject) constantValue.getObject();
+                    if (v.getType().getTypeTag().equals(ATypeTag.ARRAY)) {
+                        AOrderedList array = (AOrderedList) v;
+                        return array.size();
+                    }
+                }
+        }
+
+        return 10.0; // just a guess
+    }
+
     // main entry point in this file
     public int enumerateJoins() throws AlgebricksException {
 
@@ -1040,16 +1083,24 @@
             //jsArray[i].jsIndex = i; Fill in jsIndex later
             jsArray[i].datasetBits = 1 << (i - 1);
             jsArray[i].datasetIndexes = new ArrayList<>(Collections.singleton(i));
-            //jsArray[i].datasetNames =
-            //new ArrayList<>(Collections.singleton(jeCtx.dataSourceScanVars.get(i - 1).toString().substring(2)));
-            DataSourceId id = (DataSourceId) jeCtx.dataSourceScanOps.get(i - 1).getDataSource().getId();
-            jsArray[i].datasetNames = new ArrayList<>(Collections.singleton(id.getDatasourceName()));
-            jsArray[i].correspondingDataSourceScanOp = jeCtx.dataSourceScanOps.get(i - 1);
-            jsArray[i].origCardinality = findCardinality(i);
-            if (cardinality >= CARDMAX) // no hint available for this dataset
-                return -1;
-            jsArray[i].cardinality = jsArray[i].origCardinality * getSelectivity(jeCtx.dataSourceScanOps.get(i - 1)); // multiply by the respective predicate selectivities
 
+            if (jeCtx.emptyTupleAndDataSourceOps.get(i - 1).getSecond() != null) {
+                DataSourceId id =
+                        (DataSourceId) jeCtx.emptyTupleAndDataSourceOps.get(i - 1).getSecond().getDataSource().getId();
+                jsArray[i].datasetNames = new ArrayList<>(Collections.singleton(id.getDatasourceName()));
+                jsArray[i].origCardinality = findCardinality(i);
+                jsArray[i].cardinality = jsArray[i].origCardinality
+                        * getSelectivity(jeCtx.emptyTupleAndDataSourceOps.get(i - 1).getSecond()); // multiply by the respective predicate selectivities
+            } else {
+                jsArray[i].datasetNames = new ArrayList<>(Collections.singleton("unnestOrAssign")); // could be unnest or assign
+                EmptyTupleSourceOperator ets = jeCtx.emptyTupleAndDataSourceOps.get(i - 1).getFirst();
+                ILogicalOperator logOp = jeCtx.joinLeafInputsHashMap.get(ets);
+                jsArray[i].origCardinality = jsArray[i].cardinality = findSize(logOp);
+            }
+
+            if (jsArray[i].origCardinality >= CARDMAX)
+                return -1;
+            jsArray[i].correspondingEmptyTupleSourceOp = jeCtx.emptyTupleAndDataSourceOps.get(i - 1).getFirst();
             jsArray[i].highestDatasetId = i;
             jsArray[i].level = 1;
 
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/PlanNode.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/PlanNode.java
index 5c74b72..1e4912f 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/PlanNode.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/PlanNode.java
@@ -4,6 +4,7 @@
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
 import org.apache.hyracks.algebricks.core.algebra.base.PhysicalOperatorTag;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
 
 public class PlanNode {
     private static JoinEnumCtx jeCtx;
@@ -16,6 +17,7 @@
     PhysicalOperatorTag op;
     ILogicalExpression joinExpr;
     DataSourceScanOperator correspondingDataSourceScanOp;
+    EmptyTupleSourceOperator correspondingEmptyTupleSourceOp;
 
     public PlanNode(JoinEnumCtx JECtx) {
         jeCtx = JECtx;
@@ -47,6 +49,10 @@
         return correspondingDataSourceScanOp; // This applies only to singleDataSetPlans
     }
 
+    public EmptyTupleSourceOperator getEmptyTupleSourceOp() {
+        return correspondingEmptyTupleSourceOp; // This applies only to singleDataSetPlans
+    }
+
     public Cost getOpCost() {
         return opCost;
     }

-- 
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/15424
To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: neo
Gerrit-Change-Id: If3b0699a904fe1d7bf2f453ddb3941748c4aa553
Gerrit-Change-Number: 15424
Gerrit-PatchSet: 1
Gerrit-Owner: Vijay Sarathy <vi...@couchbase.com>
Gerrit-MessageType: newchange

Change in asterixdb[neo]: Latest changes 021422

Posted by AsterixDB Code Review <do...@asterix-gerrit.ics.uci.edu>.
From Vijay Sarathy <vi...@couchbase.com>:

Vijay Sarathy has uploaded this change for review. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/15424 )


Change subject: Latest changes 021422
......................................................................

Latest changes 021422

Change-Id: If3b0699a904fe1d7bf2f453ddb3941748c4aa553
---
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/EnumerateJoinsRule.java
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinEnumCtx.java
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinStruct.java
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/PlanNode.java
4 files changed, 140 insertions(+), 65 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/24/15424/1

diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/EnumerateJoinsRule.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/EnumerateJoinsRule.java
index b5d83b9..f03896e 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/EnumerateJoinsRule.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/EnumerateJoinsRule.java
@@ -30,12 +30,14 @@
 import org.apache.commons.lang3.mutable.Mutable;
 import org.apache.commons.lang3.mutable.MutableBoolean;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.common.utils.Pair;
 import org.apache.hyracks.algebricks.core.algebra.base.*;
 import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
 import org.apache.hyracks.algebricks.core.algebra.prettyprint.IPlanPrettyPrinter;
 import org.apache.hyracks.algebricks.core.config.AlgebricksConfig;
 import org.apache.hyracks.algebricks.core.rewriter.base.CardHints;
@@ -48,7 +50,8 @@
 public final class EnumerateJoinsRule implements IAlgebraicRewriteRule {
 
     private int totalNumberOfJoins;
-    DataSourceScanOperator leafNodeVar;
+    DataSourceScanOperator dataSourceOp;
+    EmptyTupleSourceOperator emptyTupleSourceOp;
     List<ILogicalExpression> joinConditions = new ArrayList<>();
     List<Boolean> joinConditionUsed = new ArrayList<>();
 
@@ -68,14 +71,15 @@
             return;
         }
 
+        if (op.getOperatorTag() == LogicalOperatorTag.EMPTYTUPLESOURCE) {
+            EmptyTupleSourceOperator emptyTupleSourceOperator = (EmptyTupleSourceOperator) op;
+            emptyTupleSourceOp = emptyTupleSourceOperator;
+            return;
+
+        }
         if (op.getOperatorTag() == LogicalOperatorTag.DATASOURCESCAN) {
             DataSourceScanOperator dataSourceScanOperator = (DataSourceScanOperator) op;
-            List<LogicalVariable> variables = dataSourceScanOperator.getVariables();
-            //LogicalVariable lv = variables.get(1);
-            //String vars = lv.toString();
-            //leafNodeVar = vars.substring(2); // skip the $$
-            leafNodeVar = dataSourceScanOperator;
-            return;
+            dataSourceOp = dataSourceScanOperator;
         }
 
         for (Mutable<ILogicalOperator> nextOp : op.getInputs()) {
@@ -94,8 +98,10 @@
         return true;
     }
 
-    void getJoinOpsAndLeafInputs(ILogicalOperator op, List<DataSourceScanOperator> dataSourceScanVars,
-            HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap,
+    void getJoinOpsAndLeafInputs(ILogicalOperator op,
+            List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps,
+            HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap,
+            HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap,
             List<ILogicalOperator> internalEdges, List<ILogicalOperator> joinOps, MutableBoolean canTransform) {
 
         if (canTransform.isFalse()) {
@@ -107,17 +113,17 @@
             return;
         }
         for (Mutable<ILogicalOperator> nextOp : op.getInputs()) {
-            getJoinOpsAndLeafInputs(nextOp.getValue(), dataSourceScanVars, joinLeafInputsHashMap, internalEdges,
-                    joinOps, canTransform);
+            getJoinOpsAndLeafInputs(nextOp.getValue(), emptyTupleAndDataSourceOps, joinLeafInputsHashMap,
+                    dataSourceEmptyTupleHashMap, internalEdges, joinOps, canTransform);
         }
         if (op.getOperatorTag() == LogicalOperatorTag.INNERJOIN) {
             joinOps.add(op);
             // follow the inputs and see if they reach a datascan operator
             for (Mutable<ILogicalOperator> nextOp : op.getInputs()) {
                 //Mutable<String> leafNodeVar = new MutableObject<String>();
-                leafNodeVar = null;
+                emptyTupleSourceOp = null;
                 containsLeafNodeOnly(nextOp.getValue());
-                if (leafNodeVar == null) {
+                if (emptyTupleSourceOp == null) { // This means that we did not find a emptyTupleSourceOp operator. Could be an internal edge
                     if (nextOp.getValue().getOperatorTag() != LogicalOperatorTag.INNERJOIN) {
                         if (OnlyOneAssign(nextOp)) {
                             // Currently will handle only assign statement and nothing else in an internal Edge.
@@ -128,8 +134,9 @@
                         }
                     }
                 } else {
-                    dataSourceScanVars.add(leafNodeVar);
-                    joinLeafInputsHashMap.put(leafNodeVar, nextOp.getValue()); // should not need both leafS! get rid of one later
+                    emptyTupleAndDataSourceOps.add(new Pair<>(emptyTupleSourceOp, dataSourceOp));
+                    joinLeafInputsHashMap.put(emptyTupleSourceOp, nextOp.getValue());
+                    dataSourceEmptyTupleHashMap.put(dataSourceOp, emptyTupleSourceOp);
                 }
             }
         }
@@ -137,7 +144,7 @@
 
     // we have to move the inputs in op around so that they match the tree structure in pn
     void getNewTree(ILogicalOperator root, JoinEnumCtx jeCtx, PlanNode plan,
-            HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap, List<ILogicalOperator> joinOps,
+            HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap, List<ILogicalOperator> joinOps,
             int joinNumber) {
 
         List<PlanNode> allPlans = jeCtx.getAllPlans();
@@ -161,7 +168,7 @@
         }
 
         if (leftIndex <= size) { // leaf
-            ILogicalOperator leftInput = joinLeafInputsHashMap.get(leftPlan.getDataSourceScanOp());
+            ILogicalOperator leftInput = joinLeafInputsHashMap.get(leftPlan.getEmptyTupleSourceOp());
             joinOp.getInputs().get(0).setValue(leftInput);
         } else { // join
             totalNumberOfJoins++;
@@ -172,7 +179,7 @@
         }
 
         if (rightIndex <= size) { // leaf
-            ILogicalOperator rightInput = joinLeafInputsHashMap.get(rightPlan.getDataSourceScanOp());
+            ILogicalOperator rightInput = joinLeafInputsHashMap.get(rightPlan.getEmptyTupleSourceOp());
             joinOp.getInputs().get(1).setValue(rightInput);
         } else { // join
             totalNumberOfJoins++;
@@ -204,12 +211,13 @@
         System.out.println("---------------------------- ");
     }
 
-    void printLeafPlans(IPlanPrettyPrinter pp, HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap)
-            throws AlgebricksException {
-        Iterator<Map.Entry<DataSourceScanOperator, ILogicalOperator>> li = joinLeafInputsHashMap.entrySet().iterator();
+    void printLeafPlans(IPlanPrettyPrinter pp,
+            HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap) throws AlgebricksException {
+        Iterator<Map.Entry<EmptyTupleSourceOperator, ILogicalOperator>> li =
+                joinLeafInputsHashMap.entrySet().iterator();
         int i = 0;
         while (li.hasNext()) {
-            Map.Entry<DataSourceScanOperator, ILogicalOperator> pair = li.next();
+            Map.Entry<EmptyTupleSourceOperator, ILogicalOperator> pair = li.next();
             ILogicalOperator element = pair.getValue();
             printPlan(pp, (AbstractLogicalOperator) element, "Printing Leaf Input" + i);
             i++;
@@ -233,8 +241,10 @@
 
         List<ILogicalOperator> joinOps = new ArrayList<>();
         List<ILogicalOperator> internalEdges = new ArrayList<>();
-        HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap = new HashMap<>();
-        List<DataSourceScanOperator> dataSourceScanOps = new ArrayList<>();
+        HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap = new HashMap<>();
+        //List<DataSourceScanOperator> dataSourceScanOps = new ArrayList<>();
+        List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps = new ArrayList<>();
+        HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap = new HashMap<>();
         // The data scan operators. Will be in the order of the from clause.
         // Important for position ordering when assigning bits to join expressions.
 
@@ -245,13 +255,14 @@
         printPlan(pp, (AbstractLogicalOperator) op, "Original Whole plan1");
 
         //findDataSourceScanVars(op, dataSourceScanVars, canTransform);
-        getJoinOpsAndLeafInputs(op, dataSourceScanOps, joinLeafInputsHashMap, internalEdges, joinOps, canTransform);
+        getJoinOpsAndLeafInputs(op, emptyTupleAndDataSourceOps, joinLeafInputsHashMap, dataSourceEmptyTupleHashMap,
+                internalEdges, joinOps, canTransform);
 
         if (canTransform.isFalse()) {
             return false;
         }
 
-        if (dataSourceScanOps.size() != joinLeafInputsHashMap.size())
+        if (emptyTupleAndDataSourceOps.size() != joinLeafInputsHashMap.size())
             return false; // if this happens, something in the input plan is not acceptable to the new code.
 
         printPlan(pp, (AbstractLogicalOperator) op, "Original Whole plan2");
@@ -264,13 +275,13 @@
             return false;
         }
 
-        int numberOfFromTerms = dataSourceScanOps.size();
+        int numberOfFromTerms = emptyTupleAndDataSourceOps.size();
 
         // jsArray, allPlans, joinConditions, cardHints are initialized
         // in the JoinEnumCtx. The jeCtx is attached to each joinStruct and planNode
         // also, so they have access to the context when needed.
-        JoinEnumCtx jeCtx = new JoinEnumCtx((AbstractLogicalOperator) op, numberOfFromTerms, dataSourceScanOps,
-                joinLeafInputsHashMap, internalEdges, joinOps, cardHints, context);
+        JoinEnumCtx jeCtx = new JoinEnumCtx((AbstractLogicalOperator) op, numberOfFromTerms, emptyTupleAndDataSourceOps,
+                joinLeafInputsHashMap, dataSourceEmptyTupleHashMap, internalEdges, joinOps, cardHints, context);
         JoinStruct[] jsArray = jeCtx.getJsArray(); // will not use [0] element;
         JoinStruct js = jsArray[0]; // jsArray[0] is not used for join enumeration,
                                     // only used to call member method enumerateJoins()
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinEnumCtx.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinEnumCtx.java
index 1b6196b..3084c41 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinEnumCtx.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinEnumCtx.java
@@ -6,10 +6,12 @@
 
 import org.apache.asterix.common.config.CompilerProperties;
 import org.apache.asterix.metadata.declared.MetadataProvider;
+import org.apache.hyracks.algebricks.common.utils.Pair;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
 import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
 import org.apache.hyracks.algebricks.core.rewriter.base.CardHints;
 import org.apache.hyracks.algebricks.core.rewriter.base.PhysicalOptimizationConfig;
 
@@ -19,8 +21,10 @@
     JoinStruct[] jsArray; // array of all join structs
     int jsArraySize;
     CardHints cardHints; // cardinality hints
-    List<DataSourceScanOperator> dataSourceScanOps;
-    HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap;
+    //List<DataSourceScanOperator> dataSourceScanOps;
+    List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps;
+    HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap;
+    HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap;
     List<ILogicalOperator> internalEdges;
     List<ILogicalOperator> joinOps;
     ILogicalOperator localJoinOp; // used in nestedLoopsApplicable code.
@@ -35,8 +39,9 @@
     HashMap<String, Integer> sizeMap = new HashMap<>();
 
     public JoinEnumCtx(AbstractLogicalOperator op, int numberOfFromTerms,
-            List<DataSourceScanOperator> dataSourceScanOps,
-            HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap,
+            List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps,
+            HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap,
+            HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap,
             List<ILogicalOperator> internalEdges, List<ILogicalOperator> joinOps, CardHints cardHints,
             IOptimizationContext context) {
         this.joinConditions = new ArrayList<>();
@@ -53,8 +58,10 @@
         this.optCtx = context;
         this.physOptConfig = context.getPhysicalOptimizationConfig();
 
-        this.dataSourceScanOps = dataSourceScanOps;
+        //this.dataSourceScanOps = dataSourceScanOps;
+        this.emptyTupleAndDataSourceOps = emptyTupleAndDataSourceOps;
         this.joinLeafInputsHashMap = joinLeafInputsHashMap;
+        this.dataSourceEmptyTupleHashMap = dataSourceEmptyTupleHashMap;
         this.internalEdges = internalEdges;
         this.joinOps = joinOps;
         this.op = op;
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinStruct.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinStruct.java
index 6418a2c..9e44361 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinStruct.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinStruct.java
@@ -3,7 +3,11 @@
 import java.util.*;
 
 import org.apache.asterix.metadata.declared.DataSourceId;
+import org.apache.asterix.om.base.AOrderedList;
+import org.apache.asterix.om.base.IAObject;
+import org.apache.asterix.om.constants.AsterixConstantValue;
 import org.apache.asterix.om.functions.BuiltinFunctions;
+import org.apache.asterix.om.types.ATypeTag;
 import org.apache.asterix.optimizer.cost.Cost;
 import org.apache.asterix.optimizer.cost.CostMethods;
 import org.apache.asterix.optimizer.rules.EnumerateJoinsRule;
@@ -12,10 +16,17 @@
 import org.apache.commons.lang3.mutable.MutableInt;
 import org.apache.commons.lang3.mutable.MutableObject;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.common.utils.Pair;
 import org.apache.hyracks.algebricks.core.algebra.base.*;
 import org.apache.hyracks.algebricks.core.algebra.expressions.*;
 import org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
-import org.apache.hyracks.algebricks.core.algebra.operators.logical.*;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
 import org.apache.hyracks.algebricks.core.algebra.prettyprint.IPlanPrettyPrinter;
 import org.apache.hyracks.algebricks.core.algebra.util.OperatorManipulationUtil;
@@ -54,8 +65,8 @@
     List<Integer> planIndexesArray; // indexes into the PlanNode array in enumerateJoins
     int jsIndex, level, highestDatasetId;
     List<Integer> applicableJoinConditions;
-    DataSourceScanOperator correspondingDataSourceScanOp; // There is a 1-1 relationsjip between the LVs and the dataSourceScanOps and the leafInputs.
-
+    //DataSourceScanOperator correspondingDataSourceScanOp; // There is a 1-1 relationsjip between the LVs and the dataSourceScanOps and the leafInputs.
+    EmptyTupleSourceOperator correspondingEmptyTupleSourceOp; // There is a 1-1 relationsjip between the LVs and the dataSourceScanOps and the leafInputs.
     private final double CARDMAX = 1.0e200;
 
     public JoinStruct(JoinEnumCtx JECtx) { //empty constructor. Will fill in all the fields in the code.
@@ -75,12 +86,13 @@
 
     ILogicalOperator findLeafInput(LogicalVariable dollarDollarVar, MutableInt position) throws AlgebricksException {
 
-        List<DataSourceScanOperator> dataSourceScanVars = jeCtx.dataSourceScanOps;
-        HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
+        List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps =
+                jeCtx.emptyTupleAndDataSourceOps;
+        HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
 
-        for (int pos = 0; pos < dataSourceScanVars.size(); pos++) {
-            DataSourceScanOperator dataVar = dataSourceScanVars.get(pos);
-            ILogicalOperator op = joinLeafInputsHashMap.get(dataVar);
+        for (int pos = 0; pos < emptyTupleAndDataSourceOps.size(); pos++) {
+            EmptyTupleSourceOperator emptyOp = emptyTupleAndDataSourceOps.get(pos).getFirst();
+            ILogicalOperator op = joinLeafInputsHashMap.get(emptyOp);
             HashSet<LogicalVariable> vars = new HashSet<>();
             VariableUtilities.getLiveVariables(op, vars); // this is expensive to do. So store this once and reuse
             if (vars.contains(dollarDollarVar)) {
@@ -112,12 +124,12 @@
         }
         List<JoinCondition> joinConditions = jeCtx.getJoinConditions();
 
-        ScalarFunctionCallExpression andExpr = new ScalarFunctionCallExpression(
-                BuiltinFunctions.getBuiltinFunctionInfo(AlgebricksBuiltinFunctions.AND));
-
         if (newJoinConditions.size() == 1)
             return joinConditions.get(newJoinConditions.get(0)).joinCondition;
 
+        ScalarFunctionCallExpression andExpr = new ScalarFunctionCallExpression(
+                BuiltinFunctions.getBuiltinFunctionInfo(AlgebricksBuiltinFunctions.AND));
+
         for (int i = 0; i < newJoinConditions.size(); i++) { // Need to AND all the expressions.
             int joinNum = newJoinConditions.get(i);
             andExpr.getArguments().add(new MutableObject<>(joinConditions.get(joinNum).joinCondition));
@@ -155,7 +167,7 @@
         // We need to find out which one of these is the inner joinLeafInput. So for that get the joinLeafInput using innerJs
 
         ILogicalOperator innerLeafInput =
-                jeCtx.joinLeafInputsHashMap.get(jeCtx.jsArray[innerJs].correspondingDataSourceScanOp);
+                jeCtx.joinLeafInputsHashMap.get(jeCtx.jsArray[innerJs].correspondingEmptyTupleSourceOp);
         // This must equal one of the two joinLeafInputsHashMap found above. check for sanity!!
 
         if (innerLeafInput != joinLeafInput1 && innerLeafInput != joinLeafInput0)
@@ -676,12 +688,12 @@
         PlanNode pn = new PlanNode(jeCtx);
         pn.jsIndexes[0] = index;
         pn.datasetName = jsArray[index].datasetNames.get(0);
-        pn.correspondingDataSourceScanOp = jsArray[index].correspondingDataSourceScanOp;
+        pn.correspondingEmptyTupleSourceOp = jsArray[index].correspondingEmptyTupleSourceOp;
         pn.jsIndexes[1] = 0;
         pn.planIndexes[0] = pn.planIndexes[1] = 0; // There ane no plans below this plan.
         pn.opCost = CostMethods.costFullScan(jsArray[index].origCardinality, jsArray[index].size,
                 jsArray[index].cardinality, jsArray[index].size, blockSize,
-                jeCtx.optCtx.getComputationNodeDomain().cardinality()); // temp. cost for now
+                jeCtx.optCtx.getComputationNodeDomain().cardinality()); // temp. cost for now.
         pn.totalCost = pn.opCost;
         pn.card = jsArray[index].cardinality;
         pn.op = PhysicalOperatorTag.DATASOURCE_SCAN;
@@ -731,18 +743,19 @@
     }
 
     private int findJoinStructIndex(LogicalVariable lv) throws AlgebricksException {
-        List<DataSourceScanOperator> dataSourceScanVars = jeCtx.dataSourceScanOps;
-        HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
+        List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps =
+                jeCtx.emptyTupleAndDataSourceOps;
+        HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
 
-        for (Map.Entry<DataSourceScanOperator, ILogicalOperator> mapElement : joinLeafInputsHashMap.entrySet()) {
+        for (Map.Entry<EmptyTupleSourceOperator, ILogicalOperator> mapElement : joinLeafInputsHashMap.entrySet()) {
             ILogicalOperator joinLeafInput = mapElement.getValue();
             HashSet<LogicalVariable> vars = new HashSet<>();
             // this should get the variables from the inputs only, since the join condition is itself set to null
             VariableUtilities.getLiveVariables(joinLeafInput, vars);
             if (vars.contains(lv)) {
-                DataSourceScanOperator key = mapElement.getKey();
-                for (int i = 0; i < dataSourceScanVars.size(); i++) {
-                    if (key.equals(dataSourceScanVars.get(i))) {
+                EmptyTupleSourceOperator key = mapElement.getKey();
+                for (int i = 0; i < emptyTupleAndDataSourceOps.size(); i++) {
+                    if (key.equals(emptyTupleAndDataSourceOps.get(i).getFirst())) {
                         return i;
                     }
                 }
@@ -836,8 +849,12 @@
     private double getSelectivity(DataSourceScanOperator lv) throws AlgebricksException {
         double sel = 1.0; // safe to return 1 if there is no annotation
 
-        HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
-        ILogicalOperator op = joinLeafInputsHashMap.get(lv);
+        if (lv == null) {
+            return sel;
+        }
+        HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
+        EmptyTupleSourceOperator etso = jeCtx.dataSourceEmptyTupleHashMap.get(lv);
+        ILogicalOperator op = joinLeafInputsHashMap.get(etso);
 
         // find all the selectOperators here.
 
@@ -1016,6 +1033,32 @@
         }
     }
 
+    private double findSize(ILogicalOperator op) {
+        if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN)
+            return 1.0;
+
+        if (op.getOperatorTag() == LogicalOperatorTag.UNNEST) {
+            UnnestOperator unnestOp = (UnnestOperator) op;
+            ILogicalExpression unnestExpr = unnestOp.getExpressionRef().getValue();
+            UnnestingFunctionCallExpression unnestingFuncExpr = (UnnestingFunctionCallExpression) unnestExpr;
+
+            if (unnestingFuncExpr.getFunctionIdentifier().equals(BuiltinFunctions.SCAN_COLLECTION))
+                if (unnestingFuncExpr.getArguments().get(0).getValue().getExpressionTag()
+                        .equals(LogicalExpressionTag.CONSTANT)) {
+                    ConstantExpression constantExpr =
+                            (ConstantExpression) unnestingFuncExpr.getArguments().get(0).getValue();
+                    AsterixConstantValue constantValue = (AsterixConstantValue) constantExpr.getValue();
+                    IAObject v = (IAObject) constantValue.getObject();
+                    if (v.getType().getTypeTag().equals(ATypeTag.ARRAY)) {
+                        AOrderedList array = (AOrderedList) v;
+                        return array.size();
+                    }
+                }
+        }
+
+        return 10.0; // just a guess
+    }
+
     // main entry point in this file
     public int enumerateJoins() throws AlgebricksException {
 
@@ -1040,16 +1083,24 @@
             //jsArray[i].jsIndex = i; Fill in jsIndex later
             jsArray[i].datasetBits = 1 << (i - 1);
             jsArray[i].datasetIndexes = new ArrayList<>(Collections.singleton(i));
-            //jsArray[i].datasetNames =
-            //new ArrayList<>(Collections.singleton(jeCtx.dataSourceScanVars.get(i - 1).toString().substring(2)));
-            DataSourceId id = (DataSourceId) jeCtx.dataSourceScanOps.get(i - 1).getDataSource().getId();
-            jsArray[i].datasetNames = new ArrayList<>(Collections.singleton(id.getDatasourceName()));
-            jsArray[i].correspondingDataSourceScanOp = jeCtx.dataSourceScanOps.get(i - 1);
-            jsArray[i].origCardinality = findCardinality(i);
-            if (cardinality >= CARDMAX) // no hint available for this dataset
-                return -1;
-            jsArray[i].cardinality = jsArray[i].origCardinality * getSelectivity(jeCtx.dataSourceScanOps.get(i - 1)); // multiply by the respective predicate selectivities
 
+            if (jeCtx.emptyTupleAndDataSourceOps.get(i - 1).getSecond() != null) {
+                DataSourceId id =
+                        (DataSourceId) jeCtx.emptyTupleAndDataSourceOps.get(i - 1).getSecond().getDataSource().getId();
+                jsArray[i].datasetNames = new ArrayList<>(Collections.singleton(id.getDatasourceName()));
+                jsArray[i].origCardinality = findCardinality(i);
+                jsArray[i].cardinality = jsArray[i].origCardinality
+                        * getSelectivity(jeCtx.emptyTupleAndDataSourceOps.get(i - 1).getSecond()); // multiply by the respective predicate selectivities
+            } else {
+                jsArray[i].datasetNames = new ArrayList<>(Collections.singleton("unnestOrAssign")); // could be unnest or assign
+                EmptyTupleSourceOperator ets = jeCtx.emptyTupleAndDataSourceOps.get(i - 1).getFirst();
+                ILogicalOperator logOp = jeCtx.joinLeafInputsHashMap.get(ets);
+                jsArray[i].origCardinality = jsArray[i].cardinality = findSize(logOp);
+            }
+
+            if (jsArray[i].origCardinality >= CARDMAX)
+                return -1;
+            jsArray[i].correspondingEmptyTupleSourceOp = jeCtx.emptyTupleAndDataSourceOps.get(i - 1).getFirst();
             jsArray[i].highestDatasetId = i;
             jsArray[i].level = 1;
 
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/PlanNode.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/PlanNode.java
index 5c74b72..1e4912f 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/PlanNode.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/PlanNode.java
@@ -4,6 +4,7 @@
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
 import org.apache.hyracks.algebricks.core.algebra.base.PhysicalOperatorTag;
 import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
 
 public class PlanNode {
     private static JoinEnumCtx jeCtx;
@@ -16,6 +17,7 @@
     PhysicalOperatorTag op;
     ILogicalExpression joinExpr;
     DataSourceScanOperator correspondingDataSourceScanOp;
+    EmptyTupleSourceOperator correspondingEmptyTupleSourceOp;
 
     public PlanNode(JoinEnumCtx JECtx) {
         jeCtx = JECtx;
@@ -47,6 +49,10 @@
         return correspondingDataSourceScanOp; // This applies only to singleDataSetPlans
     }
 
+    public EmptyTupleSourceOperator getEmptyTupleSourceOp() {
+        return correspondingEmptyTupleSourceOp; // This applies only to singleDataSetPlans
+    }
+
     public Cost getOpCost() {
         return opCost;
     }

-- 
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/15424
To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: neo
Gerrit-Change-Id: If3b0699a904fe1d7bf2f453ddb3941748c4aa553
Gerrit-Change-Number: 15424
Gerrit-PatchSet: 1
Gerrit-Owner: Vijay Sarathy <vi...@couchbase.com>
Gerrit-MessageType: newchange