You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@asterixdb.apache.org by AsterixDB Code Review <do...@asterix-gerrit.ics.uci.edu> on 2022/02/24 01:16:24 UTC
Change in asterixdb[neo]: Latest changes 021422
From Vijay Sarathy <vi...@couchbase.com>:
Vijay Sarathy has uploaded this change for review. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/15424 )
Change subject: Latest changes 021422
......................................................................
Latest changes 021422
Change-Id: If3b0699a904fe1d7bf2f453ddb3941748c4aa553
---
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/EnumerateJoinsRule.java
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinEnumCtx.java
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinStruct.java
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/PlanNode.java
4 files changed, 140 insertions(+), 65 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/24/15424/1
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/EnumerateJoinsRule.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/EnumerateJoinsRule.java
index b5d83b9..f03896e 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/EnumerateJoinsRule.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/EnumerateJoinsRule.java
@@ -30,12 +30,14 @@
import org.apache.commons.lang3.mutable.Mutable;
import org.apache.commons.lang3.mutable.MutableBoolean;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.common.utils.Pair;
import org.apache.hyracks.algebricks.core.algebra.base.*;
import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
import org.apache.hyracks.algebricks.core.algebra.prettyprint.IPlanPrettyPrinter;
import org.apache.hyracks.algebricks.core.config.AlgebricksConfig;
import org.apache.hyracks.algebricks.core.rewriter.base.CardHints;
@@ -48,7 +50,8 @@
public final class EnumerateJoinsRule implements IAlgebraicRewriteRule {
private int totalNumberOfJoins;
- DataSourceScanOperator leafNodeVar;
+ DataSourceScanOperator dataSourceOp;
+ EmptyTupleSourceOperator emptyTupleSourceOp;
List<ILogicalExpression> joinConditions = new ArrayList<>();
List<Boolean> joinConditionUsed = new ArrayList<>();
@@ -68,14 +71,15 @@
return;
}
+ if (op.getOperatorTag() == LogicalOperatorTag.EMPTYTUPLESOURCE) {
+ EmptyTupleSourceOperator emptyTupleSourceOperator = (EmptyTupleSourceOperator) op;
+ emptyTupleSourceOp = emptyTupleSourceOperator;
+ return;
+
+ }
if (op.getOperatorTag() == LogicalOperatorTag.DATASOURCESCAN) {
DataSourceScanOperator dataSourceScanOperator = (DataSourceScanOperator) op;
- List<LogicalVariable> variables = dataSourceScanOperator.getVariables();
- //LogicalVariable lv = variables.get(1);
- //String vars = lv.toString();
- //leafNodeVar = vars.substring(2); // skip the $$
- leafNodeVar = dataSourceScanOperator;
- return;
+ dataSourceOp = dataSourceScanOperator;
}
for (Mutable<ILogicalOperator> nextOp : op.getInputs()) {
@@ -94,8 +98,10 @@
return true;
}
- void getJoinOpsAndLeafInputs(ILogicalOperator op, List<DataSourceScanOperator> dataSourceScanVars,
- HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap,
+ void getJoinOpsAndLeafInputs(ILogicalOperator op,
+ List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps,
+ HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap,
+ HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap,
List<ILogicalOperator> internalEdges, List<ILogicalOperator> joinOps, MutableBoolean canTransform) {
if (canTransform.isFalse()) {
@@ -107,17 +113,17 @@
return;
}
for (Mutable<ILogicalOperator> nextOp : op.getInputs()) {
- getJoinOpsAndLeafInputs(nextOp.getValue(), dataSourceScanVars, joinLeafInputsHashMap, internalEdges,
- joinOps, canTransform);
+ getJoinOpsAndLeafInputs(nextOp.getValue(), emptyTupleAndDataSourceOps, joinLeafInputsHashMap,
+ dataSourceEmptyTupleHashMap, internalEdges, joinOps, canTransform);
}
if (op.getOperatorTag() == LogicalOperatorTag.INNERJOIN) {
joinOps.add(op);
// follow the inputs and see if they reach a datascan operator
for (Mutable<ILogicalOperator> nextOp : op.getInputs()) {
//Mutable<String> leafNodeVar = new MutableObject<String>();
- leafNodeVar = null;
+ emptyTupleSourceOp = null;
containsLeafNodeOnly(nextOp.getValue());
- if (leafNodeVar == null) {
+ if (emptyTupleSourceOp == null) { // This means that we did not find a emptyTupleSourceOp operator. Could be an internal edge
if (nextOp.getValue().getOperatorTag() != LogicalOperatorTag.INNERJOIN) {
if (OnlyOneAssign(nextOp)) {
// Currently will handle only assign statement and nothing else in an internal Edge.
@@ -128,8 +134,9 @@
}
}
} else {
- dataSourceScanVars.add(leafNodeVar);
- joinLeafInputsHashMap.put(leafNodeVar, nextOp.getValue()); // should not need both leafS! get rid of one later
+ emptyTupleAndDataSourceOps.add(new Pair<>(emptyTupleSourceOp, dataSourceOp));
+ joinLeafInputsHashMap.put(emptyTupleSourceOp, nextOp.getValue());
+ dataSourceEmptyTupleHashMap.put(dataSourceOp, emptyTupleSourceOp);
}
}
}
@@ -137,7 +144,7 @@
// we have to move the inputs in op around so that they match the tree structure in pn
void getNewTree(ILogicalOperator root, JoinEnumCtx jeCtx, PlanNode plan,
- HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap, List<ILogicalOperator> joinOps,
+ HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap, List<ILogicalOperator> joinOps,
int joinNumber) {
List<PlanNode> allPlans = jeCtx.getAllPlans();
@@ -161,7 +168,7 @@
}
if (leftIndex <= size) { // leaf
- ILogicalOperator leftInput = joinLeafInputsHashMap.get(leftPlan.getDataSourceScanOp());
+ ILogicalOperator leftInput = joinLeafInputsHashMap.get(leftPlan.getEmptyTupleSourceOp());
joinOp.getInputs().get(0).setValue(leftInput);
} else { // join
totalNumberOfJoins++;
@@ -172,7 +179,7 @@
}
if (rightIndex <= size) { // leaf
- ILogicalOperator rightInput = joinLeafInputsHashMap.get(rightPlan.getDataSourceScanOp());
+ ILogicalOperator rightInput = joinLeafInputsHashMap.get(rightPlan.getEmptyTupleSourceOp());
joinOp.getInputs().get(1).setValue(rightInput);
} else { // join
totalNumberOfJoins++;
@@ -204,12 +211,13 @@
System.out.println("---------------------------- ");
}
- void printLeafPlans(IPlanPrettyPrinter pp, HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap)
- throws AlgebricksException {
- Iterator<Map.Entry<DataSourceScanOperator, ILogicalOperator>> li = joinLeafInputsHashMap.entrySet().iterator();
+ void printLeafPlans(IPlanPrettyPrinter pp,
+ HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap) throws AlgebricksException {
+ Iterator<Map.Entry<EmptyTupleSourceOperator, ILogicalOperator>> li =
+ joinLeafInputsHashMap.entrySet().iterator();
int i = 0;
while (li.hasNext()) {
- Map.Entry<DataSourceScanOperator, ILogicalOperator> pair = li.next();
+ Map.Entry<EmptyTupleSourceOperator, ILogicalOperator> pair = li.next();
ILogicalOperator element = pair.getValue();
printPlan(pp, (AbstractLogicalOperator) element, "Printing Leaf Input" + i);
i++;
@@ -233,8 +241,10 @@
List<ILogicalOperator> joinOps = new ArrayList<>();
List<ILogicalOperator> internalEdges = new ArrayList<>();
- HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap = new HashMap<>();
- List<DataSourceScanOperator> dataSourceScanOps = new ArrayList<>();
+ HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap = new HashMap<>();
+ //List<DataSourceScanOperator> dataSourceScanOps = new ArrayList<>();
+ List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps = new ArrayList<>();
+ HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap = new HashMap<>();
// The data scan operators. Will be in the order of the from clause.
// Important for position ordering when assigning bits to join expressions.
@@ -245,13 +255,14 @@
printPlan(pp, (AbstractLogicalOperator) op, "Original Whole plan1");
//findDataSourceScanVars(op, dataSourceScanVars, canTransform);
- getJoinOpsAndLeafInputs(op, dataSourceScanOps, joinLeafInputsHashMap, internalEdges, joinOps, canTransform);
+ getJoinOpsAndLeafInputs(op, emptyTupleAndDataSourceOps, joinLeafInputsHashMap, dataSourceEmptyTupleHashMap,
+ internalEdges, joinOps, canTransform);
if (canTransform.isFalse()) {
return false;
}
- if (dataSourceScanOps.size() != joinLeafInputsHashMap.size())
+ if (emptyTupleAndDataSourceOps.size() != joinLeafInputsHashMap.size())
return false; // if this happens, something in the input plan is not acceptable to the new code.
printPlan(pp, (AbstractLogicalOperator) op, "Original Whole plan2");
@@ -264,13 +275,13 @@
return false;
}
- int numberOfFromTerms = dataSourceScanOps.size();
+ int numberOfFromTerms = emptyTupleAndDataSourceOps.size();
// jsArray, allPlans, joinConditions, cardHints are initialized
// in the JoinEnumCtx. The jeCtx is attached to each joinStruct and planNode
// also, so they have access to the context when needed.
- JoinEnumCtx jeCtx = new JoinEnumCtx((AbstractLogicalOperator) op, numberOfFromTerms, dataSourceScanOps,
- joinLeafInputsHashMap, internalEdges, joinOps, cardHints, context);
+ JoinEnumCtx jeCtx = new JoinEnumCtx((AbstractLogicalOperator) op, numberOfFromTerms, emptyTupleAndDataSourceOps,
+ joinLeafInputsHashMap, dataSourceEmptyTupleHashMap, internalEdges, joinOps, cardHints, context);
JoinStruct[] jsArray = jeCtx.getJsArray(); // will not use [0] element;
JoinStruct js = jsArray[0]; // jsArray[0] is not used for join enumeration,
// only used to call member method enumerateJoins()
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinEnumCtx.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinEnumCtx.java
index 1b6196b..3084c41 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinEnumCtx.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinEnumCtx.java
@@ -6,10 +6,12 @@
import org.apache.asterix.common.config.CompilerProperties;
import org.apache.asterix.metadata.declared.MetadataProvider;
+import org.apache.hyracks.algebricks.common.utils.Pair;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
import org.apache.hyracks.algebricks.core.rewriter.base.CardHints;
import org.apache.hyracks.algebricks.core.rewriter.base.PhysicalOptimizationConfig;
@@ -19,8 +21,10 @@
JoinStruct[] jsArray; // array of all join structs
int jsArraySize;
CardHints cardHints; // cardinality hints
- List<DataSourceScanOperator> dataSourceScanOps;
- HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap;
+ //List<DataSourceScanOperator> dataSourceScanOps;
+ List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps;
+ HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap;
+ HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap;
List<ILogicalOperator> internalEdges;
List<ILogicalOperator> joinOps;
ILogicalOperator localJoinOp; // used in nestedLoopsApplicable code.
@@ -35,8 +39,9 @@
HashMap<String, Integer> sizeMap = new HashMap<>();
public JoinEnumCtx(AbstractLogicalOperator op, int numberOfFromTerms,
- List<DataSourceScanOperator> dataSourceScanOps,
- HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap,
+ List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps,
+ HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap,
+ HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap,
List<ILogicalOperator> internalEdges, List<ILogicalOperator> joinOps, CardHints cardHints,
IOptimizationContext context) {
this.joinConditions = new ArrayList<>();
@@ -53,8 +58,10 @@
this.optCtx = context;
this.physOptConfig = context.getPhysicalOptimizationConfig();
- this.dataSourceScanOps = dataSourceScanOps;
+ //this.dataSourceScanOps = dataSourceScanOps;
+ this.emptyTupleAndDataSourceOps = emptyTupleAndDataSourceOps;
this.joinLeafInputsHashMap = joinLeafInputsHashMap;
+ this.dataSourceEmptyTupleHashMap = dataSourceEmptyTupleHashMap;
this.internalEdges = internalEdges;
this.joinOps = joinOps;
this.op = op;
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinStruct.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinStruct.java
index 6418a2c..9e44361 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinStruct.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinStruct.java
@@ -3,7 +3,11 @@
import java.util.*;
import org.apache.asterix.metadata.declared.DataSourceId;
+import org.apache.asterix.om.base.AOrderedList;
+import org.apache.asterix.om.base.IAObject;
+import org.apache.asterix.om.constants.AsterixConstantValue;
import org.apache.asterix.om.functions.BuiltinFunctions;
+import org.apache.asterix.om.types.ATypeTag;
import org.apache.asterix.optimizer.cost.Cost;
import org.apache.asterix.optimizer.cost.CostMethods;
import org.apache.asterix.optimizer.rules.EnumerateJoinsRule;
@@ -12,10 +16,17 @@
import org.apache.commons.lang3.mutable.MutableInt;
import org.apache.commons.lang3.mutable.MutableObject;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.common.utils.Pair;
import org.apache.hyracks.algebricks.core.algebra.base.*;
import org.apache.hyracks.algebricks.core.algebra.expressions.*;
import org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
-import org.apache.hyracks.algebricks.core.algebra.operators.logical.*;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
import org.apache.hyracks.algebricks.core.algebra.prettyprint.IPlanPrettyPrinter;
import org.apache.hyracks.algebricks.core.algebra.util.OperatorManipulationUtil;
@@ -54,8 +65,8 @@
List<Integer> planIndexesArray; // indexes into the PlanNode array in enumerateJoins
int jsIndex, level, highestDatasetId;
List<Integer> applicableJoinConditions;
- DataSourceScanOperator correspondingDataSourceScanOp; // There is a 1-1 relationsjip between the LVs and the dataSourceScanOps and the leafInputs.
-
+ //DataSourceScanOperator correspondingDataSourceScanOp; // There is a 1-1 relationsjip between the LVs and the dataSourceScanOps and the leafInputs.
+ EmptyTupleSourceOperator correspondingEmptyTupleSourceOp; // There is a 1-1 relationsjip between the LVs and the dataSourceScanOps and the leafInputs.
private final double CARDMAX = 1.0e200;
public JoinStruct(JoinEnumCtx JECtx) { //empty constructor. Will fill in all the fields in the code.
@@ -75,12 +86,13 @@
ILogicalOperator findLeafInput(LogicalVariable dollarDollarVar, MutableInt position) throws AlgebricksException {
- List<DataSourceScanOperator> dataSourceScanVars = jeCtx.dataSourceScanOps;
- HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
+ List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps =
+ jeCtx.emptyTupleAndDataSourceOps;
+ HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
- for (int pos = 0; pos < dataSourceScanVars.size(); pos++) {
- DataSourceScanOperator dataVar = dataSourceScanVars.get(pos);
- ILogicalOperator op = joinLeafInputsHashMap.get(dataVar);
+ for (int pos = 0; pos < emptyTupleAndDataSourceOps.size(); pos++) {
+ EmptyTupleSourceOperator emptyOp = emptyTupleAndDataSourceOps.get(pos).getFirst();
+ ILogicalOperator op = joinLeafInputsHashMap.get(emptyOp);
HashSet<LogicalVariable> vars = new HashSet<>();
VariableUtilities.getLiveVariables(op, vars); // this is expensive to do. So store this once and reuse
if (vars.contains(dollarDollarVar)) {
@@ -112,12 +124,12 @@
}
List<JoinCondition> joinConditions = jeCtx.getJoinConditions();
- ScalarFunctionCallExpression andExpr = new ScalarFunctionCallExpression(
- BuiltinFunctions.getBuiltinFunctionInfo(AlgebricksBuiltinFunctions.AND));
-
if (newJoinConditions.size() == 1)
return joinConditions.get(newJoinConditions.get(0)).joinCondition;
+ ScalarFunctionCallExpression andExpr = new ScalarFunctionCallExpression(
+ BuiltinFunctions.getBuiltinFunctionInfo(AlgebricksBuiltinFunctions.AND));
+
for (int i = 0; i < newJoinConditions.size(); i++) { // Need to AND all the expressions.
int joinNum = newJoinConditions.get(i);
andExpr.getArguments().add(new MutableObject<>(joinConditions.get(joinNum).joinCondition));
@@ -155,7 +167,7 @@
// We need to find out which one of these is the inner joinLeafInput. So for that get the joinLeafInput using innerJs
ILogicalOperator innerLeafInput =
- jeCtx.joinLeafInputsHashMap.get(jeCtx.jsArray[innerJs].correspondingDataSourceScanOp);
+ jeCtx.joinLeafInputsHashMap.get(jeCtx.jsArray[innerJs].correspondingEmptyTupleSourceOp);
// This must equal one of the two joinLeafInputsHashMap found above. check for sanity!!
if (innerLeafInput != joinLeafInput1 && innerLeafInput != joinLeafInput0)
@@ -676,12 +688,12 @@
PlanNode pn = new PlanNode(jeCtx);
pn.jsIndexes[0] = index;
pn.datasetName = jsArray[index].datasetNames.get(0);
- pn.correspondingDataSourceScanOp = jsArray[index].correspondingDataSourceScanOp;
+ pn.correspondingEmptyTupleSourceOp = jsArray[index].correspondingEmptyTupleSourceOp;
pn.jsIndexes[1] = 0;
pn.planIndexes[0] = pn.planIndexes[1] = 0; // There ane no plans below this plan.
pn.opCost = CostMethods.costFullScan(jsArray[index].origCardinality, jsArray[index].size,
jsArray[index].cardinality, jsArray[index].size, blockSize,
- jeCtx.optCtx.getComputationNodeDomain().cardinality()); // temp. cost for now
+ jeCtx.optCtx.getComputationNodeDomain().cardinality()); // temp. cost for now.
pn.totalCost = pn.opCost;
pn.card = jsArray[index].cardinality;
pn.op = PhysicalOperatorTag.DATASOURCE_SCAN;
@@ -731,18 +743,19 @@
}
private int findJoinStructIndex(LogicalVariable lv) throws AlgebricksException {
- List<DataSourceScanOperator> dataSourceScanVars = jeCtx.dataSourceScanOps;
- HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
+ List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps =
+ jeCtx.emptyTupleAndDataSourceOps;
+ HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
- for (Map.Entry<DataSourceScanOperator, ILogicalOperator> mapElement : joinLeafInputsHashMap.entrySet()) {
+ for (Map.Entry<EmptyTupleSourceOperator, ILogicalOperator> mapElement : joinLeafInputsHashMap.entrySet()) {
ILogicalOperator joinLeafInput = mapElement.getValue();
HashSet<LogicalVariable> vars = new HashSet<>();
// this should get the variables from the inputs only, since the join condition is itself set to null
VariableUtilities.getLiveVariables(joinLeafInput, vars);
if (vars.contains(lv)) {
- DataSourceScanOperator key = mapElement.getKey();
- for (int i = 0; i < dataSourceScanVars.size(); i++) {
- if (key.equals(dataSourceScanVars.get(i))) {
+ EmptyTupleSourceOperator key = mapElement.getKey();
+ for (int i = 0; i < emptyTupleAndDataSourceOps.size(); i++) {
+ if (key.equals(emptyTupleAndDataSourceOps.get(i).getFirst())) {
return i;
}
}
@@ -836,8 +849,12 @@
private double getSelectivity(DataSourceScanOperator lv) throws AlgebricksException {
double sel = 1.0; // safe to return 1 if there is no annotation
- HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
- ILogicalOperator op = joinLeafInputsHashMap.get(lv);
+ if (lv == null) {
+ return sel;
+ }
+ HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
+ EmptyTupleSourceOperator etso = jeCtx.dataSourceEmptyTupleHashMap.get(lv);
+ ILogicalOperator op = joinLeafInputsHashMap.get(etso);
// find all the selectOperators here.
@@ -1016,6 +1033,32 @@
}
}
+ private double findSize(ILogicalOperator op) {
+ if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN)
+ return 1.0;
+
+ if (op.getOperatorTag() == LogicalOperatorTag.UNNEST) {
+ UnnestOperator unnestOp = (UnnestOperator) op;
+ ILogicalExpression unnestExpr = unnestOp.getExpressionRef().getValue();
+ UnnestingFunctionCallExpression unnestingFuncExpr = (UnnestingFunctionCallExpression) unnestExpr;
+
+ if (unnestingFuncExpr.getFunctionIdentifier().equals(BuiltinFunctions.SCAN_COLLECTION))
+ if (unnestingFuncExpr.getArguments().get(0).getValue().getExpressionTag()
+ .equals(LogicalExpressionTag.CONSTANT)) {
+ ConstantExpression constantExpr =
+ (ConstantExpression) unnestingFuncExpr.getArguments().get(0).getValue();
+ AsterixConstantValue constantValue = (AsterixConstantValue) constantExpr.getValue();
+ IAObject v = (IAObject) constantValue.getObject();
+ if (v.getType().getTypeTag().equals(ATypeTag.ARRAY)) {
+ AOrderedList array = (AOrderedList) v;
+ return array.size();
+ }
+ }
+ }
+
+ return 10.0; // just a guess
+ }
+
// main entry point in this file
public int enumerateJoins() throws AlgebricksException {
@@ -1040,16 +1083,24 @@
//jsArray[i].jsIndex = i; Fill in jsIndex later
jsArray[i].datasetBits = 1 << (i - 1);
jsArray[i].datasetIndexes = new ArrayList<>(Collections.singleton(i));
- //jsArray[i].datasetNames =
- //new ArrayList<>(Collections.singleton(jeCtx.dataSourceScanVars.get(i - 1).toString().substring(2)));
- DataSourceId id = (DataSourceId) jeCtx.dataSourceScanOps.get(i - 1).getDataSource().getId();
- jsArray[i].datasetNames = new ArrayList<>(Collections.singleton(id.getDatasourceName()));
- jsArray[i].correspondingDataSourceScanOp = jeCtx.dataSourceScanOps.get(i - 1);
- jsArray[i].origCardinality = findCardinality(i);
- if (cardinality >= CARDMAX) // no hint available for this dataset
- return -1;
- jsArray[i].cardinality = jsArray[i].origCardinality * getSelectivity(jeCtx.dataSourceScanOps.get(i - 1)); // multiply by the respective predicate selectivities
+ if (jeCtx.emptyTupleAndDataSourceOps.get(i - 1).getSecond() != null) {
+ DataSourceId id =
+ (DataSourceId) jeCtx.emptyTupleAndDataSourceOps.get(i - 1).getSecond().getDataSource().getId();
+ jsArray[i].datasetNames = new ArrayList<>(Collections.singleton(id.getDatasourceName()));
+ jsArray[i].origCardinality = findCardinality(i);
+ jsArray[i].cardinality = jsArray[i].origCardinality
+ * getSelectivity(jeCtx.emptyTupleAndDataSourceOps.get(i - 1).getSecond()); // multiply by the respective predicate selectivities
+ } else {
+ jsArray[i].datasetNames = new ArrayList<>(Collections.singleton("unnestOrAssign")); // could be unnest or assign
+ EmptyTupleSourceOperator ets = jeCtx.emptyTupleAndDataSourceOps.get(i - 1).getFirst();
+ ILogicalOperator logOp = jeCtx.joinLeafInputsHashMap.get(ets);
+ jsArray[i].origCardinality = jsArray[i].cardinality = findSize(logOp);
+ }
+
+ if (jsArray[i].origCardinality >= CARDMAX)
+ return -1;
+ jsArray[i].correspondingEmptyTupleSourceOp = jeCtx.emptyTupleAndDataSourceOps.get(i - 1).getFirst();
jsArray[i].highestDatasetId = i;
jsArray[i].level = 1;
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/PlanNode.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/PlanNode.java
index 5c74b72..1e4912f 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/PlanNode.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/PlanNode.java
@@ -4,6 +4,7 @@
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
import org.apache.hyracks.algebricks.core.algebra.base.PhysicalOperatorTag;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
public class PlanNode {
private static JoinEnumCtx jeCtx;
@@ -16,6 +17,7 @@
PhysicalOperatorTag op;
ILogicalExpression joinExpr;
DataSourceScanOperator correspondingDataSourceScanOp;
+ EmptyTupleSourceOperator correspondingEmptyTupleSourceOp;
public PlanNode(JoinEnumCtx JECtx) {
jeCtx = JECtx;
@@ -47,6 +49,10 @@
return correspondingDataSourceScanOp; // This applies only to singleDataSetPlans
}
+ public EmptyTupleSourceOperator getEmptyTupleSourceOp() {
+ return correspondingEmptyTupleSourceOp; // This applies only to singleDataSetPlans
+ }
+
public Cost getOpCost() {
return opCost;
}
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/15424
To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: neo
Gerrit-Change-Id: If3b0699a904fe1d7bf2f453ddb3941748c4aa553
Gerrit-Change-Number: 15424
Gerrit-PatchSet: 1
Gerrit-Owner: Vijay Sarathy <vi...@couchbase.com>
Gerrit-MessageType: newchange
Change in asterixdb[neo]: Latest changes 021422
Posted by AsterixDB Code Review <do...@asterix-gerrit.ics.uci.edu>.
From Vijay Sarathy <vi...@couchbase.com>:
Vijay Sarathy has uploaded this change for review. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/15424 )
Change subject: Latest changes 021422
......................................................................
Latest changes 021422
Change-Id: If3b0699a904fe1d7bf2f453ddb3941748c4aa553
---
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/EnumerateJoinsRule.java
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinEnumCtx.java
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinStruct.java
M asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/PlanNode.java
4 files changed, 140 insertions(+), 65 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/24/15424/1
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/EnumerateJoinsRule.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/EnumerateJoinsRule.java
index b5d83b9..f03896e 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/EnumerateJoinsRule.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/EnumerateJoinsRule.java
@@ -30,12 +30,14 @@
import org.apache.commons.lang3.mutable.Mutable;
import org.apache.commons.lang3.mutable.MutableBoolean;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.common.utils.Pair;
import org.apache.hyracks.algebricks.core.algebra.base.*;
import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
import org.apache.hyracks.algebricks.core.algebra.prettyprint.IPlanPrettyPrinter;
import org.apache.hyracks.algebricks.core.config.AlgebricksConfig;
import org.apache.hyracks.algebricks.core.rewriter.base.CardHints;
@@ -48,7 +50,8 @@
public final class EnumerateJoinsRule implements IAlgebraicRewriteRule {
private int totalNumberOfJoins;
- DataSourceScanOperator leafNodeVar;
+ DataSourceScanOperator dataSourceOp;
+ EmptyTupleSourceOperator emptyTupleSourceOp;
List<ILogicalExpression> joinConditions = new ArrayList<>();
List<Boolean> joinConditionUsed = new ArrayList<>();
@@ -68,14 +71,15 @@
return;
}
+ if (op.getOperatorTag() == LogicalOperatorTag.EMPTYTUPLESOURCE) {
+ EmptyTupleSourceOperator emptyTupleSourceOperator = (EmptyTupleSourceOperator) op;
+ emptyTupleSourceOp = emptyTupleSourceOperator;
+ return;
+
+ }
if (op.getOperatorTag() == LogicalOperatorTag.DATASOURCESCAN) {
DataSourceScanOperator dataSourceScanOperator = (DataSourceScanOperator) op;
- List<LogicalVariable> variables = dataSourceScanOperator.getVariables();
- //LogicalVariable lv = variables.get(1);
- //String vars = lv.toString();
- //leafNodeVar = vars.substring(2); // skip the $$
- leafNodeVar = dataSourceScanOperator;
- return;
+ dataSourceOp = dataSourceScanOperator;
}
for (Mutable<ILogicalOperator> nextOp : op.getInputs()) {
@@ -94,8 +98,10 @@
return true;
}
- void getJoinOpsAndLeafInputs(ILogicalOperator op, List<DataSourceScanOperator> dataSourceScanVars,
- HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap,
+ void getJoinOpsAndLeafInputs(ILogicalOperator op,
+ List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps,
+ HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap,
+ HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap,
List<ILogicalOperator> internalEdges, List<ILogicalOperator> joinOps, MutableBoolean canTransform) {
if (canTransform.isFalse()) {
@@ -107,17 +113,17 @@
return;
}
for (Mutable<ILogicalOperator> nextOp : op.getInputs()) {
- getJoinOpsAndLeafInputs(nextOp.getValue(), dataSourceScanVars, joinLeafInputsHashMap, internalEdges,
- joinOps, canTransform);
+ getJoinOpsAndLeafInputs(nextOp.getValue(), emptyTupleAndDataSourceOps, joinLeafInputsHashMap,
+ dataSourceEmptyTupleHashMap, internalEdges, joinOps, canTransform);
}
if (op.getOperatorTag() == LogicalOperatorTag.INNERJOIN) {
joinOps.add(op);
// follow the inputs and see if they reach a datascan operator
for (Mutable<ILogicalOperator> nextOp : op.getInputs()) {
//Mutable<String> leafNodeVar = new MutableObject<String>();
- leafNodeVar = null;
+ emptyTupleSourceOp = null;
containsLeafNodeOnly(nextOp.getValue());
- if (leafNodeVar == null) {
+ if (emptyTupleSourceOp == null) { // This means that we did not find a emptyTupleSourceOp operator. Could be an internal edge
if (nextOp.getValue().getOperatorTag() != LogicalOperatorTag.INNERJOIN) {
if (OnlyOneAssign(nextOp)) {
// Currently will handle only assign statement and nothing else in an internal Edge.
@@ -128,8 +134,9 @@
}
}
} else {
- dataSourceScanVars.add(leafNodeVar);
- joinLeafInputsHashMap.put(leafNodeVar, nextOp.getValue()); // should not need both leafS! get rid of one later
+ emptyTupleAndDataSourceOps.add(new Pair<>(emptyTupleSourceOp, dataSourceOp));
+ joinLeafInputsHashMap.put(emptyTupleSourceOp, nextOp.getValue());
+ dataSourceEmptyTupleHashMap.put(dataSourceOp, emptyTupleSourceOp);
}
}
}
@@ -137,7 +144,7 @@
// we have to move the inputs in op around so that they match the tree structure in pn
void getNewTree(ILogicalOperator root, JoinEnumCtx jeCtx, PlanNode plan,
- HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap, List<ILogicalOperator> joinOps,
+ HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap, List<ILogicalOperator> joinOps,
int joinNumber) {
List<PlanNode> allPlans = jeCtx.getAllPlans();
@@ -161,7 +168,7 @@
}
if (leftIndex <= size) { // leaf
- ILogicalOperator leftInput = joinLeafInputsHashMap.get(leftPlan.getDataSourceScanOp());
+ ILogicalOperator leftInput = joinLeafInputsHashMap.get(leftPlan.getEmptyTupleSourceOp());
joinOp.getInputs().get(0).setValue(leftInput);
} else { // join
totalNumberOfJoins++;
@@ -172,7 +179,7 @@
}
if (rightIndex <= size) { // leaf
- ILogicalOperator rightInput = joinLeafInputsHashMap.get(rightPlan.getDataSourceScanOp());
+ ILogicalOperator rightInput = joinLeafInputsHashMap.get(rightPlan.getEmptyTupleSourceOp());
joinOp.getInputs().get(1).setValue(rightInput);
} else { // join
totalNumberOfJoins++;
@@ -204,12 +211,13 @@
System.out.println("---------------------------- ");
}
- void printLeafPlans(IPlanPrettyPrinter pp, HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap)
- throws AlgebricksException {
- Iterator<Map.Entry<DataSourceScanOperator, ILogicalOperator>> li = joinLeafInputsHashMap.entrySet().iterator();
+ void printLeafPlans(IPlanPrettyPrinter pp,
+ HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap) throws AlgebricksException {
+ Iterator<Map.Entry<EmptyTupleSourceOperator, ILogicalOperator>> li =
+ joinLeafInputsHashMap.entrySet().iterator();
int i = 0;
while (li.hasNext()) {
- Map.Entry<DataSourceScanOperator, ILogicalOperator> pair = li.next();
+ Map.Entry<EmptyTupleSourceOperator, ILogicalOperator> pair = li.next();
ILogicalOperator element = pair.getValue();
printPlan(pp, (AbstractLogicalOperator) element, "Printing Leaf Input" + i);
i++;
@@ -233,8 +241,10 @@
List<ILogicalOperator> joinOps = new ArrayList<>();
List<ILogicalOperator> internalEdges = new ArrayList<>();
- HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap = new HashMap<>();
- List<DataSourceScanOperator> dataSourceScanOps = new ArrayList<>();
+ HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap = new HashMap<>();
+ //List<DataSourceScanOperator> dataSourceScanOps = new ArrayList<>();
+ List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps = new ArrayList<>();
+ HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap = new HashMap<>();
// The data scan operators. Will be in the order of the from clause.
// Important for position ordering when assigning bits to join expressions.
@@ -245,13 +255,14 @@
printPlan(pp, (AbstractLogicalOperator) op, "Original Whole plan1");
//findDataSourceScanVars(op, dataSourceScanVars, canTransform);
- getJoinOpsAndLeafInputs(op, dataSourceScanOps, joinLeafInputsHashMap, internalEdges, joinOps, canTransform);
+ getJoinOpsAndLeafInputs(op, emptyTupleAndDataSourceOps, joinLeafInputsHashMap, dataSourceEmptyTupleHashMap,
+ internalEdges, joinOps, canTransform);
if (canTransform.isFalse()) {
return false;
}
- if (dataSourceScanOps.size() != joinLeafInputsHashMap.size())
+ if (emptyTupleAndDataSourceOps.size() != joinLeafInputsHashMap.size())
return false; // if this happens, something in the input plan is not acceptable to the new code.
printPlan(pp, (AbstractLogicalOperator) op, "Original Whole plan2");
@@ -264,13 +275,13 @@
return false;
}
- int numberOfFromTerms = dataSourceScanOps.size();
+ int numberOfFromTerms = emptyTupleAndDataSourceOps.size();
// jsArray, allPlans, joinConditions, cardHints are initialized
// in the JoinEnumCtx. The jeCtx is attached to each joinStruct and planNode
// also, so they have access to the context when needed.
- JoinEnumCtx jeCtx = new JoinEnumCtx((AbstractLogicalOperator) op, numberOfFromTerms, dataSourceScanOps,
- joinLeafInputsHashMap, internalEdges, joinOps, cardHints, context);
+ JoinEnumCtx jeCtx = new JoinEnumCtx((AbstractLogicalOperator) op, numberOfFromTerms, emptyTupleAndDataSourceOps,
+ joinLeafInputsHashMap, dataSourceEmptyTupleHashMap, internalEdges, joinOps, cardHints, context);
JoinStruct[] jsArray = jeCtx.getJsArray(); // will not use [0] element;
JoinStruct js = jsArray[0]; // jsArray[0] is not used for join enumeration,
// only used to call member method enumerateJoins()
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinEnumCtx.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinEnumCtx.java
index 1b6196b..3084c41 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinEnumCtx.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinEnumCtx.java
@@ -6,10 +6,12 @@
import org.apache.asterix.common.config.CompilerProperties;
import org.apache.asterix.metadata.declared.MetadataProvider;
+import org.apache.hyracks.algebricks.common.utils.Pair;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
import org.apache.hyracks.algebricks.core.rewriter.base.CardHints;
import org.apache.hyracks.algebricks.core.rewriter.base.PhysicalOptimizationConfig;
@@ -19,8 +21,10 @@
JoinStruct[] jsArray; // array of all join structs
int jsArraySize;
CardHints cardHints; // cardinality hints
- List<DataSourceScanOperator> dataSourceScanOps;
- HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap;
+ //List<DataSourceScanOperator> dataSourceScanOps;
+ List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps;
+ HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap;
+ HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap;
List<ILogicalOperator> internalEdges;
List<ILogicalOperator> joinOps;
ILogicalOperator localJoinOp; // used in nestedLoopsApplicable code.
@@ -35,8 +39,9 @@
HashMap<String, Integer> sizeMap = new HashMap<>();
public JoinEnumCtx(AbstractLogicalOperator op, int numberOfFromTerms,
- List<DataSourceScanOperator> dataSourceScanOps,
- HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap,
+ List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps,
+ HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap,
+ HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap,
List<ILogicalOperator> internalEdges, List<ILogicalOperator> joinOps, CardHints cardHints,
IOptimizationContext context) {
this.joinConditions = new ArrayList<>();
@@ -53,8 +58,10 @@
this.optCtx = context;
this.physOptConfig = context.getPhysicalOptimizationConfig();
- this.dataSourceScanOps = dataSourceScanOps;
+ //this.dataSourceScanOps = dataSourceScanOps;
+ this.emptyTupleAndDataSourceOps = emptyTupleAndDataSourceOps;
this.joinLeafInputsHashMap = joinLeafInputsHashMap;
+ this.dataSourceEmptyTupleHashMap = dataSourceEmptyTupleHashMap;
this.internalEdges = internalEdges;
this.joinOps = joinOps;
this.op = op;
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinStruct.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinStruct.java
index 6418a2c..9e44361 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinStruct.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/JoinStruct.java
@@ -3,7 +3,11 @@
import java.util.*;
import org.apache.asterix.metadata.declared.DataSourceId;
+import org.apache.asterix.om.base.AOrderedList;
+import org.apache.asterix.om.base.IAObject;
+import org.apache.asterix.om.constants.AsterixConstantValue;
import org.apache.asterix.om.functions.BuiltinFunctions;
+import org.apache.asterix.om.types.ATypeTag;
import org.apache.asterix.optimizer.cost.Cost;
import org.apache.asterix.optimizer.cost.CostMethods;
import org.apache.asterix.optimizer.rules.EnumerateJoinsRule;
@@ -12,10 +16,17 @@
import org.apache.commons.lang3.mutable.MutableInt;
import org.apache.commons.lang3.mutable.MutableObject;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.common.utils.Pair;
import org.apache.hyracks.algebricks.core.algebra.base.*;
import org.apache.hyracks.algebricks.core.algebra.expressions.*;
import org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
-import org.apache.hyracks.algebricks.core.algebra.operators.logical.*;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
import org.apache.hyracks.algebricks.core.algebra.prettyprint.IPlanPrettyPrinter;
import org.apache.hyracks.algebricks.core.algebra.util.OperatorManipulationUtil;
@@ -54,8 +65,8 @@
List<Integer> planIndexesArray; // indexes into the PlanNode array in enumerateJoins
int jsIndex, level, highestDatasetId;
List<Integer> applicableJoinConditions;
- DataSourceScanOperator correspondingDataSourceScanOp; // There is a 1-1 relationsjip between the LVs and the dataSourceScanOps and the leafInputs.
-
+ //DataSourceScanOperator correspondingDataSourceScanOp; // There is a 1-1 relationsjip between the LVs and the dataSourceScanOps and the leafInputs.
+ EmptyTupleSourceOperator correspondingEmptyTupleSourceOp; // There is a 1-1 relationsjip between the LVs and the dataSourceScanOps and the leafInputs.
private final double CARDMAX = 1.0e200;
public JoinStruct(JoinEnumCtx JECtx) { //empty constructor. Will fill in all the fields in the code.
@@ -75,12 +86,13 @@
ILogicalOperator findLeafInput(LogicalVariable dollarDollarVar, MutableInt position) throws AlgebricksException {
- List<DataSourceScanOperator> dataSourceScanVars = jeCtx.dataSourceScanOps;
- HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
+ List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps =
+ jeCtx.emptyTupleAndDataSourceOps;
+ HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
- for (int pos = 0; pos < dataSourceScanVars.size(); pos++) {
- DataSourceScanOperator dataVar = dataSourceScanVars.get(pos);
- ILogicalOperator op = joinLeafInputsHashMap.get(dataVar);
+ for (int pos = 0; pos < emptyTupleAndDataSourceOps.size(); pos++) {
+ EmptyTupleSourceOperator emptyOp = emptyTupleAndDataSourceOps.get(pos).getFirst();
+ ILogicalOperator op = joinLeafInputsHashMap.get(emptyOp);
HashSet<LogicalVariable> vars = new HashSet<>();
VariableUtilities.getLiveVariables(op, vars); // this is expensive to do. So store this once and reuse
if (vars.contains(dollarDollarVar)) {
@@ -112,12 +124,12 @@
}
List<JoinCondition> joinConditions = jeCtx.getJoinConditions();
- ScalarFunctionCallExpression andExpr = new ScalarFunctionCallExpression(
- BuiltinFunctions.getBuiltinFunctionInfo(AlgebricksBuiltinFunctions.AND));
-
if (newJoinConditions.size() == 1)
return joinConditions.get(newJoinConditions.get(0)).joinCondition;
+ ScalarFunctionCallExpression andExpr = new ScalarFunctionCallExpression(
+ BuiltinFunctions.getBuiltinFunctionInfo(AlgebricksBuiltinFunctions.AND));
+
for (int i = 0; i < newJoinConditions.size(); i++) { // Need to AND all the expressions.
int joinNum = newJoinConditions.get(i);
andExpr.getArguments().add(new MutableObject<>(joinConditions.get(joinNum).joinCondition));
@@ -155,7 +167,7 @@
// We need to find out which one of these is the inner joinLeafInput. So for that get the joinLeafInput using innerJs
ILogicalOperator innerLeafInput =
- jeCtx.joinLeafInputsHashMap.get(jeCtx.jsArray[innerJs].correspondingDataSourceScanOp);
+ jeCtx.joinLeafInputsHashMap.get(jeCtx.jsArray[innerJs].correspondingEmptyTupleSourceOp);
// This must equal one of the two joinLeafInputsHashMap found above. check for sanity!!
if (innerLeafInput != joinLeafInput1 && innerLeafInput != joinLeafInput0)
@@ -676,12 +688,12 @@
PlanNode pn = new PlanNode(jeCtx);
pn.jsIndexes[0] = index;
pn.datasetName = jsArray[index].datasetNames.get(0);
- pn.correspondingDataSourceScanOp = jsArray[index].correspondingDataSourceScanOp;
+ pn.correspondingEmptyTupleSourceOp = jsArray[index].correspondingEmptyTupleSourceOp;
pn.jsIndexes[1] = 0;
pn.planIndexes[0] = pn.planIndexes[1] = 0; // There ane no plans below this plan.
pn.opCost = CostMethods.costFullScan(jsArray[index].origCardinality, jsArray[index].size,
jsArray[index].cardinality, jsArray[index].size, blockSize,
- jeCtx.optCtx.getComputationNodeDomain().cardinality()); // temp. cost for now
+ jeCtx.optCtx.getComputationNodeDomain().cardinality()); // temp. cost for now.
pn.totalCost = pn.opCost;
pn.card = jsArray[index].cardinality;
pn.op = PhysicalOperatorTag.DATASOURCE_SCAN;
@@ -731,18 +743,19 @@
}
private int findJoinStructIndex(LogicalVariable lv) throws AlgebricksException {
- List<DataSourceScanOperator> dataSourceScanVars = jeCtx.dataSourceScanOps;
- HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
+ List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps =
+ jeCtx.emptyTupleAndDataSourceOps;
+ HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
- for (Map.Entry<DataSourceScanOperator, ILogicalOperator> mapElement : joinLeafInputsHashMap.entrySet()) {
+ for (Map.Entry<EmptyTupleSourceOperator, ILogicalOperator> mapElement : joinLeafInputsHashMap.entrySet()) {
ILogicalOperator joinLeafInput = mapElement.getValue();
HashSet<LogicalVariable> vars = new HashSet<>();
// this should get the variables from the inputs only, since the join condition is itself set to null
VariableUtilities.getLiveVariables(joinLeafInput, vars);
if (vars.contains(lv)) {
- DataSourceScanOperator key = mapElement.getKey();
- for (int i = 0; i < dataSourceScanVars.size(); i++) {
- if (key.equals(dataSourceScanVars.get(i))) {
+ EmptyTupleSourceOperator key = mapElement.getKey();
+ for (int i = 0; i < emptyTupleAndDataSourceOps.size(); i++) {
+ if (key.equals(emptyTupleAndDataSourceOps.get(i).getFirst())) {
return i;
}
}
@@ -836,8 +849,12 @@
private double getSelectivity(DataSourceScanOperator lv) throws AlgebricksException {
double sel = 1.0; // safe to return 1 if there is no annotation
- HashMap<DataSourceScanOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
- ILogicalOperator op = joinLeafInputsHashMap.get(lv);
+ if (lv == null) {
+ return sel;
+ }
+ HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap = jeCtx.joinLeafInputsHashMap;
+ EmptyTupleSourceOperator etso = jeCtx.dataSourceEmptyTupleHashMap.get(lv);
+ ILogicalOperator op = joinLeafInputsHashMap.get(etso);
// find all the selectOperators here.
@@ -1016,6 +1033,32 @@
}
}
+ private double findSize(ILogicalOperator op) {
+ if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN)
+ return 1.0;
+
+ if (op.getOperatorTag() == LogicalOperatorTag.UNNEST) {
+ UnnestOperator unnestOp = (UnnestOperator) op;
+ ILogicalExpression unnestExpr = unnestOp.getExpressionRef().getValue();
+ UnnestingFunctionCallExpression unnestingFuncExpr = (UnnestingFunctionCallExpression) unnestExpr;
+
+ if (unnestingFuncExpr.getFunctionIdentifier().equals(BuiltinFunctions.SCAN_COLLECTION))
+ if (unnestingFuncExpr.getArguments().get(0).getValue().getExpressionTag()
+ .equals(LogicalExpressionTag.CONSTANT)) {
+ ConstantExpression constantExpr =
+ (ConstantExpression) unnestingFuncExpr.getArguments().get(0).getValue();
+ AsterixConstantValue constantValue = (AsterixConstantValue) constantExpr.getValue();
+ IAObject v = (IAObject) constantValue.getObject();
+ if (v.getType().getTypeTag().equals(ATypeTag.ARRAY)) {
+ AOrderedList array = (AOrderedList) v;
+ return array.size();
+ }
+ }
+ }
+
+ return 10.0; // just a guess
+ }
+
// main entry point in this file
public int enumerateJoins() throws AlgebricksException {
@@ -1040,16 +1083,24 @@
//jsArray[i].jsIndex = i; Fill in jsIndex later
jsArray[i].datasetBits = 1 << (i - 1);
jsArray[i].datasetIndexes = new ArrayList<>(Collections.singleton(i));
- //jsArray[i].datasetNames =
- //new ArrayList<>(Collections.singleton(jeCtx.dataSourceScanVars.get(i - 1).toString().substring(2)));
- DataSourceId id = (DataSourceId) jeCtx.dataSourceScanOps.get(i - 1).getDataSource().getId();
- jsArray[i].datasetNames = new ArrayList<>(Collections.singleton(id.getDatasourceName()));
- jsArray[i].correspondingDataSourceScanOp = jeCtx.dataSourceScanOps.get(i - 1);
- jsArray[i].origCardinality = findCardinality(i);
- if (cardinality >= CARDMAX) // no hint available for this dataset
- return -1;
- jsArray[i].cardinality = jsArray[i].origCardinality * getSelectivity(jeCtx.dataSourceScanOps.get(i - 1)); // multiply by the respective predicate selectivities
+ if (jeCtx.emptyTupleAndDataSourceOps.get(i - 1).getSecond() != null) {
+ DataSourceId id =
+ (DataSourceId) jeCtx.emptyTupleAndDataSourceOps.get(i - 1).getSecond().getDataSource().getId();
+ jsArray[i].datasetNames = new ArrayList<>(Collections.singleton(id.getDatasourceName()));
+ jsArray[i].origCardinality = findCardinality(i);
+ jsArray[i].cardinality = jsArray[i].origCardinality
+ * getSelectivity(jeCtx.emptyTupleAndDataSourceOps.get(i - 1).getSecond()); // multiply by the respective predicate selectivities
+ } else {
+ jsArray[i].datasetNames = new ArrayList<>(Collections.singleton("unnestOrAssign")); // could be unnest or assign
+ EmptyTupleSourceOperator ets = jeCtx.emptyTupleAndDataSourceOps.get(i - 1).getFirst();
+ ILogicalOperator logOp = jeCtx.joinLeafInputsHashMap.get(ets);
+ jsArray[i].origCardinality = jsArray[i].cardinality = findSize(logOp);
+ }
+
+ if (jsArray[i].origCardinality >= CARDMAX)
+ return -1;
+ jsArray[i].correspondingEmptyTupleSourceOp = jeCtx.emptyTupleAndDataSourceOps.get(i - 1).getFirst();
jsArray[i].highestDatasetId = i;
jsArray[i].level = 1;
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/PlanNode.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/PlanNode.java
index 5c74b72..1e4912f 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/PlanNode.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/util/PlanNode.java
@@ -4,6 +4,7 @@
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
import org.apache.hyracks.algebricks.core.algebra.base.PhysicalOperatorTag;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
public class PlanNode {
private static JoinEnumCtx jeCtx;
@@ -16,6 +17,7 @@
PhysicalOperatorTag op;
ILogicalExpression joinExpr;
DataSourceScanOperator correspondingDataSourceScanOp;
+ EmptyTupleSourceOperator correspondingEmptyTupleSourceOp;
public PlanNode(JoinEnumCtx JECtx) {
jeCtx = JECtx;
@@ -47,6 +49,10 @@
return correspondingDataSourceScanOp; // This applies only to singleDataSetPlans
}
+ public EmptyTupleSourceOperator getEmptyTupleSourceOp() {
+ return correspondingEmptyTupleSourceOp; // This applies only to singleDataSetPlans
+ }
+
public Cost getOpCost() {
return opCost;
}
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/15424
To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: neo
Gerrit-Change-Id: If3b0699a904fe1d7bf2f453ddb3941748c4aa553
Gerrit-Change-Number: 15424
Gerrit-PatchSet: 1
Gerrit-Owner: Vijay Sarathy <vi...@couchbase.com>
Gerrit-MessageType: newchange