You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by hy...@apache.org on 2014/02/24 12:30:51 UTC
git commit: TAJO-619: SELECT count(1) after joins on text keys causes
wrong plans.
Repository: incubator-tajo
Updated Branches:
refs/heads/master 245dbd208 -> 9cce80cfe
TAJO-619: SELECT count(1) after joins on text keys causes wrong plans.
Project: http://git-wip-us.apache.org/repos/asf/incubator-tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tajo/commit/9cce80cf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tajo/tree/9cce80cf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tajo/diff/9cce80cf
Branch: refs/heads/master
Commit: 9cce80cfe585e8b93f86542e38ec5b07972d05d6
Parents: 245dbd2
Author: Hyunsik Choi <hy...@apache.org>
Authored: Mon Feb 24 20:29:55 2014 +0900
Committer: Hyunsik Choi <hy...@apache.org>
Committed: Mon Feb 24 20:29:55 2014 +0900
----------------------------------------------------------------------
CHANGES.txt | 9 ++++--
.../tajo/algebra/CountRowsFunctionExpr.java | 7 ++--
.../org/apache/tajo/algebra/FunctionExpr.java | 2 +-
.../tajo/algebra/GeneralSetFunctionExpr.java | 31 ++++++++++++++++--
.../java/org/apache/tajo/algebra/OpType.java | 34 ++++++++++++++++++++
.../tajo/engine/planner/ExprNormalizer.java | 32 +++++++++---------
.../tajo/engine/planner/LogicalPlanner.java | 10 +++---
.../apache/tajo/LocalTajoTestingUtility.java | 23 ++++++-------
.../tajo/engine/query/TestCaseByCases.java | 10 ++++++
.../queries/TestCaseByCases/testTAJO619Case.sql | 4 +++
.../TestCaseByCases/testTAJO619Case.result | 3 ++
11 files changed, 124 insertions(+), 41 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 27f7698..c169029 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -263,13 +263,18 @@ Release 0.8.0 - unreleased
BUG FIXES
- TAJO-403: HiveQLAnalyzer should supports standard function in the GROUP BY Clause. (jaehwa)
+ TAJO-619: SELECT count(1) after joins on text keys causes wrong plans.
+ (hyunsik)
+
+ TAJO-403: HiveQLAnalyzer should supports standard function in the GROUP BY
+ Clause. (jaehwa)
TAJO-594: MySQL store doesn't work. (Yongjun Park via jaehwa)
TAJO-590: Rename HiveConverter to HiveQLAnalyzer. (jaehwa)
- TAJO-575: Worker's env.jsp has wrong URL which go to worker's index.jsp. (hyoungjunkim via jaehwa)
+ TAJO-575: Worker's env.jsp has wrong URL which go to worker's index.jsp.
+ (hyoungjunkim via jaehwa)
TAJO-609: PlannerUtil::getRelationLineage ignores PartitionedTableScanNode.
(hyunsik)
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/tajo-algebra/src/main/java/org/apache/tajo/algebra/CountRowsFunctionExpr.java
----------------------------------------------------------------------
diff --git a/tajo-algebra/src/main/java/org/apache/tajo/algebra/CountRowsFunctionExpr.java b/tajo-algebra/src/main/java/org/apache/tajo/algebra/CountRowsFunctionExpr.java
index cf2efb5..7b2c555 100644
--- a/tajo-algebra/src/main/java/org/apache/tajo/algebra/CountRowsFunctionExpr.java
+++ b/tajo-algebra/src/main/java/org/apache/tajo/algebra/CountRowsFunctionExpr.java
@@ -14,9 +14,12 @@
package org.apache.tajo.algebra;
-public class CountRowsFunctionExpr extends FunctionExpr {
+/**
+ * Describe a aggregation function count(*).
+ */
+public class CountRowsFunctionExpr extends GeneralSetFunctionExpr {
public CountRowsFunctionExpr() {
- super(OpType.CountRowsFunction, "count");
+ super(OpType.CountRowsFunction, "count", false, new Expr[] {});
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/tajo-algebra/src/main/java/org/apache/tajo/algebra/FunctionExpr.java
----------------------------------------------------------------------
diff --git a/tajo-algebra/src/main/java/org/apache/tajo/algebra/FunctionExpr.java b/tajo-algebra/src/main/java/org/apache/tajo/algebra/FunctionExpr.java
index b007362..6635aeb 100644
--- a/tajo-algebra/src/main/java/org/apache/tajo/algebra/FunctionExpr.java
+++ b/tajo-algebra/src/main/java/org/apache/tajo/algebra/FunctionExpr.java
@@ -44,7 +44,7 @@ public class FunctionExpr extends Expr {
protected FunctionExpr(OpType type, String signature, Expr [] params) {
super(type);
- if (type != OpType.Function && type != OpType.GeneralSetFunction) {
+ if (!OpType.isFunction(type)) {
throw new IllegalArgumentException("FunctionExpr cannot accept " + type + "type");
}
this.signature = signature.toLowerCase();
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/tajo-algebra/src/main/java/org/apache/tajo/algebra/GeneralSetFunctionExpr.java
----------------------------------------------------------------------
diff --git a/tajo-algebra/src/main/java/org/apache/tajo/algebra/GeneralSetFunctionExpr.java b/tajo-algebra/src/main/java/org/apache/tajo/algebra/GeneralSetFunctionExpr.java
index c45a58f..d7eb4ed 100644
--- a/tajo-algebra/src/main/java/org/apache/tajo/algebra/GeneralSetFunctionExpr.java
+++ b/tajo-algebra/src/main/java/org/apache/tajo/algebra/GeneralSetFunctionExpr.java
@@ -14,14 +14,41 @@
package org.apache.tajo.algebra;
+import com.google.common.base.Preconditions;
+
+/**
+ * Describes SQL Standard set function (e.g., sum, min, max, avg, and count)
+ */
public class GeneralSetFunctionExpr extends FunctionExpr {
private boolean distinct = false;
- public GeneralSetFunctionExpr(String signature, boolean distinct, Expr param) {
- super(OpType.GeneralSetFunction, signature, new Expr[] {param});
+ /**
+ *
+ * @param type Function type which must be one of GeneralSetFunction or CountRowFunction
+ * @param signature Function name
+ * @param distinct True if this function is a distinct aggregation function
+ * @param params An array of function parameters
+ */
+ protected GeneralSetFunctionExpr(OpType type, String signature, boolean distinct, Expr [] params) {
+ super(type, signature, params);
+ Preconditions.checkArgument(OpType.isAggregationFunction(type));
this.distinct = distinct;
}
+ /**
+ *
+ * @param signature Function name
+ * @param distinct True if this function is a distinct aggregation function
+ * @param param Function parameter
+ */
+ public GeneralSetFunctionExpr(String signature, boolean distinct, Expr param) {
+ this(OpType.GeneralSetFunction, signature, distinct, new Expr [] {param});
+ }
+
+ /**
+ *
+ * @return True if this function is a distinct aggregation function. Otherwise, it returns False.
+ */
public boolean isDistinct() {
return distinct;
}
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/tajo-algebra/src/main/java/org/apache/tajo/algebra/OpType.java
----------------------------------------------------------------------
diff --git a/tajo-algebra/src/main/java/org/apache/tajo/algebra/OpType.java b/tajo-algebra/src/main/java/org/apache/tajo/algebra/OpType.java
index 91f322b..7122af8 100644
--- a/tajo-algebra/src/main/java/org/apache/tajo/algebra/OpType.java
+++ b/tajo-algebra/src/main/java/org/apache/tajo/algebra/OpType.java
@@ -131,4 +131,38 @@ public enum OpType {
return OpType.valueOf(json.getAsString());
}
}
+
+ /**
+ * Check if it is one of the literal types.
+ *
+ * @param type The type to be checked
+ * @return True if it is one of the literal types. Otherwise, it returns False.
+ */
+ public static boolean isLiteral(OpType type) {
+ return type == Literal ||
+ type == NullLiteral ||
+ type == TimeLiteral ||
+ type == DateLiteral ||
+ type == TimestampLiteral;
+ }
+
+ /**
+ * Check if it is one of function types.
+ *
+ * @param type The type to be checked
+ * @return True if it is aggregation function type. Otherwise, it returns False.
+ */
+ public static boolean isFunction(OpType type) {
+ return type == Function || isAggregationFunction(type);
+ }
+
+ /**
+ * Check if it is an aggregation function type.
+ *
+ * @param type The type to be checked
+ * @return True if it is aggregation function type. Otherwise, it returns False.
+ */
+ public static boolean isAggregationFunction(OpType type) {
+ return type == GeneralSetFunction || type == CountRowsFunction;
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/ExprNormalizer.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/ExprNormalizer.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/ExprNormalizer.java
index e7de03f..0dc8f82 100644
--- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/ExprNormalizer.java
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/ExprNormalizer.java
@@ -105,10 +105,6 @@ class ExprNormalizer extends SimpleAlgebraVisitor<ExprNormalizer.ExprNormalizedR
return exprNormalizedResult;
}
- private boolean isAggregationFunction(Expr expr) {
- return expr.getType() == OpType.GeneralSetFunction || expr.getType() == OpType.CountRowsFunction;
- }
-
@Override
public Object visitCaseWhen(ExprNormalizedResult ctx, Stack<Expr> stack, CaseWhenPredicate expr)
throws PlanningException {
@@ -117,13 +113,13 @@ class ExprNormalizer extends SimpleAlgebraVisitor<ExprNormalizer.ExprNormalizedR
visit(ctx, stack, when.getCondition());
visit(ctx, stack, when.getResult());
- if (isAggregationFunction(when.getCondition())) {
+ if (OpType.isAggregationFunction(when.getCondition().getType())) {
String referenceName = ctx.block.namedExprsMgr.addExpr(when.getCondition());
ctx.aggExprs.add(new NamedExpr(when.getCondition(), referenceName));
when.setCondition(new ColumnReferenceExpr(referenceName));
}
- if (isAggregationFunction(when.getResult())) {
+ if (OpType.isAggregationFunction(when.getResult().getType())) {
String referenceName = ctx.block.namedExprsMgr.addExpr(when.getResult());
ctx.aggExprs.add(new NamedExpr(when.getResult(), referenceName));
when.setResult(new ColumnReferenceExpr(referenceName));
@@ -132,7 +128,7 @@ class ExprNormalizer extends SimpleAlgebraVisitor<ExprNormalizer.ExprNormalizedR
if (expr.hasElseResult()) {
visit(ctx, stack, expr.getElseResult());
- if (isAggregationFunction(expr.getElseResult())) {
+ if (OpType.isAggregationFunction(expr.getElseResult().getType())) {
String referenceName = ctx.block.namedExprsMgr.addExpr(expr.getElseResult());
ctx.aggExprs.add(new NamedExpr(expr.getElseResult(), referenceName));
expr.setElseResult(new ColumnReferenceExpr(referenceName));
@@ -145,7 +141,7 @@ class ExprNormalizer extends SimpleAlgebraVisitor<ExprNormalizer.ExprNormalizedR
@Override
public Expr visitUnaryOperator(ExprNormalizedResult ctx, Stack<Expr> stack, UnaryOperator expr) throws PlanningException {
super.visitUnaryOperator(ctx, stack, expr);
- if (isAggregationFunction(expr.getChild())) {
+ if (OpType.isAggregationFunction(expr.getChild().getType())) {
// Get an anonymous column name and replace the aggregation function by the column name
String refName = ctx.block.namedExprsMgr.addExpr(expr.getChild());
ctx.aggExprs.add(new NamedExpr(expr.getChild(), refName));
@@ -163,7 +159,7 @@ class ExprNormalizer extends SimpleAlgebraVisitor<ExprNormalizer.ExprNormalizedR
// For Left Term
////////////////////////
- if (isAggregationFunction(expr.getLeft())) {
+ if (OpType.isAggregationFunction(expr.getLeft().getType())) {
String leftRefName = ctx.block.namedExprsMgr.addExpr(expr.getLeft());
ctx.aggExprs.add(new NamedExpr(expr.getLeft(), leftRefName));
expr.setLeft(new ColumnReferenceExpr(leftRefName));
@@ -173,7 +169,7 @@ class ExprNormalizer extends SimpleAlgebraVisitor<ExprNormalizer.ExprNormalizedR
////////////////////////
// For Right Term
////////////////////////
- if (isAggregationFunction(expr.getRight())) {
+ if (OpType.isAggregationFunction(expr.getRight().getType())) {
String rightRefName = ctx.block.namedExprsMgr.addExpr(expr.getRight());
ctx.aggExprs.add(new NamedExpr(expr.getRight(), rightRefName));
expr.setRight(new ColumnReferenceExpr(rightRefName));
@@ -195,7 +191,7 @@ class ExprNormalizer extends SimpleAlgebraVisitor<ExprNormalizer.ExprNormalizedR
param = expr.getParams()[i];
visit(ctx, stack, param);
- if (isAggregationFunction(param)) {
+ if (OpType.isAggregationFunction(param.getType())) {
String referenceName = ctx.plan.generateUniqueColumnName(param);
ctx.aggExprs.add(new NamedExpr(param, referenceName));
expr.getParams()[i] = new ColumnReferenceExpr(referenceName);
@@ -217,9 +213,14 @@ class ExprNormalizer extends SimpleAlgebraVisitor<ExprNormalizer.ExprNormalizedR
param = expr.getParams()[i];
visit(ctx, stack, param);
- String referenceName = ctx.block.namedExprsMgr.addExpr(param);
- ctx.scalarExprs.add(new NamedExpr(param, referenceName));
- expr.getParams()[i] = new ColumnReferenceExpr(referenceName);
+
+ // If parameters are all constants, we don't need to dissect an aggregation expression into two parts:
+ // function and parameter parts.
+ if (!OpType.isLiteral(param.getType())) {
+ String referenceName = ctx.block.namedExprsMgr.addExpr(param);
+ ctx.scalarExprs.add(new NamedExpr(param, referenceName));
+ expr.getParams()[i] = new ColumnReferenceExpr(referenceName);
+ }
}
stack.pop();
return expr;
@@ -232,8 +233,7 @@ class ExprNormalizer extends SimpleAlgebraVisitor<ExprNormalizer.ExprNormalizedR
@Override
public Expr visitCastExpr(ExprNormalizedResult ctx, Stack<Expr> stack, CastExpr expr) throws PlanningException {
super.visitCastExpr(ctx, stack, expr);
- if (expr.getChild().getType() == OpType.GeneralSetFunction
- || expr.getChild().getType() == OpType.CountRowsFunction) {
+ if (OpType.isAggregationFunction(expr.getType())) {
String referenceName = ctx.block.namedExprsMgr.addExpr(expr.getChild());
ctx.aggExprs.add(new NamedExpr(expr.getChild(), referenceName));
expr.setChild(new ColumnReferenceExpr(referenceName));
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/LogicalPlanner.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/LogicalPlanner.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/LogicalPlanner.java
index 756ede4..01bacc0 100644
--- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/LogicalPlanner.java
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/LogicalPlanner.java
@@ -931,12 +931,12 @@ public class LogicalPlanner extends BaseAlgebraVisitor<LogicalPlanner.PlanContex
ScanNode scanNode = block.getNodeFromExpr(expr);
updatePhysicalInfo(scanNode.getTableDesc());
- // Add additional expressions required in upper nodes, such as sort key, grouping columns,
- // column references included in filter condition
- //
- // newlyEvaluatedExprsRef keeps
+ // Find expression which can be evaluated at this relation node.
+ // Except for column references, additional expressions used in select list, where clause, order-by clauses
+ // can be evaluated here. Their reference names are kept in newlyEvaluatedExprsRef.
Set<String> newlyEvaluatedExprsReferences = new LinkedHashSet<String>();
- for (NamedExpr rawTarget : block.namedExprsMgr.getAllNamedExprs()) {
+ for (Iterator<NamedExpr> iterator = block.namedExprsMgr.getIteratorForUnevaluatedExprs(); iterator.hasNext();) {
+ NamedExpr rawTarget = iterator.next();
try {
EvalNode evalNode = exprAnnotator.createEvalNode(context.plan, context.queryBlock, rawTarget.getExpr());
if (checkIfBeEvaluatedAtRelation(block, evalNode, scanNode)) {
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/LocalTajoTestingUtility.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/LocalTajoTestingUtility.java b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/LocalTajoTestingUtility.java
index 50fed7b..144ca1b 100644
--- a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/LocalTajoTestingUtility.java
+++ b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/LocalTajoTestingUtility.java
@@ -23,11 +23,10 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.tajo.catalog.CatalogUtil;
-import org.apache.tajo.catalog.Options;
-import org.apache.tajo.catalog.Schema;
-import org.apache.tajo.catalog.TableMeta;
+import org.apache.tajo.benchmark.TPCH;
+import org.apache.tajo.catalog.*;
import org.apache.tajo.catalog.proto.CatalogProtos;
+import org.apache.tajo.catalog.statistics.TableStats;
import org.apache.tajo.client.TajoClient;
import org.apache.tajo.conf.TajoConf;
import org.apache.tajo.engine.planner.global.MasterPlan;
@@ -85,15 +84,13 @@ public class LocalTajoTestingUtility {
fs.copyFromLocalFile(localPath, dfsPath);
TableMeta meta = CatalogUtil.newTableMeta(CatalogProtos.StoreType.CSV, option);
- // Enable this if you want to set pseudo stats. But, it will causes errors in some unit tests.
- // So, you just use manually it for certain unit tests.
-// TableStats stats = new TableStats();
-// stats.setNumBytes(TPCH.tableVolumes.get(names[i]));
-// TableDesc tableDesc = new TableDesc(names[i], schemas[i], meta, tablePath);
-// tableDesc.setStats(stats);
-// util.getMaster().getCatalog().addTable(tableDesc);
-
- client.createExternalTable(names[i], schemas[i], tablePath, meta);
+ // Add fake table statistic data to tables.
+ // It gives more various situations to unit tests.
+ TableStats stats = new TableStats();
+ stats.setNumBytes(TPCH.tableVolumes.get(names[i]));
+ TableDesc tableDesc = new TableDesc(names[i], schemas[i], meta, tablePath);
+ tableDesc.setStats(stats);
+ util.getMaster().getCatalog().addTable(tableDesc);
}
LOG.info("===================================================");
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
index 20b5ac5..203d113 100644
--- a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
+++ b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
@@ -37,4 +37,14 @@ public class TestCaseByCases extends QueryTestCaseBase {
assertResultSet(res);
cleanupQuery(res);
}
+
+ /**
+ * It's an unit test to reproduce TAJO-619 (https://issues.apache.org/jira/browse/TAJO-619).
+ */
+ @Test
+ public final void testTAJO619Case() throws Exception {
+ ResultSet res = executeQuery();
+ assertResultSet(res);
+ cleanupQuery(res);
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO619Case.sql
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO619Case.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO619Case.sql
new file mode 100644
index 0000000..15355b8
--- /dev/null
+++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO619Case.sql
@@ -0,0 +1,4 @@
+select
+ count(1)
+from
+ lineitem as l1 join lineitem as l2 on l1.l_returnflag = l2.l_returnflag;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO619Case.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO619Case.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO619Case.result
new file mode 100644
index 0000000..86249b0
--- /dev/null
+++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO619Case.result
@@ -0,0 +1,3 @@
+?count
+-------------------------------
+13