You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by hy...@apache.org on 2014/02/24 12:30:51 UTC

git commit: TAJO-619: SELECT count(1) after joins on text keys causes wrong plans.

Repository: incubator-tajo
Updated Branches:
  refs/heads/master 245dbd208 -> 9cce80cfe


TAJO-619: SELECT count(1) after joins on text keys causes wrong plans.


Project: http://git-wip-us.apache.org/repos/asf/incubator-tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tajo/commit/9cce80cf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tajo/tree/9cce80cf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tajo/diff/9cce80cf

Branch: refs/heads/master
Commit: 9cce80cfe585e8b93f86542e38ec5b07972d05d6
Parents: 245dbd2
Author: Hyunsik Choi <hy...@apache.org>
Authored: Mon Feb 24 20:29:55 2014 +0900
Committer: Hyunsik Choi <hy...@apache.org>
Committed: Mon Feb 24 20:29:55 2014 +0900

----------------------------------------------------------------------
 CHANGES.txt                                     |  9 ++++--
 .../tajo/algebra/CountRowsFunctionExpr.java     |  7 ++--
 .../org/apache/tajo/algebra/FunctionExpr.java   |  2 +-
 .../tajo/algebra/GeneralSetFunctionExpr.java    | 31 ++++++++++++++++--
 .../java/org/apache/tajo/algebra/OpType.java    | 34 ++++++++++++++++++++
 .../tajo/engine/planner/ExprNormalizer.java     | 32 +++++++++---------
 .../tajo/engine/planner/LogicalPlanner.java     | 10 +++---
 .../apache/tajo/LocalTajoTestingUtility.java    | 23 ++++++-------
 .../tajo/engine/query/TestCaseByCases.java      | 10 ++++++
 .../queries/TestCaseByCases/testTAJO619Case.sql |  4 +++
 .../TestCaseByCases/testTAJO619Case.result      |  3 ++
 11 files changed, 124 insertions(+), 41 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 27f7698..c169029 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -263,13 +263,18 @@ Release 0.8.0 - unreleased
 
   BUG FIXES
 
-    TAJO-403: HiveQLAnalyzer should supports standard function in the GROUP BY Clause. (jaehwa)
+    TAJO-619: SELECT count(1) after joins on text keys causes wrong plans.
+    (hyunsik)
+
+    TAJO-403: HiveQLAnalyzer should supports standard function in the GROUP BY
+    Clause. (jaehwa)
 
     TAJO-594: MySQL store doesn't work. (Yongjun Park via jaehwa)
 
     TAJO-590: Rename HiveConverter to HiveQLAnalyzer. (jaehwa)
 
-    TAJO-575: Worker's env.jsp has wrong URL which go to worker's index.jsp. (hyoungjunkim via jaehwa)
+    TAJO-575: Worker's env.jsp has wrong URL which go to worker's index.jsp.
+    (hyoungjunkim via jaehwa)
 
     TAJO-609: PlannerUtil::getRelationLineage ignores PartitionedTableScanNode.
     (hyunsik)

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/tajo-algebra/src/main/java/org/apache/tajo/algebra/CountRowsFunctionExpr.java
----------------------------------------------------------------------
diff --git a/tajo-algebra/src/main/java/org/apache/tajo/algebra/CountRowsFunctionExpr.java b/tajo-algebra/src/main/java/org/apache/tajo/algebra/CountRowsFunctionExpr.java
index cf2efb5..7b2c555 100644
--- a/tajo-algebra/src/main/java/org/apache/tajo/algebra/CountRowsFunctionExpr.java
+++ b/tajo-algebra/src/main/java/org/apache/tajo/algebra/CountRowsFunctionExpr.java
@@ -14,9 +14,12 @@
 
 package org.apache.tajo.algebra;
 
-public class CountRowsFunctionExpr extends FunctionExpr {
+/**
+ * Describe a aggregation function count(*).
+ */
+public class CountRowsFunctionExpr extends GeneralSetFunctionExpr {
   public CountRowsFunctionExpr() {
-    super(OpType.CountRowsFunction, "count");
+    super(OpType.CountRowsFunction, "count", false, new Expr[] {});
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/tajo-algebra/src/main/java/org/apache/tajo/algebra/FunctionExpr.java
----------------------------------------------------------------------
diff --git a/tajo-algebra/src/main/java/org/apache/tajo/algebra/FunctionExpr.java b/tajo-algebra/src/main/java/org/apache/tajo/algebra/FunctionExpr.java
index b007362..6635aeb 100644
--- a/tajo-algebra/src/main/java/org/apache/tajo/algebra/FunctionExpr.java
+++ b/tajo-algebra/src/main/java/org/apache/tajo/algebra/FunctionExpr.java
@@ -44,7 +44,7 @@ public class FunctionExpr extends Expr {
 
   protected FunctionExpr(OpType type, String signature, Expr [] params) {
     super(type);
-    if (type != OpType.Function && type != OpType.GeneralSetFunction) {
+    if (!OpType.isFunction(type)) {
       throw new IllegalArgumentException("FunctionExpr cannot accept " + type + "type");
     }
     this.signature = signature.toLowerCase();

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/tajo-algebra/src/main/java/org/apache/tajo/algebra/GeneralSetFunctionExpr.java
----------------------------------------------------------------------
diff --git a/tajo-algebra/src/main/java/org/apache/tajo/algebra/GeneralSetFunctionExpr.java b/tajo-algebra/src/main/java/org/apache/tajo/algebra/GeneralSetFunctionExpr.java
index c45a58f..d7eb4ed 100644
--- a/tajo-algebra/src/main/java/org/apache/tajo/algebra/GeneralSetFunctionExpr.java
+++ b/tajo-algebra/src/main/java/org/apache/tajo/algebra/GeneralSetFunctionExpr.java
@@ -14,14 +14,41 @@
 
 package org.apache.tajo.algebra;
 
+import com.google.common.base.Preconditions;
+
+/**
+ * Describes SQL Standard set function (e.g., sum, min, max, avg, and count)
+ */
 public class GeneralSetFunctionExpr extends FunctionExpr {
   private boolean distinct = false;
 
-  public GeneralSetFunctionExpr(String signature, boolean distinct, Expr param) {
-    super(OpType.GeneralSetFunction, signature, new Expr[] {param});
+  /**
+   *
+   * @param type Function type which must be one of GeneralSetFunction or CountRowFunction
+   * @param signature Function name
+   * @param distinct True if this function is a distinct aggregation function
+   * @param params An array of function parameters
+   */
+  protected GeneralSetFunctionExpr(OpType type, String signature, boolean distinct, Expr [] params) {
+    super(type, signature, params);
+    Preconditions.checkArgument(OpType.isAggregationFunction(type));
     this.distinct = distinct;
   }
 
+  /**
+   *
+   * @param signature Function name
+   * @param distinct True if this function is a distinct aggregation function
+   * @param param Function parameter
+   */
+  public GeneralSetFunctionExpr(String signature, boolean distinct, Expr param) {
+    this(OpType.GeneralSetFunction, signature, distinct, new Expr [] {param});
+  }
+
+  /**
+   *
+   * @return True if this function is a distinct aggregation function. Otherwise, it returns False.
+   */
   public boolean isDistinct() {
     return distinct;
   }

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/tajo-algebra/src/main/java/org/apache/tajo/algebra/OpType.java
----------------------------------------------------------------------
diff --git a/tajo-algebra/src/main/java/org/apache/tajo/algebra/OpType.java b/tajo-algebra/src/main/java/org/apache/tajo/algebra/OpType.java
index 91f322b..7122af8 100644
--- a/tajo-algebra/src/main/java/org/apache/tajo/algebra/OpType.java
+++ b/tajo-algebra/src/main/java/org/apache/tajo/algebra/OpType.java
@@ -131,4 +131,38 @@ public enum OpType {
       return OpType.valueOf(json.getAsString());
     }
   }
+
+  /**
+   * Check if it is one of the literal types.
+   *
+   * @param type The type to be checked
+   * @return True if it is one of the literal types. Otherwise, it returns False.
+   */
+  public static boolean isLiteral(OpType type) {
+    return  type == Literal ||
+            type == NullLiteral ||
+            type == TimeLiteral ||
+            type == DateLiteral ||
+            type == TimestampLiteral;
+  }
+
+  /**
+   * Check if it is one of function types.
+   *
+   * @param type The type to be checked
+   * @return True if it is aggregation function type. Otherwise, it returns False.
+   */
+  public static boolean isFunction(OpType type) {
+    return type == Function || isAggregationFunction(type);
+  }
+
+  /**
+   * Check if it is an aggregation function type.
+   *
+   * @param type The type to be checked
+   * @return True if it is aggregation function type. Otherwise, it returns False.
+   */
+  public static boolean isAggregationFunction(OpType type) {
+    return type == GeneralSetFunction || type == CountRowsFunction;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/ExprNormalizer.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/ExprNormalizer.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/ExprNormalizer.java
index e7de03f..0dc8f82 100644
--- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/ExprNormalizer.java
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/ExprNormalizer.java
@@ -105,10 +105,6 @@ class ExprNormalizer extends SimpleAlgebraVisitor<ExprNormalizer.ExprNormalizedR
     return exprNormalizedResult;
   }
 
-  private boolean isAggregationFunction(Expr expr) {
-    return expr.getType() == OpType.GeneralSetFunction || expr.getType() == OpType.CountRowsFunction;
-  }
-
   @Override
   public Object visitCaseWhen(ExprNormalizedResult ctx, Stack<Expr> stack, CaseWhenPredicate expr)
       throws PlanningException {
@@ -117,13 +113,13 @@ class ExprNormalizer extends SimpleAlgebraVisitor<ExprNormalizer.ExprNormalizedR
       visit(ctx, stack, when.getCondition());
       visit(ctx, stack, when.getResult());
 
-      if (isAggregationFunction(when.getCondition())) {
+      if (OpType.isAggregationFunction(when.getCondition().getType())) {
         String referenceName = ctx.block.namedExprsMgr.addExpr(when.getCondition());
         ctx.aggExprs.add(new NamedExpr(when.getCondition(), referenceName));
         when.setCondition(new ColumnReferenceExpr(referenceName));
       }
 
-      if (isAggregationFunction(when.getResult())) {
+      if (OpType.isAggregationFunction(when.getResult().getType())) {
         String referenceName = ctx.block.namedExprsMgr.addExpr(when.getResult());
         ctx.aggExprs.add(new NamedExpr(when.getResult(), referenceName));
         when.setResult(new ColumnReferenceExpr(referenceName));
@@ -132,7 +128,7 @@ class ExprNormalizer extends SimpleAlgebraVisitor<ExprNormalizer.ExprNormalizedR
 
     if (expr.hasElseResult()) {
       visit(ctx, stack, expr.getElseResult());
-      if (isAggregationFunction(expr.getElseResult())) {
+      if (OpType.isAggregationFunction(expr.getElseResult().getType())) {
         String referenceName = ctx.block.namedExprsMgr.addExpr(expr.getElseResult());
         ctx.aggExprs.add(new NamedExpr(expr.getElseResult(), referenceName));
         expr.setElseResult(new ColumnReferenceExpr(referenceName));
@@ -145,7 +141,7 @@ class ExprNormalizer extends SimpleAlgebraVisitor<ExprNormalizer.ExprNormalizedR
   @Override
   public Expr visitUnaryOperator(ExprNormalizedResult ctx, Stack<Expr> stack, UnaryOperator expr) throws PlanningException {
     super.visitUnaryOperator(ctx, stack, expr);
-    if (isAggregationFunction(expr.getChild())) {
+    if (OpType.isAggregationFunction(expr.getChild().getType())) {
       // Get an anonymous column name and replace the aggregation function by the column name
       String refName = ctx.block.namedExprsMgr.addExpr(expr.getChild());
       ctx.aggExprs.add(new NamedExpr(expr.getChild(), refName));
@@ -163,7 +159,7 @@ class ExprNormalizer extends SimpleAlgebraVisitor<ExprNormalizer.ExprNormalizedR
     // For Left Term
     ////////////////////////
 
-    if (isAggregationFunction(expr.getLeft())) {
+    if (OpType.isAggregationFunction(expr.getLeft().getType())) {
       String leftRefName = ctx.block.namedExprsMgr.addExpr(expr.getLeft());
       ctx.aggExprs.add(new NamedExpr(expr.getLeft(), leftRefName));
       expr.setLeft(new ColumnReferenceExpr(leftRefName));
@@ -173,7 +169,7 @@ class ExprNormalizer extends SimpleAlgebraVisitor<ExprNormalizer.ExprNormalizedR
     ////////////////////////
     // For Right Term
     ////////////////////////
-    if (isAggregationFunction(expr.getRight())) {
+    if (OpType.isAggregationFunction(expr.getRight().getType())) {
       String rightRefName = ctx.block.namedExprsMgr.addExpr(expr.getRight());
       ctx.aggExprs.add(new NamedExpr(expr.getRight(), rightRefName));
       expr.setRight(new ColumnReferenceExpr(rightRefName));
@@ -195,7 +191,7 @@ class ExprNormalizer extends SimpleAlgebraVisitor<ExprNormalizer.ExprNormalizedR
       param = expr.getParams()[i];
       visit(ctx, stack, param);
 
-      if (isAggregationFunction(param)) {
+      if (OpType.isAggregationFunction(param.getType())) {
         String referenceName = ctx.plan.generateUniqueColumnName(param);
         ctx.aggExprs.add(new NamedExpr(param, referenceName));
         expr.getParams()[i] = new ColumnReferenceExpr(referenceName);
@@ -217,9 +213,14 @@ class ExprNormalizer extends SimpleAlgebraVisitor<ExprNormalizer.ExprNormalizedR
       param = expr.getParams()[i];
       visit(ctx, stack, param);
 
-      String referenceName = ctx.block.namedExprsMgr.addExpr(param);
-      ctx.scalarExprs.add(new NamedExpr(param, referenceName));
-      expr.getParams()[i] = new ColumnReferenceExpr(referenceName);
+
+      // If parameters are all constants, we don't need to dissect an aggregation expression into two parts:
+      // function and parameter parts.
+      if (!OpType.isLiteral(param.getType())) {
+        String referenceName = ctx.block.namedExprsMgr.addExpr(param);
+        ctx.scalarExprs.add(new NamedExpr(param, referenceName));
+        expr.getParams()[i] = new ColumnReferenceExpr(referenceName);
+      }
     }
     stack.pop();
     return expr;
@@ -232,8 +233,7 @@ class ExprNormalizer extends SimpleAlgebraVisitor<ExprNormalizer.ExprNormalizedR
   @Override
   public Expr visitCastExpr(ExprNormalizedResult ctx, Stack<Expr> stack, CastExpr expr) throws PlanningException {
     super.visitCastExpr(ctx, stack, expr);
-    if (expr.getChild().getType() == OpType.GeneralSetFunction
-        || expr.getChild().getType() == OpType.CountRowsFunction) {
+    if (OpType.isAggregationFunction(expr.getType())) {
       String referenceName = ctx.block.namedExprsMgr.addExpr(expr.getChild());
       ctx.aggExprs.add(new NamedExpr(expr.getChild(), referenceName));
       expr.setChild(new ColumnReferenceExpr(referenceName));

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/LogicalPlanner.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/LogicalPlanner.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/LogicalPlanner.java
index 756ede4..01bacc0 100644
--- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/LogicalPlanner.java
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/LogicalPlanner.java
@@ -931,12 +931,12 @@ public class LogicalPlanner extends BaseAlgebraVisitor<LogicalPlanner.PlanContex
     ScanNode scanNode = block.getNodeFromExpr(expr);
     updatePhysicalInfo(scanNode.getTableDesc());
 
-    // Add additional expressions required in upper nodes, such as sort key, grouping columns,
-    // column references included in filter condition
-    //
-    // newlyEvaluatedExprsRef keeps
+    // Find expression which can be evaluated at this relation node.
+    // Except for column references, additional expressions used in select list, where clause, order-by clauses
+    // can be evaluated here. Their reference names are kept in newlyEvaluatedExprsRef.
     Set<String> newlyEvaluatedExprsReferences = new LinkedHashSet<String>();
-    for (NamedExpr rawTarget : block.namedExprsMgr.getAllNamedExprs()) {
+    for (Iterator<NamedExpr> iterator = block.namedExprsMgr.getIteratorForUnevaluatedExprs(); iterator.hasNext();) {
+      NamedExpr rawTarget = iterator.next();
       try {
         EvalNode evalNode = exprAnnotator.createEvalNode(context.plan, context.queryBlock, rawTarget.getExpr());
         if (checkIfBeEvaluatedAtRelation(block, evalNode, scanNode)) {

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/LocalTajoTestingUtility.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/LocalTajoTestingUtility.java b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/LocalTajoTestingUtility.java
index 50fed7b..144ca1b 100644
--- a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/LocalTajoTestingUtility.java
+++ b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/LocalTajoTestingUtility.java
@@ -23,11 +23,10 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.tajo.catalog.CatalogUtil;
-import org.apache.tajo.catalog.Options;
-import org.apache.tajo.catalog.Schema;
-import org.apache.tajo.catalog.TableMeta;
+import org.apache.tajo.benchmark.TPCH;
+import org.apache.tajo.catalog.*;
 import org.apache.tajo.catalog.proto.CatalogProtos;
+import org.apache.tajo.catalog.statistics.TableStats;
 import org.apache.tajo.client.TajoClient;
 import org.apache.tajo.conf.TajoConf;
 import org.apache.tajo.engine.planner.global.MasterPlan;
@@ -85,15 +84,13 @@ public class LocalTajoTestingUtility {
       fs.copyFromLocalFile(localPath, dfsPath);
       TableMeta meta = CatalogUtil.newTableMeta(CatalogProtos.StoreType.CSV, option);
 
-      // Enable this if you want to set pseudo stats. But, it will causes errors in some unit tests.
-      // So, you just use manually it for certain unit tests.
-//      TableStats stats = new TableStats();
-//      stats.setNumBytes(TPCH.tableVolumes.get(names[i]));
-//      TableDesc tableDesc = new TableDesc(names[i], schemas[i], meta, tablePath);
-//      tableDesc.setStats(stats);
-//      util.getMaster().getCatalog().addTable(tableDesc);
-
-      client.createExternalTable(names[i], schemas[i], tablePath, meta);
+      // Add fake table statistic data to tables.
+      // It gives more various situations to unit tests.
+      TableStats stats = new TableStats();
+      stats.setNumBytes(TPCH.tableVolumes.get(names[i]));
+      TableDesc tableDesc = new TableDesc(names[i], schemas[i], meta, tablePath);
+      tableDesc.setStats(stats);
+      util.getMaster().getCatalog().addTable(tableDesc);
     }
 
     LOG.info("===================================================");

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
index 20b5ac5..203d113 100644
--- a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
+++ b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
@@ -37,4 +37,14 @@ public class TestCaseByCases extends QueryTestCaseBase {
     assertResultSet(res);
     cleanupQuery(res);
   }
+
+  /**
+   * It's an unit test to reproduce TAJO-619 (https://issues.apache.org/jira/browse/TAJO-619).
+   */
+  @Test
+  public final void testTAJO619Case() throws Exception {
+    ResultSet res = executeQuery();
+    assertResultSet(res);
+    cleanupQuery(res);
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO619Case.sql
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO619Case.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO619Case.sql
new file mode 100644
index 0000000..15355b8
--- /dev/null
+++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO619Case.sql
@@ -0,0 +1,4 @@
+select
+  count(1)
+from
+  lineitem as l1 join lineitem as l2 on l1.l_returnflag = l2.l_returnflag;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/9cce80cf/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO619Case.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO619Case.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO619Case.result
new file mode 100644
index 0000000..86249b0
--- /dev/null
+++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO619Case.result
@@ -0,0 +1,3 @@
+?count
+-------------------------------
+13