You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2017/08/04 22:45:11 UTC

[1/2] incubator-impala git commit: IMPALA-5725: coalesce() with outer join incorrectly rewritten

Repository: incubator-impala
Updated Branches:
  refs/heads/master dced73167 -> d5b0c6b93


IMPALA-5725: coalesce() with outer join incorrectly rewritten

A recent change, IMPALA-5016, added an expr rewrite rule to simplfy
coalesce(). This rule eliminates the coalesce() when its first
parameter (that isn't constant null) is a SlotRef pointing to a
SlotDescriptor that is non-nullable (for example because it is from
a non-nullable Kudu column or because it is from an HDFS partition
column with no null partitions), under the assumption that the SlotRef
could never have a null value.

This assumption is violated when the SlotRef is the output of an
outer join, leading to incorrect results being returned. The problem
is that the nullability of a SlotDescriptor (which determines whether
there is a null indicator bit in the tuple for that slot) is a
slightly different property than the nullability of a SlotRef pointing
to that SlotDescriptor (since the SlotRef can still be NULL if the
entire tuple is NULL).

This patch removes the portion of the rewrite rule that considers
the nullability of the SlotDescriptor. This means that we're missing
out on some optimizations opportunities and we should revisit this in
a way that works with outer joins (IMPALA-5753)

Testing:
- Updated FE tests.
- Added regression tests to exprs.test

Change-Id: I1ca6df949f9d416ab207016236dbcb5886295337
Reviewed-on: http://gerrit.cloudera.org:8080/7567
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Reviewed-by: Thomas Tauber-Marshall <tm...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/2ae94e7e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/2ae94e7e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/2ae94e7e

Branch: refs/heads/master
Commit: 2ae94e7ead090b3e80b7a75fee7026f2fe8d8ca9
Parents: dced731
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
Authored: Wed Aug 2 10:51:57 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Aug 4 21:51:19 2017 +0000

----------------------------------------------------------------------
 .../apache/impala/analysis/SlotDescriptor.java  |  2 ++
 .../rewrite/SimplifyConditionalsRule.java       | 30 +-------------------
 .../impala/analysis/ExprRewriteRulesTest.java   | 17 ++---------
 .../queries/QueryTest/exprs.test                | 24 +++++++++++++++-
 4 files changed, 29 insertions(+), 44 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2ae94e7e/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java b/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java
index f67ac8b..e303a11 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java
@@ -55,6 +55,8 @@ public class SlotDescriptor {
   private boolean isMaterialized_ = false;
 
   // if false, this slot cannot be NULL
+  // Note: it is still possible that a SlotRef pointing to this descriptor could have a
+  // NULL value if the entire tuple is NULL, for example as the result of an outer join.
   private boolean isNullable_ = true;
 
   // physical layout parameters

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2ae94e7e/fe/src/main/java/org/apache/impala/rewrite/SimplifyConditionalsRule.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/rewrite/SimplifyConditionalsRule.java b/fe/src/main/java/org/apache/impala/rewrite/SimplifyConditionalsRule.java
index 287b4be..6d0b38c 100644
--- a/fe/src/main/java/org/apache/impala/rewrite/SimplifyConditionalsRule.java
+++ b/fe/src/main/java/org/apache/impala/rewrite/SimplifyConditionalsRule.java
@@ -103,8 +103,6 @@ public class SimplifyConditionalsRule implements ExprRewriteRule {
    * Simplify COALESCE by skipping leading nulls and applying the following transformations:
    * COALESCE(null, a, b) -> COALESCE(a, b);
    * COALESCE(<literal>, a, b) -> <literal>, when literal is not NullLiteral;
-   * COALESCE(<partition-slotref>, a, b) -> <partition-slotref>,
-   * when the partition column does not contain NULL.
    */
   private Expr simplifyCoalesceFunctionCallExpr(FunctionCallExpr expr) {
     int numChildren = expr.getChildren().size();
@@ -113,7 +111,7 @@ public class SimplifyConditionalsRule implements ExprRewriteRule {
       Expr childExpr = expr.getChildren().get(i);
       // Skip leading nulls.
       if (childExpr.isNullLiteral()) continue;
-      if ((i == numChildren - 1) || canSimplifyCoalesceUsingChild(childExpr)) {
+      if ((i == numChildren - 1) || childExpr.isLiteral()) {
         result = childExpr;
       } else if (i == 0) {
         result = expr;
@@ -126,32 +124,6 @@ public class SimplifyConditionalsRule implements ExprRewriteRule {
     return result;
   }
 
-  /**
-   * Checks if the given child expr is nullable. Returns true if one of the following holds:
-   * child is a non-NULL literal;
-   * child is a possibly cast SlotRef against a non-nullable slot;
-   * child is a possible cast SlotRef against a partition column that does not contain NULL.
-   */
-  private boolean canSimplifyCoalesceUsingChild(Expr child) {
-    if (child.isLiteral() && !child.isNullLiteral()) return true;
-
-    SlotRef slotRef = child.unwrapSlotRef(false);
-    if (slotRef == null) return false;
-    SlotDescriptor slotDesc = slotRef.getDesc();
-    if (!slotDesc.getIsNullable()) return true;
-    // Check partition column using partition metadata.
-    if (slotDesc.getParent().getTable() instanceof HdfsTable
-        && slotDesc.getColumn() != null
-        && slotDesc.getParent().getTable().isClusteringColumn(slotDesc.getColumn())) {
-      HdfsTable table = (HdfsTable) slotDesc.getParent().getTable();
-      // Return true if the partition column does not have a NULL value.
-      if (table.getNullPartitionIds(slotDesc.getColumn().getPosition()).isEmpty()) {
-        return true;
-      }
-    }
-    return false;
-  }
-
   private Expr simplifyFunctionCallExpr(FunctionCallExpr expr) {
     FunctionName fnName = expr.getFnName();
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2ae94e7e/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
index e49c652..4ff50a3 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
@@ -398,20 +398,9 @@ public class ExprRewriteRulesTest extends FrontendTestBase {
     RewritesOk("coalesce(1 + 2, id, year)", rules, "3");
     RewritesOk("coalesce(null is null, bool_col)", rules, "TRUE");
     RewritesOk("coalesce(10 + null, id, year)", rules, "coalesce(id, year)");
-    // If the leading parameter is partition column, try to rewrite with partition metadata.
-    RewritesOk("coalesce(year, id)", rule, "year");
-    RewritesOk("coalesce(year, bigint_col)", rule, "year");
-    RewritesOk("coalesce(cast(year as string), string_col)", rule, "CAST(year AS STRING)");
-    RewritesOk("coalesce(id, year)", rule, null);
-    RewritesOk("coalesce(null, year, id)", rule, "year");
-    // If the leading partition column has NULL value, do not rewrite.
-    RewritesOk("functional.alltypesagg", "coalesce(year, id)", rule, "year");
-    RewritesOk("functional.alltypesagg", "coalesce(day, id)", rule, null);
-    // If the leading column is not nullable, rewrite to the column.
-    RewritesOk("functional_kudu.alltypessmall", "coalesce(id, year)", rule, "id");
-    RewritesOk("functional_kudu.alltypessmall", "coalesce(cast(id as string), string_col)", rule,
-        "CAST(id AS STRING)");
-    RewritesOk("functional_kudu.alltypessmall", "coalesce(null, id, year)", rule, "id");
+    // Don't rewrite based on nullability of slots. TODO (IMPALA-5753).
+    RewritesOk("coalesce(year, id)", rule, null);
+    RewritesOk("functional_kudu.alltypessmall", "coalesce(id, year)", rule, null);
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2ae94e7e/testdata/workloads/functional-query/queries/QueryTest/exprs.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/exprs.test b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
index d618a24..9b1a1a7 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/exprs.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
@@ -2741,4 +2741,26 @@ select if (true, 0, sum(id)) from functional.alltypestiny
 0
 ---- TYPES
 BIGINT
-====
\ No newline at end of file
+====
+---- QUERY
+# IMPALA-5725: test coalesce() is not rewritten when its first parameter is a non-nullable
+# Kudu column, but can have NULL values due to an outer join.
+select coalesce(b.id, a.id), b.id, a.id
+from functional_kudu.alltypes a left join functional_kudu.alltypestiny b on a.id = b.id
+where a.id = 100
+---- RESULTS
+100,NULL,100
+---- TYPES
+INT,INT,INT
+====
+---- QUERY
+# IMPALA-5725: test coalesce() is not rewritten when its first parameter is an HDFS
+# partition col with no NULL partitions, but can have NULL values due to an outer join.
+select coalesce(b.year, a.id), b.id, a.id
+from functional.alltypes a left join functional.alltypestiny b on a.id = b.id
+where a.id = 100
+---- RESULTS
+100,NULL,100
+---- TYPES
+INT,INT,INT
+====


[2/2] incubator-impala git commit: Update .gitinore files

Posted by ta...@apache.org.
Update .gitinore files

I noticed a bunch of new things had crept in.

Change-Id: Ie6ef085357a3bf026f2b42689ee642192a7791e7
Reviewed-on: http://gerrit.cloudera.org:8080/7590
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Tim Armstrong <ta...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/d5b0c6b9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/d5b0c6b9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/d5b0c6b9

Branch: refs/heads/master
Commit: d5b0c6b9366bc54801ac0dc74c942cdbda213b67
Parents: 2ae94e7
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Fri Aug 4 12:06:27 2017 -0700
Committer: Tim Armstrong <ta...@cloudera.com>
Committed: Fri Aug 4 22:44:59 2017 +0000

----------------------------------------------------------------------
 .gitignore          | 3 +++
 be/.gitignore       | 7 +++++++
 testdata/.gitignore | 3 +++
 3 files changed, 13 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d5b0c6b9/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index ebd6bbe..51a619a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,7 @@ bin/impala-config-local.sh
 
 # distcc options
 .impala_compiler_opts
+.impala_compiler_opts_v2
 
 pprof.out
 
@@ -56,5 +57,7 @@ hs_err_pid*.log
 *.jar
 
 # Vim related files
+.*.swn
+.*.swo
 .*.swp
 .vimrc

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d5b0c6b9/be/.gitignore
----------------------------------------------------------------------
diff --git a/be/.gitignore b/be/.gitignore
index 6c9804d..026df8c 100644
--- a/be/.gitignore
+++ b/be/.gitignore
@@ -1,6 +1,13 @@
 *~
 
 build
+
+# Generated sources.
 generated-sources/**/*
 src/common/version.cc
+src/kudu/util/protoc-gen-insertions
+*.pb.h
+*.pb.cc
 
+# Static libraries.
+*.a

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d5b0c6b9/testdata/.gitignore
----------------------------------------------------------------------
diff --git a/testdata/.gitignore b/testdata/.gitignore
index a33027b..ff11b82 100644
--- a/testdata/.gitignore
+++ b/testdata/.gitignore
@@ -28,3 +28,6 @@ block-ids
 
 # Generated data loading files
 load-*-hbase.create
+
+# Temporary data generated by tests.
+tmp-scanner-fuzz*