You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/11/14 20:23:04 UTC

[4/9] impala git commit: IMPALA-7805: Emit zero as "0" in toSql()

IMPALA-7805: Emit zero as "0" in toSql()

It turns out that Impala has a somewhat Baroque way to represent the
value of a numeric 0.  NumericLiteral.toSql() uses the Java
BigDecimal class to convert a numeric value to a string for use in
explained plans and in verifying expression rewrites.

The default Java behavior is to consider scale when rendering numbers,
including 0. Thus, depending on precision and scale, you may get:

0
0.0
0.00
0.000
...
0E-38

However, mathematically, zero is zero. Plans attach no special meaning
to the extra decimal points or trailing zeros.

To make testing easier, changed the behavior to always emit "0" when the
value is zero, regardless of precision or scale.

Testing: Reran the planner tests and modified captured plans that had
the 0.0, 0.00 variations of zero.

Since this change affects only EXPLAIN output, it cannot affect the
operation of queries. If may impact other tests that compare EXPLAIN
output to a "golden" copy.

Change-Id: I0b2f2f34fe5e6003de407301310ccf433841b9f1
Reviewed-on: http://gerrit.cloudera.org:8080/11878
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/b2dbc0f0
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/b2dbc0f0
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/b2dbc0f0

Branch: refs/heads/master
Commit: b2dbc0f0bc106c89c5722ae909e067c4dabff4d1
Parents: c4c1eba
Author: Paul Rogers <pr...@cloudera.com>
Authored: Mon Nov 5 11:30:56 2018 -0800
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Wed Nov 14 10:30:30 2018 +0000

----------------------------------------------------------------------
 .../org/apache/impala/analysis/NumericLiteral.java  |  9 ++++++++-
 .../queries/PlannerTest/kudu-selectivity.test       |  2 +-
 .../queries/PlannerTest/tpch-all.test               | 16 ++++++++--------
 .../queries/PlannerTest/tpch-kudu.test              |  4 ++--
 .../queries/PlannerTest/tpch-nested.test            | 14 +++++++-------
 .../queries/PlannerTest/tpch-views.test             |  4 ++--
 6 files changed, 28 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/b2dbc0f0/fe/src/main/java/org/apache/impala/analysis/NumericLiteral.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/NumericLiteral.java b/fe/src/main/java/org/apache/impala/analysis/NumericLiteral.java
index f3fea9b..4444755 100644
--- a/fe/src/main/java/org/apache/impala/analysis/NumericLiteral.java
+++ b/fe/src/main/java/org/apache/impala/analysis/NumericLiteral.java
@@ -150,8 +150,15 @@ public class NumericLiteral extends LiteralExpr {
 
   @Override
   public String toSqlImpl() { return getStringValue(); }
+
   @Override
-  public String getStringValue() { return value_.toString(); }
+  public String getStringValue() {
+    // BigDecimal returns CAST(0, DECIMAL(38, 38))
+    // as 0E-38. We want just 0.
+    return value_.compareTo(BigDecimal.ZERO) == 0
+        ? "0" : value_.toString();
+  }
+
   public double getDoubleValue() { return value_.doubleValue(); }
   public long getLongValue() { return value_.longValue(); }
   public long getIntValue() { return value_.intValue(); }

http://git-wip-us.apache.org/repos/asf/impala/blob/b2dbc0f0/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
index 6ae50f3..05b1c92 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
@@ -165,7 +165,7 @@ Per-Host Resources: mem-estimate=9.75MB mem-reservation=0B thread-reservation=2
   |
   00:SCAN KUDU [functional_kudu.alltypes]
      predicates: id IN (int_col), bigint_col IN (9999999999999999999), double_col IN (CAST('inf' AS DOUBLE)), float_col IN (CAST('NaN' AS FLOAT)), int_col IN (9999999999), smallint_col IN (99999, 2), tinyint_col IN (1, 999), bool_col IN (1), string_col NOT IN ('bar')
-     kudu predicates: double_col IN (0.0), float_col IN (0.0), bigint_col IN (1, 2), int_col IN (1, 2), smallint_col IN (0, 2), string_col IN ('foo', 'foo       '), tinyint_col IN (1, 2), bool_col IN (TRUE)
+     kudu predicates: double_col IN (0), float_col IN (0), bigint_col IN (1, 2), int_col IN (1, 2), smallint_col IN (0, 2), string_col IN ('foo', 'foo       '), tinyint_col IN (1, 2), bool_col IN (TRUE)
      mem-estimate=9.75MB mem-reservation=0B thread-reservation=1
      tuple-ids=0 row-size=97B cardinality=5
      in pipelines: 00(GETNEXT)

http://git-wip-us.apache.org/repos/asf/impala/blob/b2dbc0f0/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
index f6f7672..10efec9 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
@@ -2677,7 +2677,7 @@ Per-Host Resource Estimates: Memory=346MB
 PLAN-ROOT SINK
 |
 03:AGGREGATE [FINALIZE]
-|  output: sum(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0.0 END), sum(l_extendedprice * (1 - l_discount))
+|  output: sum(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0 END), sum(l_extendedprice * (1 - l_discount))
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: l_partkey = p_partkey
@@ -2696,12 +2696,12 @@ Per-Host Resource Estimates: Memory=364MB
 PLAN-ROOT SINK
 |
 07:AGGREGATE [FINALIZE]
-|  output: sum:merge(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0.0 END), sum:merge(l_extendedprice * (1 - l_discount))
+|  output: sum:merge(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0 END), sum:merge(l_extendedprice * (1 - l_discount))
 |
 06:EXCHANGE [UNPARTITIONED]
 |
 03:AGGREGATE
-|  output: sum(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0.0 END), sum(l_extendedprice * (1 - l_discount))
+|  output: sum(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0 END), sum(l_extendedprice * (1 - l_discount))
 |
 02:HASH JOIN [INNER JOIN, PARTITIONED]
 |  hash predicates: l_partkey = p_partkey
@@ -2724,12 +2724,12 @@ Per-Host Resource Estimates: Memory=299MB
 PLAN-ROOT SINK
 |
 07:AGGREGATE [FINALIZE]
-|  output: sum:merge(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0.0 END), sum:merge(l_extendedprice * (1 - l_discount))
+|  output: sum:merge(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0 END), sum:merge(l_extendedprice * (1 - l_discount))
 |
 06:EXCHANGE [UNPARTITIONED]
 |
 03:AGGREGATE
-|  output: sum(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0.0 END), sum(l_extendedprice * (1 - l_discount))
+|  output: sum(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0 END), sum(l_extendedprice * (1 - l_discount))
 |
 02:HASH JOIN [INNER JOIN, PARTITIONED]
 |  hash predicates: l_partkey = p_partkey
@@ -4197,7 +4197,7 @@ PLAN-ROOT SINK
 |  |  |
 |  |  01:SCAN HDFS [tpch.customer]
 |  |     partitions=1/1 files=1 size=23.08MB
-|  |     predicates: c_acctbal > 0.00, substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
+|  |     predicates: c_acctbal > 0, substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
 |  |
 |  00:SCAN HDFS [tpch.customer]
 |     partitions=1/1 files=1 size=23.08MB
@@ -4246,7 +4246,7 @@ PLAN-ROOT SINK
 |  |  |
 |  |  01:SCAN HDFS [tpch.customer]
 |  |     partitions=1/1 files=1 size=23.08MB
-|  |     predicates: c_acctbal > 0.00, substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
+|  |     predicates: c_acctbal > 0, substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
 |  |
 |  00:SCAN HDFS [tpch.customer]
 |     partitions=1/1 files=1 size=23.08MB
@@ -4305,7 +4305,7 @@ PLAN-ROOT SINK
 |  |  |
 |  |  01:SCAN HDFS [tpch.customer]
 |  |     partitions=1/1 files=1 size=23.08MB
-|  |     predicates: c_acctbal > 0.00, substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
+|  |     predicates: c_acctbal > 0, substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
 |  |
 |  00:SCAN HDFS [tpch.customer]
 |     partitions=1/1 files=1 size=23.08MB

http://git-wip-us.apache.org/repos/asf/impala/blob/b2dbc0f0/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
index 2a5cbd7..373fa87 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test
@@ -895,7 +895,7 @@ Per-Host Resource Estimates: Memory=33MB
 PLAN-ROOT SINK
 |
 03:AGGREGATE [FINALIZE]
-|  output: sum(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0.0 END), sum(l_extendedprice * (1 - l_discount))
+|  output: sum(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0 END), sum(l_extendedprice * (1 - l_discount))
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: l_partkey = p_partkey
@@ -1447,7 +1447,7 @@ PLAN-ROOT SINK
 |  |  |
 |  |  01:SCAN KUDU [tpch_kudu.customer]
 |  |     predicates: substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
-|  |     kudu predicates: c_acctbal > 0.00
+|  |     kudu predicates: c_acctbal > 0
 |  |
 |  00:SCAN KUDU [tpch_kudu.customer]
 |     predicates: substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')

http://git-wip-us.apache.org/repos/asf/impala/blob/b2dbc0f0/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
index dfcdc4f..12bb1c9 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
@@ -1683,7 +1683,7 @@ Per-Host Resource Estimates: Memory=402MB
 PLAN-ROOT SINK
 |
 03:AGGREGATE [FINALIZE]
-|  output: sum(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0.0 END), sum(l_extendedprice * (1 - l_discount))
+|  output: sum(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0 END), sum(l_extendedprice * (1 - l_discount))
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: l_partkey = p_partkey
@@ -1702,12 +1702,12 @@ Per-Host Resource Estimates: Memory=422MB
 PLAN-ROOT SINK
 |
 06:AGGREGATE [FINALIZE]
-|  output: sum:merge(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0.0 END), sum:merge(l_extendedprice * (1 - l_discount))
+|  output: sum:merge(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0 END), sum:merge(l_extendedprice * (1 - l_discount))
 |
 05:EXCHANGE [UNPARTITIONED]
 |
 03:AGGREGATE
-|  output: sum(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0.0 END), sum(l_extendedprice * (1 - l_discount))
+|  output: sum(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0 END), sum(l_extendedprice * (1 - l_discount))
 |
 02:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: l_partkey = p_partkey
@@ -2636,8 +2636,8 @@ PLAN-ROOT SINK
 |  |  output: avg(c_acctbal)
 |  |
 |  01:SCAN HDFS [tpch_nested_parquet.customer c]
-|     partitions=1/1 files=4 size=288.99MB
-|     predicates: c_acctbal > 0.00, substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
+|     partitions=1/1 files=4 size=289.00MB
+|     predicates: c_acctbal > 0, substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
 |
 00:SCAN HDFS [tpch_nested_parquet.customer c]
    partitions=1/1 files=4 size=288.99MB
@@ -2685,8 +2685,8 @@ PLAN-ROOT SINK
 |  |  output: avg(c_acctbal)
 |  |
 |  01:SCAN HDFS [tpch_nested_parquet.customer c]
-|     partitions=1/1 files=4 size=288.99MB
-|     predicates: c_acctbal > 0.00, substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
+|     partitions=1/1 files=4 size=289.00MB
+|     predicates: c_acctbal > 0, substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
 |
 00:SCAN HDFS [tpch_nested_parquet.customer c]
    partitions=1/1 files=4 size=288.99MB

http://git-wip-us.apache.org/repos/asf/impala/blob/b2dbc0f0/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
index 52af979..7bc22c3 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
@@ -935,7 +935,7 @@ where
 PLAN-ROOT SINK
 |
 03:AGGREGATE [FINALIZE]
-|  output: sum(CASE WHEN tpch.part.p_type LIKE 'PROMO%' THEN tpch.lineitem.l_extendedprice * (1 - tpch.lineitem.l_discount) ELSE 0.0 END), sum(tpch.lineitem.l_extendedprice * (1 - tpch.lineitem.l_discount))
+|  output: sum(CASE WHEN tpch.part.p_type LIKE 'PROMO%' THEN tpch.lineitem.l_extendedprice * (1 - tpch.lineitem.l_discount) ELSE 0 END), sum(tpch.lineitem.l_extendedprice * (1 - tpch.lineitem.l_discount))
 |
 02:HASH JOIN [INNER JOIN]
 |  hash predicates: tpch.lineitem.l_partkey = tpch.part.p_partkey
@@ -1507,7 +1507,7 @@ PLAN-ROOT SINK
 |  |  |
 |  |  01:SCAN HDFS [tpch.customer]
 |  |     partitions=1/1 files=1 size=23.08MB
-|  |     predicates: tpch.customer.c_acctbal > 0.00, substr(tpch.customer.c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
+|  |     predicates: tpch.customer.c_acctbal > 0, substr(tpch.customer.c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
 |  |
 |  00:SCAN HDFS [tpch.customer]
 |     partitions=1/1 files=1 size=23.08MB