You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mj...@apache.org on 2017/07/31 16:12:36 UTC

[1/6] incubator-impala git commit: IMPALA-5714: Add OpenSSL to bootstrap_toolchain.py

Repository: incubator-impala
Updated Branches:
  refs/heads/master 1cbfa423b -> 15e6cf8fd


IMPALA-5714: Add OpenSSL to bootstrap_toolchain.py

To support KRPC on legacy platforms with version of OpenSSL
older than 1.0.1, we may need to use libssl from the toolchain.
This change makes toolchain boostrapping to also download
OpenSSL 1.0.1p.

Testing: private packaging build.

Change-Id: I860b16d8606de1ee472db35a4d8d4e97b57b67ae
Reviewed-on: http://gerrit.cloudera.org:8080/7532
Reviewed-by: Henry Robinson <he...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/e90afcb3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/e90afcb3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/e90afcb3

Branch: refs/heads/master
Commit: e90afcb36e80d11ff4d374f37b1c84b906ab7c4f
Parents: 1cbfa42
Author: Michael Ho <kw...@cloudera.com>
Authored: Wed Jul 26 20:48:53 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Jul 28 22:45:49 2017 +0000

----------------------------------------------------------------------
 bin/bootstrap_toolchain.py | 4 ++--
 bin/impala-config.sh       | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e90afcb3/bin/bootstrap_toolchain.py
----------------------------------------------------------------------
diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index afe8780..38b8d05 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -352,8 +352,8 @@ if __name__ == "__main__":
 
   packages = ["avro", "binutils", "boost", "breakpad", "bzip2", "cmake", "crcutil",
       "flatbuffers", "gcc", "gflags", "glog", "gperftools", "gtest", "kudu", "libev",
-      "llvm", ("llvm", "3.8.0-asserts-p1"), "lz4", "openldap", "protobuf", "rapidjson",
-      "re2", "snappy", "thrift", "tpc-h", "tpc-ds", "zlib"]
+      "llvm", ("llvm", "3.8.0-asserts-p1"), "lz4", "openldap", "openssl", "protobuf",
+      "rapidjson", "re2", "snappy", "thrift", "tpc-h", "tpc-ds", "zlib"]
   bootstrap(toolchain_root, packages)
 
   # Download the CDH components if necessary.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e90afcb3/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 501d1e4..fdf74ad 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -97,7 +97,7 @@ export IMPALA_LLVM_ASAN_VERSION=3.8.0-p1
 export IMPALA_LLVM_DEBUG_VERSION=3.8.0-asserts-p1
 export IMPALA_LZ4_VERSION=1.7.5
 export IMPALA_OPENLDAP_VERSION=2.4.25
-export IMPALA_OPENSSL_VERSION=0.9.8zf
+export IMPALA_OPENSSL_VERSION=1.0.1p
 export IMPALA_PROTOBUF_VERSION=2.6.1
 export IMPALA_POSTGRES_JDBC_DRIVER_VERSION=9.0-801
 export IMPALA_RAPIDJSON_VERSION=0.11


[5/6] incubator-impala git commit: IMPALA-5630: Add Kudu client version as a common metric

Posted by mj...@apache.org.
IMPALA-5630: Add Kudu client version as a common metric

Change-Id: I8bd0e220c030822209b811703ebd0cea699e8268
Reviewed-on: http://gerrit.cloudera.org:8080/7525
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/c8814262
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/c8814262
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/c8814262

Branch: refs/heads/master
Commit: c8814262e79a6a5d7b6695604496361d5d9bdff5
Parents: 3296064
Author: Bikramjeet Vig <bi...@cloudera.com>
Authored: Thu Jul 27 15:05:01 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Sat Jul 29 20:24:59 2017 +0000

----------------------------------------------------------------------
 be/src/util/common-metrics.cc |  9 ++++++++-
 be/src/util/common-metrics.h  |  2 ++
 common/thrift/metrics.json    | 12 ++++++++++++
 3 files changed, 22 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c8814262/be/src/util/common-metrics.cc
----------------------------------------------------------------------
diff --git a/be/src/util/common-metrics.cc b/be/src/util/common-metrics.cc
index ed724bb..94c32b5 100644
--- a/be/src/util/common-metrics.cc
+++ b/be/src/util/common-metrics.cc
@@ -17,15 +17,22 @@
 
 #include "util/common-metrics.h"
 #include "runtime/timestamp-value.h"
+#include <kudu/client/client.h>
 
 namespace impala {
 
 StringProperty* CommonMetrics::PROCESS_START_TIME = nullptr;
+StringProperty* CommonMetrics::KUDU_CLIENT_VERSION = nullptr;
+
 string CommonMetrics::PROCESS_START_TIME_METRIC_NAME = "process-start-time";
+string CommonMetrics::KUDU_CLIENT_VERSION_METRIC_NAME = "kudu-client.version";
 
 void CommonMetrics::InitCommonMetrics(MetricGroup* metric_group) {
   PROCESS_START_TIME = metric_group->AddProperty<string>(
-    PROCESS_START_TIME_METRIC_NAME, "");
+      PROCESS_START_TIME_METRIC_NAME, "");
+  KUDU_CLIENT_VERSION = metric_group->AddProperty<string>(
+      KUDU_CLIENT_VERSION_METRIC_NAME, kudu::client::GetShortVersionString());
+
   // TODO: IMPALA-5599: Clean up non-TIMESTAMP usages of TimestampValue
   PROCESS_START_TIME->set_value(TimestampValue::LocalTime().ToString());
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c8814262/be/src/util/common-metrics.h
----------------------------------------------------------------------
diff --git a/be/src/util/common-metrics.h b/be/src/util/common-metrics.h
index 05f0091..141fe1e 100644
--- a/be/src/util/common-metrics.h
+++ b/be/src/util/common-metrics.h
@@ -25,12 +25,14 @@ namespace impala {
 class CommonMetrics {
 public:
   static StringProperty* PROCESS_START_TIME;
+  static StringProperty* KUDU_CLIENT_VERSION;
 
   /// Registers and initializes the commnon metrics
   static void InitCommonMetrics(MetricGroup* metric_group);
 
 private:
   static string PROCESS_START_TIME_METRIC_NAME;
+  static string KUDU_CLIENT_VERSION_METRIC_NAME;
 };
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c8814262/common/thrift/metrics.json
----------------------------------------------------------------------
diff --git a/common/thrift/metrics.json b/common/thrift/metrics.json
index 4f86363..251ea14 100644
--- a/common/thrift/metrics.json
+++ b/common/thrift/metrics.json
@@ -1438,5 +1438,17 @@
     "units": "NONE",
     "kind": "COUNTER",
     "key": "total-senders-timedout-waiting-for-recvr-creation"
+  },
+  {
+    "description": "A version string identifying the Kudu client",
+    "contexts": [
+      "IMPALAD",
+      "CATALOGSERVER",
+      "STATESTORE"
+    ],
+    "label": "Kudu Client Version",
+    "units": "NONE",
+    "kind": "PROPERTY",
+    "key": "kudu-client.version"
   }
 ]


[6/6] incubator-impala git commit: IMPALA-5529: Add additional function signatures for TRUNC()

Posted by mj...@apache.org.
IMPALA-5529: Add additional function signatures for TRUNC()

The following signatures to be added:
+--------------+----------------------------------+-------------+---------------+
| return type  | signature                        | binary type | is persistent |
+--------------+----------------------------------+-------------+---------------+
| DECIMAL(*,*) | trunc(DECIMAL(*,*))              | BUILTIN     | true          |
| DECIMAL(*,*) | trunc(DECIMAL(*,*), BIGINT)      | BUILTIN     | true          |
| DECIMAL(*,*) | trunc(DECIMAL(*,*), INT)         | BUILTIN     | true          |
| DECIMAL(*,*) | trunc(DECIMAL(*,*), SMALLINT)    | BUILTIN     | true          |
| DECIMAL(*,*) | trunc(DECIMAL(*,*), TINYINT)     | BUILTIN     | true          |
| BIGINT       | trunc(DOUBLE)                    | BUILTIN     | true          |
+--------------+----------------------------------+-------------+---------------+

Tests:
* Adds tests for the new builtin trunc()/dtrunc()

Change-Id: I856da9f817b948de3c72af60a0742b128398b4cf
Reviewed-on: http://gerrit.cloudera.org:8080/7450
Tested-by: Impala Public Jenkins
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/15e6cf8f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/15e6cf8f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/15e6cf8f

Branch: refs/heads/master
Commit: 15e6cf8fd03e3ae012435592f0bf4d6a64298099
Parents: c881426
Author: Jinchul <ji...@gmail.com>
Authored: Tue Jul 18 23:39:08 2017 +0900
Committer: Matthew Jacobs <mj...@cloudera.com>
Committed: Sat Jul 29 20:53:45 2017 +0000

----------------------------------------------------------------------
 common/function-registry/impala_functions.py    | 12 +++---
 .../impala/analysis/FunctionCallExpr.java       |  1 +
 .../impala/analysis/AnalyzeExprsTest.java       | 42 ++++++++++++++------
 3 files changed, 36 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/15e6cf8f/common/function-registry/impala_functions.py
----------------------------------------------------------------------
diff --git a/common/function-registry/impala_functions.py b/common/function-registry/impala_functions.py
index 1502809..f43c7c1 100644
--- a/common/function-registry/impala_functions.py
+++ b/common/function-registry/impala_functions.py
@@ -275,7 +275,7 @@ visible_functions = [
   [['degrees'], 'DOUBLE', ['DOUBLE'], 'impala::MathFunctions::Degrees'],
   [['ceil', 'ceiling', 'dceil'], 'BIGINT', ['DOUBLE'], 'impala::MathFunctions::Ceil'],
   [['floor', 'dfloor'], 'BIGINT', ['DOUBLE'], 'impala::MathFunctions::Floor'],
-  [['truncate','dtrunc'], 'BIGINT', ['DOUBLE'], 'impala::MathFunctions::Truncate'],
+  [['truncate','dtrunc','trunc'], 'BIGINT', ['DOUBLE'], 'impala::MathFunctions::Truncate'],
   [['round','dround'], 'BIGINT', ['DOUBLE'], 'impala::MathFunctions::Round'],
   [['round','dround'], 'DOUBLE', ['DOUBLE', 'INT'], 'impala::MathFunctions::RoundUpTo'],
   [['exp', 'dexp'], 'DOUBLE', ['DOUBLE'], 'impala::MathFunctions::Exp'],
@@ -396,14 +396,14 @@ visible_functions = [
   [['round','dround'], 'DECIMAL', ['DECIMAL', 'SMALLINT'], 'impala::DecimalFunctions::RoundTo'],
   [['round','dround'], 'DECIMAL', ['DECIMAL', 'INT'], 'impala::DecimalFunctions::RoundTo'],
   [['round','dround'], 'DECIMAL', ['DECIMAL', 'BIGINT'], 'impala::DecimalFunctions::RoundTo'],
-  [['truncate','dtrunc'], 'DECIMAL', ['DECIMAL'], 'impala::DecimalFunctions::Truncate'],
-  [['truncate','dtrunc'], 'DECIMAL', ['DECIMAL', 'TINYINT'],
+  [['truncate','dtrunc','trunc'], 'DECIMAL', ['DECIMAL'], 'impala::DecimalFunctions::Truncate'],
+  [['truncate','dtrunc','trunc'], 'DECIMAL', ['DECIMAL', 'TINYINT'],
       'impala::DecimalFunctions::TruncateTo'],
-  [['truncate','dtrunc'], 'DECIMAL', ['DECIMAL', 'SMALLINT'],
+  [['truncate','dtrunc','trunc'], 'DECIMAL', ['DECIMAL', 'SMALLINT'],
       'impala::DecimalFunctions::TruncateTo'],
-  [['truncate','dtrunc'], 'DECIMAL', ['DECIMAL', 'INT'],
+  [['truncate','dtrunc','trunc'], 'DECIMAL', ['DECIMAL', 'INT'],
       'impala::DecimalFunctions::TruncateTo'],
-  [['truncate','dtrunc'], 'DECIMAL', ['DECIMAL', 'BIGINT'],
+  [['truncate','dtrunc','trunc'], 'DECIMAL', ['DECIMAL', 'BIGINT'],
       'impala::DecimalFunctions::TruncateTo'],
 
   # String builtin functions

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/15e6cf8f/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java b/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java
index 20add13..9364f12 100644
--- a/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java
+++ b/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java
@@ -394,6 +394,7 @@ public class FunctionCallExpr extends Expr {
       digitsAfter = 0;
     } else if (fnName_.getFunction().equalsIgnoreCase("truncate") ||
                fnName_.getFunction().equalsIgnoreCase("dtrunc") ||
+               fnName_.getFunction().equalsIgnoreCase("trunc") ||
                fnName_.getFunction().equalsIgnoreCase("round") ||
                fnName_.getFunction().equalsIgnoreCase("dround")) {
       if (children_.size() > 1) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/15e6cf8f/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
index 6f883a3..5d843bf 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
@@ -2193,6 +2193,7 @@ public class AnalyzeExprsTest extends AnalyzerTest {
 
   @Test
   public void TestDecimalFunctions() throws AnalysisException {
+    final String [] aliasesOfTruncate = new String[]{"truncate", "dtrunc", "trunc"};
     AnalyzesOk("select abs(cast(1 as decimal))");
     AnalyzesOk("select abs(cast(-1.1 as decimal(10,3)))");
 
@@ -2205,18 +2206,25 @@ public class AnalyzeExprsTest extends AnalyzerTest {
     AnalyzesOk("select round(cast(1.123 as decimal(10,3)), 5)");
     AnalyzesOk("select round(cast(1.123 as decimal(10,3)), -2)");
 
-    AnalyzesOk("select truncate(cast(1.123 as decimal(10,3)))");
-    AnalyzesOk("select truncate(cast(1.123 as decimal(10,3)), 0)");
-    AnalyzesOk("select truncate(cast(1.123 as decimal(10,3)), 2)");
-    AnalyzesOk("select truncate(cast(1.123 as decimal(10,3)), 5)");
-    AnalyzesOk("select truncate(cast(1.123 as decimal(10,3)), -1)");
+    for (final String alias : aliasesOfTruncate) {
+        AnalyzesOk(String.format("select %s(cast(1.123 as decimal(10,3)))", alias));
+        AnalyzesOk(String.format("select %s(cast(1.123 as decimal(10,3)), 0)", alias));
+        AnalyzesOk(String.format("select %s(cast(1.123 as decimal(10,3)), 2)", alias));
+        AnalyzesOk(String.format("select %s(cast(1.123 as decimal(10,3)), 5)", alias));
+        AnalyzesOk(String.format("select %s(cast(1.123 as decimal(10,3)), -1)", alias));
+    }
 
     AnalysisError("select round(cast(1.123 as decimal(10,3)), 5.1)",
         "No matching function with signature: round(DECIMAL(10,3), DECIMAL(2,1))");
     AnalysisError("select round(cast(1.123 as decimal(30,20)), 40)",
         "Cannot round/truncate to scales greater than 38.");
-    AnalysisError("select truncate(cast(1.123 as decimal(10,3)), 40)",
-        "Cannot round/truncate to scales greater than 38.");
+    for (final String alias : aliasesOfTruncate) {
+        AnalysisError(String.format("select truncate(cast(1.123 as decimal(10,3)), 40)",
+            alias), "Cannot round/truncate to scales greater than 38.");
+        AnalyzesOk(String.format("select %s(NULL)", alias));
+        AnalysisError(String.format("select %s(NULL, 1)", alias),
+            "Cannot resolve DECIMAL precision and scale from NULL type.");
+    }
     AnalysisError("select round(cast(1.123 as decimal(10,3)), NULL)",
         "round() cannot be called with a NULL second argument.");
 
@@ -2244,12 +2252,20 @@ public class AnalyzeExprsTest extends AnalyzerTest {
     testDecimalExpr("ceil(123.45)", ScalarType.createDecimalType(4, 0));
     testDecimalExpr("floor(12.345)", ScalarType.createDecimalType(3, 0));
 
-    testDecimalExpr("truncate(1.23)", ScalarType.createDecimalType(1, 0));
-    testDecimalExpr("truncate(1.23, 1)", ScalarType.createDecimalType(2, 1));
-    testDecimalExpr("truncate(1.23, 0)", ScalarType.createDecimalType(1, 0));
-    testDecimalExpr("truncate(1.23, 3)", ScalarType.createDecimalType(4, 3));
-    testDecimalExpr("truncate(1.23, -1)", ScalarType.createDecimalType(1, 0));
-    testDecimalExpr("truncate(1.23, -2)", ScalarType.createDecimalType(1, 0));
+    for (final String alias : aliasesOfTruncate) {
+        testDecimalExpr(String.format("%s(1.23)", alias),
+            ScalarType.createDecimalType(1, 0));
+        testDecimalExpr(String.format("%s(1.23, 1)", alias),
+            ScalarType.createDecimalType(2, 1));
+        testDecimalExpr(String.format("%s(1.23, 0)", alias),
+            ScalarType.createDecimalType(1, 0));
+        testDecimalExpr(String.format("%s(1.23, 3)", alias),
+            ScalarType.createDecimalType(4, 3));
+        testDecimalExpr(String.format("%s(1.23, -1)", alias),
+            ScalarType.createDecimalType(1, 0));
+        testDecimalExpr(String.format("%s(1.23, -2)", alias),
+            ScalarType.createDecimalType(1, 0));
+    }
   }
 
   /**


[4/6] incubator-impala git commit: IMPALA-1882: Remove ORDER BY restriction from first_value()/last_value()

Posted by mj...@apache.org.
IMPALA-1882: Remove ORDER BY restriction from first_value()/last_value()

In order to conform to the ISO SQL specifications, this patch allows
the 'order by' clause to be optional for first_value() and last_value()
analytical functions.

Testing:
1. Added Analyzer tests
2. Added Planner tests which checks that a sort node is not used if both
'order by' and 'partition by' clauses are not used. Also, checks that
if only 'partition by' clause is used then the sorting is done only on
the partition column.
3. Added a query test that checks that the input is not reordered by
these analytic functions if 'order by' clause is not used

Change-Id: I5a3a56833ac062839629353ea240b361bc727d96
Reviewed-on: http://gerrit.cloudera.org:8080/7502
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/32960649
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/32960649
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/32960649

Branch: refs/heads/master
Commit: 32960649746bca7517d0981f3563efea14c51950
Parents: 8623145
Author: Bikramjeet Vig <bi...@cloudera.com>
Authored: Mon Jul 24 12:50:19 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Sat Jul 29 20:21:40 2017 +0000

----------------------------------------------------------------------
 .../apache/impala/analysis/AnalyticExpr.java    |  19 +--
 .../impala/analysis/AnalyzeExprsTest.java       |  11 ++
 .../queries/PlannerTest/analytic-fns.test       |  70 ++++++++++
 .../queries/QueryTest/analytic-fns.test         | 134 +++++++++++++++++++
 4 files changed, 226 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/32960649/fe/src/main/java/org/apache/impala/analysis/AnalyticExpr.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/AnalyticExpr.java b/fe/src/main/java/org/apache/impala/analysis/AnalyticExpr.java
index bf0fb1b..534d09a 100644
--- a/fe/src/main/java/org/apache/impala/analysis/AnalyticExpr.java
+++ b/fe/src/main/java/org/apache/impala/analysis/AnalyticExpr.java
@@ -227,6 +227,12 @@ public class AnalyticExpr extends Expr {
         isAnalyticFn(fn, ROWNUMBER);
   }
 
+  static private boolean isFirstOrLastValueFn(Function fn) {
+    return isAnalyticFn(fn, LAST_VALUE) || isAnalyticFn(fn, FIRST_VALUE) ||
+        isAnalyticFn(fn, LAST_VALUE_IGNORE_NULLS) ||
+        isAnalyticFn(fn, FIRST_VALUE_IGNORE_NULLS);
+  }
+
   /**
    * Rewrite the following analytic functions:
    * percent_rank(), cume_dist() and ntile()
@@ -447,16 +453,13 @@ public class AnalyticExpr extends Expr {
           "DISTINCT not allowed in analytic function: " + getFnCall().toSql());
     }
 
-    if (getFnCall().getParams().isIgnoreNulls()) {
-      String fnName = getFnCall().getFnName().getFunction();
-      if (!fnName.equals(LAST_VALUE) && !fnName.equals(FIRST_VALUE)) {
-        throw new AnalysisException("Function " + fnName.toUpperCase()
-            + " does not accept the keyword IGNORE NULLS.");
-      }
+    Function fn = getFnCall().getFn();
+    if (getFnCall().getParams().isIgnoreNulls() && !isFirstOrLastValueFn(fn)) {
+      throw new AnalysisException("Function " + fn.functionName().toUpperCase()
+          + " does not accept the keyword IGNORE NULLS.");
     }
 
     // check for correct composition of analytic expr
-    Function fn = getFnCall().getFn();
     if (!(fn instanceof AggregateFunction)) {
         throw new AnalysisException(
             "OVER clause requires aggregate or analytic function: "
@@ -471,7 +474,7 @@ public class AnalyticExpr extends Expr {
     }
 
     if (isAnalyticFn(fn) && !isAggregateFn(fn)) {
-      if (orderByElements_.isEmpty()) {
+      if (!isFirstOrLastValueFn(fn) && orderByElements_.isEmpty()) {
         throw new AnalysisException(
             "'" + getFnCall().toSql() + "' requires an ORDER BY clause");
       }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/32960649/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
index cc65a23..6f883a3 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
@@ -735,6 +735,17 @@ public class AnalyzeExprsTest extends AnalyzerTest {
                "from functional.alltypesagg a " +
                "where exists (select 1 from functional.alltypes b where a.id = b.id)");
 
+    // last_value/first_value without using order by
+    AnalyzesOk("select first_value(tinyint_col) over () from functional.alltypesagg");
+    AnalyzesOk("select last_value(tinyint_col) over () from functional.alltypesagg");
+    AnalyzesOk("select first_value(tinyint_col ignore nulls) over () from "
+        + "functional.alltypesagg");
+    AnalyzesOk("select last_value(tinyint_col ignore nulls) over () from "
+        + "functional.alltypesagg");
+    AnalyzesOk("select first_value(tinyint_col ignore nulls) over ()," +
+        "last_value(tinyint_col ignore nulls) over () from functional.alltypesagg a " +
+        "where exists (select 1 from functional.alltypes b where a.id = b.id)");
+
     // legal combinations of analytic and agg functions
     AnalyzesOk("select sum(count(id)) over (partition by min(int_col) "
         + "order by max(bigint_col)) from functional.alltypes group by id, tinyint_col "

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/32960649/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
index c6d9c1a..2977f5d 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
@@ -2364,3 +2364,73 @@ PLAN-ROOT SINK
    partitions=24/24 files=24 size=478.45KB
    runtime filters: RF000 -> t1.id
 ====
+# IMPALA-1882: Confirm that first_value function used without a partition by and order
+# by clause does not need a sort node
+select first_value(tinyint_col ignore nulls) over () from functional.alltypesagg
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+01:ANALYTIC
+|  functions: first_value_ignore_nulls(tinyint_col)
+|
+02:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN HDFS [functional.alltypesagg]
+   partitions=11/11 files=11 size=814.73KB
+====
+# IMPALA-1882: Confirm that last_value function used without a partition by and order
+# by clause does not need a sort node
+select last_value(tinyint_col ignore nulls) over () from functional.alltypesagg
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+01:ANALYTIC
+|  functions: last_value_ignore_nulls(tinyint_col)
+|
+02:EXCHANGE [UNPARTITIONED]
+|
+00:SCAN HDFS [functional.alltypesagg]
+   partitions=11/11 files=11 size=814.73KB
+====
+# IMPALA-1882: Confirm that first_value function using only a partition by clause
+# sorts over partition column
+select *, first_value(id) over (partition by bool_col) first_val from
+functional.alltypessmall;
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+04:EXCHANGE [UNPARTITIONED]
+|
+02:ANALYTIC
+|  functions: first_value(id)
+|  partition by: bool_col
+|
+01:SORT
+|  order by: bool_col ASC NULLS FIRST
+|
+03:EXCHANGE [HASH(bool_col)]
+|
+00:SCAN HDFS [functional.alltypessmall]
+   partitions=4/4 files=4 size=6.32KB
+====
+# IMPALA-1882: Confirm that last_value function using only a partition by clause
+# sorts over partition column
+select *, last_value(id) over (partition by bool_col) first_val from
+functional.alltypessmall;
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+04:EXCHANGE [UNPARTITIONED]
+|
+02:ANALYTIC
+|  functions: last_value(id)
+|  partition by: bool_col
+|
+01:SORT
+|  order by: bool_col ASC NULLS FIRST
+|
+03:EXCHANGE [HASH(bool_col)]
+|
+00:SCAN HDFS [functional.alltypessmall]
+   partitions=4/4 files=4 size=6.32KB
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/32960649/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test b/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test
index 0e2f6b0..e697914 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test
@@ -1955,3 +1955,137 @@ TINYINT
 4
 4
 ====
+---- QUERY
+# Test for IMPALA-1882: first_value works without order by clause. Therefore, output of
+# function depends on order of input rows, which in this case is DESC
+select id, bool_col, first_value(id) over (partition by bool_col) from
+(select * from functional.alltypessmall order by id desc limit 5) as t1
+---- TYPES
+INT, BOOLEAN, INT
+---- RESULTS
+99,false,99
+97,false,99
+95,false,99
+98,true,98
+96,true,98
+====
+---- QUERY
+# Test for IMPALA-1882: first_value works without order by clause. Therefore, output of
+# function depends on order of input rows, which in this case is ASC
+select id, bool_col, first_value(id) over (partition by bool_col) from
+(select * from functional.alltypessmall order by id asc limit 5) as t1
+---- TYPES
+INT, BOOLEAN, INT
+---- RESULTS
+1,false,1
+3,false,1
+0,true,0
+2,true,0
+4,true,0
+---- QUERY
+# Test for IMPALA-1882: last_value works without order by clause. Therefore, output of
+# function depends on order of input rows, which in this case is DESC
+select id, bool_col, last_value(id) over (partition by bool_col) from
+(select * from functional.alltypessmall order by id desc limit 5) as t1
+---- TYPES
+INT, BOOLEAN, INT
+---- RESULTS
+99,false,95
+97,false,95
+95,false,95
+98,true,96
+96,true,96
+====
+---- QUERY
+# Test for IMPALA-1882: last_value works without order by clause. Therefore, output of
+# function depends on order of input rows, which in this case is ASC
+select id, bool_col, last_value(id) over (partition by bool_col) from
+(select * from functional.alltypessmall order by id asc limit 5) as t1
+---- TYPES
+INT, BOOLEAN, INT
+---- RESULTS
+1,false,3
+3,false,3
+0,true,4
+2,true,4
+4,true,4
+====
+---- QUERY
+# Test for IMPALA-1882: first_value works with ignore nulls and without order by clause.
+# Therefore, output of function depends on order of input rows, which in this case is ASC
+select bool_col, smallint_col, first_value(smallint_col ignore nulls) over
+(partition by bool_col) from (select * from functional.alltypesagg where
+id > 99 order by id asc limit 10) as t1
+---- TYPES
+BOOLEAN, SMALLINT, SMALLINT
+---- RESULTS
+false,1,1
+false,3,1
+false,5,1
+false,7,1
+true,NULL,2
+true,NULL,2
+true,2,2
+true,4,2
+true,6,2
+true,8,2
+====
+---- QUERY
+# Test for IMPALA-1882: first_value works with ignore nulls and without order by clause.
+# Therefore, output of function depends on order of input rows, which in this case is DESC
+select bool_col, smallint_col, first_value(smallint_col ignore nulls) over
+(partition by bool_col) from (select * from functional.alltypesagg where
+id < 101 order by id desc limit 10) as t1;
+---- TYPES
+BOOLEAN, SMALLINT, SMALLINT
+---- RESULTS
+false,99,99
+false,97,99
+false,95,99
+false,93,99
+true,NULL,98
+true,NULL,98
+true,98,98
+true,96,98
+true,94,98
+true,92,98
+====
+---- QUERY
+# Test for IMPALA-1882: last_value works with ignore nulls and without order by clause.
+# Therefore, output of function depends on order of input rows, which in this case is ASC
+select bool_col, smallint_col, last_value(smallint_col ignore nulls) over
+(partition by bool_col) from (select * from functional.alltypesagg where
+id > 99 order by id asc limit 10) as t1;
+---- TYPES
+BOOLEAN, SMALLINT, SMALLINT
+---- RESULTS
+false,1,7
+false,3,7
+false,5,7
+false,7,7
+true,NULL,8
+true,NULL,8
+true,2,8
+true,4,8
+true,6,8
+true,8,8
+====
+---- QUERY
+# Test for IMPALA-1882: last_value works with ignore nulls and without order by clause.
+# Therefore, output of function depends on order of input rows, which in this case is DESC
+select bool_col, smallint_col, last_value(smallint_col ignore nulls) over
+(partition by bool_col) from (select * from functional.alltypesagg where
+id < 110 order by id desc limit 10) as t1;
+---- TYPES
+BOOLEAN, SMALLINT, SMALLINT
+---- RESULTS
+false,9,1
+false,7,1
+false,5,1
+false,3,1
+false,1,1
+true,8,2
+true,6,2
+true,4,2
+true,2,2
+true,NULL,2


[2/6] incubator-impala git commit: IMPALA-4086: Add benchmark for simple scheduler

Posted by mj...@apache.org.
IMPALA-4086: Add benchmark for simple scheduler

Change-Id: I89ec1c6c1828bb0b86d1e13ce4dfc5a8df865c2e
Reviewed-on: http://gerrit.cloudera.org:8080/4554
Reviewed-by: Lars Volker <lv...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/2ee16067
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/2ee16067
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/2ee16067

Branch: refs/heads/master
Commit: 2ee160679ca693777ad6764706a251755ad6ece3
Parents: e90afcb
Author: Lars Volker <lv...@cloudera.com>
Authored: Fri Sep 9 13:28:05 2016 +0200
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Sat Jul 29 05:43:58 2017 +0000

----------------------------------------------------------------------
 be/src/benchmarks/CMakeLists.txt         |   1 +
 be/src/benchmarks/scheduler-benchmark.cc | 171 ++++++++++++++++++++++++++
 be/src/scheduling/scheduler.h            |   8 +-
 be/src/util/debug-util.cc                |   2 +
 be/src/util/debug-util.h                 |   1 +
 5 files changed, 178 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2ee16067/be/src/benchmarks/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/CMakeLists.txt b/be/src/benchmarks/CMakeLists.txt
index e954583..1d67d45 100644
--- a/be/src/benchmarks/CMakeLists.txt
+++ b/be/src/benchmarks/CMakeLists.txt
@@ -50,6 +50,7 @@ ADD_BE_BENCHMARK(overflow-benchmark)
 ADD_BE_BENCHMARK(parse-timestamp-benchmark)
 ADD_BE_BENCHMARK(rle-benchmark)
 ADD_BE_BENCHMARK(row-batch-serialize-benchmark)
+ADD_BE_BENCHMARK(scheduler-benchmark)
 ADD_BE_BENCHMARK(status-benchmark)
 ADD_BE_BENCHMARK(string-benchmark)
 ADD_BE_BENCHMARK(string-compare-benchmark)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2ee16067/be/src/benchmarks/scheduler-benchmark.cc
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/scheduler-benchmark.cc b/be/src/benchmarks/scheduler-benchmark.cc
new file mode 100644
index 0000000..df9e981
--- /dev/null
+++ b/be/src/benchmarks/scheduler-benchmark.cc
@@ -0,0 +1,171 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "gutil/strings/substitute.h"
+#include "scheduling/scheduler.h"
+#include "scheduling/scheduler-test-util.h"
+#include "util/benchmark.h"
+#include "util/cpu-info.h"
+#include "util/debug-util.h"
+#include "util/thread.h"
+
+#include "common/names.h"
+
+using namespace impala;
+using namespace impala::test;
+
+
+// This benchmark exercises the core scheduling method 'ComputeScanRangeAssignment()' of
+// the Scheduler class for various cluster and table sizes. It makes the following
+// assumptions:
+// - All nodes run local impalads. The benchmark includes suites for both DISK_LOCAL and
+//   REMOTE scheduling preferences. Having datanodes without a local impalad would result
+//   in a performance somewhere between these two.
+// - The plan only scans one table. All logic in the scheduler is built around assigning
+//   blocks to impalad backends, which scan them. Having multiple tables will merely
+//   repeat this process. The interesting metric is varying the number of blocks per
+//   table.
+// - Blocks and files are treated as the same thing from the scheduler's perspective.
+//   Scheduling happens on scan ranges, which are issued based on file blocks by the
+//   frontend.
+//
+// Machine Info: Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz
+// Cluster Size, DISK_LOCAL:  Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                             3 Hosts               13.2     13.5     13.5         1X         1X         1X
+//                            10 Hosts               12.6     12.6     12.8     0.951X     0.937X     0.951X
+//                            50 Hosts               9.34     9.52     9.53     0.705X     0.706X     0.706X
+//                           100 Hosts               8.68     8.68     8.68     0.655X     0.643X     0.643X
+//                           500 Hosts               5.57     5.67      5.7      0.42X     0.421X     0.423X
+//                          1000 Hosts               4.02      4.1      4.1     0.304X     0.304X     0.304X
+//                          3000 Hosts               1.85     1.85     1.86      0.14X     0.137X     0.138X
+//                         10000 Hosts              0.577    0.588    0.588    0.0436X    0.0436X    0.0436X
+//
+// Cluster Size, REMOTE:      Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                             3 Hosts               23.8     23.9     24.1         1X         1X         1X
+//                            10 Hosts               20.4     20.9     21.1     0.854X     0.877X     0.878X
+//                            50 Hosts                 15       15     15.2     0.628X     0.628X     0.632X
+//                           100 Hosts               12.6     12.7     12.7      0.53X     0.532X     0.528X
+//                           500 Hosts               7.38     7.55      7.6      0.31X     0.316X     0.316X
+//                          1000 Hosts                4.9     4.93     4.93     0.206X     0.207X     0.205X
+//                          3000 Hosts                1.9     1.96     1.96    0.0797X    0.0823X    0.0817X
+//                         10000 Hosts              0.577    0.588    0.588    0.0242X    0.0246X    0.0245X
+//
+// Number of Blocks:          Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                            1 Blocks               74.5     75.2     76.1         1X         1X         1X
+//                           10 Blocks               44.4     44.6     45.1     0.596X     0.593X     0.593X
+//                          100 Blocks               8.46     8.46     8.49     0.114X     0.113X     0.112X
+//                         1000 Blocks              0.981        1        1    0.0132X    0.0133X    0.0131X
+//                        10000 Blocks                0.1    0.102    0.103   0.00134X   0.00136X   0.00136X
+
+static const vector<int> CLUSTER_SIZES = {3, 10, 50, 100, 500, 1000, 3000, 10000};
+static const int DEFAULT_CLUSTER_SIZE = 100;
+static const vector<int> NUM_BLOCKS_PER_TABLE = {1, 10, 100, 1000, 10000};
+static const int DEFAULT_NUM_BLOCKS_PER_TABLE = 100;
+
+/// Members of this struct are needed to build the test fixtures and depend on each other.
+/// Since their constructors take const references they must be constructed in order,
+/// which is why we keep pointers to them here.
+struct TestCtx {
+  std::unique_ptr<Cluster> cluster;
+  std::unique_ptr<Schema> schema;
+  std::unique_ptr<Plan> plan;
+  std::unique_ptr<Result> result;
+  std::unique_ptr<SchedulerWrapper> scheduler_wrapper;
+};
+
+/// Initialize a test context for a single benchmark run.
+void InitializeTestCtx(int num_hosts, int num_blocks,
+    TReplicaPreference::type replica_preference, TestCtx* test_ctx) {
+  test_ctx->cluster.reset(new Cluster());
+  test_ctx->cluster->AddHosts(num_hosts, true, true);
+
+  test_ctx->schema.reset(new Schema(*test_ctx->cluster));
+  test_ctx->schema->AddMultiBlockTable("T0", num_blocks, ReplicaPlacement::LOCAL_ONLY, 3);
+
+  test_ctx->plan.reset(new Plan(*test_ctx->schema));
+  test_ctx->plan->SetReplicaPreference(replica_preference);
+  test_ctx->plan->SetRandomReplica(true);
+  test_ctx->plan->AddTableScan("T0");
+
+  test_ctx->result.reset(new Result(*test_ctx->plan));
+
+  test_ctx->scheduler_wrapper.reset(new SchedulerWrapper(*test_ctx->plan));
+}
+
+/// This function is passed to the test framework and executes the scheduling method
+/// repeatedly.
+void BenchmarkFunction(int num_iterations, void* data) {
+  TestCtx* test_ctx = static_cast<TestCtx*>(data);
+  for (int i = 0; i < num_iterations; ++i) {
+    test_ctx->result->Reset();
+    test_ctx->scheduler_wrapper->Compute(test_ctx->result.get());
+  }
+}
+
+/// Build and run a benchmark suite for various cluster sizes with the default number of
+/// blocks. Scheduling will be done according to the parameter 'replica_preference'.
+void RunClusterSizeBenchmark(TReplicaPreference::type replica_preference) {
+  string suite_name = strings::Substitute(
+      "Cluster Size, $0", PrintTReplicaPreference(replica_preference));
+  Benchmark suite(suite_name, false /* micro_heuristics */);
+  vector<TestCtx> test_ctx(CLUSTER_SIZES.size());
+
+  for (int i = 0; i < CLUSTER_SIZES.size(); ++i) {
+    int cluster_size = CLUSTER_SIZES[i];
+    InitializeTestCtx(
+        cluster_size, DEFAULT_NUM_BLOCKS_PER_TABLE, replica_preference, &test_ctx[i]);
+    string benchmark_name = strings::Substitute("$0 Hosts", cluster_size);
+    suite.AddBenchmark(benchmark_name, BenchmarkFunction, &test_ctx[i]);
+  }
+  cout << suite.Measure() << endl;
+}
+
+/// Build and run a benchmark suite for various table sizes with the default cluster size.
+/// Scheduling will be done according to the parameter 'replica_preference'.
+void RunNumBlocksBenchmark(TReplicaPreference::type replica_preference) {
+  Benchmark suite("Number of Blocks", false /* micro_heuristics */);
+  vector<TestCtx> test_ctx(NUM_BLOCKS_PER_TABLE.size());
+
+  for (int i = 0; i < NUM_BLOCKS_PER_TABLE.size(); ++i) {
+    int num_blocks = NUM_BLOCKS_PER_TABLE[i];
+    InitializeTestCtx(DEFAULT_CLUSTER_SIZE, num_blocks, replica_preference, &test_ctx[i]);
+    string benchmark_name = strings::Substitute("$0 Blocks", num_blocks);
+    suite.AddBenchmark(benchmark_name, BenchmarkFunction, &test_ctx[i]);
+  }
+  cout << suite.Measure() << endl;
+}
+
+int main(int argc, char** argv) {
+  google::InitGoogleLogging(argv[0]);
+  CpuInfo::Init();
+  impala::InitThreading();
+
+  cout << Benchmark::GetMachineInfo() << endl;
+  RunClusterSizeBenchmark(TReplicaPreference::DISK_LOCAL);
+  RunClusterSizeBenchmark(TReplicaPreference::REMOTE);
+  RunNumBlocksBenchmark(TReplicaPreference::DISK_LOCAL);
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2ee16067/be/src/scheduling/scheduler.h
----------------------------------------------------------------------
diff --git a/be/src/scheduling/scheduler.h b/be/src/scheduling/scheduler.h
index 8d57c32..043deac 100644
--- a/be/src/scheduling/scheduler.h
+++ b/be/src/scheduling/scheduler.h
@@ -65,11 +65,8 @@ class SchedulerWrapper;
 /// TODO: Inject global dependencies into the class (for example ExecEnv::GetInstance(),
 ///       RNG used during scheduling, FLAGS_*)
 ///       to make it testable.
-/// TODO: Benchmark the performance of the scheduler. The tests need to include setups
-///       with:
-///         - Small and large number of executors.
-///         - Small and large query plans.
-///         - Scheduling query plans with concurrent updates to the internal executor
+/// TODO: Extend the benchmarks of the scheduler. The tests need to include setups with:
+///         - Scheduling query plans with concurrent updates to the internal backend
 ///           configuration.
 class Scheduler {
  public:
@@ -334,6 +331,7 @@ class Scheduler {
   /// the schedule's TQueryExecRequest.plan_exec_info.
   /// Unpartitioned fragments are assigned to the coordinator. Populate the schedule's
   /// fragment_exec_params_ with the resulting scan range assignment.
+  /// We have a benchmark for this method in be/src/benchmarks/scheduler-benchmark.cc.
   Status ComputeScanRangeAssignment(QuerySchedule* schedule);
 
   /// Process the list of scan ranges of a single plan node and compute scan range

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2ee16067/be/src/util/debug-util.cc
----------------------------------------------------------------------
diff --git a/be/src/util/debug-util.cc b/be/src/util/debug-util.cc
index 6a94496..48df592 100644
--- a/be/src/util/debug-util.cc
+++ b/be/src/util/debug-util.cc
@@ -78,6 +78,7 @@ THRIFT_ENUM_OUTPUT_FN(TDdlType);
 THRIFT_ENUM_OUTPUT_FN(TCatalogOpType);
 THRIFT_ENUM_OUTPUT_FN(THdfsFileFormat);
 THRIFT_ENUM_OUTPUT_FN(THdfsCompression);
+THRIFT_ENUM_OUTPUT_FN(TReplicaPreference);
 THRIFT_ENUM_OUTPUT_FN(TSessionType);
 THRIFT_ENUM_OUTPUT_FN(TStmtType);
 THRIFT_ENUM_OUTPUT_FN(QueryState);
@@ -91,6 +92,7 @@ THRIFT_ENUM_OUTPUT_FN(TImpalaQueryOptions);
 THRIFT_ENUM_PRINT_FN(TCatalogObjectType);
 THRIFT_ENUM_PRINT_FN(TDdlType);
 THRIFT_ENUM_PRINT_FN(TCatalogOpType);
+THRIFT_ENUM_PRINT_FN(TReplicaPreference);
 THRIFT_ENUM_PRINT_FN(TSessionType);
 THRIFT_ENUM_PRINT_FN(TStmtType);
 THRIFT_ENUM_PRINT_FN(QueryState);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2ee16067/be/src/util/debug-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/debug-util.h b/be/src/util/debug-util.h
index fcfb9d6..29fd6bb 100644
--- a/be/src/util/debug-util.h
+++ b/be/src/util/debug-util.h
@@ -64,6 +64,7 @@ std::string PrintPlanNodeType(const TPlanNodeType::type& type);
 std::string PrintTCatalogObjectType(const TCatalogObjectType::type& type);
 std::string PrintTDdlType(const TDdlType::type& type);
 std::string PrintTCatalogOpType(const TCatalogOpType::type& type);
+std::string PrintTReplicaPreference(const TReplicaPreference::type& type);
 std::string PrintTSessionType(const TSessionType::type& type);
 std::string PrintTStmtType(const TStmtType::type& type);
 std::string PrintQueryState(const beeswax::QueryState::type& type);


[3/6] incubator-impala git commit: IMPALA-5722: Fix string to decimal cast

Posted by mj...@apache.org.
IMPALA-5722: Fix string to decimal cast

When converting a string to a decimal, we didn't handle the case where
the exponent is a large negative number. The function for computing the
divisor returns -1 when the exponent is too large. The problem is fixed
by making sure that the divisor is positive.

Testing:
-Added decimal tests.

Change-Id: I1f5eb5b64a6924af2e8eb7f9a50da67015757efe
Reviewed-on: http://gerrit.cloudera.org:8080/7517
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Reviewed-by: Jim Apple <jb...@apache.org>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/86231459
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/86231459
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/86231459

Branch: refs/heads/master
Commit: 862314599614fd20f6e64ccb8600cd53f0f879d5
Parents: 2ee1606
Author: Taras Bobrovytsky <tb...@cloudera.com>
Authored: Wed Jul 26 18:06:21 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Sat Jul 29 09:43:47 2017 +0000

----------------------------------------------------------------------
 be/src/runtime/decimal-test.cc |  9 ++++++++-
 be/src/util/decimal-util.h     |  5 +++++
 be/src/util/string-parser.h    | 23 ++++++++++++++++++-----
 3 files changed, 31 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/86231459/be/src/runtime/decimal-test.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/decimal-test.cc b/be/src/runtime/decimal-test.cc
index 190ee39..d81a16e 100644
--- a/be/src/runtime/decimal-test.cc
+++ b/be/src/runtime/decimal-test.cc
@@ -294,7 +294,14 @@ TEST(StringToDecimal, Basic) {
   StringToAllDecimals("1.1.0e0", 10, 0, 100, StringParser::PARSE_FAILURE);
   StringToAllDecimals("1e1.0", 10, 0, 100, StringParser::PARSE_FAILURE);
   StringToAllDecimals("1..1e1", 10, 0, 100, StringParser::PARSE_FAILURE);
-  StringToAllDecimals("1e9999999999999999999", 10, 0, 100, StringParser::PARSE_OVERFLOW);
+  StringToAllDecimals("1e9999999999999999999", 10, 0, 0, StringParser::PARSE_OVERFLOW);
+  StringToAllDecimals("1e-38", 10, 2, 0, StringParser::PARSE_UNDERFLOW);
+  StringToAllDecimals("1e-38", 38, 38, 1, StringParser::PARSE_SUCCESS);
+  StringToAllDecimals("-1e-38", 38, 38, -1, StringParser::PARSE_SUCCESS);
+  StringToAllDecimals("1e-39", 38, 38, 0, StringParser::PARSE_UNDERFLOW);
+  StringToAllDecimals("-1e-39", 38, 38, 0, StringParser::PARSE_UNDERFLOW);
+  StringToAllDecimals("1e-9999999999999999999", 10, 0, 0, StringParser::PARSE_UNDERFLOW);
+  StringToAllDecimals("-1e-9999999999999999999", 10, 0, 0, StringParser::PARSE_UNDERFLOW);
 
   StringToAllDecimals(" 1e0 ", 10, 2, 100, StringParser::PARSE_SUCCESS);
   StringToAllDecimals("-1e0", 10, 2, -100, StringParser::PARSE_SUCCESS);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/86231459/be/src/util/decimal-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/decimal-util.h b/be/src/util/decimal-util.h
index 9048793..288caf3 100644
--- a/be/src/util/decimal-util.h
+++ b/be/src/util/decimal-util.h
@@ -59,6 +59,11 @@ class DecimalUtil {
     DCHECK_GE(scale, 0);
     T result = 1;
     for (int i = 0; i < scale; ++i) {
+      // Verify that the result of multiplication does not overflow.
+      // TODO: This is not an ideal way to check for overflow because if T is signed, the
+      // behavior is undefined in case of overflow. Replace this with a better overflow
+      // check.
+      DCHECK_GE(result * 10, result);
       result *= 10;
     }
     return result;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/86231459/be/src/util/string-parser.h
----------------------------------------------------------------------
diff --git a/be/src/util/string-parser.h b/be/src/util/string-parser.h
index 62e7dcc..cc9c517 100644
--- a/be/src/util/string-parser.h
+++ b/be/src/util/string-parser.h
@@ -142,7 +142,7 @@ class StringParser {
       --len;
     }
 
-    // Ignore leading zeros even after a dot. This allows for differetiating between
+    // Ignore leading zeros even after a dot. This allows for differentiating between
     // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would
     // overflow.
     int scale = 0;
@@ -182,7 +182,12 @@ class StringParser {
       } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
         found_exponent = true;
         exponent = StringToIntInternal<int8_t>(s + i + 1, len - i - 1, result);
-        if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) return DecimalValue<T>(0);
+        if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
+          if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
+            *result = StringParser::PARSE_UNDERFLOW;
+          }
+          return DecimalValue<T>(0);
+        }
         break;
       } else {
         *result = StringParser::PARSE_FAILURE;
@@ -216,9 +221,17 @@ class StringParser {
     } else if (UNLIKELY(scale > type_scale)) {
       *result = StringParser::PARSE_UNDERFLOW;
       int shift = scale - type_scale;
-      if (truncated_digit_count > 0) shift -= truncated_digit_count;
-      if (shift > 0) value /= DecimalUtil::GetScaleMultiplier<T>(shift);
-      DCHECK(value >= 0);
+      if (UNLIKELY(truncated_digit_count > 0)) shift -= truncated_digit_count;
+      if (shift > 0) {
+        T divisor = DecimalUtil::GetScaleMultiplier<T>(shift);
+        if (LIKELY(divisor >= 0)) {
+          value /= divisor;
+        } else {
+          DCHECK(divisor == -1); // DCHECK_EQ doesn't work with int128_t.
+          value = 0;
+        }
+      }
+      DCHECK(value >= 0); // DCHECK_GE doesn't work with int128_t.
     } else if (UNLIKELY(!found_value && !found_dot)) {
       *result = StringParser::PARSE_FAILURE;
     }