You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@impala.apache.org by mj...@apache.org on 2017/02/22 18:12:00 UTC

[1/6] incubator-impala git commit: IMPALA-4926: Upgrade LZ4 to 1.7.5

Repository: incubator-impala
Updated Branches:
  refs/heads/master 815c76f9c -> 637cc3e44


IMPALA-4926: Upgrade LZ4 to 1.7.5

LZ4 has deprecated the method names LZ4_compress and LZ4_uncompress, so
rename those to their new versions LZ4_compress_default() and
LZ4_decompress_fast() respectively. Otherwise no changes are required
for compatibility with LZ4 1.7.5.

Change-Id: I10e4561d0e940fa15ca8248c8277acfc6dff3da3
Reviewed-on: http://gerrit.cloudera.org:8080/6030
Reviewed-by: Jim Apple <jb...@apache.org>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/d2ae45b0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/d2ae45b0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/d2ae45b0

Branch: refs/heads/master
Commit: d2ae45b0a9a566e19a4bf504680f43e15bc8b8a9
Parents: 815c76f
Author: Henry Robinson <he...@cloudera.com>
Authored: Mon Feb 13 17:31:57 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Sat Feb 18 04:33:07 2017 +0000

----------------------------------------------------------------------
 be/src/util/compress.cc   | 4 ++--
 be/src/util/decompress.cc | 4 ++--
 bin/impala-config.sh      | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d2ae45b0/be/src/util/compress.cc
----------------------------------------------------------------------
diff --git a/be/src/util/compress.cc b/be/src/util/compress.cc
index 3737828..429ae66 100644
--- a/be/src/util/compress.cc
+++ b/be/src/util/compress.cc
@@ -299,7 +299,7 @@ Status Lz4Compressor::ProcessBlock(bool output_preallocated, int64_t input_lengt
   DCHECK_GE(input_length, 0);
   CHECK(output_preallocated) << "Output was not allocated for Lz4 Codec";
   if (input_length == 0) return Status::OK();
-  *output_length = LZ4_compress(reinterpret_cast<const char*>(input),
-                       reinterpret_cast<char*>(*output), input_length);
+  *output_length = LZ4_compress_default(reinterpret_cast<const char*>(input),
+      reinterpret_cast<char*>(*output), input_length, *output_length);
   return Status::OK();
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d2ae45b0/be/src/util/decompress.cc
----------------------------------------------------------------------
diff --git a/be/src/util/decompress.cc b/be/src/util/decompress.cc
index 7896c33..8897e98 100644
--- a/be/src/util/decompress.cc
+++ b/be/src/util/decompress.cc
@@ -559,9 +559,9 @@ int64_t Lz4Decompressor::MaxOutputLen(int64_t input_len, const uint8_t* input) {
 Status Lz4Decompressor::ProcessBlock(bool output_preallocated, int64_t input_length,
     const uint8_t* input, int64_t* output_length, uint8_t** output) {
   DCHECK(output_preallocated) << "Lz4 Codec implementation must have allocated output";
-  // LZ4_uncompress will cause a segmentation fault if passed a NULL output.
+  // LZ4_decompress_fast will cause a segmentation fault if passed a NULL output.
   if(*output_length == 0) return Status::OK();
-  if (LZ4_uncompress(reinterpret_cast<const char*>(input),
+  if (LZ4_decompress_fast(reinterpret_cast<const char*>(input),
           reinterpret_cast<char*>(*output), *output_length) != input_length) {
     return Status("Lz4: uncompress failed");
   }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d2ae45b0/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 4a89432..1782577 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -72,7 +72,7 @@ fi
 # moving to a different build of the toolchain, e.g. when a version is bumped or a
 # compile option is changed. The build id can be found in the output of the toolchain
 # build jobs, it is constructed from the build number and toolchain git hash prefix.
-export IMPALA_TOOLCHAIN_BUILD_ID=339-7dba579b79
+export IMPALA_TOOLCHAIN_BUILD_ID=344-715ee51bdc
 
 # Versions of toolchain dependencies.
 # -----------------------------------
@@ -95,7 +95,7 @@ export IMPALA_LLVM_ASAN_VERSION=3.8.0-p1
 # Debug builds should use the release+asserts build to get additional coverage.
 # Don't use the LLVM debug build because the binaries are too large to distribute.
 export IMPALA_LLVM_DEBUG_VERSION=3.8.0-asserts-p1
-export IMPALA_LZ4_VERSION=svn
+export IMPALA_LZ4_VERSION=1.7.5
 export IMPALA_OPENLDAP_VERSION=2.4.25
 export IMPALA_OPENSSL_VERSION=0.9.8zf
 export IMPALA_PROTOBUF_VERSION=2.6.1

[4/6] incubator-impala git commit: kudu: fix uninitialized variable usage warning

Posted by mj...@apache.org.

kudu: fix uninitialized variable usage warning

This fixes a trivial warning of a potential usage of an uninitialized variable
in release builds. If for some reason Kudu sends us an invalid log severity
level, we'll now fall back to INFO instead of reading an uninitialized
severity value.

Change-Id: I4f87c39cf9259bec8ba9a84a2543fb1f2e4a3422
Reviewed-on: http://gerrit.cloudera.org:8080/6098
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/441e15a3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/441e15a3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/441e15a3

Branch: refs/heads/master
Commit: 441e15a365540d52d022f475a64dc7576ae5df32
Parents: 56e8380
Author: Todd Lipcon <to...@cloudera.com>
Authored: Mon Feb 20 19:36:13 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Feb 22 01:21:50 2017 +0000

----------------------------------------------------------------------
 be/src/exec/kudu-util.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/441e15a3/be/src/exec/kudu-util.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/kudu-util.cc b/be/src/exec/kudu-util.cc
index e81baf5..ef1c74b 100644
--- a/be/src/exec/kudu-util.cc
+++ b/be/src/exec/kudu-util.cc
@@ -81,7 +81,7 @@ void LogKuduMessage(void* unused, kudu::client::KuduLogSeverity severity,
   // Note: use raw ints instead of the nice LogSeverity typedef
   // that can be found in glog/log_severity.h as it has an import
   // conflict with gutil/logging-inl.h (indirectly imported).
-  int glog_severity;
+  int glog_severity = 0;
 
   switch (severity) {
     case kudu::client::SEVERITY_INFO: glog_severity = 0; break;

[3/6] incubator-impala git commit: build: don't look in system paths for kudu client

Posted by mj...@apache.org.

build: don't look in system paths for kudu client

On my system which had the Kudu client library installed in /usr,
Impala was picking up the wrong version of the client. This instructs
CMake to only look in the specified toolchain directory to find Kudu.

Change-Id: I8dad3d92e0bd07c27c3a58f6964d4da88218049c
Reviewed-on: http://gerrit.cloudera.org:8080/6097
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/56e83806
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/56e83806
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/56e83806

Branch: refs/heads/master
Commit: 56e83806bf21f58618c57245fede42a32f0b8792
Parents: 9414d53
Author: Todd Lipcon <to...@cloudera.com>
Authored: Mon Feb 20 19:35:25 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Feb 22 01:14:01 2017 +0000

----------------------------------------------------------------------
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/56e83806/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1a28143..a283097 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -337,7 +337,7 @@ else()
 endif()
 # When KUDU_IS_SUPPORTED is false, the Kudu client is expected to be a non-functional
 # stub. It's still needed to link though.
-find_package(kuduClient REQUIRED)
+find_package(kuduClient REQUIRED NO_DEFAULT_PATH)
 include_directories(SYSTEM ${KUDU_CLIENT_INCLUDE_DIR})
 
 # find jni headers and libs

[2/6] incubator-impala git commit: IMPALA-4904, IMPALA-4914: add targeted-stress to exhaustive tests

Posted by mj...@apache.org.

IMPALA-4904,IMPALA-4914: add targeted-stress to exhaustive tests

This patch allows any test suites whose workload is "targeted-stress" to
be run in so-called "exhaustive" mode. Before this patch, only suites
whose workload was "functional-query" would be run exhaustively. More on
this flaw is in IMPALA-3947.

The net effects are:

1. We fix IMPALA-4904, which allows test_ddl_stress to start running
   again.
2. We also improve the situation in IMPALA-4914 by getting
   TestSpillStress to run, though we don't fix its
   not-running-concurrently problem.

The other mini-cluster stress tests were disabled in this commit:
  IMPALA-2605: Omit the sort and mini stress tests
so they are not directly affected here.

I also tried to clarify what "exhaustive" means in some of our shell
scripts, via help text and comments.

An exhaustive build+test run showed test_ddl_stress and TestSpillStress
now get run and passed. This adds roughly 12 minutes to a build that's
on the order of 13-14 hours.

Change-Id: Ie6bd5bbd380e636d680368e908519b042d79dfec
Reviewed-on: http://gerrit.cloudera.org:8080/6002
Tested-by: Impala Public Jenkins
Reviewed-by: Jim Apple <jb...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/9414d53a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/9414d53a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/9414d53a

Branch: refs/heads/master
Commit: 9414d53a891448be13228f5fc63e089522e143aa
Parents: d2ae45b
Author: Michael Brown <mi...@cloudera.com>
Authored: Fri Feb 10 09:35:16 2017 -0800
Committer: Jim Apple <jb...@apache.org>
Committed: Tue Feb 21 21:12:36 2017 +0000

----------------------------------------------------------------------
 bin/run-all-tests.sh                  | 20 ++++++++++---
 buildall.sh                           | 19 +++++++-----
 tests/custom_cluster/test_spilling.py | 38 ++++++++++++++++++++----
 tests/stress/test_ddl_stress.py       | 47 ++++++++++++++++--------------
 4 files changed, 85 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9414d53a/bin/run-all-tests.sh
----------------------------------------------------------------------
diff --git a/bin/run-all-tests.sh b/bin/run-all-tests.sh
index 728c0e5..692b510 100755
--- a/bin/run-all-tests.sh
+++ b/bin/run-all-tests.sh
@@ -87,11 +87,23 @@ do
   esac
 done
 
-# The EXPLORATION_STRATEGY parameter should only apply to the
-# functional-query workload because the larger datasets (ex. tpch) are
-# not generated in all table formats.
+# IMPALA-3947: "Exhaustive" tests are actually based on workload. This
+# means what we colloquially call "exhaustive" tests are actually
+# "exhaustive tests whose workloads are in this set below". Not all
+# workloads are able to be exhaustively run through buildall.sh. For
+# example, the tpch workload is never run exhaustively, because the
+# relatively large size of tpch means data loading in all exhaustive
+# formats takes much longer and data load snapshots containing tpch in
+# all exhaustive formats are much larger to store and take longer to
+# load.
+#
+# XXX If you change the --workload_exploration_strategy set below,
+# please update the buildall.sh help text for -testexhaustive.
 COMMON_PYTEST_ARGS="--maxfail=${MAX_PYTEST_FAILURES} --exploration_strategy=core"`
-    `" --workload_exploration_strategy=functional-query:$EXPLORATION_STRATEGY"
+    `" --workload_exploration_strategy="`
+        `"functional-query:${EXPLORATION_STRATEGY},"`
+        `"targeted-stress:${EXPLORATION_STRATEGY}"
+
 if [[ "${TARGET_FILESYSTEM}" == "local" ]]; then
   # Only one impalad is supported when running against local filesystem.
   COMMON_PYTEST_ARGS+=" --impalad=localhost:21000"

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9414d53a/buildall.sh
----------------------------------------------------------------------
diff --git a/buildall.sh b/buildall.sh
index 291f19e..9c4385c 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -119,6 +119,8 @@ do
       ;;
     -testexhaustive)
       EXPLORATION_STRATEGY=exhaustive
+      # See bin/run-all-tests.sh and IMPALA-3947 for more information on
+      # what this means.
       ;;
     -snapshot_file)
       SNAPSHOT_FILE="${2-}"
@@ -172,7 +174,7 @@ do
       echo "[-noclean] : Omits cleaning all packages before building. Will not kill"\
            "running Hadoop services unless any -format* is True"
       echo "[-format] : Format the minicluster, metastore db, and sentry policy db"\
-           " [Default: False]"
+           "[Default: False]"
       echo "[-format_cluster] : Format the minicluster [Default: False]"
       echo "[-format_metastore] : Format the metastore db [Default: False]"
       echo "[-format_sentry_policy_db] : Format the Sentry policy db [Default: False]"
@@ -183,16 +185,17 @@ do
       echo "[-skiptests] : Skips execution of all tests"
       echo "[-notests] : Skips building and execution of all tests"
       echo "[-start_minicluster] : Start test cluster including Impala and all"\
-            " its dependencies. If already running, all services are restarted."\
-            " Regenerates test cluster config files. [Default: True if running "\
-            " tests or loading data, False otherwise]"
+           "its dependencies. If already running, all services are restarted."\
+           "Regenerates test cluster config files. [Default: True if running"\
+           "tests or loading data, False otherwise]"
       echo "[-start_impala_cluster] : Start Impala minicluster after build"\
-           " [Default: False]"
+           "[Default: False]"
       echo "[-testpairwise] : Run tests in 'pairwise' mode (increases"\
            "test execution time)"
-      echo "[-testexhaustive] : Run tests in 'exhaustive' mode (significantly increases"\
-           "test execution time)"
-      echo "[-testdata] : Loads test data. Implied as true if -snapshot_file is "\
+      echo "[-testexhaustive] : Run tests in 'exhaustive' mode, which significantly"\
+           "increases test execution time. ONLY APPLIES to suites with workloads:"\
+           "functional-query, targeted-stress"
+      echo "[-testdata] : Loads test data. Implied as true if -snapshot_file is"\
            "specified. If -snapshot_file is not specified, data will be regenerated."
       echo "[-snapshot_file <file name>] : Load test data from a snapshot file"
       echo "[-metastore_snapshot_file <file_name>]: Load the hive metastore snapshot"

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9414d53a/tests/custom_cluster/test_spilling.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_spilling.py b/tests/custom_cluster/test_spilling.py
index 7f90a8e..df69a67 100644
--- a/tests/custom_cluster/test_spilling.py
+++ b/tests/custom_cluster/test_spilling.py
@@ -19,28 +19,56 @@ import pytest
 from copy import deepcopy
 
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
-from tests.common.test_dimensions import (ImpalaTestDimension,
+from tests.common.test_dimensions import (
+    ImpalaTestDimension,
     create_single_exec_option_dimension,
     create_parquet_dimension)
 
+
 class TestSpillStress(CustomClusterTestSuite):
+
+  # IMPALA-4914 part 3 TODO: In the agg_stress workload file used below, it's implied
+  # that the workload needs to run concurrently. This appears to attempt to try to do
+  # that by using TEST_IDS and NUM_ITERATIONS and assuming multiple instances of the
+  # test will run concurrently. But custom cluster tests are explicitly never run
+  # concurrently. If this test ought to run concurrently, then new test infra is needed
+  # to achieve that.
+
   @classmethod
   def get_workload(self):
     return 'targeted-stress'
 
   @classmethod
   def setup_class(cls):
-    super(TestSpillStress, cls).setup_class()
+    # Don't do anything expensive if we're not running exhaustively for this workload.
+    # This check comes first.
     if cls.exploration_strategy() != 'exhaustive':
       pytest.skip('runs only in exhaustive')
     # Since test_spill_stress below runs TEST_IDS * NUM_ITERATIONS times, but we only
     # need Impala to start once, it's inefficient to use
     # @CustomClusterTestSuite.with_args() to restart Impala every time. Instead, start
-    # Impala here, once.
+    # Impala here, once. We start impalad before we try to make connections to it.
     #
     # Start with 256KB buffers to reduce data size required to force spilling.
-    cls._start_impala_cluster(['--impalad_args=--"read_size=262144"',
-        'catalogd_args="--load_catalog_in_background=false"'])
+    cls._start_impala_cluster([
+        '--impalad_args=--"read_size=262144"',
+        '--catalogd_args="--load_catalog_in_background=false"'])
+    # This super() call leapfrogs CustomClusterTestSuite.setup_class() and calls
+    # ImpalaTestSuite.setup_class(). This is done because this is an atypical custom
+    # cluster test that doesn't restart impalad per test method, but it still needs
+    # self.client (etc.) set up. IMPALA-4914 part 3 would address this.
+    # This happens last because we need impalad started above before we try to establish
+    # connections to it.
+    super(CustomClusterTestSuite, cls).setup_class()
+
+  @classmethod
+  def teardown_class(cls):
+    # This teardown leapfrog matches the setup leapfrog above, for equivalent reasons.
+    # CustomClusterTestSuite.teardown_class() overrides ImpalaTestSuite.teardown_class()
+    # and is a no-op. ImpalaTestSuite.teardown_class() stops client connections. The
+    # *next* custom cluster test that py.test chooses to run is responsible for
+    # restarting impalad.
+    super(CustomClusterTestSuite, cls).teardown_class()
 
   def setup_method(self, method):
     # We don't need CustomClusterTestSuite.setup_method() or teardown_method() here

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9414d53a/tests/stress/test_ddl_stress.py
----------------------------------------------------------------------
diff --git a/tests/stress/test_ddl_stress.py b/tests/stress/test_ddl_stress.py
index 5a55f5d..9aa6de5 100644
--- a/tests/stress/test_ddl_stress.py
+++ b/tests/stress/test_ddl_stress.py
@@ -16,21 +16,22 @@
 # under the License.
 
 import pytest
-import uuid
 
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.skip import SkipIfS3, SkipIfIsilon, SkipIfLocal
-from tests.common.test_vector import ImpalaTestDimension
 
 # Number of tables to create per thread
 NUM_TBLS_PER_THREAD = 10
 
 # Each client will get a different test id.
-TEST_IDS = xrange(0, 10)
+TEST_INDICES = xrange(10)
+
 
 # Simple stress test for DDL operations. Attempts to create, cache,
 # uncache, then drop many different tables in parallel.
 class TestDdlStress(ImpalaTestSuite):
+  SHARED_DATABASE = 'test_ddl_stress_db'
+
   @classmethod
   def get_workload(self):
     return 'targeted-stress'
@@ -42,38 +43,40 @@ class TestDdlStress(ImpalaTestSuite):
     if cls.exploration_strategy() != 'exhaustive':
       pytest.skip("Should only run in exhaustive due to long execution time.")
 
-    cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('test_id', *TEST_IDS))
     cls.ImpalaTestMatrix.add_constraint(
-        lambda v: v.get_value('exec_option')['batch_size'] == 0)
-
-    cls.ImpalaTestMatrix.add_constraint(lambda v:
-        v.get_value('table_format').file_format == 'text' and\
-        v.get_value('table_format').compression_codec == 'none')
+        lambda v: (v.get_value('table_format').file_format == 'text' and
+                   v.get_value('table_format').compression_codec == 'none'))
 
   @SkipIfS3.caching
   @SkipIfIsilon.caching
   @SkipIfLocal.caching
   @pytest.mark.stress
-  def test_create_cache_many_tables(self, vector):
+  @pytest.mark.parametrize('test_index', TEST_INDICES)
+  def test_create_cache_many_tables(self, vector, testid_checksum, test_index):
     self.client.set_configuration(vector.get_value('exec_option'))
-    self.client.execute("create database if not exists ddl_stress_testdb")
-    self.client.execute("use ddl_stress_testdb")
+    # Don't use unique_database so that we issue several "create database" statements
+    # rather simultaneously on the same object.
+    self.client.execute("create database if not exists {0}".format(self.SHARED_DATABASE))
 
-    # Since this test runs concurrently, UUIDs generated by separate processes may
-    # (arguably) not be guaranteed to be unique. To be certain, we add the test ID to the
-    # tables we create.
-    test_id = vector.vector_values[0].value
-    tbl_uniquifier = "proc_%s_%s" % (test_id, str(uuid.uuid4()).replace('-', ''))
     for i in xrange(NUM_TBLS_PER_THREAD):
-      tbl_name = "tmp_%s_%s" % (tbl_uniquifier, i)
+      tbl_name = "{db}.test_{checksum}_{i}".format(
+          db=self.SHARED_DATABASE,
+          checksum=testid_checksum,
+          i=i)
+      # Because we're using a shared database, first clear potential tables from any
+      # previous test runs that failed to properly clean up
+      self.client.execute("drop table if exists {0}".format(tbl_name))
+      self.client.execute("drop table if exists {0}_part".format(tbl_name))
       # Create a partitioned and unpartitioned table
       self.client.execute("create table %s (i int)" % tbl_name)
-      self.client.execute("create table %s_part (i int) partitioned by (j int)" %\
-          tbl_name)
+      self.client.execute(
+          "create table %s_part (i int) partitioned by (j int)" % tbl_name)
       # Add some data to each
-      self.client.execute("insert overwrite table %s select int_col from "\
+      self.client.execute(
+          "insert overwrite table %s select int_col from "
           "functional.alltypestiny" % tbl_name)
-      self.client.execute("insert overwrite table %s_part partition(j) "\
+      self.client.execute(
+          "insert overwrite table %s_part partition(j) "
           "values (1, 1), (2, 2), (3, 3), (4, 4), (4, 4)" % tbl_name)
 
       # Cache the data the unpartitioned table

[6/6] incubator-impala git commit: IMPALA-4821: Update AVG() for DECIMAL_V2

Posted by mj...@apache.org.

IMPALA-4821: Update AVG() for DECIMAL_V2

This change implements the DECIMAL_V2's behavior for AVG().
The differences with DECIMAL_V1 are:

1. The output type has a minimum scale of 6. This is similar
to MS SQL's behavior which takes the max of 6 and the input
type's scale. We deviate from MS SQL in the output's precision
which is always set to 38. We use the smallest precision which
can store the output. A key insight is that the output of AVG()
is no wider than the inputs. Precision only needs to be adjusted
when the scale is augmented. Using a smaller precision avoids
potential loss of precision in subsequent decimal operations
(e.g. division) if AVG() is a subexpression. Please note that
the output type is different from SUM()/COUNT() as the latter
can have a much larger scale.

2. Due to a minimum of 6 decimal places for the output,
AVG() for decimal values whose whole number part exceeds 32
decimal places (e.g. DECIMAL(38,4), DECIMAL(33,0)) will
always overflow as the scale is augmented to 6. Certain
decimal types which work with AVG() in DECIMAL_V1 no longer
work in DECIMAL_V2.

Change-Id: I28f5ef0370938440eb5b1c6d29b2f24e6f88499f
Reviewed-on: http://gerrit.cloudera.org:8080/6038
Reviewed-by: Dan Hecht <dh...@cloudera.com>
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/637cc3e4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/637cc3e4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/637cc3e4

Branch: refs/heads/master
Commit: 637cc3e447650a5c9bc4b8dd79ee74fd11459fa2
Parents: ed71133
Author: Michael Ho <kw...@cloudera.com>
Authored: Tue Feb 14 19:28:15 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Feb 22 06:31:14 2017 +0000

----------------------------------------------------------------------
 be/src/exprs/aggregate-functions-ir.cc          |  17 ++-
 be/src/exprs/expr-test.cc                       |  77 ++++++++--
 .../impala/analysis/FunctionCallExpr.java       |  16 ++-
 .../queries/QueryTest/decimal-exprs.test        | 143 ++++++++++++++++++-
 .../queries/QueryTest/decimal.test              |  58 ++------
 5 files changed, 235 insertions(+), 76 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/637cc3e4/be/src/exprs/aggregate-functions-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/aggregate-functions-ir.cc b/be/src/exprs/aggregate-functions-ir.cc
index 531598c..6c8c781 100644
--- a/be/src/exprs/aggregate-functions-ir.cc
+++ b/be/src/exprs/aggregate-functions-ir.cc
@@ -437,19 +437,18 @@ DecimalVal AggregateFunctions::DecimalAvgGetValue(FunctionContext* ctx,
     const StringVal& src) {
   DecimalAvgState* val_struct = reinterpret_cast<DecimalAvgState*>(src.ptr);
   if (val_struct->count == 0) return DecimalVal::null();
-  const FunctionContext::TypeDesc& output_desc = ctx->GetReturnType();
-  DCHECK_EQ(FunctionContext::TYPE_DECIMAL, output_desc.type);
   Decimal16Value sum(val_struct->sum.val16);
   Decimal16Value count(val_struct->count);
-  // The scale of the accumulated sum must be the same as the scale of the return type.
-  // TODO: Investigate whether this is always the right thing to do. Does the current
-  // implementation result in an unacceptable loss of output precision?
-  ColumnType sum_type = ColumnType::CreateDecimalType(38, output_desc.scale);
-  ColumnType count_type = ColumnType::CreateDecimalType(38, 0);
+
+  int output_precision =
+      ctx->impl()->GetConstFnAttr(FunctionContextImpl::RETURN_TYPE_PRECISION);
+  int output_scale = ctx->impl()->GetConstFnAttr(FunctionContextImpl::RETURN_TYPE_SCALE);
+  // The scale of the accumulated sum is set to the scale of the input type.
+  int sum_scale = ctx->impl()->GetConstFnAttr(FunctionContextImpl::ARG_TYPE_SCALE, 0);
   bool is_nan = false;
   bool overflow = false;
-  Decimal16Value result = sum.Divide<int128_t>(sum_type, count, count_type,
-      output_desc.precision, output_desc.scale, &is_nan, &overflow);
+  Decimal16Value result = sum.Divide<int128_t>(sum_scale, count, 0 /* count's scale */,
+      output_precision, output_scale, &is_nan, &overflow);
   if (UNLIKELY(is_nan)) return DecimalVal::null();
   if (UNLIKELY(overflow)) {
     ctx->AddWarning("Avg computation overflowed, returning NULL");

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/637cc3e4/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index 5f3d1d0..442b64e 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -403,38 +403,50 @@ class ExprTest : public testing::Test {
         "1970-01-01 00:00:00");
   }
 
-  // Verify that 'expr' has the same precision and scale as 'expected_type'.
-  void TestDecimalResultType(const string& expr, const ColumnType& expected_type) {
-    const string typeof_expr = "typeof(" + expr + ")";
-    const string typeof_result = GetValue(typeof_expr, TYPE_STRING);
-    EXPECT_EQ(expected_type.DebugString(), typeof_result) << typeof_expr;
+  // Verify that output of 'query' has the same precision and scale as 'expected_type'.
+  // 'query' is an expression, optionally followed by a from clause which is needed
+  // for testing aggregate expressions.
+  void TestDecimalResultType(const string& query, const ColumnType& expected_type) {
+    // For the case with from clause, we need to generate the "typeof query" by first
+    // extracting the select list.
+    size_t from_offset = query.find("from");
+    string typeof_query;
+    if (from_offset != string::npos) {
+      int query_len = query.length();
+      typeof_query = "typeof(" + query.substr(0, from_offset) + ")" +
+          query.substr(from_offset, query_len - from_offset);
+    } else {
+      typeof_query = "typeof(" + query + ")";
+    }
+    const string typeof_result = GetValue(typeof_query, TYPE_STRING);
+    EXPECT_EQ(expected_type.DebugString(), typeof_result) << typeof_query;
   }
 
   // Decimals don't work with TestValue.
   // TODO: figure out what operators need to be implemented to work with EXPECT_EQ
   template<typename T>
-  void TestDecimalValue(const string& expr, const T& expected_result,
+  void TestDecimalValue(const string& query, const T& expected_result,
       const ColumnType& expected_type) {
     // Verify precision and scale of the expression match the expected type.
-    TestDecimalResultType(expr, expected_type);
+    TestDecimalResultType(query, expected_type);
     // Verify the expression result matches the expected result, for the given the
     // precision and scale.
-    const string value = GetValue(expr, expected_type);
+    const string value = GetValue(query, expected_type);
     StringParser::ParseResult result;
     // These require that we've passed the correct type to StringToDecimal(), so these
     // results are valid only when TestDecimalResultType() succeeded.
     switch (expected_type.GetByteSize()) {
       case 4:
         EXPECT_EQ(expected_result.value(), StringParser::StringToDecimal<int32_t>(
-            &value[0], value.size(), expected_type, &result).value()) << expr;
+            &value[0], value.size(), expected_type, &result).value()) << query;
         break;
       case 8:
         EXPECT_EQ(expected_result.value(), StringParser::StringToDecimal<int64_t>(
-            &value[0], value.size(), expected_type, &result).value()) << expr;
+            &value[0], value.size(), expected_type, &result).value()) << query;
         break;
       case 16:
         EXPECT_EQ(expected_result.value(), StringParser::StringToDecimal<int128_t>(
-            &value[0], value.size(), expected_type, &result).value()) << expr;
+            &value[0], value.size(), expected_type, &result).value()) << query;
         break;
       default:
         EXPECT_TRUE(false) << expected_type << " " << expected_type.GetByteSize();
@@ -1435,7 +1447,48 @@ DecimalTestCase decimal_cases[] = {
   // The overload greatest(decimal(*,*)) is available and should be used.
   { "greatest(0, cast('99999.1111' as decimal(30,10)))",
     {{ false, 999991111000000, 30, 10 },
-     { false, 999991111000000, 30, 10 }}}
+     { false, 999991111000000, 30, 10 }}},
+  // Test AVG() with DECIMAL
+  { "avg(d) from (values((cast(100000000000000000000000000000000.00000 as DECIMAL(38,5)) "
+    "as d))) as t",
+    {{ false, static_cast<int128_t>(10000000ll) *
+       10000000000ll * 10000000000ll * 10000000000ll, 38, 5 },
+     { true, 0, 38, 6}}},
+  { "avg(d) from (values((cast(1234567890 as DECIMAL(10,0)) as d))) as t",
+    {{false, 1234567890, 10, 0},
+     {false, 1234567890000000, 16, 6}}},
+  { "avg(d) from (values((cast(1234567.89 as DECIMAL(10,2)) as d))) as t",
+    {{false, 123456789, 10, 2},
+     {false, 1234567890000, 14, 6}}},
+  { "avg(d) from (values((cast(10000000000000000000000000000000 as DECIMAL(32,0)) "
+    "as d))) as t",
+    {{false, static_cast<int128_t>(10) *
+      10000000000ll * 10000000000ll * 10000000000ll, 32, 0},
+     {false, static_cast<int128_t>(10000000) *
+      10000000000ll * 10000000000ll * 10000000000ll, 38, 6}}},
+  { "avg(d) from (values((cast(100000000000000000000000000000000 as DECIMAL(33,0)) "
+    "as d))) as t",
+    {{false, static_cast<int128_t>(100) *
+      10000000000ll * 10000000000ll * 10000000000ll, 33, 0},
+     {true, 0, 38, 6}}},
+  { "avg(d) from (values((cast(100000000000000000000000000000000.0 as DECIMAL(34,1)) "
+    "as d))) as t",
+    {{false, static_cast<int128_t>(1000) *
+      10000000000ll * 10000000000ll * 10000000000ll, 34, 1},
+     {true, 0, 38, 6}}},
+  { "avg(d) from (values((cast(100000000000000000000000000000000.00000 as DECIMAL(38,5)) "
+    "as d))) as t",
+    {{false, static_cast<int128_t>(10000000) *
+      10000000000ll * 10000000000ll * 10000000000ll, 38, 5},
+     {true, 0, 38, 6}}},
+  { "avg(d) from (values((cast(10000000000000000000000000000000.000000 as DECIMAL(38,6)) "
+    "as d))) as t",
+    {{false, static_cast<int128_t>(10000000) *
+      10000000000ll * 10000000000ll * 10000000000ll, 38, 6}}},
+  { "avg(d) from (values((cast(0.10000000000000000000000000000000000000 as DECIMAL(38,38)) "
+    "as d))) as t",
+    {{false, static_cast<int128_t>(10000000) *
+      10000000000ll * 10000000000ll * 10000000000ll, 38, 38}}}
 };
 
 TEST_F(ExprTest, DecimalArithmeticExprs) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/637cc3e4/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java b/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java
index 482a31e..a747af8 100644
--- a/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java
+++ b/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java
@@ -331,7 +331,21 @@ public class FunctionCallExpr extends Expr {
 
     int digitsBefore = childType.decimalPrecision() - childType.decimalScale();
     int digitsAfter = childType.decimalScale();
-    if (fnName_.getFunction().equalsIgnoreCase("ceil") ||
+    if (fnName_.getFunction().equalsIgnoreCase("avg") &&
+        analyzer.getQueryOptions().isDecimal_v2()) {
+      // AVG() always gets at least MIN_ADJUSTED_SCALE decimal places since it performs
+      // an implicit divide. The output type isn't always the same as SUM()/COUNT().
+      // Scale is set the same as MS SQL Server, which takes the max of the input scale
+      // and MIN_ADJUST_SCALE. For precision, MS SQL always sets it to 38. We choose to
+      // trim it down to the size that's needed because the absolute value of the result
+      // is less than the absolute value of the largest input. Using a smaller precision
+      // allows for better DECIMAL types to be chosen for the overall expression when
+      // AVG() is a subexpression. For DECIMAL_V1, we set the output type to be the same
+      // as the input type.
+      int resultScale = Math.max(ScalarType.MIN_ADJUSTED_SCALE, digitsAfter);
+      int resultPrecision = digitsBefore + resultScale;
+      return ScalarType.createAdjustedDecimalType(resultPrecision, resultScale);
+    } else if (fnName_.getFunction().equalsIgnoreCase("ceil") ||
                fnName_.getFunction().equalsIgnoreCase("ceiling") ||
                fnName_.getFunction().equals("floor") ||
                fnName_.getFunction().equals("dfloor")) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/637cc3e4/testdata/workloads/functional-query/queries/QueryTest/decimal-exprs.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/decimal-exprs.test b/testdata/workloads/functional-query/queries/QueryTest/decimal-exprs.test
index 89385bd..1dff5f1 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/decimal-exprs.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/decimal-exprs.test
@@ -30,7 +30,7 @@ DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
 ---- QUERY
 # Test casting behavior without decimal_v2 query option set.
 set decimal_v2=false;
-select cast(d3 as decimal(20, 3)) from functional.decimal_tbl;
+select cast(d3 as decimal(20, 3)) from decimal_tbl;
 ---- RESULTS
 1.234
 12.345
@@ -43,7 +43,7 @@ DECIMAL
 ---- QUERY
 # Test casting behavior with decimal_v2 query option set.
 set decimal_v2=true;
-select cast(d3 as decimal(20, 3)) from functional.decimal_tbl;
+select cast(d3 as decimal(20, 3)) from decimal_tbl;
 ---- RESULTS
 1.235
 12.346
@@ -56,7 +56,7 @@ DECIMAL
 ---- QUERY
 # Test casting behavior without decimal_v2 query option set.
 set decimal_v2=false;
-select sum(cast(d3 as DECIMAL(20,2)) + cast(d5 as DECIMAL(20,4))) from functional.decimal_tbl;
+select sum(cast(d3 as DECIMAL(20,2)) + cast(d5 as DECIMAL(20,4))) from decimal_tbl;
 ---- RESULTS
 26078.2788
 ---- TYPES
@@ -65,9 +65,144 @@ DECIMAL
 ---- QUERY
 # Test casting behavior with decimal_v2 query option set.
 set decimal_v2=true;
-select sum(cast(d3 as DECIMAL(20,2)) + cast(d5 as DECIMAL(20,4))) from functional.decimal_tbl;
+select sum(cast(d3 as DECIMAL(20,2)) + cast(d5 as DECIMAL(20,4))) from decimal_tbl;
 ---- RESULTS
 26078.3189
 ---- TYPES
 DECIMAL
 ====
+---- QUERY
+# Test AVG() with DECIMAL_V1
+set decimal_v2=false;
+select avg(d1), avg(d2), avg(d3), avg(d4), avg(d5), avg(d6) from decimal_tbl;
+---- RESULTS
+32222,666,2743.4567651580,0.12345678900000000000000000000000000000,2472.20577,1
+---- TYPES
+DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL
+====
+---- QUERY
+# Test AVG() with DECIMAL_V2
+set decimal_v2=true;
+select avg(d1), avg(d2), avg(d3), avg(d4), avg(d5), avg(d6) from decimal_tbl;
+---- RESULTS
+32222.200000,666.400000,2743.4567651580,0.12345678900000000000000000000000000000,2472.205778,1.000000
+---- TYPES
+DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL
+====
+---- QUERY
+# Test AVG() with DECIMAL_V1
+set decimal_v2=false;
+select l_tax, avg(cast(l_extendedprice as decimal(38,10))), avg(l_extendedprice)
+from tpch_parquet.lineitem group by l_tax order by 1;
+---- RESULTS
+0.00,38241.5984613546,38241.59
+0.01,38283.5417664599,38283.54
+0.02,38250.4873094187,38250.48
+0.03,38259.2810374789,38259.28
+0.04,38247.1967454731,38247.19
+0.05,38234.8480874721,38234.84
+0.06,38246.4342924027,38246.43
+0.07,38281.1963710003,38281.19
+0.08,38251.6233675941,38251.62
+---- TYPES
+DECIMAL,DECIMAL,DECIMAL
+====
+---- QUERY
+# Test AVG() with DECIMAL_V2
+set decimal_v2=true;
+select l_tax, avg(cast(l_extendedprice as decimal(38,10))), avg(l_extendedprice)
+from tpch_parquet.lineitem group by l_tax order by 1;
+---- RESULTS
+0.00,38241.5984613546,38241.598461
+0.01,38283.5417664599,38283.541766
+0.02,38250.4873094187,38250.487309
+0.03,38259.2810374789,38259.281037
+0.04,38247.1967454731,38247.196745
+0.05,38234.8480874721,38234.848087
+0.06,38246.4342924027,38246.434292
+0.07,38281.1963710003,38281.196371
+0.08,38251.6233675941,38251.623367
+---- TYPES
+DECIMAL,DECIMAL,DECIMAL
+====
+---- QUERY
+# Test AVG() with DECIMAL_V1
+set decimal_v2=false;
+select avg(l_extendedprice) as a from tpch_parquet.lineitem
+group by l_tax having a > 38247.190 order by 1;
+---- RESULTS
+38250.48
+38251.62
+38259.28
+38281.19
+38283.54
+---- TYPES
+DECIMAL
+====
+---- QUERY
+# Test AVG() with DECIMAL_V2
+set decimal_v2=true;
+select avg(l_extendedprice) as a from tpch_parquet.lineitem
+group by l_tax having a > 38247.190 order by 1;
+---- RESULTS
+38247.196745
+38250.487309
+38251.623367
+38259.281037
+38281.196371
+38283.541766
+---- TYPES
+DECIMAL
+====
+---- QUERY
+# Test sum() and avg() analytic fns with start bounds (tests Remove() for decimal)
+# with DECIMAL_V1
+set decimal_v2=false;
+select
+sum(c1) over (order by c1 rows between 5 preceding and current row),
+sum(c2) over (order by c1 rows between 5 preceding and 5 following),
+sum(c3) over (order by c1 rows between 5 preceding and 2 preceding),
+avg(c1) over (order by c1 rows between 5 preceding and current row),
+avg(c2) over (order by c1 rows between 5 preceding and 5 following),
+avg(c3) over (order by c1 rows between 5 preceding and 2 preceding)
+from decimal_tiny where c2 < 112
+---- RESULTS: VERIFY_IS_EQUAL_SORTED
+0.0000,618.33330,NULL,0.0000,103.05555,NULL
+0.1111,725.66662,NULL,0.0555,103.66666,NULL
+0.3333,834.22216,0.0,0.1111,104.27777,0.0
+0.6666,943.99992,0.1,0.1666,104.88888,0.0
+1.1110,1054.99990,0.3,0.2222,105.49999,0.1
+1.6665,1054.99990,0.6,0.2777,105.49999,0.1
+2.3331,954.99990,1.0,0.3888,106.11110,0.2
+2.9997,853.77768,1.4,0.4999,106.72221,0.3
+3.6663,751.33324,1.8,0.6110,107.33332,0.4
+4.3329,647.66658,2.2,0.7221,107.94443,0.5
+---- TYPES
+DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL
+====
+---- QUERY
+# Test sum() and avg() analytic fns with start bounds (tests Remove() for decimal)
+# with DECIMAL_V2
+set decimal_v2=true;
+select
+sum(c1) over (order by c1 rows between 5 preceding and current row),
+sum(c2) over (order by c1 rows between 5 preceding and 5 following),
+sum(c3) over (order by c1 rows between 5 preceding and 2 preceding),
+avg(c1) over (order by c1 rows between 5 preceding and current row),
+avg(c2) over (order by c1 rows between 5 preceding and 5 following),
+avg(c3) over (order by c1 rows between 5 preceding and 2 preceding)
+from decimal_tiny where c2 < 112
+---- RESULTS: VERIFY_IS_EQUAL_SORTED
+0.0000,618.33330,NULL,0.000000,103.055550,NULL
+0.1111,725.66662,NULL,0.055550,103.666660,NULL
+0.3333,834.22216,0.0,0.111100,104.277770,0.000000
+0.6666,943.99992,0.1,0.166650,104.888880,0.050000
+1.1110,1054.99990,0.3,0.222200,105.499990,0.100000
+1.6665,1054.99990,0.6,0.277750,105.499990,0.150000
+2.3331,954.99990,1.0,0.388850,106.111100,0.250000
+2.9997,853.77768,1.4,0.499950,106.722210,0.350000
+3.6663,751.33324,1.8,0.611050,107.333320,0.450000
+4.3329,647.66658,2.2,0.722150,107.944430,0.550000
+---- TYPES
+DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL
+====
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/637cc3e4/testdata/workloads/functional-query/queries/QueryTest/decimal.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/decimal.test b/testdata/workloads/functional-query/queries/QueryTest/decimal.test
index 6e7cbae..1f0fa4c 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/decimal.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/decimal.test
@@ -134,23 +134,6 @@ order by t1.c1 desc limit 3
 DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
 ====
 ---- QUERY
-select avg(d1), avg(d2), avg(d3), avg(d4), avg(d5), avg(d6) from decimal_tbl;
----- RESULTS
-32222,666,2743.4567651580,0.12345678900000000000000000000000000000,2472.20577,1
----- TYPES
-decimal,decimal,decimal,decimal,decimal,decimal
-====
----- QUERY
-select d1, avg(d2), avg(d3), avg(d4), avg(d5), avg(d6) from decimal_tbl group by d1;
----- RESULTS
-132842,333,12345.6789000000,0.12345678900000000000000000000000000000,0.77889,1
-2345,111,12.3456789000,0.12345678900000000000000000000000000000,3.14100,1
-1234,2222,1.2345678900,0.12345678900000000000000000000000000000,12345.78900,1
-12345,333,679.0123395000,0.12345678900000000000000000000000000000,5.66000,1
----- TYPES
-decimal,decimal,decimal,decimal,decimal,decimal
-====
----- QUERY
 select count(d1), count(d2), count(d3), count(d4), count(d5), count(d6) from decimal_tbl;
 ---- RESULTS
 5,5,5,5,5,5
@@ -219,18 +202,6 @@ select d1, ndv(d2), ndv(d3), ndv(d4), ndv(d5), ndv(d6) from decimal_tbl group by
 decimal,bigint,bigint,bigint,bigint,bigint
 ====
 ---- QUERY
-select cast(avg(c1) as decimal(10,4)) as c from decimal_tiny
-group by c3 having c > 5.5 order by 1
----- RESULTS
-5.5550
-5.6661
-5.7772
-5.8883
-5.9994
----- TYPES
-decimal
-====
----- QUERY
 select a.c1 from decimal_tiny a left semi join decimal_tiny b on a.c1=b.c3
 ---- RESULTS
 0.0000
@@ -344,28 +315,15 @@ select * from decimal_tiny
 DECIMAL, DECIMAL, DECIMAL
 ====
 ---- QUERY
-# Test sum() and avg() analytic fns with start bounds (tests Remove() for decimal)
-select
-sum(c1) over (order by c1 rows between 5 preceding and current row),
-sum(c2) over (order by c1 rows between 5 preceding and 5 following),
-sum(c3) over (order by c1 rows between 5 preceding and 2 preceding),
-avg(c1) over (order by c1 rows between 5 preceding and current row),
-avg(c2) over (order by c1 rows between 5 preceding and 5 following),
-avg(c3) over (order by c1 rows between 5 preceding and 2 preceding)
-from decimal_tiny where c2 < 112
----- RESULTS: VERIFY_IS_EQUAL_SORTED
-0.0000,618.33330,NULL,0.0000,103.05555,NULL
-0.1111,725.66662,NULL,0.0555,103.66666,NULL
-0.3333,834.22216,0.0,0.1111,104.27777,0.0
-0.6666,943.99992,0.1,0.1666,104.88888,0.0
-1.1110,1054.99990,0.3,0.2222,105.49999,0.1
-1.6665,1054.99990,0.6,0.2777,105.49999,0.1
-2.3331,954.99990,1.0,0.3888,106.11110,0.2
-2.9997,853.77768,1.4,0.4999,106.72221,0.3
-3.6663,751.33324,1.8,0.6110,107.33332,0.4
-4.3329,647.66658,2.2,0.7221,107.94443,0.5
+# Test group-by with decimal
+select d1, d2, sum(d3), sum(d4), sum(d5), sum(d6) from decimal_tbl group by d1,d2;
+---- RESULTS
+132842,333,12345.6789000000,0.12345678900000000000000000000000000000,0.77889,1
+12345,333,1358.0246790000,0.24691357800000000000000000000000000000,11.32000,2
+1234,2222,1.2345678900,0.12345678900000000000000000000000000000,12345.78900,1
+2345,111,12.3456789000,0.12345678900000000000000000000000000000,3.14100,1
 ---- TYPES
-DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
+decimal,decimal,decimal,decimal,decimal,decimal
 ====
 ---- QUERY
 # IMPALA-1559: FIRST_VALUE rewrite function intermediate type not matching slot type

[5/6] incubator-impala git commit: IMPALA-4934: Disable Kudu OpenSSL initialization

Posted by mj...@apache.org.

IMPALA-4934: Disable Kudu OpenSSL initialization

Bumps the Kudu version to include the change to the client
that allows Impala to disable SSL initialization.

In authentication.cc, after Impala initializes OpenSSL,
Impala then disables Kudu's OpenSSL init.

Fixed a python test case that started failing after bumping
the Kudu client version.

Change-Id: I3f13f3af512c6d771979638da593685524c73086
Reviewed-on: http://gerrit.cloudera.org:8080/6056
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/ed711330
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/ed711330
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/ed711330

Branch: refs/heads/master
Commit: ed711330fce4b8b739e9f30a28720b1c8e90fc63
Parents: 441e15a
Author: Matthew Jacobs <mj...@cloudera.com>
Authored: Fri Feb 17 09:07:56 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Feb 22 05:06:20 2017 +0000

----------------------------------------------------------------------
 be/src/rpc/authentication.cc            | 9 ++++++++-
 bin/impala-config.sh                    | 4 ++--
 infra/python/deps/download_requirements | 2 +-
 infra/python/deps/requirements.txt      | 2 +-
 tests/query_test/test_kudu.py           | 2 +-
 5 files changed, 13 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ed711330/be/src/rpc/authentication.cc
----------------------------------------------------------------------
diff --git a/be/src/rpc/authentication.cc b/be/src/rpc/authentication.cc
index 665a68c..a42a1a4 100644
--- a/be/src/rpc/authentication.cc
+++ b/be/src/rpc/authentication.cc
@@ -643,7 +643,7 @@ Status InitAuth(const string& appname) {
     // Impala's SASL initialization. This must be called before any KuduClients are
     // created to ensure that Kudu doesn't init SASL first, and this returns an error if
     // Kudu has already initialized SASL.
-    if (impala::KuduIsAvailable()) {
+    if (KuduIsAvailable()) {
       KUDU_RETURN_IF_ERROR(kudu::client::DisableSaslInitialization(),
           "Unable to disable Kudu SASL initialization.");
     }
@@ -656,7 +656,14 @@ Status InitAuth(const string& appname) {
     }
   }
 
+  // Initializes OpenSSL.
   RETURN_IF_ERROR(AuthManager::GetInstance()->Init());
+
+  // Prevent Kudu from re-initializing OpenSSL.
+  if (KuduIsAvailable()) {
+    KUDU_RETURN_IF_ERROR(kudu::client::DisableOpenSSLInitialization(),
+        "Unable to disable Kudu SSL initialization.");
+  }
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ed711330/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 1782577..af76ac9 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -120,10 +120,10 @@ if [[ $OSTYPE == "darwin"* ]]; then
 fi
 
 # Kudu version in the toolchain; provides libkudu_client.so and minicluster binaries.
-export IMPALA_KUDU_VERSION=cd7b0dd
+export IMPALA_KUDU_VERSION=2b0edbe
 
 # Kudu version used to identify Java client jar from maven
-export KUDU_JAVA_VERSION=1.2.0-SNAPSHOT
+export KUDU_JAVA_VERSION=1.3.0-SNAPSHOT
 
 # Versions of Hadoop ecosystem dependencies.
 # ------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ed711330/infra/python/deps/download_requirements
----------------------------------------------------------------------
diff --git a/infra/python/deps/download_requirements b/infra/python/deps/download_requirements
index 2ef8922..f610cab 100755
--- a/infra/python/deps/download_requirements
+++ b/infra/python/deps/download_requirements
@@ -29,5 +29,5 @@ PY26="$(./find_py26.py)"
 "$PY26" pip_download.py virtualenv 13.1.0
 # kudu-python is downloaded separately because pip install attempts to execute a
 # setup.py subcommand for kudu-python that can fail even if the download succeeds.
-"$PY26" pip_download.py kudu-python 1.1.0
+"$PY26" pip_download.py kudu-python 1.2.0
 popd

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ed711330/infra/python/deps/requirements.txt
----------------------------------------------------------------------
diff --git a/infra/python/deps/requirements.txt b/infra/python/deps/requirements.txt
index 1c3a329..1fa5a28 100644
--- a/infra/python/deps/requirements.txt
+++ b/infra/python/deps/requirements.txt
@@ -82,7 +82,7 @@ texttable == 0.8.3
 # functional and determines the expected kudu-python version. The version must be listed
 # in the format below including # and spacing. Keep this formatting! The kudu-python
 # version in download_requirements must be kept in sync with this version.
-# kudu-python==1.1.0
+# kudu-python==1.2.0
   Cython == 0.23.4
   numpy == 1.10.4
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ed711330/tests/query_test/test_kudu.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_kudu.py b/tests/query_test/test_kudu.py
index 50db7ee..0359161 100644
--- a/tests/query_test/test_kudu.py
+++ b/tests/query_test/test_kudu.py
@@ -204,7 +204,7 @@ class TestKuduOperations(KuduTestSuite):
     # Add some rows
     session = kudu_client.new_session()
     for i in range(100):
-      op = table.new_insert((i, None))
+      op = table.new_insert((i, "foo"))
       session.apply(op)
     session.flush()