You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2019/08/08 16:05:28 UTC

[impala] branch master updated (6d68c4f -> a0c00e5)

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from 6d68c4f  IMPALA-8549: Add support for scanning DEFLATE text files
     new ef79663  IMPALA-8840: Check failed: num_bytes <= sizeof(T) (5 vs. 4)
     new a0c00e5  Bump CDP_BUILD_NUMBER to 1318335

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/util/bit-packing.h         |  1 +
 be/src/util/bit-packing.inline.h  |  7 +++++--
 be/src/util/dict-test.cc          |  2 +-
 be/src/util/rle-encoding.h        | 10 ++++++----
 bin/impala-config.sh              | 10 +++++-----
 tests/custom_cluster/test_kudu.py |  2 ++
 tests/query_test/test_insert.py   | 15 +++++++++------
 7 files changed, 29 insertions(+), 18 deletions(-)


[impala] 02/02: Bump CDP_BUILD_NUMBER to 1318335

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit a0c00e508f08542b55f140e4c329bf498141e921
Author: Csaba Ringhofer <cs...@cloudera.com>
AuthorDate: Thu Aug 1 16:27:06 2019 +0200

    Bump CDP_BUILD_NUMBER to 1318335
    
    The main reason for bumping is to include HIVE-21838.
    Also skips / fixes some tests.
    
    Change-Id: I432e8c02dbd349a3507bfabfef2727914537652c
    Reviewed-on: http://gerrit.cloudera.org:8080/14005
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 bin/impala-config.sh              | 10 +++++-----
 tests/custom_cluster/test_kudu.py |  2 ++
 tests/query_test/test_insert.py   | 15 +++++++++------
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index e5f52a2..effddc4 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -168,19 +168,19 @@ fi
 export IMPALA_TOOLCHAIN_HOST
 export CDH_MAJOR_VERSION=6
 export CDH_BUILD_NUMBER=1173663
-export CDP_BUILD_NUMBER=1268086
+export CDP_BUILD_NUMBER=1318335
 export CDH_HADOOP_VERSION=3.0.0-cdh6.x-SNAPSHOT
-export CDP_HADOOP_VERSION=3.1.1.7.0.0.0-280
+export CDP_HADOOP_VERSION=3.1.1.7.0.0.0-365
 export IMPALA_HBASE_VERSION=2.1.0-cdh6.x-SNAPSHOT
 export IMPALA_SENTRY_VERSION=2.1.0-cdh6.x-SNAPSHOT
-export IMPALA_RANGER_VERSION=1.2.0.7.0.0.0-280
+export IMPALA_RANGER_VERSION=1.2.0.7.0.0.0-365
 export IMPALA_PARQUET_VERSION=1.9.0-cdh6.x-SNAPSHOT
 export IMPALA_AVRO_JAVA_VERSION=1.8.2-cdh6.x-SNAPSHOT
 export IMPALA_LLAMA_MINIKDC_VERSION=1.0.0
 export IMPALA_KITE_VERSION=1.0.0-cdh6.x-SNAPSHOT
 export IMPALA_KUDU_JAVA_VERSION=1.10.0-cdh6.x-SNAPSHOT
 export CDH_HIVE_VERSION=2.1.1-cdh6.x-SNAPSHOT
-export CDP_HIVE_VERSION=3.1.0.7.0.0.0-280
+export CDP_HIVE_VERSION=3.1.0.7.0.0.0-365
 
 # When IMPALA_(CDH_COMPONENT)_URL are overridden, they may contain '$(platform_label)'
 # which will be substituted for the CDH platform label in bootstrap_toolchain.py
@@ -207,7 +207,7 @@ if $USE_CDP_HIVE; then
   # When USE_CDP_HIVE is set we use the CDP hive version to build as well as deploy in
   # the minicluster
   export IMPALA_HIVE_VERSION=${CDP_HIVE_VERSION}
-  export IMPALA_TEZ_VERSION=0.9.1.7.0.0.0-280
+  export IMPALA_TEZ_VERSION=0.9.1.7.0.0.0-365
   export IMPALA_HADOOP_VERSION=${CDP_HADOOP_VERSION}
   export HADOOP_HOME="$CDP_COMPONENTS_HOME/hadoop-${CDP_HADOOP_VERSION}/"
 else
diff --git a/tests/custom_cluster/test_kudu.py b/tests/custom_cluster/test_kudu.py
index c4049ca..17ea63a 100644
--- a/tests/custom_cluster/test_kudu.py
+++ b/tests/custom_cluster/test_kudu.py
@@ -147,6 +147,7 @@ class TestKuduHMSIntegration(CustomClusterTestSuite, KuduTestSuite):
     self.run_test_case('QueryTest/kudu_create', vector, use_db=unique_database)
 
   @pytest.mark.execute_serially
+  @SkipIfHive3.kudu_hms_notifications_not_supported
   def test_implicit_external_table_props(self, cursor, kudu_client):
     """Check that table properties added internally for external table during
        table creation are as expected.
@@ -172,6 +173,7 @@ class TestKuduHMSIntegration(CustomClusterTestSuite, KuduTestSuite):
             in table_desc
 
   @pytest.mark.execute_serially
+  @SkipIfHive3.kudu_hms_notifications_not_supported
   def test_implicit_managed_table_props(self, cursor, kudu_client, unique_database):
     """Check that table properties added internally for managed table during table
        creation are as expected.
diff --git a/tests/query_test/test_insert.py b/tests/query_test/test_insert.py
index 41cce30..be298a7 100644
--- a/tests/query_test/test_insert.py
+++ b/tests/query_test/test_insert.py
@@ -147,12 +147,15 @@ class TestInsertQueries(ImpalaTestSuite):
     # need to drop and create such tables, and table properties are preserved during
     # those operations and this is enough for the tests (A table is ACID if it has the
     # relevant table properties).
-    capability_check = self.hive_client.getMetaConf("metastore.client.capability.check")
-    self.hive_client.setMetaConf("metastore.client.capability.check", "false")
-    self.run_test_case('QueryTest/acid-insert', vector,
-        multiple_impalad=vector.get_value('exec_option')['sync_ddl'] == 1)
-    # Reset original state.
-    self.hive_client.setMetaConf("metastore.client.capability.check", capability_check)
+    CAPABILITY_CHECK_CONF = "hive.metastore.client.capability.check"
+    capability_check = self.hive_client.getMetaConf(CAPABILITY_CHECK_CONF)
+    try:
+      self.hive_client.setMetaConf(CAPABILITY_CHECK_CONF, "false")
+      self.run_test_case('QueryTest/acid-insert', vector,
+          multiple_impalad=vector.get_value('exec_option')['sync_ddl'] == 1)
+    finally:
+      # Reset original state.
+      self.hive_client.setMetaConf(CAPABILITY_CHECK_CONF, capability_check)
 
   @SkipIfHive2.acid
   @UniqueDatabase.parametrize(sync_ddl=True)


[impala] 01/02: IMPALA-8840: Check failed: num_bytes <= sizeof(T) (5 vs. 4)

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit ef796639451af63bc9f4dbdc2a79bfc2f5048a93
Author: Daniel Becker <da...@cloudera.com>
AuthorDate: Wed Aug 7 14:12:51 2019 +0200

    IMPALA-8840: Check failed: num_bytes <= sizeof(T) (5 vs. 4)
    
    The actual DCHECK failure was fixed by the change for IMPALA-8833.
    
    Added a DCHECK to RleBatchDecoder so that it does not accept bit widths
    higher than the width of its type parameter.
    
    Also preventing UnpackAndDecodeValues from using higher bit widths than
    32, the width of the dictionary index type, with a static assert. This
    also reduces compile time because the compiler does not have to generate
    code for invalid bit widths.
    
    Change-Id: I93461ba2cabb5ec7e0b65dcd62844fcbfa597d16
    Reviewed-on: http://gerrit.cloudera.org:8080/14029
    Reviewed-by: Csaba Ringhofer <cs...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/util/bit-packing.h        |  1 +
 be/src/util/bit-packing.inline.h |  7 +++++--
 be/src/util/dict-test.cc         |  2 +-
 be/src/util/rle-encoding.h       | 10 ++++++----
 4 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/be/src/util/bit-packing.h b/be/src/util/bit-packing.h
index d449048..140525b 100644
--- a/be/src/util/bit-packing.h
+++ b/be/src/util/bit-packing.h
@@ -47,6 +47,7 @@ namespace impala {
 class BitPacking {
  public:
   static constexpr int MAX_BITWIDTH = sizeof(uint64_t) * 8;
+  static constexpr int MAX_DICT_BITWIDTH = sizeof(uint32_t) * 8;
 
   /// Unpack bit-packed values with 'bit_width' from 'in' to 'out'. Keeps unpacking until
   /// either all 'in_bytes' are read or 'num_values' values are unpacked. 'out' must have
diff --git a/be/src/util/bit-packing.inline.h b/be/src/util/bit-packing.inline.h
index 1e8d39a..a279fa7 100644
--- a/be/src/util/bit-packing.inline.h
+++ b/be/src/util/bit-packing.inline.h
@@ -105,8 +105,8 @@ std::pair<const uint8_t*, int64_t> BitPacking::UnpackAndDecodeValues(int bit_wid
         in, in_bytes, dict, dict_len, num_values, out, stride, decode_error);
 
   switch (bit_width) {
-    // Expand cases from 0 to 64.
-    BOOST_PP_REPEAT_FROM_TO(0, 65, UNPACK_VALUES_CASE, ignore);
+    // Expand cases from 0 to MAX_DICT_BITWIDTH.
+    BOOST_PP_REPEAT_FROM_TO(0, 33, UNPACK_VALUES_CASE, ignore);
     default:
       DCHECK(false);
       return std::make_pair(nullptr, -1);
@@ -271,6 +271,9 @@ const uint8_t* BitPacking::UnpackAndDecode32Values(const uint8_t* __restrict__ i
   // TODO: this could be optimised further by using SIMD instructions.
   // https://lemire.me/blog/2016/08/25/faster-dictionary-decoding-with-simd-instructions/
 
+  static_assert(BIT_WIDTH <= MAX_DICT_BITWIDTH,
+      "Too high bit width for dictionary index.");
+
   // Call UnpackValue() and DecodeValue() for 0 <= i < 32.
 #pragma push_macro("DECODE_VALUE_CALL")
 #define DECODE_VALUE_CALL(ignore1, i, ignore2)               \
diff --git a/be/src/util/dict-test.cc b/be/src/util/dict-test.cc
index 7fbdcef..875d4c3 100644
--- a/be/src/util/dict-test.cc
+++ b/be/src/util/dict-test.cc
@@ -284,7 +284,7 @@ TEST(DictTest, SetDataInvalidBitwidthFails) {
   for (int i = 0; i < high_bit_width; i++) {
     buffer[0] = i;
     Status status = decoder.SetData(buffer, 5);
-    EXPECT_TRUE(status.ok());
+    EXPECT_OK(status);
   }
 
   // Reject too high bit widths.
diff --git a/be/src/util/rle-encoding.h b/be/src/util/rle-encoding.h
index 322b5d7..4da9fd9 100644
--- a/be/src/util/rle-encoding.h
+++ b/be/src/util/rle-encoding.h
@@ -82,10 +82,11 @@ namespace impala {
 
 /// RLE decoder with a batch-oriented interface that enables fast decoding.
 /// Users of this class must first initialize the class to point to a buffer of
-/// RLE-encoded data, passed into the constructor or Reset(). Then they can
-/// decode data by checking NextNumRepeats()/NextNumLiterals() to see if the
-/// next run is a repeated or literal run, then calling GetRepeatedValue()
-/// or GetLiteralValues() respectively to read the values.
+/// RLE-encoded data, passed into the constructor or Reset(). The provided
+/// bit_width must be at most min(sizeof(T) * 8, BatchedBitReader::MAX_BITWIDTH).
+/// Then they can decode data by checking NextNumRepeats()/NextNumLiterals() to
+/// see if the next run is a repeated or literal run, then calling
+/// GetRepeatedValue() or GetLiteralValues() respectively to read the values.
 ///
 /// End-of-input is signalled by NextNumRepeats() == NextNumLiterals() == 0.
 /// Other decoding errors are signalled by functions returning false. If an
@@ -495,6 +496,7 @@ inline void RleBatchDecoder<T>::Reset(uint8_t* buffer, int buffer_len, int bit_w
   DCHECK(buffer != nullptr);
   DCHECK_GE(buffer_len, 0);
   DCHECK_GE(bit_width, 0);
+  DCHECK_LE(bit_width, sizeof(T) * 8);
   DCHECK_LE(bit_width, BatchedBitReader::MAX_BITWIDTH);
   bit_reader_.Reset(buffer, buffer_len);
   bit_width_ = bit_width;