You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2019/02/26 17:47:43 UTC

[impala] branch master updated (1772b3b -> e9936b0)

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from 1772b3b  Allow CMake to find JNI libraries from JDK
     new 5a270e0  IMPALA-7979: Fix bug of SkipValues() and GetNextValues() interaction
     new e9936b0  IMPALA-8247: Fix tests missing from unified backend executable

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/testutil/rand-util.h                  |  5 ++-
 be/src/util/CMakeLists.txt                   |  4 +-
 be/src/util/dict-encoding.h                  |  2 +-
 be/src/util/dict-test.cc                     | 61 ++++++++++++++++++++++++++++
 be/src/util/encoding-test-util.h             | 60 +++++++++++++++++++++++++++
 be/src/util/rle-encoding.h                   |  7 ++--
 be/src/util/rle-test.cc                      | 56 ++++---------------------
 bin/validate-unified-backend-test-filters.py | 16 ++++++++
 8 files changed, 157 insertions(+), 54 deletions(-)
 create mode 100644 be/src/util/encoding-test-util.h


[impala] 02/02: IMPALA-8247: Fix tests missing from unified backend executable

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit e9936b02cf5b9673d6dd8f3624b07e2ec81f763e
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Mon Feb 25 18:10:46 2019 -0800

    IMPALA-8247: Fix tests missing from unified backend executable
    
    When converting the tests in be/src/util to the unified backend
    test executable in IMPALA-8071, some tests did not get linked
    into the executable appropriately. This means that they stopped
    running. Specifically, the tests in bit-stream-utils-test.cc
    and system-state-info-test.cc were left out.
    
    This adds them back and modifies bin/validate-unified-backend-filters.py
    to check both directions:
     - If a filter is specified, it must match a test in the executable.
     - If the executable has a test, some filter must match it.
    I ran the backend tests on debug and ASAN with this change.
    
    Change-Id: I80d8b5779cb0ac663e32c72722e0c73b68a43d2e
    Reviewed-on: http://gerrit.cloudera.org:8080/12584
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Tim Armstrong <ta...@cloudera.com>
---
 be/src/util/CMakeLists.txt                   |  4 +++-
 bin/validate-unified-backend-test-filters.py | 16 ++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt
index 08d3111..0a26372 100644
--- a/be/src/util/CMakeLists.txt
+++ b/be/src/util/CMakeLists.txt
@@ -100,6 +100,7 @@ add_library(UtilTests
   benchmark-test.cc
   bitmap-test.cc
   bit-packing-test.cc
+  bit-stream-utils-test.cc
   bit-util-test.cc
   blocking-queue-test.cc
   bloom-filter-test.cc
@@ -129,6 +130,7 @@ add_library(UtilTests
   string-util-test.cc
   symbols-util-test.cc
   sys-info-test.cc
+  system-state-info-test.cc
   thread-pool-test.cc
   time-test.cc
   uid-util-test.cc
@@ -187,7 +189,7 @@ ADD_UNIFIED_BE_LSAN_TEST(redactor-unconfigured-test "RedactorUnconfigTest.*")
 ADD_UNIFIED_BE_LSAN_TEST(rle-test "BitArray.*:RleTest.*")
 ADD_UNIFIED_BE_LSAN_TEST(runtime-profile-test "CountersTest.*:TimerCounterTest.*:TimeSeriesCounterTest.*:VariousNumbers/TimeSeriesCounterResampleTest.*")
 ADD_UNIFIED_BE_LSAN_TEST(string-parser-test "StringToInt.*:StringToIntWithBase.*:StringToFloat.*:StringToBool.*")
-ADD_UNIFIED_BE_LSAN_TEST(string-util-test "TruncateDownTest.*:TruncateUpTest.*:CommandSeparatedContainsTest.*:CommaSeparatedContainsTest.*")
+ADD_UNIFIED_BE_LSAN_TEST(string-util-test "TruncateDownTest.*:TruncateUpTest.*:CommaSeparatedContainsTest.*")
 ADD_UNIFIED_BE_LSAN_TEST(symbols-util-test "SymbolsUtil.*")
 ADD_UNIFIED_BE_LSAN_TEST(system-state-info-test "SystemStateInfoTest.*")
 ADD_UNIFIED_BE_LSAN_TEST(sys-info-test "CpuInfoTest.*:DiskInfoTest.*")
diff --git a/bin/validate-unified-backend-test-filters.py b/bin/validate-unified-backend-test-filters.py
index c872938..62733e9 100755
--- a/bin/validate-unified-backend-test-filters.py
+++ b/bin/validate-unified-backend-test-filters.py
@@ -63,5 +63,21 @@ def main():
             options.unified_binary, options.filters))
         sys.exit(1)
 
+    # Check to see if there are any filters that do not match tests in the unified
+    # test executable. This can indicate that a test file is not included appropriately
+    # in the executable. It can also indicate a bogus filter.
+    filters_without_tests = []
+    for test_filter in options.filters.split(":"):
+        if len(test_filter) == 0: continue
+        tests = get_set_of_tests(options.unified_binary, test_filter)
+        if len(tests) == 0:
+            filters_without_tests.append(test_filter)
+    if len(filters_without_tests) > 0:
+        print("FAILED: The following test filters do not match any tests in the\n"
+              "unified test executable. This can indicate that some test has not\n"
+              "been linked appropriately into the test executable:")
+        for test_filter in filters_without_tests:
+            print(test_filter)
+        sys.exit(1)
 
 if __name__ == "__main__": main()


[impala] 01/02: IMPALA-7979: Fix bug of SkipValues() and GetNextValues() interaction

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 5a270e097214624ced7814692c58b03d575e7ace
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Fri Feb 22 17:52:47 2019 +0100

    IMPALA-7979: Fix bug of SkipValues() and GetNextValues() interaction
    
    Alternating calls to DictDecoder's SkipValues() and GetNextValues()
    could result in a decoding error.
    
    RleBatchDecoder<T>::DecodeLiteralValues() returned false when
    invoked with num_literals_to_consume=0 and had buffered literals.
    Then the caller (DictDecoder::GetNextValues()) thought it failed,
    and in the end it resulted in a decoding error.
    
    It could only occur if GetNextValues() was invoked after SkipValues().
    Otherwise the RleBatchDecoder in DictDecoder could not have buffered
    literals, since they were buffered in the dictionary decoder.
    
    Since SkipValues() is not used yet, it couldn't happen in production
    code.
    
    I added a backend test to fuzz test the interaction between
    SkipValues() and GetNextValues(). I looped the test for a few hours
    without seeing an error.
    
    Change-Id: Id0426cdef54ee9bc2306a542098a92c640dc41c4
    Reviewed-on: http://gerrit.cloudera.org:8080/12555
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/testutil/rand-util.h      |  5 ++--
 be/src/util/dict-encoding.h      |  2 +-
 be/src/util/dict-test.cc         | 61 ++++++++++++++++++++++++++++++++++++++++
 be/src/util/encoding-test-util.h | 60 +++++++++++++++++++++++++++++++++++++++
 be/src/util/rle-encoding.h       |  7 +++--
 be/src/util/rle-test.cc          | 56 ++++++------------------------------
 6 files changed, 138 insertions(+), 53 deletions(-)

diff --git a/be/src/testutil/rand-util.h b/be/src/testutil/rand-util.h
index 255fcb0..4e69428 100644
--- a/be/src/testutil/rand-util.h
+++ b/be/src/testutil/rand-util.h
@@ -30,8 +30,9 @@ namespace impala {
 class RandTestUtil {
  public:
   /// Seed 'rng' with a seed either from the environment variable 'env_var' or the
-  /// current time.
-  static void SeedRng(const char* env_var, std::mt19937* rng) {
+  /// random device of the platform.
+  template <typename RandomEngine>
+  static void SeedRng(const char* env_var, RandomEngine* rng) {
     const char* seed_str = getenv(env_var);
     int64_t seed;
     if (seed_str != nullptr) {
diff --git a/be/src/util/dict-encoding.h b/be/src/util/dict-encoding.h
index 78dc42f..fa52275 100644
--- a/be/src/util/dict-encoding.h
+++ b/be/src/util/dict-encoding.h
@@ -519,7 +519,7 @@ ALWAYS_INLINE inline bool DictDecoder<T>::GetNextValues(
         count -= num_literals;
       } else { // Case 2
         uint32_t num_to_decode = BitUtil::RoundDown(count, 32);
-        if (UNLIKELY(!data_decoder_.DecodeLiteralValues(
+        if (num_to_decode > 0 && UNLIKELY(!data_decoder_.DecodeLiteralValues(
                 num_to_decode, dict_.data(), dict_.size(), &out))) {
           return false;
         }
diff --git a/be/src/util/dict-test.cc b/be/src/util/dict-test.cc
index 4f46cf2..bb533d3 100644
--- a/be/src/util/dict-test.cc
+++ b/be/src/util/dict-test.cc
@@ -25,8 +25,10 @@
 #include "runtime/string-value.inline.h"
 #include "runtime/timestamp-value.h"
 #include "testutil/gtest-util.h"
+#include "testutil/rand-util.h"
 #include "util/bit-packing.inline.h"
 #include "util/dict-encoding.h"
+#include "util/encoding-test-util.h"
 
 #include "common/names.h"
 
@@ -336,6 +338,65 @@ TEST(DictTest, DecodeErrors) {
   }
 }
 
+TEST(DictTest, TestGetNextValuesAndSkippingFuzzy) {
+  const int values_size = 8192;
+  const int rounds = 100;
+  MemTracker tracker;
+  MemTracker track_encoder;
+  MemTracker track_decoder;
+  MemPool pool(&tracker);
+  DictEncoder<int> large_dict_encoder(&pool, sizeof(int), &track_encoder);
+
+  std::default_random_engine random_eng;
+  RandTestUtil::SeedRng("DICT_TEST_SEED", &random_eng);
+
+  // Generates random number between 'bottom' and 'top' (inclusive intervals).
+  auto GetRandom = [&random_eng](int bottom, int top) {
+    std::uniform_int_distribution<int> uni_dist(bottom, top);
+    return uni_dist(random_eng);
+  };
+
+  vector<int> values = MakeRandomSequence(random_eng, values_size, 200, 10);
+  for (int val : values) {
+    large_dict_encoder.Put(val);
+  }
+
+  vector<uint8_t> data_buffer(large_dict_encoder.EstimatedDataEncodedSize() * 2);
+  int data_len = large_dict_encoder.WriteData(data_buffer.data(), data_buffer.size());
+  ASSERT_GT(data_len, 0);
+
+  vector<uint8_t> dict_buffer(large_dict_encoder.dict_encoded_size());
+  large_dict_encoder.WriteDict(dict_buffer.data());
+  large_dict_encoder.Close();
+
+  vector<int32_t> decoded_values(values.size());
+  DictDecoder<int> decoder(&track_decoder);
+  ASSERT_TRUE(decoder.template Reset<parquet::Type::INT32>(
+      dict_buffer.data(), dict_buffer.size(), sizeof(int)));
+
+  for (int round = 0; round < rounds; ++round) {
+    ASSERT_OK(decoder.SetData(data_buffer.data(), data_buffer.size()));
+    int i = 0;
+    while (i < values.size()) {
+      int length = GetRandom(1, 200);
+      if (i + length > values.size()) length = values.size() - i;
+      int skip_or_get = GetRandom(0, 1);
+      if (skip_or_get == 0) {
+        // skip values
+        ASSERT_TRUE(decoder.SkipValues(length));
+      } else {
+        // decode values
+        ASSERT_TRUE(decoder.GetNextValues(&decoded_values[i],
+                sizeof(int32_t), length));
+        for (int j = 0; j < length; ++j) {
+          EXPECT_EQ(values[i+j], decoded_values[i+j]);
+        }
+      }
+      i += length;
+    }
+  }
+}
+
 TEST(DictTest, TestSkippingValues) {
   auto ValidateSkipping = [](const vector<int32_t>& values,
       const vector<int32_t>& dict_values, int skip_at, int skip_count,
diff --git a/be/src/util/encoding-test-util.h b/be/src/util/encoding-test-util.h
new file mode 100644
index 0000000..2e1f5d0
--- /dev/null
+++ b/be/src/util/encoding-test-util.h
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <random>
+
+namespace impala {
+
+/// Generates a sequence that contains repeated and literal runs with random lengths.
+/// Total length of the sequence is limited by 'max_run_length'. It only generates values
+/// that can be represented on 'bit_width' size bits.
+template<typename RandomEngine>
+std::vector<int> MakeRandomSequence(RandomEngine& random_eng, int total_length,
+    int max_run_length, int bit_width) {
+  auto NextRunLength = [&]() {
+    std::uniform_int_distribution<int> uni_dist(1, max_run_length);
+    return uni_dist(random_eng);
+  };
+  auto IsNextRunRepeated = [&random_eng]() {
+    std::uniform_int_distribution<int> uni_dist(0, 1);
+    return uni_dist(random_eng) == 0;
+  };
+  auto NextVal = [bit_width](int val) {
+    if (bit_width == CHAR_BIT * sizeof(int)) return val + 1;
+    return (val + 1) % (1 << bit_width);
+  };
+
+  std::vector<int> ret;
+  int run_length = 0;
+  int val = 0;
+  int is_repeated = false;
+  while (ret.size() < total_length) {
+    if (run_length == 0) {
+      run_length = NextRunLength();
+      is_repeated = IsNextRunRepeated();
+      val = NextVal(val);
+    }
+    ret.push_back(val);
+    if (!is_repeated) val = NextVal(val);
+    --run_length;
+  }
+  return ret;
+}
+
+}
diff --git a/be/src/util/rle-encoding.h b/be/src/util/rle-encoding.h
index 21adc02..36cadbb 100644
--- a/be/src/util/rle-encoding.h
+++ b/be/src/util/rle-encoding.h
@@ -714,9 +714,11 @@ template <typename T>
 template <typename OutType>
 inline bool RleBatchDecoder<T>::DecodeLiteralValues(int32_t num_literals_to_consume,
     OutType* dict, int64_t dict_len, StrideWriter<OutType>* RESTRICT out) {
-  DCHECK_GE(num_literals_to_consume, 0);
+  DCHECK_GT(num_literals_to_consume, 0);
   DCHECK_GE(literal_count_, num_literals_to_consume);
 
+  if (num_literals_to_consume == 0) return false;
+
   int32_t num_remaining = num_literals_to_consume;
   // Decode any buffered literals left over from previous calls.
   if (HaveBufferedLiterals()) {
@@ -892,8 +894,7 @@ inline int32_t RleBatchDecoder<T>::SkipValues(int32_t num_values) {
     // Skip RLE encoded values
     int32_t num_repeats = NextNumRepeats();
     if (num_repeats > 0) {
-       int32_t num_repeats_to_consume =
-           std::min(num_repeats, num_values - num_skipped);
+       int32_t num_repeats_to_consume = std::min(num_repeats, num_values - num_skipped);
        SkipRepeatedValues(num_repeats_to_consume);
        num_skipped += num_repeats_to_consume;
        continue;
diff --git a/be/src/util/rle-test.cc b/be/src/util/rle-test.cc
index 1e3caec..3eb9087 100644
--- a/be/src/util/rle-test.cc
+++ b/be/src/util/rle-test.cc
@@ -24,8 +24,10 @@
 #include <random>
 
 #include "testutil/gtest-util.h"
+#include "testutil/rand-util.h"
 #include "util/bit-packing.inline.h"
 #include "util/bit-stream-utils.h"
+#include "util/encoding-test-util.h"
 #include "util/rle-encoding.h"
 
 #include "common/names.h"
@@ -172,14 +174,13 @@ class RleTest : public ::testing::Test {
   }
 
   int ValidateRleSkip(const vector<int>& values, int bit_width,
-      int min_repeated_run_length, int skip_at, int skip_count, unsigned int seed=0) {
+      int min_repeated_run_length, int skip_at, int skip_count) {
     stringstream ss;
     ss << "bit_width=" << bit_width
        << " min_repeated_run_length_=" << min_repeated_run_length
        << " skip_at=" << skip_at
        << " skip_count=" << skip_count
-       << " values.size()=" << values.size()
-       << " seed=" << seed;
+       << " values.size()=" << values.size();
     const string& description = ss.str();
     const int len = 64 * 1024;
     uint8_t buffer[len];
@@ -285,44 +286,6 @@ class RleTest : public ::testing::Test {
     return MakeSequenceBitWidth(values, intitial_literal_length, repeated_length,
         trailing_literal_length, 1);
   }
-
-  /// Generates a sequence that contains repeated and literal runs with random lengths.
-  /// Total length of the sequence is limited by 'max_run_length'. The random generation
-  /// is seeded by 'seed' to allow deterministic behavior.
-  vector<int> MakeRandomSequence(unsigned int seed, int total_length, int max_run_length,
-      int bit_width) {
-    std::default_random_engine random_eng(seed);
-    auto NextRunLength = [&]() {
-      std::uniform_int_distribution<int> uni_dist(1, max_run_length);
-      return uni_dist(random_eng);
-    };
-    auto IsNextRunRepeated = [&random_eng]() {
-      std::uniform_int_distribution<int> uni_dist(0, 1);
-      return uni_dist(random_eng) == 0;
-    };
-    auto NextVal = [bit_width](int val) {
-      if (bit_width == CHAR_BIT * sizeof(int)) return val + 1;
-      return (val + 1) % (1 << bit_width);
-    };
-
-    vector<int> ret;
-    int run_length = 0;
-    int val = 0;
-    int is_repeated = false;
-    while (ret.size() < total_length) {
-      if (run_length == 0) {
-        run_length = NextRunLength();
-        is_repeated = IsNextRunRepeated();
-        val = NextVal(val);
-      }
-      ret.push_back(val);
-      if (!is_repeated) {
-        val = NextVal(val);
-      }
-      --run_length;
-    }
-    return ret;
-  }
 };
 
 /// Basic test case for literal unpacking - two literals in a run.
@@ -376,9 +339,8 @@ TEST_F(RleTest, ValueSkippingFuzzy) {
   const int probe_iteration = 100;
   const int total_sequence_length = 2048;
 
-  std::random_device r;
-  unsigned int seed = r();
-  std::default_random_engine random_eng(seed);
+  std::default_random_engine random_eng;
+  RandTestUtil::SeedRng("RLE_TEST_SEED", &random_eng);
 
   // Generates random number between 'bottom' and 'top' (inclusive intervals).
   auto GetRandom = [&random_eng](int bottom, int top) {
@@ -390,12 +352,12 @@ TEST_F(RleTest, ValueSkippingFuzzy) {
     for (int i = 0; i < bitwidth_iteration; ++i) {
       int bit_width = GetRandom(1, 32);
       int max_run_length = GetRandom(5, 200);
-      vector<int> seq = MakeRandomSequence(seed, total_sequence_length, max_run_length,
-          bit_width);
+      vector<int> seq = MakeRandomSequence(random_eng, total_sequence_length,
+          max_run_length, bit_width);
       for (int j = 0; j < probe_iteration; ++j) {
         int skip_at = GetRandom(0, seq.size() - 1);
         int skip_count = GetRandom(1, seq.size() - skip_at);
-        ValidateRleSkip(seq, bit_width, min_run_length, skip_at, skip_count, seed);
+        ValidateRleSkip(seq, bit_width, min_run_length, skip_at, skip_count);
       }
     }
   }