You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2020/04/16 15:47:03 UTC
[impala] 04/04: IMPALA-9596: deflake test_tpch_mem_limit_single_node

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit dc410a2cf47bcf06a0f4563d05a9d0a339af5fb2
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Thu Apr 9 16:11:42 2020 -0700

    IMPALA-9596: deflake test_tpch_mem_limit_single_node
    
    This changes the test to use a debug action instead of
    trying to hit the memory limit in the right spot, which
    has tended to be flaky. This still exercises the error
    handling code in the scanner, which was the original
    point of the test (see IMPALA-2376).
    
    This revealed an actual bug in the ORC scanner, where
    it was not returning the error directly from
    AssembleCollection(). Before I fixed that, the scanner
    got stuck in an infinite loop when running the test.
    
    Change-Id: I4678963c264b7c15fbac6f71721162b38676aa21
    Reviewed-on: http://gerrit.cloudera.org:8080/15700
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Gabor Kaszab <ga...@cloudera.com>
---
 be/src/exec/hdfs-orc-scanner.cc                    |  5 ++-
 be/src/runtime/collection-value-builder-test.cc    |  5 ++-
 be/src/runtime/collection-value-builder.h          | 12 ++++++-
 be/src/runtime/row-batch-serialize-test.cc         | 41 +++++++++++++---------
 .../QueryTest/nested-types-tpch-errors.test        | 17 +++++++++
 .../nested-types-tpch-mem-limit-single-node.test   | 18 ----------
 tests/query_test/test_nested_types.py              | 17 +++------
 7 files changed, 63 insertions(+), 52 deletions(-)

diff --git a/be/src/exec/hdfs-orc-scanner.cc b/be/src/exec/hdfs-orc-scanner.cc
index 05e1198..0b1a599 100644
--- a/be/src/exec/hdfs-orc-scanner.cc
+++ b/be/src/exec/hdfs-orc-scanner.cc
@@ -813,9 +813,8 @@ Status HdfsOrcScanner::AssembleCollection(
 
     int64_t num_rows;
     // We're assembling item tuples into an CollectionValue
-    parse_status_ =
-        GetCollectionMemory(coll_value_builder, &pool, &tuple, &row, &num_rows);
-    if (UNLIKELY(!parse_status_.ok())) break;
+    RETURN_IF_ERROR(
+        GetCollectionMemory(coll_value_builder, &pool, &tuple, &row, &num_rows));
     // 'num_rows' can be very high if we're writing to a large CollectionValue. Limit
     // the number of rows we read at one time so we don't spend too long in the
     // 'num_rows' loop below before checking for cancellation or limit reached.
diff --git a/be/src/runtime/collection-value-builder-test.cc b/be/src/runtime/collection-value-builder-test.cc
index af710ce..c1bde10 100644
--- a/be/src/runtime/collection-value-builder-test.cc
+++ b/be/src/runtime/collection-value-builder-test.cc
@@ -33,6 +33,9 @@ static scoped_ptr<Frontend> fe;
 TEST(CollectionValueBuilderTest, MaxBufferSize) {
   TestEnv test_env;
   ASSERT_OK(test_env.Init());
+  TQueryOptions opts;
+  RuntimeState* runtime_state;
+  ASSERT_OK(test_env.CreateQueryState(1234, &opts, &runtime_state));
   ObjectPool obj_pool;
   DescriptorTblBuilder builder(fe.get(), &obj_pool);
   builder.DeclareTuple() << TYPE_TINYINT << TYPE_TINYINT << TYPE_TINYINT;
@@ -51,7 +54,7 @@ TEST(CollectionValueBuilderTest, MaxBufferSize) {
   MemTracker tracker(mem_limit);
   MemPool pool(&tracker);
   CollectionValueBuilder coll_value_builder(
-      &coll_value, tuple_desc, &pool, NULL, initial_capacity);
+      &coll_value, tuple_desc, &pool, runtime_state, initial_capacity);
   EXPECT_EQ(tracker.consumption(), initial_capacity * 4);
 
   // Attempt to double the buffer so it goes over 32-bit INT_MAX.
diff --git a/be/src/runtime/collection-value-builder.h b/be/src/runtime/collection-value-builder.h
index ba9ddd6..e7a47bb 100644
--- a/be/src/runtime/collection-value-builder.h
+++ b/be/src/runtime/collection-value-builder.h
@@ -20,6 +20,7 @@
 
 #include "runtime/collection-value.h"
 #include "runtime/mem-tracker.h"
+#include "runtime/runtime-state.h"
 #include "runtime/tuple.h"
 #include "util/debug-util.h"
 #include "util/ubsan.h"
@@ -40,7 +41,8 @@ class CollectionValueBuilder {
     : coll_value_(coll_value),
       tuple_desc_(tuple_desc),
       pool_(pool),
-      state_(state) {
+      state_(state),
+      have_debug_action_(!state->query_options().debug_action.empty()) {
     buffer_size_ = initial_tuple_capacity * tuple_desc_.byte_size();
     coll_value_->ptr = pool_->TryAllocate(buffer_size_);
     if (coll_value_->ptr == NULL) buffer_size_ = 0;
@@ -60,6 +62,10 @@ class CollectionValueBuilder {
       int64_t bytes_written = coll_value_->ByteSize(tuple_desc_);
       DCHECK_GE(buffer_size_, bytes_written);
       if (buffer_size_ == bytes_written) {
+        if (UNLIKELY(have_debug_action_)) {
+          RETURN_IF_ERROR(
+              DebugAction(state_->query_options(), "SCANNER_COLLECTION_ALLOC"));
+        }
         // Double tuple buffer
         int64_t new_buffer_size =
             std::max<int64_t>(buffer_size_ * 2, tuple_desc_.byte_size());
@@ -107,6 +113,10 @@ class CollectionValueBuilder {
   /// May be NULL. If non-NULL, used to log memory limit errors.
   RuntimeState* state_;
 
+  /// Whether 'state_' has a debug action set. Used to reduce overhead of
+  /// the check that is run once per collection.
+  const bool have_debug_action_;
+
   /// The current size of coll_value_'s buffer in bytes, including any unused space
   /// (i.e. buffer_size_ is equal to or larger than coll_value_->ByteSize()).
   int64_t buffer_size_;
diff --git a/be/src/runtime/row-batch-serialize-test.cc b/be/src/runtime/row-batch-serialize-test.cc
index fcac615..99a09ea 100644
--- a/be/src/runtime/row-batch-serialize-test.cc
+++ b/be/src/runtime/row-batch-serialize-test.cc
@@ -25,6 +25,7 @@
 #include "runtime/raw-value.h"
 #include "runtime/raw-value.inline.h"
 #include "runtime/row-batch.h"
+#include "runtime/test-env.h"
 #include "runtime/tuple-row.h"
 #include "service/fe-support.h"
 #include "service/frontend.h"
@@ -47,20 +48,28 @@ class RowBatchSerializeTest : public testing::Test {
   ObjectPool pool_;
   scoped_ptr<MemTracker> tracker_;
 
-  // For computing tuple mem layouts.
-  scoped_ptr<Frontend> fe_;
+  scoped_ptr<TestEnv> test_env_;
+  RuntimeState* runtime_state_ = nullptr;
+
+  TQueryOptions dummy_query_opts_;
 
   virtual void SetUp() {
-    fe_.reset(new Frontend());
+    test_env_.reset(new TestEnv);
+    ASSERT_OK(test_env_->Init());
     tracker_.reset(new MemTracker());
+    ASSERT_OK(test_env_->CreateQueryState(1234, &dummy_query_opts_, &runtime_state_));
   }
 
   virtual void TearDown() {
     pool_.Clear();
     tracker_.reset();
-    fe_.reset();
+    test_env_.reset();
+    runtime_state_ = nullptr;
   }
 
+  /// Helper to get frontend from 'test_env_'.
+  Frontend* frontend() const { return test_env_->exec_env()->frontend(); }
+
   // Serializes and deserializes 'batch', then checks that the deserialized batch is valid
   // and has the same contents as 'batch'. If serialization returns an error (e.g. if the
   // row batch is too large to serialize), this will return that error.
@@ -104,7 +113,7 @@ class RowBatchSerializeTest : public testing::Test {
     // tuple: (int, string, string, string)
     // This uses three strings so that this test can reach INT_MAX+1 without any
     // single string exceeding the 1GB limit on string length (see string-value.h).
-    DescriptorTblBuilder builder(fe_.get(), &pool_);
+    DescriptorTblBuilder builder(frontend(), &pool_);
     builder.DeclareTuple() << TYPE_INT << TYPE_STRING << TYPE_STRING << TYPE_STRING;
     DescriptorTbl* desc_tbl = builder.Build();
 
@@ -253,7 +262,7 @@ class RowBatchSerializeTest : public testing::Test {
         const TupleDescriptor* item_desc = slot_desc.collection_item_descriptor();
         int array_len = rand() % (MAX_ARRAY_LEN + 1);
         CollectionValue cv;
-        CollectionValueBuilder builder(&cv, *item_desc, pool, NULL, array_len);
+        CollectionValueBuilder builder(&cv, *item_desc, pool, runtime_state_, array_len);
         Tuple* tuple_mem;
         int n;
         EXPECT_OK(builder.GetFreeMemory(&tuple_mem, &n));
@@ -380,7 +389,7 @@ class RowBatchSerializeTest : public testing::Test {
 
 TEST_F(RowBatchSerializeTest, Basic) {
   // tuple: (int)
-  DescriptorTblBuilder builder(fe_.get(), &pool_);
+  DescriptorTblBuilder builder(frontend(), &pool_);
   builder.DeclareTuple() << TYPE_INT;
   DescriptorTbl* desc_tbl = builder.Build();
 
@@ -395,7 +404,7 @@ TEST_F(RowBatchSerializeTest, Basic) {
 
 TEST_F(RowBatchSerializeTest, String) {
   // tuple: (int, string)
-  DescriptorTblBuilder builder(fe_.get(), &pool_);
+  DescriptorTblBuilder builder(frontend(), &pool_);
   builder.DeclareTuple() << TYPE_INT << TYPE_STRING;
   DescriptorTbl* desc_tbl = builder.Build();
 
@@ -441,7 +450,7 @@ TEST_F(RowBatchSerializeTest, BasicArray) {
   array_type.type = TYPE_ARRAY;
   array_type.children.push_back(TYPE_INT);
 
-  DescriptorTblBuilder builder(fe_.get(), &pool_);
+  DescriptorTblBuilder builder(frontend(), &pool_);
   builder.DeclareTuple() << TYPE_INT << TYPE_STRING << array_type;
   DescriptorTbl* desc_tbl = builder.Build();
 
@@ -469,7 +478,7 @@ TEST_F(RowBatchSerializeTest, StringArray) {
   array_type.type = TYPE_ARRAY;
   array_type.children.push_back(struct_type);
 
-  DescriptorTblBuilder builder(fe_.get(), &pool_);
+  DescriptorTblBuilder builder(frontend(), &pool_);
   builder.DeclareTuple() << TYPE_INT << TYPE_STRING << array_type;
   DescriptorTbl* desc_tbl = builder.Build();
 
@@ -510,7 +519,7 @@ TEST_F(RowBatchSerializeTest, NestedArrays) {
   array_type.type = TYPE_ARRAY;
   array_type.children.push_back(struct_type);
 
-  DescriptorTblBuilder builder(fe_.get(), &pool_);
+  DescriptorTblBuilder builder(frontend(), &pool_);
   builder.DeclareTuple() << array_type;
   DescriptorTbl* desc_tbl = builder.Build();
 
@@ -534,7 +543,7 @@ TEST_F(RowBatchSerializeTest, DupCorrectnessFull) {
 
 void RowBatchSerializeTest::TestDupCorrectness(bool full_dedup) {
   // tuples: (int), (string)
-  DescriptorTblBuilder builder(fe_.get(), &pool_);
+  DescriptorTblBuilder builder(frontend(), &pool_);
   builder.DeclareTuple() << TYPE_INT;
   builder.DeclareTuple() << TYPE_STRING;
   DescriptorTbl* desc_tbl = builder.Build();
@@ -575,7 +584,7 @@ TEST_F(RowBatchSerializeTest, DupRemovalFull) {
 // Test that tuple deduplication results in the expected reduction in serialized size.
 void RowBatchSerializeTest::TestDupRemoval(bool full_dedup) {
   // tuples: (int, string)
-  DescriptorTblBuilder builder(fe_.get(), &pool_);
+  DescriptorTblBuilder builder(frontend(), &pool_);
   builder.DeclareTuple() << TYPE_INT << TYPE_STRING;
   DescriptorTbl* desc_tbl = builder.Build();
 
@@ -614,7 +623,7 @@ TEST_F(RowBatchSerializeTest, ConsecutiveNullsFull) {
 // Test that deduplication handles NULL tuples correctly.
 void RowBatchSerializeTest::TestConsecutiveNulls(bool full_dedup) {
   // tuples: (int)
-  DescriptorTblBuilder builder(fe_.get(), &pool_);
+  DescriptorTblBuilder builder(frontend(), &pool_);
   builder.DeclareTuple() << TYPE_INT;
   DescriptorTbl* desc_tbl = builder.Build();
   vector<bool> nullable_tuples(1, true);
@@ -642,7 +651,7 @@ TEST_F(RowBatchSerializeTest, ZeroLengthTuplesDedup) {
 
 void RowBatchSerializeTest::TestZeroLengthTuple(bool full_dedup) {
   // tuples: (int), (string), ()
-  DescriptorTblBuilder builder(fe_.get(), &pool_);
+  DescriptorTblBuilder builder(frontend(), &pool_);
   builder.DeclareTuple() << TYPE_INT;
   builder.DeclareTuple() << TYPE_STRING;
   builder.DeclareTuple();
@@ -669,7 +678,7 @@ TEST_F(RowBatchSerializeTest, DedupPathologicalFull) {
   ColumnType array_type;
   array_type.type = TYPE_ARRAY;
   array_type.children.push_back(TYPE_STRING);
-  DescriptorTblBuilder builder(fe_.get(), &pool_);
+  DescriptorTblBuilder builder(frontend(), &pool_);
   builder.DeclareTuple() << TYPE_INT;
   builder.DeclareTuple() << TYPE_INT;
   builder.DeclareTuple() << array_type;
diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-types-tpch-errors.test b/testdata/workloads/functional-query/queries/QueryTest/nested-types-tpch-errors.test
new file mode 100644
index 0000000..2f114d4
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/nested-types-tpch-errors.test
@@ -0,0 +1,17 @@
+====
+---- QUERY
+# IMPALA-2376: test error handling when hitting memory limit during allocation of
+# a collection in the scanner. Use debug action to make the failure deterministic
+# (when setting the real mem_limit, it tends to be non-deterministic where in query
+# execution the error is hit).
+set debug_action="SCANNER_COLLECTION_ALLOC:FAIL@1.0";
+select max(cnt1), max(cnt2), max(cnt3), max(cnt4), max(cnt5)
+from customer c,
+  (select count(l_returnflag) cnt1, count(l_partkey) cnt2, count(l_suppkey) cnt3,
+          count(l_linenumber) cnt4, count(l_quantity) cnt5
+   from c.c_orders.o_lineitems) v;
+---- TYPES
+BIGINT
+---- CATCH
+Debug Action: SCANNER_COLLECTION_ALLOC:FAIL@1.0
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-types-tpch-mem-limit-single-node.test b/testdata/workloads/functional-query/queries/QueryTest/nested-types-tpch-mem-limit-single-node.test
deleted file mode 100644
index 46d2cf8..0000000
--- a/testdata/workloads/functional-query/queries/QueryTest/nested-types-tpch-mem-limit-single-node.test
+++ /dev/null
@@ -1,18 +0,0 @@
-====
----- QUERY
-# IMPALA-2376: run scan that constructs large collection and set memory limit low enough
-# to get the below query to consistently fail when allocating a large collection. Set
-# num_nodes to 1 in the python test and mt_dop to 1 here in order to make the query as
-# deterministic as possible. mem_limit is tuned for a 3-node HDFS minicluster.
-set buffer_pool_limit=24m;
-set mt_dop=1;
-select max(cnt1), max(cnt2), max(cnt3), max(cnt4), max(cnt5)
-from customer c,
-  (select count(l_returnflag) cnt1, count(l_partkey) cnt2, count(l_suppkey) cnt3,
-          count(l_linenumber) cnt4, count(l_quantity) cnt5
-   from c.c_orders.o_lineitems) v;
----- TYPES
-BIGINT
----- CATCH
-row_regex: .*Memory limit exceeded: Failed to allocate [0-9]+ bytes for collection 'tpch_nested_.*.customer.c_orders.item.o_lineitems'.*
-====
diff --git a/tests/query_test/test_nested_types.py b/tests/query_test/test_nested_types.py
index c95e082..1f9e1a4 100644
--- a/tests/query_test/test_nested_types.py
+++ b/tests/query_test/test_nested_types.py
@@ -144,20 +144,11 @@ class TestNestedTypesNoMtDop(ImpalaTestSuite):
     self.run_test_case('QueryTest/nested-types-tpch-mem-limit', vector,
                        use_db='tpch_nested' + db_suffix)
 
-  @SkipIfNotHdfsMinicluster.tuned_for_minicluster
-  def test_tpch_mem_limit_single_node(self, vector):
-    """Queries over the larger nested TPCH dataset with memory limits tuned for
-    a 3-node HDFS minicluster with num_nodes=1."""
-    new_vector = deepcopy(vector)
-    new_vector.get_value('exec_option')['num_nodes'] = 1
-    if vector.get_value('table_format').file_format == 'orc':
-      # IMPALA-8336: lower memory limit for ORC
-      new_vector.get_value('exec_option')['mem_limit'] = '20M'
-    else:
-      new_vector.get_value('exec_option')['mem_limit'] = '28M'
+  def test_tpch_errors(self, vector):
+    """Queries that test error handling on the TPC-H nested data set."""
     db_suffix = vector.get_value('table_format').db_suffix()
-    self.run_test_case('QueryTest/nested-types-tpch-mem-limit-single-node',
-                       new_vector, use_db='tpch_nested' + db_suffix)
+    self.run_test_case('QueryTest/nested-types-tpch-errors',
+                       vector, use_db='tpch_nested' + db_suffix)
 
   @SkipIfEC.fix_later
   def test_parquet_stats(self, vector):