You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2022/02/04 17:56:33 UTC

[impala] branch master updated (d59ec73 -> a0e6b6b)

This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from d59ec73  IMPALA-11093: Fine grained table refreshing doesn't refresh table file metadata
     new 5cfdff0  IMPALA-11095: Fix Impala-shell strict_hs2 mode inserts
     new a0e6b6b  IMPALA-11105: Impala crashes in PhjBuilder::Close() when Prepare() fails

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/exec/hash-table.cc                          |  5 ++++
 be/src/exec/partitioned-hash-join-builder.cc       | 33 ++++++++++++----------
 shell/impala_client.py                             |  4 ++-
 shell/impala_shell.py                              |  8 ++++--
 .../functional-query/queries/QueryTest/joins.test  |  8 ++++++
 tests/shell/test_shell_commandline.py              | 10 +++----
 6 files changed, 45 insertions(+), 23 deletions(-)

[impala] 01/02: IMPALA-11095: Fix Impala-shell strict_hs2 mode inserts

Posted by bo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 5cfdff03f78fb06a50669fa31acf55e5f8a3d0d6
Author: Steve Carlin <sc...@cloudera.com>
AuthorDate: Fri Jan 28 14:41:42 2022 -0800

    IMPALA-11095: Fix Impala-shell strict_hs2 mode inserts
    
    The insert command was broken for impala-shell in the strict_hs2
    mode. The return parameter for close_dml should return two parameters.
    
    The parameters returned by close_dml are rows returned and error
    rows. These are not supported by strict hs2 mode since the close
    does not return the TDmlResult structure. So the message to
    the end user also had to be changed.
    
    Change-Id: Ibe837c99e54d68d1e27b97f0025e17faf0a2cb9f
    Reviewed-on: http://gerrit.cloudera.org:8080/18176
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Csaba Ringhofer <cs...@cloudera.com>
---
 shell/impala_client.py                |  4 +++-
 shell/impala_shell.py                 |  8 ++++++--
 tests/shell/test_shell_commandline.py | 10 +++++-----
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/shell/impala_client.py b/shell/impala_client.py
index 7232faa..c0c5815 100755
--- a/shell/impala_client.py
+++ b/shell/impala_client.py
@@ -1089,7 +1089,8 @@ class StrictHS2Client(ImpalaHS2Client):
     super(StrictHS2Client, self).__init__(*args, **kwargs)
 
   def close_dml(self, last_query_handle):
-    return self.close_query(last_query_handle)
+    self.close_query(last_query_handle)
+    return (None, None)
 
   def close_query(self, last_query_handle):
     # Set a member in the handle to make sure that it is idempotent
@@ -1118,6 +1119,7 @@ class StrictHS2Client(ImpalaHS2Client):
   def _populate_query_options(self):
     return
 
+
 class ImpalaBeeswaxClient(ImpalaClient):
   """Legacy Beeswax client. Uses the Beeswax protocol plus Impala-specific extensions.
   TODO: remove once we've phased out beeswax."""
diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index e9b660c..c6ab51c 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -1325,8 +1325,12 @@ class ImpalaShell(cmd.Cmd, object):
       else:
         error_report = ""
 
-      self._print_if_verbose("%s %d row(s)%s in %2.2fs" %\
-          (verb, num_rows, error_report, time_elapsed))
+      if num_rows is not None:
+        self._print_if_verbose("%s %d row(s)%s in %2.2fs" %
+            (verb, num_rows, error_report, time_elapsed))
+      else:
+        self._print_if_verbose("Time elapsed: %2.2fs" %
+            (time_elapsed))
 
       if not is_dml:
         self.imp_client.close_query(self.last_query_handle)
diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py
index cde373f..01d87dd 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -755,8 +755,6 @@ class TestImpalaShell(ImpalaTestSuite):
         assert msg not in result_stderr, result_stderr
 
   def test_query_time_and_link_message(self, vector, unique_database):
-    if vector.get_value('strict_hs2_protocol'):
-      pytest.skip("IMPALA-10827: Messages not sent back is strict hs2 mode.")
     shell_messages = ["Query submitted at: ", "(Coordinator: ",
         "Query progress can be monitored at: "]
     # CREATE statements should not print query time and webserver address.
@@ -797,13 +795,15 @@ class TestImpalaShell(ImpalaTestSuite):
     self._validate_shell_messages(results.stderr, shell_messages, should_exist=False)
 
   def test_insert_status(self, vector, unique_database):
-    if vector.get_value('strict_hs2_protocol'):
-      pytest.skip("No message sent back in strict hs2 mode.")
     run_impala_shell_cmd(
         vector, ['--query=create table %s.insert_test (id int)' % unique_database])
     results = run_impala_shell_cmd(
         vector, ['--query=insert into %s.insert_test values (1)' % unique_database])
-    assert "Modified 1 row(s)" in results.stderr
+
+    if vector.get_value('strict_hs2_protocol'):
+      assert "Time elapsed" in results.stderr
+    else:
+      assert "Modified 1 row(s)" in results.stderr
 
   def _validate_dml_stmt(self, vector, stmt, expected_rows_modified, expected_row_errors):
     results = run_impala_shell_cmd(vector, ['--query=%s' % stmt])

[impala] 02/02: IMPALA-11105: Impala crashes in PhjBuilder::Close() when Prepare() fails

Posted by bo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit a0e6b6b618ba9996bc2bc9bd644a340c2cad8b79
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Thu Feb 3 17:33:15 2022 +0100

    IMPALA-11105: Impala crashes in PhjBuilder::Close() when Prepare() fails
    
    In PhjBuilder::Close() we invoke
    'ht_ctx_->StatsCountersAdd(ht_stats_profile_.get())' when 'ht_ctx_' is
    not null. But in Prepare we create 'ht_ctx_' first, then after a couple
    operations which might fail we create 'ht_stats_profile_'. This means if
    an operation fails in Prepare(), between the creation of 'ht_ctx_' and
    'ht_stast_profile_', then later we'll get a SEGFAULT in Close().
    
    This patch restructures the code in PhjBuilder::Prepare(), so at first
    it creates the counters and profile, then it creates 'ht_ctx_',
    similarly to what we do in grouping-aggregator.cc. It also modifies
    HashTableCtx::StatsCountersAdd(), so in release mode it is a no-op
    if 'profile' is null.
    
    Testing:
     * added a debug action that fails PhjBuilder::Prepare() after the
       creation of 'ht_ctx_'
    
    Change-Id: Id41b0c45d9693cb3433e02737048cb9f50ba59c1
    Reviewed-on: http://gerrit.cloudera.org:8080/18195
    Reviewed-by: Joe McDonnell <jo...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exec/hash-table.cc                          |  5 ++++
 be/src/exec/partitioned-hash-join-builder.cc       | 33 ++++++++++++----------
 .../functional-query/queries/QueryTest/joins.test  |  8 ++++++
 3 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/be/src/exec/hash-table.cc b/be/src/exec/hash-table.cc
index da2e2b1..47a1d4e 100644
--- a/be/src/exec/hash-table.cc
+++ b/be/src/exec/hash-table.cc
@@ -226,6 +226,11 @@ string HashTableCtx::PrintStats() const {
 }
 
 void HashTableCtx::StatsCountersAdd(HashTableStatsProfile* profile) {
+  DCHECK(profile != nullptr);
+  if (profile == nullptr) {
+    LOG(WARNING) << "In HashTableCtx::StatsCountersAdd() 'profile is NULL.'";
+    return;
+  }
   COUNTER_ADD(profile->num_hash_collisions_, num_hash_collisions_);
   COUNTER_ADD(profile->num_hash_probes_, num_probes_);
   COUNTER_ADD(profile->num_hash_travels_, travel_length_);
diff --git a/be/src/exec/partitioned-hash-join-builder.cc b/be/src/exec/partitioned-hash-join-builder.cc
index ebbf61a..8187331 100644
--- a/be/src/exec/partitioned-hash-join-builder.cc
+++ b/be/src/exec/partitioned-hash-join-builder.cc
@@ -226,6 +226,22 @@ PhjBuilder::~PhjBuilder() {}
 
 Status PhjBuilder::Prepare(RuntimeState* state, MemTracker* parent_mem_tracker) {
   RETURN_IF_ERROR(DataSink::Prepare(state, parent_mem_tracker));
+
+  partitions_created_ = ADD_COUNTER(profile(), "PartitionsCreated", TUnit::UNIT);
+  largest_partition_percent_ =
+      profile()->AddHighWaterMarkCounter("LargestPartitionPercent", TUnit::UNIT);
+  max_partition_level_ =
+      profile()->AddHighWaterMarkCounter("MaxPartitionLevel", TUnit::UNIT);
+  num_build_rows_ = ADD_COUNTER(profile(), "BuildRows", TUnit::UNIT);
+  ht_stats_profile_ = HashTable::AddHashTableCounters(profile());
+  num_spilled_partitions_ = ADD_COUNTER(profile(), "SpilledPartitions", TUnit::UNIT);
+  num_repartitions_ = ADD_COUNTER(profile(), "NumRepartitions", TUnit::UNIT);
+  partition_build_rows_timer_ = ADD_TIMER(profile(), "BuildRowsPartitionTime");
+  build_hash_table_timer_ = ADD_TIMER(profile(), "HashTablesBuildTime");
+  num_hash_table_builds_skipped_ =
+      ADD_COUNTER(profile(), "NumHashTableBuildsSkipped", TUnit::UNIT);
+  repartition_timer_ = ADD_TIMER(profile(), "RepartitionTime");
+
   if (is_separate_build_) {
     const TDebugOptions& instance_debug_options = state->instance_ctx().debug_options;
     bool debug_option_enabled = instance_debug_options.node_id == -1
@@ -241,26 +257,13 @@ Status PhjBuilder::Prepare(RuntimeState* state, MemTracker* parent_mem_tracker)
       MAX_PARTITION_DEPTH, row_desc_->tuple_descriptors().size(), expr_perm_pool_.get(),
       expr_results_pool_.get(), expr_results_pool_.get(), &ht_ctx_));
 
+  RETURN_IF_ERROR(DebugAction(state->query_options(), "PHJ_BUILDER_PREPARE"));
+
   DCHECK_EQ(filter_exprs_.size(), filter_ctxs_.size());
   for (int i = 0; i < filter_exprs_.size(); ++i) {
     RETURN_IF_ERROR(ScalarExprEvaluator::Create(*filter_exprs_[i], state, &obj_pool_,
         expr_perm_pool_.get(), expr_results_pool_.get(), &filter_ctxs_[i].expr_eval));
   }
-
-  partitions_created_ = ADD_COUNTER(profile(), "PartitionsCreated", TUnit::UNIT);
-  largest_partition_percent_ =
-      profile()->AddHighWaterMarkCounter("LargestPartitionPercent", TUnit::UNIT);
-  max_partition_level_ =
-      profile()->AddHighWaterMarkCounter("MaxPartitionLevel", TUnit::UNIT);
-  num_build_rows_ = ADD_COUNTER(profile(), "BuildRows", TUnit::UNIT);
-  ht_stats_profile_ = HashTable::AddHashTableCounters(profile());
-  num_spilled_partitions_ = ADD_COUNTER(profile(), "SpilledPartitions", TUnit::UNIT);
-  num_repartitions_ = ADD_COUNTER(profile(), "NumRepartitions", TUnit::UNIT);
-  partition_build_rows_timer_ = ADD_TIMER(profile(), "BuildRowsPartitionTime");
-  build_hash_table_timer_ = ADD_TIMER(profile(), "HashTablesBuildTime");
-  num_hash_table_builds_skipped_ =
-      ADD_COUNTER(profile(), "NumHashTableBuildsSkipped", TUnit::UNIT);
-  repartition_timer_ = ADD_TIMER(profile(), "RepartitionTime");
   return Status::OK();
 }
 
diff --git a/testdata/workloads/functional-query/queries/QueryTest/joins.test b/testdata/workloads/functional-query/queries/QueryTest/joins.test
index b18b56c..af6f918 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/joins.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/joins.test
@@ -849,3 +849,11 @@ select 1 from alltypestiny a join alltypestiny b on a.id != b.id;
 ---- CATCH
 Debug Action: FAIL
 ====
+---- QUERY
+# IMPALA-11105: If PhjBuilder::Prepare() failed after creating the hash table context,
+# Impala crashed.
+set debug_action="PHJ_BUILDER_PREPARE:FAIL@1.0";
+select * from alltypestiny a join alltypestiny b on a.id = b.id;
+---- CATCH
+Debug Action: PHJ_BUILDER_PREPARE:FAIL@1.0
+====