You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by wz...@apache.org on 2023/07/19 03:40:52 UTC

[impala] branch master updated: IMPALA-12295: Statestore crashed when restarting catalogd

This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 97e44c119 IMPALA-12295: Statestore crashed when restarting catalogd
97e44c119 is described below

commit 97e44c11923f3d28e08aba1b5dd66b8a35465deb
Author: wzhou-code <wz...@cloudera.com>
AuthorDate: Tue Jul 18 12:27:36 2023 -0700

    IMPALA-12295: Statestore crashed when restarting catalogd
    
    Statestore hit DCHECK when re-registering catalogd when CatalogD
    HA is not enabled. The number of catalogd should not be increased
    when re-registering catalogd.
    The issue could be re-produced for unit-test case
    test_restart_services.py::TestRestart::test_restart_catalog with
    increased value for statestore_heartbeat_frequency_ms.
    
    Testing:
     - Verified the issue does not happen for unit-test case
       test_restart_services.py::TestRestart::test_restart_catalog.
     - Passed core test.
    
    Change-Id: I031f0c6d895601e7ea8b15005a3ad52bd3254e7c
    Reviewed-on: http://gerrit.cloudera.org:8080/20217
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Wenzhe Zhou <wz...@cloudera.com>
---
 be/src/statestore/statestore-catalogd-mgr.cc  | 2 +-
 tests/custom_cluster/test_restart_services.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/be/src/statestore/statestore-catalogd-mgr.cc b/be/src/statestore/statestore-catalogd-mgr.cc
index 8af3b5244..fb2ba6663 100644
--- a/be/src/statestore/statestore-catalogd-mgr.cc
+++ b/be/src/statestore/statestore-catalogd-mgr.cc
@@ -54,7 +54,7 @@ bool StatestoreCatalogdMgr::RegisterCatalogd(bool is_reregistering,
   std::lock_guard<std::mutex> l(catalog_mgr_lock_);
   if (!enable_catalogd_ha_) {
     // CatalogD HA is not enabled.
-    num_registered_catalogd_++;
+    if (!is_reregistering) num_registered_catalogd_++;
     DCHECK(num_registered_catalogd_ < 2);
     is_active_catalogd_assigned_ = true;
     COPY_CATALOGD_REGISTRATION_FROM_LOCAL_VARIABLES(active);
diff --git a/tests/custom_cluster/test_restart_services.py b/tests/custom_cluster/test_restart_services.py
index f8afe450c..1e1dae001 100644
--- a/tests/custom_cluster/test_restart_services.py
+++ b/tests/custom_cluster/test_restart_services.py
@@ -167,7 +167,8 @@ class TestRestart(CustomClusterTestSuite):
 
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
-    statestored_args="--statestore_update_frequency_ms=5000")
+    statestored_args="--statestore_update_frequency_ms=5000 "
+                     "--statestore_heartbeat_frequency_ms=10000")
   def test_restart_catalogd(self):
     self.execute_query_expect_success(self.client, "drop table if exists join_aa")
     self.execute_query_expect_success(self.client, "create table join_aa(id int)")