You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/09/09 05:21:41 UTC
[impala] 02/03: IMPALA-7864: (Addendum) Deflake test_replan_limit by postponing catalog fetches
This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch branch-4.1.1
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 2aeb6013fa44e53031d82b7e7ca59d771037d60a
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Wed May 18 16:51:37 2022 +0800
IMPALA-7864: (Addendum) Deflake test_replan_limit by postponing catalog fetches
TestLocalCatalogRetries.test_replan_limit runs REFRESH and SELECT
queries concurrently on a table, and expects one of the query hits
inconsistent metadata.
This patch increases the chance of inconsistent metadata by injecting
a latency (500ms) before each catalog fetch. So it's more likely that a
request is fetching stale metadata. Also bump up the timeout of
thread.join() so we can try out all the attempts.
Test
- Run test_replan_limit 1000 times without any error.
- Run all tests of TestLocalCatalogRetries 100 times without any error.
Change-Id: Ia5bdca7402039f1f24b7bf19595c2541fa32d0ad
Reviewed-on: http://gerrit.cloudera.org:8080/18537
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-on: http://gerrit.cloudera.org:8080/18951
Reviewed-by: Csaba Ringhofer <cs...@cloudera.com>
Tested-by: Quanlong Huang <hu...@gmail.com>
---
be/src/exec/catalog-op-executor.cc | 5 +++++
tests/custom_cluster/test_local_catalog.py | 12 ++++++++----
2 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/be/src/exec/catalog-op-executor.cc b/be/src/exec/catalog-op-executor.cc
index 646e6aa52..c6c245428 100644
--- a/be/src/exec/catalog-op-executor.cc
+++ b/be/src/exec/catalog-op-executor.cc
@@ -55,6 +55,8 @@ DECLARE_int32(catalog_client_connection_num_retries);
DECLARE_int32(catalog_client_rpc_timeout_ms);
DECLARE_int32(catalog_client_rpc_retry_interval_ms);
+DEFINE_int32_hidden(inject_latency_before_catalog_fetch_ms, 0,
+ "Latency (ms) to be injected before fetching catalog data from the catalogd");
DEFINE_int32_hidden(inject_latency_after_catalog_fetch_ms, 0,
"Latency (ms) to be injected after fetching catalog data from the catalogd");
@@ -366,6 +368,9 @@ Status CatalogOpExecutor::GetPartialCatalogObject(
DCHECK(FLAGS_use_local_catalog || TestInfo::is_test());
const TNetworkAddress& address =
MakeNetworkAddress(FLAGS_catalog_service_host, FLAGS_catalog_service_port);
+ if (FLAGS_inject_latency_before_catalog_fetch_ms > 0) {
+ SleepForMs(FLAGS_inject_latency_before_catalog_fetch_ms);
+ }
int attempt = 0; // Used for debug action only.
CatalogServiceConnection::RpcStatus rpc_status =
CatalogServiceConnection::DoRpcWithRetry(env_->catalogd_client_cache(), address,
diff --git a/tests/custom_cluster/test_local_catalog.py b/tests/custom_cluster/test_local_catalog.py
index 63b0cbb91..6e74a4de0 100644
--- a/tests/custom_cluster/test_local_catalog.py
+++ b/tests/custom_cluster/test_local_catalog.py
@@ -273,8 +273,9 @@ class TestLocalCatalogRetries(CustomClusterTestSuite):
q = random.choice(queries)
attempt += 1
try:
+ print 'Attempt', attempt, 'client', str(client)
ret = self.execute_query_unchecked(client, q)
- except Exception, e:
+ except Exception as e:
if 'InconsistentMetadataFetchException' in str(e):
with inconsistent_seen_lock:
inconsistent_seen[0] += 1
@@ -287,7 +288,8 @@ class TestLocalCatalogRetries(CustomClusterTestSuite):
t.start()
for t in threads:
# When there are failures, they're observed quickly.
- t.join(30)
+ # 600s is enough for 200 attempts.
+ t.join(600)
assert failed_queries.empty(),\
"Failed query count non zero: %s" % list(failed_queries.queue)
@@ -318,7 +320,8 @@ class TestLocalCatalogRetries(CustomClusterTestSuite):
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
- impalad_args="--use_local_catalog=true --local_catalog_max_fetch_retries=0",
+ impalad_args="--use_local_catalog=true --local_catalog_max_fetch_retries=0"
+ " --inject_latency_before_catalog_fetch_ms=500",
catalogd_args="--catalog_topic_mode=minimal")
def test_replan_limit(self):
"""
@@ -326,7 +329,8 @@ class TestLocalCatalogRetries(CustomClusterTestSuite):
an inconsistent metadata exception when running concurrent reads/writes
is seen. With the max retries set to 0, no retries are expected and with
the concurrent read/write workload, an inconsistent metadata exception is
- expected.
+ expected. Setting inject_latency_before_catalog_fetch_ms to increases the
+ possibility of a stale request which throws the expected exception.
"""
queries = [
'refresh functional.alltypes',