You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by la...@apache.org on 2021/02/17 12:25:15 UTC
[impala] branch master updated: IMPALA-10497: Fix flakiness in
test_no_fd_caching_on_cached_data.
This is an automated email from the ASF dual-hosted git repository.
laszlog pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 490aff5 IMPALA-10497: Fix flakiness in test_no_fd_caching_on_cached_data.
490aff5 is described below
commit 490aff51b9e3289f2225d3918734821cab7f28c2
Author: Riza Suminto <ri...@cloudera.com>
AuthorDate: Tue Feb 9 23:23:05 2021 -0800
IMPALA-10497: Fix flakiness in test_no_fd_caching_on_cached_data.
test_no_fd_caching_on_cached_data has been flaky for not having all of
the data fully cached in the warm-up phase. There is a limit on
concurrency in writing to the cache such that we may fail to cache data
the first time read it. This patch fixes the test by repeating the
warm-up query 5 times. This patch also add a proper start_args to the
test so that each impalad will write their data cache file in their own
directory.
Testing:
- Loop the test manually 100 times and see no more failures.
Change-Id: I774f9dfea7dcc107c3c7f2b76db3aaf4b2dd7952
Reviewed-on: http://gerrit.cloudera.org:8080/17054
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
tests/custom_cluster/test_hdfs_fd_caching.py | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/tests/custom_cluster/test_hdfs_fd_caching.py b/tests/custom_cluster/test_hdfs_fd_caching.py
index 42c6ca6..07fdd55 100644
--- a/tests/custom_cluster/test_hdfs_fd_caching.py
+++ b/tests/custom_cluster/test_hdfs_fd_caching.py
@@ -164,7 +164,8 @@ class TestHdfsFdCaching(CustomClusterTestSuite):
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
impalad_args="--max_cached_file_handles=16 --unused_file_handle_timeout_sec=5 " +
- "--data_cache=/tmp:500MB --always_use_data_cache=true",
+ "--always_use_data_cache=true",
+ start_args="--data_cache_dir=/tmp --data_cache_size=500MB",
catalogd_args="--load_catalog_in_background=false")
def test_no_fd_caching_on_cached_data(self, vector):
"""IMPALA-10147: Test that no file handle should be opened nor cached again if data
@@ -183,10 +184,12 @@ class TestHdfsFdCaching(CustomClusterTestSuite):
# The table has one file. If caching is expected, there should be one more
# handle cached after the first select. If caching is not expected, the
# number of handles should not change from the initial number.
- self.execute_query("select * from cachefd.simple", vector=vector)
- num_handles_after = self.cached_handles()
- assert self.max_cached_handles() <= cache_capacity
- assert num_handles_after == (num_handles_start + 1)
+ # Read 5 times to make sure the data cache is fully warmed up.
+ for x in range(5):
+ self.execute_query("select * from cachefd.simple", vector=vector)
+ num_handles_after = self.cached_handles()
+ assert self.max_cached_handles() <= cache_capacity
+ assert num_handles_after == (num_handles_start + 1)
# No open handles if scanning is finished.
assert self.outstanding_handles() == 0