You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by la...@apache.org on 2021/02/17 12:25:15 UTC

[impala] branch master updated: IMPALA-10497: Fix flakiness in test_no_fd_caching_on_cached_data.

This is an automated email from the ASF dual-hosted git repository.

laszlog pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 490aff5  IMPALA-10497: Fix flakiness in test_no_fd_caching_on_cached_data.
490aff5 is described below

commit 490aff51b9e3289f2225d3918734821cab7f28c2
Author: Riza Suminto <ri...@cloudera.com>
AuthorDate: Tue Feb 9 23:23:05 2021 -0800

    IMPALA-10497: Fix flakiness in test_no_fd_caching_on_cached_data.
    
    test_no_fd_caching_on_cached_data has been flaky for not having all of
    the data fully cached in the warm-up phase. There is a limit on
    concurrency in writing to the cache such that we may fail to cache data
    the first time read it. This patch fixes the test by repeating the
    warm-up query 5 times. This patch also add a proper start_args to the
    test so that each impalad will write their data cache file in their own
    directory.
    
    Testing:
    - Loop the test manually 100 times and see no more failures.
    
    Change-Id: I774f9dfea7dcc107c3c7f2b76db3aaf4b2dd7952
    Reviewed-on: http://gerrit.cloudera.org:8080/17054
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/custom_cluster/test_hdfs_fd_caching.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/tests/custom_cluster/test_hdfs_fd_caching.py b/tests/custom_cluster/test_hdfs_fd_caching.py
index 42c6ca6..07fdd55 100644
--- a/tests/custom_cluster/test_hdfs_fd_caching.py
+++ b/tests/custom_cluster/test_hdfs_fd_caching.py
@@ -164,7 +164,8 @@ class TestHdfsFdCaching(CustomClusterTestSuite):
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
       impalad_args="--max_cached_file_handles=16 --unused_file_handle_timeout_sec=5 " +
-                   "--data_cache=/tmp:500MB --always_use_data_cache=true",
+                   "--always_use_data_cache=true",
+      start_args="--data_cache_dir=/tmp --data_cache_size=500MB",
       catalogd_args="--load_catalog_in_background=false")
   def test_no_fd_caching_on_cached_data(self, vector):
     """IMPALA-10147: Test that no file handle should be opened nor cached again if data
@@ -183,10 +184,12 @@ class TestHdfsFdCaching(CustomClusterTestSuite):
     # The table has one file. If caching is expected, there should be one more
     # handle cached after the first select. If caching is not expected, the
     # number of handles should not change from the initial number.
-    self.execute_query("select * from cachefd.simple", vector=vector)
-    num_handles_after = self.cached_handles()
-    assert self.max_cached_handles() <= cache_capacity
-    assert num_handles_after == (num_handles_start + 1)
+    # Read 5 times to make sure the data cache is fully warmed up.
+    for x in range(5):
+      self.execute_query("select * from cachefd.simple", vector=vector)
+      num_handles_after = self.cached_handles()
+      assert self.max_cached_handles() <= cache_capacity
+      assert num_handles_after == (num_handles_start + 1)
 
     # No open handles if scanning is finished.
     assert self.outstanding_handles() == 0