You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2021/03/26 03:15:44 UTC

[impala] 01/03: IMPALA-10598: Deflake test_cache_reload_validation

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 5b27b7ca7232a17d2a099f8567553004248989f2
Author: Vihang Karajgaonkar <vi...@apache.org>
AuthorDate: Fri Mar 19 14:00:19 2021 -0700

    IMPALA-10598: Deflake test_cache_reload_validation
    
    This patch deflakes the test test_cache_reload_validation in
    test_hdfs_caching.py e2e test. The util method which the test relies on to
    get the count of list of cache directives by parsing the output of command
    "hdfs cacheadmin -listDirectives -stats" does not consider that the output
    may contain trailing new lines or headers. Hence the test fails because the
    expected number of cache directives does not match the number of lines
    of the output.
    
    The fix parses the line "Found <int> entries" in the output when available
    and returns the count from that line. If the line is not found, it fallbacks
    to the earlier implementation of using the number of lines.
    
    Testing:
    1. The test was failing for me when run individually. After the patch, I looped
    the test 10 times without any errors.
    
    Change-Id: I2d491e90af461d5db3575a5840958d17ca90901c
    Reviewed-on: http://gerrit.cloudera.org:8080/17210
    Reviewed-by: Vihang Karajgaonkar <vi...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/query_test/test_hdfs_caching.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/tests/query_test/test_hdfs_caching.py b/tests/query_test/test_hdfs_caching.py
index 6a7eda3..0815e58 100644
--- a/tests/query_test/test_hdfs_caching.py
+++ b/tests/query_test/test_hdfs_caching.py
@@ -345,13 +345,27 @@ def get_num_cache_requests():
   def get_num_cache_requests_util():
     rc, stdout, stderr = exec_process("hdfs cacheadmin -listDirectives -stats")
     assert rc == 0, 'Error executing hdfs cacheadmin: %s %s' % (stdout, stderr)
-    return len(stdout.split('\n'))
+    # remove blank new lines from output count
+    lines = [line for line in stdout.split('\n') if line.strip()]
+    count = None
+    for line in lines:
+      if line.startswith("Found "):
+        # the line should say "Found <int> entries"
+        # if we find this line we parse the number of entries
+        # from this line.
+        count = int(re.search(r'\d+', line).group())
+        break
+    # if count is available we return it else we just
+    # return the total number of lines
+    if count is not None:
+      return count
+    else:
+      return len(stdout.split('\n'))
 
   # IMPALA-3040: This can take time, especially under slow builds like ASAN.
   wait_time_in_sec = build_flavor_timeout(5, slow_build_timeout=20)
   num_stabilization_attempts = 0
   max_num_stabilization_attempts = 10
-  new_requests = None
   num_requests = None
   LOG.info("{0} Entered get_num_cache_requests()".format(time.time()))
   while num_stabilization_attempts < max_num_stabilization_attempts: