You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by to...@apache.org on 2018/07/12 02:47:19 UTC

[3/3] impala git commit: IMPALA-7279: Fix flakiness in test_rows_availability

IMPALA-7279: Fix flakiness in test_rows_availability

This patch fixes a flaky time string parsing method in
test_rows_availability that fails on strings with microsecond precision.

Change-Id: If7634869823d8cc4059048dd5d3c3a984744f3be
Reviewed-on: http://gerrit.cloudera.org:8080/10922
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/e8a669bf
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/e8a669bf
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/e8a669bf

Branch: refs/heads/master
Commit: e8a669bf918e8d1c7ec158acdbacdaf6d1755549
Parents: 9df9efc
Author: Bikramjeet Vig <bi...@cloudera.com>
Authored: Wed Jul 11 10:57:23 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Jul 12 02:42:00 2018 +0000

----------------------------------------------------------------------
 tests/query_test/test_hash_join_timer.py   | 28 +++++-------------------
 tests/query_test/test_rows_availability.py | 29 +++----------------------
 tests/util/parse_util.py                   | 13 +++++++++++
 3 files changed, 22 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/e8a669bf/tests/query_test/test_hash_join_timer.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_hash_join_timer.py b/tests/query_test/test_hash_join_timer.py
index c111c03..c1b0f0f 100644
--- a/tests/query_test/test_hash_join_timer.py
+++ b/tests/query_test/test_hash_join_timer.py
@@ -21,6 +21,7 @@ import re
 from tests.common.impala_cluster import ImpalaCluster
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.test_vector import ImpalaTestDimension
+from tests.util.parse_util import parse_duration_string_ms
 from tests.verifiers.metric_verifier import MetricVerifier
 
 
@@ -154,27 +155,10 @@ class TestHashJoinTimer(ImpalaTestSuite):
         "Unable to verify ExecSummary: {0}".format(profile)
 
   def __verify_join_time(self, duration, comment):
-    duration_ms = self.__parse_duration_ms(duration)
+    duration_ms = parse_duration_string_ms(duration)
     if (duration_ms > self.HASH_JOIN_UPPER_BOUND_MS):
-        assert False, "Hash join timing too high for %s: %s %s" %(comment, duration, duration_ms)
+      assert False, "Hash join timing too high for %s: %s %s" % (
+        comment, duration, duration_ms)
     if (duration_ms < self.HASH_JOIN_LOWER_BOUND_MS):
-        assert False, "Hash join timing too low for %s: %s %s" %(comment, duration, duration_ms)
-
-  def __parse_duration_ms(self, duration):
-    """Parses a duration string of the form 1h2h3m4s5.6ms into milliseconds."""
-    matches = re.findall(r'(?P<value>[0-9]+(\.[0-9]+)?)(?P<units>\D+)', duration)
-    assert matches, 'Failed to parse duration string %s' % duration
-    hours = 0
-    minutes = 0
-    seconds = 0
-    milliseconds = 0
-    for match in matches:
-      if (match[2] == 'h'):
-        hours = float(match[0])
-      elif (match[2] == 'm'):
-        minutes = float(match[0])
-      elif (match[2] == 's'):
-        seconds = float(match[0])
-      elif (match[2] == 'ms'):
-        milliseconds = float(match[0])
-    return hours * 60 * 60 * 1000 + minutes * 60 * 1000 + seconds * 1000 + milliseconds
+      assert False, "Hash join timing too low for %s: %s %s" % (
+        comment, duration, duration_ms)

http://git-wip-us.apache.org/repos/asf/impala/blob/e8a669bf/tests/query_test/test_rows_availability.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_rows_availability.py b/tests/query_test/test_rows_availability.py
index 06b4d65..0bd65df 100644
--- a/tests/query_test/test_rows_availability.py
+++ b/tests/query_test/test_rows_availability.py
@@ -19,6 +19,7 @@ import pytest
 import re
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.test_vector import ImpalaTestDimension
+from tests.util.parse_util import parse_duration_string_ms
 
 class TestRowsAvailability(ImpalaTestSuite):
   """Tests that the 'Rows available' timeline event is marked only after rows are
@@ -76,9 +77,9 @@ class TestRowsAvailability(ImpalaTestSuite):
     rows_avail_time_ms = None
     for line in profile.split("\n"):
       if "Ready to start on" in line:
-        start_time_ms = self.__parse_time_ms(self.__find_time(line))
+        start_time_ms = parse_duration_string_ms(self.__find_time(line))
       elif "Rows available:" in line:
-        rows_avail_time_ms = self.__parse_time_ms(self.__find_time(line))
+        rows_avail_time_ms = parse_duration_string_ms(self.__find_time(line))
 
     if start_time_ms is None:
       assert False, "Failed to find the 'Ready to start' timeline event in the " \
@@ -102,27 +103,3 @@ class TestRowsAvailability(ImpalaTestSuite):
     if match is None:
       assert False, "Failed to find time in runtime profile"
     return match.group(1)
-
-  @staticmethod
-  def __parse_time_ms(duration):
-    """Parses a duration string of the form 1h2h3m4s5.6ms7.8ns into milliseconds."""
-    matches = re.findall(r'([0-9]+h)?([0-9]+m)?([0-9]+s)?'\
-                         '([0-9]+(\.[0-9]+)?ms)?([0-9]+(\.[0-9]+)?ns)?',
-                         duration)
-    # Expect exactly two matches because all groups are optional in the regex.
-    if matches is None or len(matches) != 2:
-      assert False, 'Failed to parse duration string %s' % duration
-    hours = 0
-    minutes = 0
-    seconds = 0
-    milliseconds = 0
-    if matches[0][0]:
-      hours = int(matches[0][0][:-1])
-    if matches[0][1]:
-      minutes = int(matches[0][1][:-1])
-    if matches[0][2]:
-      seconds = int(matches[0][2][:-1])
-    if matches[0][3]:
-      # Truncate fractional milliseconds.
-      milliseconds = int(float(matches[0][3][:-2]))
-    return hours * 60 * 60 * 1000 + minutes * 60 * 1000 + seconds * 1000 + milliseconds

http://git-wip-us.apache.org/repos/asf/impala/blob/e8a669bf/tests/util/parse_util.py
----------------------------------------------------------------------
diff --git a/tests/util/parse_util.py b/tests/util/parse_util.py
index 6869489..592716b 100644
--- a/tests/util/parse_util.py
+++ b/tests/util/parse_util.py
@@ -70,3 +70,16 @@ def parse_mem_to_mb(mem, units):
   else:
     raise Exception('Unexpected memory unit "%s"' % units)
   return int(mem)
+
+def parse_duration_string_ms(duration):
+  """Parses a duration string of the form 1h2h3m4s5.6ms4.5us7.8ns into milliseconds."""
+  pattern = r'(?P<value>[0-9]+\.?[0-9]*?)(?P<units>\D+)'
+  matches = list(re.finditer(pattern, duration))
+  assert matches, 'Failed to parse duration string %s' % duration
+
+  times = {'h': 0, 'm': 0, 's': 0, 'ms': 0}
+  for match in matches:
+    parsed = match.groupdict()
+    times[parsed['units']] = float(parsed['value'])
+
+  return (times['h'] * 60 * 60 + times['m'] * 60 + times['s']) * 1000 + times['ms']