You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/04/26 16:39:37 UTC
[5/5] impala git commit: IMPALA-6905: support regexes with more verifiers

IMPALA-6905: support regexes with more verifiers

Support row_regex and other lines for the subset and superset verifiers,
which previously assumed that lines in the actual and expected had to
match exactly.

Use in test_stats_extrapolation to make the test more robust to
irrelevant changes in the explain plan.

Testing:
Manually modified a superset and a subset test to check that tests fail
as expected.

Change-Id: Ia7a28d421c8e7cd84b14d07fcb71b76449156409
Reviewed-on: http://gerrit.cloudera.org:8080/10155
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/4c285b98
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/4c285b98
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/4c285b98

Branch: refs/heads/2.x
Commit: 4c285b98eec32e854a35fc1649bd13085e7eecd7
Parents: 81bed7a
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Mon Apr 23 10:31:39 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Apr 26 04:33:16 2018 +0000

----------------------------------------------------------------------
 .../queries/QueryTest/stats-extrapolation.test  | 26 +++++-----
 tests/common/test_result_verifier.py            | 52 +++++++++++++++-----
 2 files changed, 53 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/4c285b98/testdata/workloads/functional-query/queries/QueryTest/stats-extrapolation.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/stats-extrapolation.test b/testdata/workloads/functional-query/queries/QueryTest/stats-extrapolation.test
index 7da7baf..42f4d50 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/stats-extrapolation.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/stats-extrapolation.test
@@ -14,8 +14,7 @@ explain select id from alltypes;
 '     table: rows=unavailable size=unavailable'
 '     partitions: 0/12 rows=unavailable'
 '     columns: unavailable'
-'   extrapolated-rows=unavailable'
-'   mem-estimate=16.00MB mem-reservation=0B'
+row_regex:.* extrapolated-rows=unavailable.*
 '   tuple-ids=0 row-size=4B cardinality=unavailable'
 ---- TYPES
 STRING
@@ -68,8 +67,8 @@ row_regex:.*partitions=12/12 files=12 size=.*
 row_regex:.*table: rows=3650 size=.*
 '     partitions: 0/12 rows=unavailable'
 '     columns: all'
-'   extrapolated-rows=3650'
-'   mem-estimate=16.00MB mem-reservation=0B'
+row_regex:.* extrapolated-rows=3650.*
+row_regex:.*mem-estimate=.* mem-reservation=.*
 '   tuple-ids=0 row-size=4B cardinality=3650'
 ---- TYPES
 STRING
@@ -93,8 +92,8 @@ row_regex:.*partitions=3/12 files=3 size=.*
 row_regex:.*table: rows=3650 size=.*
 '     partitions: 0/3 rows=unavailable'
 '     columns: all'
-'   extrapolated-rows=904'
-'   mem-estimate=16.00MB mem-reservation=0B'
+row_regex:.* extrapolated-rows=904.*
+row_regex:.*mem-estimate=.* mem-reservation=.*
 '   tuple-ids=0 row-size=4B cardinality=904'
 ---- TYPES
 STRING
@@ -119,8 +118,8 @@ row_regex:.*partitions=12/12 files=24 size=.*
 row_regex:.*table: rows=3650 size=.*
 '     partitions: 0/12 rows=unavailable'
 '     columns: all'
-'   extrapolated-rows=7300'
-'   mem-estimate=16.00MB mem-reservation=0B'
+row_regex:.* extrapolated-rows=7300.*
+row_regex:.*mem-estimate=.* mem-reservation=.*
 '   tuple-ids=0 row-size=4B cardinality=7300'
 ---- TYPES
 STRING
@@ -146,8 +145,8 @@ row_regex:.*partitions=12/24 files=12 size=.*
 row_regex:.*table: rows=3650 size=.*
 '     partitions: 0/12 rows=unavailable'
 '     columns: all'
-'   extrapolated-rows=3651'
-'   mem-estimate=16.00MB mem-reservation=0B'
+row_regex:.* extrapolated-rows=3651.*
+row_regex:.*mem-estimate=.* mem-reservation=.*
 '   tuple-ids=0 row-size=4B cardinality=3651'
 ---- TYPES
 STRING
@@ -172,8 +171,8 @@ row_regex:.*partitions=12/24 files=12 size=.*
 row_regex:.*table: rows=10950 size=.*
 '     partitions: 0/12 rows=unavailable'
 '     columns: all'
-'   extrapolated-rows=3651'
-'   mem-estimate=16.00MB mem-reservation=0B'
+row_regex:.* extrapolated-rows=3651
+row_regex:.*mem-estimate=.* mem-reservation=.*
 '   tuple-ids=0 row-size=4B cardinality=3651'
 ---- TYPES
 STRING
@@ -187,8 +186,7 @@ explain select id from alltypes;
 '     table: rows=unavailable size=unavailable'
 '     partitions: 0/24 rows=unavailable'
 '     columns: unavailable'
-'   extrapolated-rows=unavailable'
-'   mem-estimate=16.00MB mem-reservation=0B'
+row_regex:.* extrapolated-rows=unavailable.*
 '   tuple-ids=0 row-size=4B cardinality=unavailable'
 ---- TYPES
 STRING

http://git-wip-us.apache.org/repos/asf/impala/blob/4c285b98/tests/common/test_result_verifier.py
----------------------------------------------------------------------
diff --git a/tests/common/test_result_verifier.py b/tests/common/test_result_verifier.py
index a22e0d4..2e8bbb1 100644
--- a/tests/common/test_result_verifier.py
+++ b/tests/common/test_result_verifier.py
@@ -63,6 +63,18 @@ class QueryTestResult(object):
   def __str__(self):
     return '\n'.join(['%s' % row for row in self.rows])
 
+  def separate_rows(self):
+    """Returns rows that are literal rows and rows that are not literals (e.g. regex)
+    in two lists."""
+    literal_rows = []
+    non_literal_rows = []
+    for row in self.rows:
+      if row.regex is None:
+        literal_rows.append(row)
+      else:
+        non_literal_rows.append(row)
+    return (literal_rows, non_literal_rows)
+
 
 # Represents a row in a result set
 class ResultRow(object):
@@ -207,24 +219,42 @@ def assert_args_not_none(*args):
   for arg in args:
     assert arg is not None
 
-def convert_results_to_sets(expected_results, actual_results):
-  assert_args_not_none(expected_results, actual_results)
-  expected_set = set(map(str, expected_results.rows))
-  actual_set = set(map(str, actual_results.rows))
-  return expected_set, actual_set
-
 def verify_query_result_is_subset(expected_results, actual_results):
   """Check whether the results in expected_results are a subset of the results in
   actual_results. This uses set semantics, i.e. any duplicates are ignored."""
-  expected_set, actual_set = convert_results_to_sets(expected_results, actual_results)
-  assert expected_set <= actual_set
+  expected_literals, expected_non_literals = expected_results.separate_rows()
+  expected_literal_strings = set([str(row) for row in expected_literals])
+  actual_literal_strings = set([str(row) for row in actual_results.rows])
+  # Expected literal strings must all be present in the actual strings.
+  assert expected_literal_strings <= actual_literal_strings
+  # Expected patterns must be present in the actual strings.
+  for expected_row in expected_non_literals:
+    matched = False
+    for actual_row in actual_results.rows:
+      if actual_row == expected_row:
+        matched = True
+        break
+    assert matched, "Could not find expected row {0} in actual rows:\n{1}".format(
+        str(expected_row), str(actual_results))
 
 def verify_query_result_is_superset(expected_results, actual_results):
   """Check whether the results in expected_results are a superset of the results in
   actual_results. This uses set semantics, i.e. any duplicates are ignored."""
-  expected_set, actual_set = convert_results_to_sets(expected_results, actual_results)
-  assert expected_set >= actual_set
-
+  expected_literals, expected_non_literals = expected_results.separate_rows()
+  expected_literal_strings = set([str(row) for row in expected_literals])
+  # Check that all actual rows are present in either expected_literal_strings or
+  # expected_non_literals.
+  for actual_row in actual_results.rows:
+    if str(actual_row) in expected_literal_strings:
+      # Matched to a literal string
+      continue
+    matched = False
+    for expected_row in expected_non_literals:
+      if actual_row == expected_row:
+        matched = True
+        break
+    assert matched, "Could not find actual row {0} in expected rows:\n{1}".format(
+        str(actual_row), str(expected_results))
 
 def verify_query_result_is_equal(expected_results, actual_results):
   assert_args_not_none(expected_results, actual_results)