You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2016/12/15 23:01:11 UTC
[45/50] [abbrv] incubator-impala git commit: IMPALA-4659: fuzz test fixes

IMPALA-4659: fuzz test fixes

* Apply a 512m mem_limit to all fuzz tests. This limits aggregate memory
  consumption to ~5GB per daemon(assuming 10 concurrent tests).
* Refactor the exec option handling to use the exec_option dimension.
  This avoids executing the test multiple times redundantly
* Remove the xfails to reduce noise, since there is no immediate plan to
  fix the product bugs. Instead just pass the tests.

Testing:
Ran in a loop for ~1h to flush out flakiness.

Change-Id: Ie1942ceef252ec3e6171a0a54722b66a7d9abbd7
Reviewed-on: http://gerrit.cloudera.org:8080/5502
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/246acba0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/246acba0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/246acba0

Branch: refs/heads/hadoop-next
Commit: 246acba0b37f61e708c17ea55f2300ef96908fd1
Parents: fc4ee65
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Wed Dec 14 11:04:20 2016 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Dec 15 01:31:28 2016 +0000

----------------------------------------------------------------------
 tests/query_test/test_scanners_fuzz.py | 66 ++++++++++++++++++-----------
 1 file changed, 41 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/246acba0/tests/query_test/test_scanners_fuzz.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_scanners_fuzz.py b/tests/query_test/test_scanners_fuzz.py
index 76cc62a..81d20a4 100644
--- a/tests/query_test/test_scanners_fuzz.py
+++ b/tests/query_test/test_scanners_fuzz.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from copy import copy
+import itertools
 import os
 import pytest
 import random
@@ -22,12 +24,25 @@ import shutil
 import tempfile
 import time
 from subprocess import check_call
+from tests.common.test_dimensions import create_exec_option_dimension_from_dict
 from tests.common.impala_test_suite import ImpalaTestSuite, LOG
 from tests.util.filesystem_utils import WAREHOUSE, get_fs_path
 
 # Random fuzz testing of HDFS scanners. Existing tables for any HDFS file format
 # are corrupted in random ways to flush out bugs with handling of corrupted data.
 class TestScannersFuzzing(ImpalaTestSuite):
+  # Use abort_on_error = False to ensure we scan all the files.
+  ABORT_ON_ERROR_VALUES = [False]
+
+  # Only run on all nodes - num_nodes=1 would not provide additional coverage.
+  NUM_NODES_VALUES = [0]
+
+  # Limit memory to avoid causing other concurrent tests to fail.
+  MEM_LIMITS = ['512m']
+
+  # Test the codegen and non-codegen paths.
+  DISABLE_CODEGEN_VALUES = [True, False]
+
   # Test a range of batch sizes to exercise different corner cases.
   BATCH_SIZES = [0, 1, 16, 10000]
 
@@ -38,6 +53,11 @@ class TestScannersFuzzing(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestScannersFuzzing, cls).add_test_dimensions()
+    cls.TestMatrix.add_dimension(
+        create_exec_option_dimension_from_dict({
+          'abort_on_error' : cls.ABORT_ON_ERROR_VALUES,
+          'num_nodes' : cls.NUM_NODES_VALUES,
+          'mem_limit' : cls.MEM_LIMITS}))
     # TODO: enable for more table formats once they consistently pass the fuzz test.
     cls.TestMatrix.add_constraint(lambda v:
         v.get_value('table_format').file_format in ('avro', 'parquet') or
@@ -120,36 +140,32 @@ class TestScannersFuzzing(ImpalaTestSuite):
     # Execute a query that tries to read all the columns and rows in the file.
     # Also execute a count(*) that materializes no columns, since different code
     # paths are exercised.
-    # Use abort_on_error=0 to ensure we scan all the files.
     queries = [
         'select count(*) from (select distinct * from {0}.{1}) q'.format(
             unique_database, table),
         'select count(*) from {0}.{1} q'.format(unique_database, table)]
 
-    xfail_msgs = []
-    for query in queries:
-      for batch_size in self.BATCH_SIZES:
-        query_options = {'abort_on_error': '0', 'batch_size': batch_size}
-        try:
-          result = self.execute_query(query, query_options = query_options)
-          LOG.info('\n'.join(result.log))
-        except Exception as e:
-          if 'memory limit exceeded' in str(e).lower():
-            # Memory limit error should fail query.
-            continue
-          msg = "Should not throw error when abort_on_error=0: '{0}'".format(e)
-          LOG.error(msg)
-          # Parquet and compressed text can fail the query for some parse errors.
-          # E.g. corrupt Parquet footer (IMPALA-3773) or a corrupt LZO index file
-          # (IMPALA-4013).
-          if table_format.file_format == 'parquet' or \
-              (table_format.file_format == 'text' and
-              table_format.compression_codec != 'none'):
-            xfail_msgs.append(msg)
-          else:
-            raise
-    if len(xfail_msgs) != 0:
-      pytest.xfail('\n'.join(xfail_msgs))
+    for query, batch_size, disable_codegen in \
+        itertools.product(queries, self.BATCH_SIZES, self.DISABLE_CODEGEN_VALUES):
+      query_options = copy(vector.get_value('exec_option'))
+      query_options['batch_size'] = batch_size
+      query_options['disable_codegen'] = disable_codegen
+      try:
+        result = self.execute_query(query, query_options = query_options)
+        LOG.info('\n'.join(result.log))
+      except Exception as e:
+        if 'memory limit exceeded' in str(e).lower():
+          # Memory limit error should fail query.
+          continue
+        msg = "Should not throw error when abort_on_error=0: '{0}'".format(e)
+        LOG.error(msg)
+        # Parquet and compressed text can fail the query for some parse errors.
+        # E.g. corrupt Parquet footer (IMPALA-3773) or a corrupt LZO index file
+        # (IMPALA-4013).
+        if table_format.file_format != 'parquet' \
+            and not (table_format.file_format == 'text' and
+            table_format.compression_codec != 'none'):
+          raise
 
   def walk_and_corrupt_table_data(self, tmp_table_dir, num_copies, rng):
     """ Walks a local copy of a HDFS table directory. Returns a list of partitions, each