You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/04/28 23:41:58 UTC

[07/15] impala git commit: IMPALA-4835: switch I/O buffers to buffer pool

http://git-wip-us.apache.org/repos/asf/impala/blob/fb5dc9eb/tests/common/test_dimensions.py
----------------------------------------------------------------------
diff --git a/tests/common/test_dimensions.py b/tests/common/test_dimensions.py
index 434b884..a9ba7a8 100644
--- a/tests/common/test_dimensions.py
+++ b/tests/common/test_dimensions.py
@@ -132,13 +132,13 @@ SINGLE_NODE_ONLY = [1]
 ALL_NODES_ONLY = [0]
 ALL_DISABLE_CODEGEN_OPTIONS = [True, False]
 
-def create_single_exec_option_dimension():
+def create_single_exec_option_dimension(num_nodes=0, disable_codegen_rows_threshold=5000):
   """Creates an exec_option dimension that will produce a single test vector"""
-  return create_exec_option_dimension(cluster_sizes=ALL_NODES_ONLY,
-                                      disable_codegen_options=[False],
-                                      # Make sure codegen kicks in for functional.alltypes.
-                                      disable_codegen_rows_threshold_options=[5000],
-                                      batch_sizes=[0])
+  return create_exec_option_dimension(cluster_sizes=[num_nodes],
+      disable_codegen_options=[False],
+      # Make sure codegen kicks in for functional.alltypes.
+      disable_codegen_rows_threshold_options=[disable_codegen_rows_threshold],
+      batch_sizes=[0])
 
 def create_exec_option_dimension(cluster_sizes=ALL_CLUSTER_SIZES,
                                  disable_codegen_options=ALL_DISABLE_CODEGEN_OPTIONS,
@@ -146,13 +146,15 @@ def create_exec_option_dimension(cluster_sizes=ALL_CLUSTER_SIZES,
                                  sync_ddl=None, exec_single_node_option=[0],
                                  # We already run with codegen on and off explicitly -
                                  # don't need automatic toggling.
-                                 disable_codegen_rows_threshold_options=[0]):
+                                 disable_codegen_rows_threshold_options=[0],
+                                 debug_action_options=[None]):
   exec_option_dimensions = {
       'abort_on_error': [1],
       'exec_single_node_rows_threshold': exec_single_node_option,
       'batch_size': batch_sizes,
       'disable_codegen': disable_codegen_options,
       'disable_codegen_rows_threshold': disable_codegen_rows_threshold_options,
+      'debug_action': debug_action_options,
       'num_nodes': cluster_sizes}
 
   if sync_ddl is not None:

http://git-wip-us.apache.org/repos/asf/impala/blob/fb5dc9eb/tests/custom_cluster/test_scratch_disk.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_scratch_disk.py b/tests/custom_cluster/test_scratch_disk.py
index bd3c7e4..65bde66 100644
--- a/tests/custom_cluster/test_scratch_disk.py
+++ b/tests/custom_cluster/test_scratch_disk.py
@@ -39,7 +39,7 @@ class TestScratchDir(CustomClusterTestSuite):
       """
   # Buffer pool limit that is low enough to force Impala to spill to disk when executing
   # spill_query.
-  buffer_pool_limit = "32m"
+  buffer_pool_limit = "45m"
 
   def count_nonempty_dirs(self, dirs):
     count = 0

http://git-wip-us.apache.org/repos/asf/impala/blob/fb5dc9eb/tests/query_test/test_mem_usage_scaling.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_mem_usage_scaling.py b/tests/query_test/test_mem_usage_scaling.py
index 419298b..f8cf301 100644
--- a/tests/query_test/test_mem_usage_scaling.py
+++ b/tests/query_test/test_mem_usage_scaling.py
@@ -96,7 +96,7 @@ class TestExprMemUsage(ImpalaTestSuite):
 class TestLowMemoryLimits(ImpalaTestSuite):
   '''Super class for the memory limit tests with the TPC-H and TPC-DS queries'''
 
-  def low_memory_limit_test(self, vector, tpch_query, limit, xfail_mem_limit=None):
+  def low_memory_limit_test(self, vector, tpch_query, limit):
     mem = vector.get_value('mem_limit')
     # Mem consumption can be +-30MBs, depending on how many scanner threads are
     # running. Adding this extra mem in order to reduce false negatives in the tests.
@@ -113,13 +113,11 @@ class TestLowMemoryLimits(ImpalaTestSuite):
     try:
       self.run_test_case(tpch_query, new_vector)
     except ImpalaBeeswaxException as e:
-      if not expects_error and not xfail_mem_limit: raise
+      if not expects_error: raise
       found_expected_error = False
       for error_msg in MEM_LIMIT_ERROR_MSGS:
         if error_msg in str(e): found_expected_error = True
       assert found_expected_error, str(e)
-      if not expects_error and xfail_mem_limit:
-        pytest.xfail(xfail_mem_limit)
 
 
 @SkipIfLocal.mem_usage_different
@@ -134,7 +132,7 @@ class TestTpchMemLimitError(TestLowMemoryLimits):
                        'Q6' : 25, 'Q7' : 200, 'Q8' : 125, 'Q9' : 200, 'Q10' : 162,\
                        'Q11' : 112, 'Q12' : 150, 'Q13' : 125, 'Q14' : 125, 'Q15' : 125,\
                        'Q16' : 137, 'Q17' : 137, 'Q18' : 196, 'Q19' : 112, 'Q20' : 162,\
-                       'Q21' : 187, 'Q22' : 125}
+                       'Q21' : 300, 'Q22' : 125}
 
   @classmethod
   def get_workload(self):
@@ -177,8 +175,7 @@ class TestTpchMemLimitError(TestLowMemoryLimits):
     self.low_memory_limit_test(vector, 'tpch-q8', self.MIN_MEM_FOR_TPCH['Q8'])
 
   def test_low_mem_limit_q9(self, vector):
-    self.low_memory_limit_test(vector, 'tpch-q9', self.MIN_MEM_FOR_TPCH['Q9'],
-            xfail_mem_limit="IMPALA-3328: TPC-H Q9 memory limit test is flaky")
+    self.low_memory_limit_test(vector, 'tpch-q9', self.MIN_MEM_FOR_TPCH['Q9'])
 
   def test_low_mem_limit_q10(self, vector):
     self.low_memory_limit_test(vector, 'tpch-q10', self.MIN_MEM_FOR_TPCH['Q10'])

http://git-wip-us.apache.org/repos/asf/impala/blob/fb5dc9eb/tests/query_test/test_query_mem_limit.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_query_mem_limit.py b/tests/query_test/test_query_mem_limit.py
index 2fdd6eb..17ea9f5 100644
--- a/tests/query_test/test_query_mem_limit.py
+++ b/tests/query_test/test_query_mem_limit.py
@@ -118,7 +118,9 @@ class TestCodegenMemLimit(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestCodegenMemLimit, cls).add_test_dimensions()
-    cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension())
+    # Run with num_nodes=1 to avoid races between fragments allocating memory.
+    cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension(
+        num_nodes=1, disable_codegen_rows_threshold=0))
     # Only run the query for parquet
     cls.ImpalaTestMatrix.add_constraint(
       lambda v: v.get_value('table_format').file_format == 'parquet')

http://git-wip-us.apache.org/repos/asf/impala/blob/fb5dc9eb/tests/query_test/test_scanners.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py
index 89da580..dbbca22 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -37,7 +37,9 @@ from tests.common.skip import (
     SkipIfADLS,
     SkipIfIsilon,
     SkipIfLocal)
-from tests.common.test_dimensions import create_single_exec_option_dimension
+from tests.common.test_dimensions import (
+    create_single_exec_option_dimension,
+    create_exec_option_dimension)
 from tests.common.test_result_verifier import (
     parse_column_types,
     parse_column_labels,
@@ -49,6 +51,11 @@ from tests.util.hdfs_util import NAMENODE
 from tests.util.get_parquet_metadata import get_parquet_metadata
 from tests.util.test_file_parser import QueryTestSectionReader
 
+# Test scanners with denial of reservations at varying frequency. This will affect the
+# number of scanner threads that can be spun up.
+DEBUG_ACTION_DIMS = [None,
+  '-1:OPEN:SET_DENY_RESERVATION_PROBABILITY@0.5',
+  '-1:OPEN:SET_DENY_RESERVATION_PROBABILITY@1.0']
 
 class TestScannersAllTableFormats(ImpalaTestSuite):
   BATCH_SIZES = [0, 1, 16]
@@ -66,18 +73,20 @@ class TestScannersAllTableFormats(ImpalaTestSuite):
       cls.ImpalaTestMatrix.add_dimension(cls.create_table_info_dimension('pairwise'))
     cls.ImpalaTestMatrix.add_dimension(
         ImpalaTestDimension('batch_size', *TestScannersAllTableFormats.BATCH_SIZES))
+    cls.ImpalaTestMatrix.add_dimension(
+        ImpalaTestDimension('debug_action', *DEBUG_ACTION_DIMS))
 
   def test_scanners(self, vector):
     new_vector = deepcopy(vector)
     new_vector.get_value('exec_option')['batch_size'] = vector.get_value('batch_size')
+    new_vector.get_value('exec_option')['debug_action'] = vector.get_value('debug_action')
     self.run_test_case('QueryTest/scanners', new_vector)
 
   def test_hdfs_scanner_profile(self, vector):
-    if vector.get_value('table_format').file_format in ('kudu', 'hbase'):
+    if vector.get_value('table_format').file_format in ('kudu', 'hbase') or \
+       vector.get_value('exec_option')['num_nodes'] != 0:
       pytest.skip()
-    new_vector = deepcopy(vector)
-    new_vector.get_value('exec_option')['num_nodes'] = 0
-    self.run_test_case('QueryTest/hdfs_scanner_profile', new_vector)
+    self.run_test_case('QueryTest/hdfs_scanner_profile', vector)
 
 # Test all the scanners with a simple limit clause. The limit clause triggers
 # cancellation in the scanner code paths.
@@ -171,6 +180,8 @@ class TestWideRow(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestWideRow, cls).add_test_dimensions()
+    cls.ImpalaTestMatrix.add_dimension(
+        create_exec_option_dimension(debug_action_options=DEBUG_ACTION_DIMS))
     # I can't figure out how to load a huge row into hbase
     cls.ImpalaTestMatrix.add_constraint(
       lambda v: v.get_value('table_format').file_format != 'hbase')
@@ -202,6 +213,8 @@ class TestWideTable(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestWideTable, cls).add_test_dimensions()
+    cls.ImpalaTestMatrix.add_dimension(
+        create_exec_option_dimension(debug_action_options=DEBUG_ACTION_DIMS))
     cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension("num_cols", *cls.NUM_COLS))
     # To cut down on test execution time, only run in exhaustive.
     if cls.exploration_strategy() != 'exhaustive':
@@ -244,6 +257,8 @@ class TestParquet(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestParquet, cls).add_test_dimensions()
+    cls.ImpalaTestMatrix.add_dimension(
+        create_exec_option_dimension(debug_action_options=DEBUG_ACTION_DIMS))
     cls.ImpalaTestMatrix.add_constraint(
       lambda v: v.get_value('table_format').file_format == 'parquet')
 

http://git-wip-us.apache.org/repos/asf/impala/blob/fb5dc9eb/tests/query_test/test_scanners_fuzz.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_scanners_fuzz.py b/tests/query_test/test_scanners_fuzz.py
index 791c343..2cdb4f6 100644
--- a/tests/query_test/test_scanners_fuzz.py
+++ b/tests/query_test/test_scanners_fuzz.py
@@ -48,6 +48,12 @@ class TestScannersFuzzing(ImpalaTestSuite):
   # Test a range of batch sizes to exercise different corner cases.
   BATCH_SIZES = [0, 1, 16, 10000]
 
+  # Test with denial of reservations at varying frequency. This will affect the number
+  # of scanner threads that can be spun up.
+  DEBUG_ACTION_VALUES = [None,
+    '-1:OPEN:SET_DENY_RESERVATION_PROBABILITY@0.5',
+    '-1:OPEN:SET_DENY_RESERVATION_PROBABILITY@1.0']
+
   @classmethod
   def get_workload(cls):
     return 'functional-query'
@@ -59,7 +65,8 @@ class TestScannersFuzzing(ImpalaTestSuite):
         create_exec_option_dimension_from_dict({
           'abort_on_error' : cls.ABORT_ON_ERROR_VALUES,
           'num_nodes' : cls.NUM_NODES_VALUES,
-          'mem_limit' : cls.MEM_LIMITS}))
+          'mem_limit' : cls.MEM_LIMITS,
+          'debug_action' : cls.DEBUG_ACTION_VALUES}))
     # TODO: enable for more table formats once they consistently pass the fuzz test.
     # TODO(IMPALA-6772): enable for ORC formats once a new version after release-1.4.3
     # of ORC library is released.

http://git-wip-us.apache.org/repos/asf/impala/blob/fb5dc9eb/tests/query_test/test_sort.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_sort.py b/tests/query_test/test_sort.py
index e44d086..9024cfb 100644
--- a/tests/query_test/test_sort.py
+++ b/tests/query_test/test_sort.py
@@ -68,14 +68,23 @@ class TestQueryFullSort(ImpalaTestSuite):
       order by o_orderdate"""
     exec_option = copy(vector.get_value('exec_option'))
     table_format = vector.get_value('table_format')
+    exec_option['default_spillable_buffer_size'] = '8M'
+
+    # Minimum memory for different parts of the plan.
+    sort_reservation_mb = 48
+    if table_format.file_format == 'parquet':
+      scan_reservation_mb = 24
+    else:
+      scan_reservation_mb = 8
+    total_reservation_mb = sort_reservation_mb + scan_reservation_mb
 
     # The below memory value assume 8M pages.
-    exec_option['default_spillable_buffer_size'] = '8M'
-    buffer_pool_limit_values = ['-1', '48M'] # Unlimited and minimum memory.
+    # Test with unlimited and minimum memory for all file formats.
+    buffer_pool_limit_values = ['-1', '{0}M'.format(total_reservation_mb)]
     if self.exploration_strategy() == 'exhaustive' and \
         table_format.file_format == 'parquet':
       # Test some intermediate values for parquet on exhaustive.
-      buffer_pool_limit_values += ['64M', '128M', '256M']
+      buffer_pool_limit_values += ['128M', '256M']
     for buffer_pool_limit in buffer_pool_limit_values:
       exec_option['buffer_pool_limit'] = buffer_pool_limit
       result = transpose_results(self.execute_query(
@@ -84,7 +93,6 @@ class TestQueryFullSort(ImpalaTestSuite):
 
   def test_sort_join(self, vector):
     """With 200m memory limit this should be a 2-phase sort"""
-
     query = """select o1.o_orderdate, o2.o_custkey, o1.o_comment from orders o1 join
     orders o2 on (o1.o_orderkey = o2.o_orderkey) order by o1.o_orderdate limit 100000"""
 

http://git-wip-us.apache.org/repos/asf/impala/blob/fb5dc9eb/tests/query_test/test_spilling.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_spilling.py b/tests/query_test/test_spilling.py
index 8550f96..8629853 100644
--- a/tests/query_test/test_spilling.py
+++ b/tests/query_test/test_spilling.py
@@ -104,3 +104,8 @@ class TestSpillingNoDebugActionDimensions(ImpalaTestSuite):
       setting debug_action to alternative values via query options."""
     self.run_test_case('QueryTest/spilling-query-options', vector)
 
+  def test_spilling_no_debug_action(self, vector):
+    """Spilling tests that will not succeed if run with an arbitrary debug action.
+       These tests either run with no debug action set or set their own debug action."""
+    self.run_test_case('QueryTest/spilling-no-debug-action', vector)
+