You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by db...@apache.org on 2023/07/28 08:54:35 UTC
[impala] branch master updated: IMPALA-12292: TestCodegenCache.{test_codegen_cache_with_asm_module_dir,test_codegen_cache_with_perf_map} fail in builds

This is an automated email from the ASF dual-hosted git repository.

dbecker pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 66b701f80 IMPALA-12292: TestCodegenCache.{test_codegen_cache_with_asm_module_dir,test_codegen_cache_with_perf_map} fail in builds
66b701f80 is described below

commit 66b701f80612a1396ba10aa46d4912ac92ec21d8
Author: Daniel Becker <da...@cloudera.com>
AuthorDate: Wed Jul 19 16:09:45 2023 +0200

    IMPALA-12292: TestCodegenCache.{test_codegen_cache_with_asm_module_dir,test_codegen_cache_with_perf_map} fail in builds
    
    The above codegen cache tests were introduced by IMPALA-12260. They run
    two queries and the first query produces two codegen cache entries. The
    tests aim to bring about the following scenario:
    
    1. both codegen cache entries from the first query fit in the cache
    AND
    2. both entries from the first query are evicted during the second
       query.
    
    The parameters that can be tuned are the following:
    1. the size of the codegen cache entries of the first query
    2. the size of the codegen cache entries of the second query
    3. the size of the codegen cache.
    
    If the parameters are chosen badly or the sizes of the codegen cache
    entries change because of other Impala changes (e.g. codegen
    optimisations), the conditions may not be satisfied and the tests may
    fail like they did now.
    
    This change makes the tests more robust by
     - increasing the cache footprint of the second query (from 487.40 KB to
       663.68 KB)
     - choosing the size of the codegen cache so as to leave as much margin
       on each side as possible. At present
         - the minimal codegen cache size so that both entries from the
           first query fit the cache is around 2.4 MB
         - the maximal cache size so that both entries from the first query
           are evicted during the second query is around 4.1 MB
       Therefore we choose a cache size of 3.25 MB, which lies in the middle.
    
    Experience has shown that this setup is fragile and breaks easily when
    new commits are added to Impala. Therefore this change relaxes some of
    the assertions in the tests as a temporary measure to prevent build
    failures. For this and other reasons IMPALA-12306 was opened to make
    these tests more robust.
    
    Change-Id: I15320b8c0d06f4d93927b19731c11bd4e15b3690
    Reviewed-on: http://gerrit.cloudera.org:8080/20224
    Reviewed-by: Yida Wu <wy...@gmail.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/custom_cluster/test_codegen_cache.py | 31 +++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/tests/custom_cluster/test_codegen_cache.py b/tests/custom_cluster/test_codegen_cache.py
index 5712dbc27..19cea803a 100644
--- a/tests/custom_cluster/test_codegen_cache.py
+++ b/tests/custom_cluster/test_codegen_cache.py
@@ -140,16 +140,20 @@ class TestCodegenCache(CustomClusterTestSuite):
     self._test_codegen_cache(vector,
       "select sum(identity(bigint_col)) from functional.alltypes", False)
 
+  CODEGEN_CACHE_CAPACITY_IN_SYMBOL_EMITTER_TESTS = "3.25MB"
+
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(cluster_size=1,
-          impalad_args="--codegen_cache_capacity=2.5MB --asm_module_dir=/dev/null")
+          impalad_args="--codegen_cache_capacity={} --asm_module_dir=/dev/null".format(
+              CODEGEN_CACHE_CAPACITY_IN_SYMBOL_EMITTER_TESTS))
   # Regression test for IMPALA-12260.
   def test_codegen_cache_with_asm_module_dir(self, vector):
     self._test_codegen_cache_with_symbol_emitter(vector)
 
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(cluster_size=1,
-          impalad_args="--codegen_cache_capacity=2.5MB --perf_map")
+          impalad_args="--codegen_cache_capacity={} --perf_map".format(
+              CODEGEN_CACHE_CAPACITY_IN_SYMBOL_EMITTER_TESTS))
   # Regression test for IMPALA-12260.
   def test_codegen_cache_with_perf_map(self, vector):
     self._test_codegen_cache_with_symbol_emitter(vector)
@@ -169,23 +173,24 @@ class TestCodegenCache(CustomClusterTestSuite):
 
     q1 = """select int_col, tinyint_col from functional_parquet.alltypessmall
         order by int_col desc limit 20"""
-    q2 = """select bool_col, year, month
-        from functional_parquet.alltypes
-        group by id, bool_col, smallint_col, bigint_col, float_col, double_col,
-            date_string_col, string_col, timestamp_col, year, month
-        order by id, bool_col, smallint_col, bigint_col, float_col, double_col,
-            date_string_col, string_col, timestamp_col, year, month
-        limit 20"""
+    q2 = """select t1.bool_col, t1.year, t1.month
+         from functional_parquet.alltypes t1
+         inner join functional_parquet.alltypessmall t2 on t1.year = t2.year
+         group by t1.id, t1.bool_col, t1.smallint_col, t1.bigint_col, t1.float_col,
+             t1.double_col, t1.date_string_col, t1.string_col, t1.timestamp_col, t1.year,
+             t1.month
+         order by t1.id, t1.bool_col, t1.smallint_col, t1.bigint_col, t1.float_col,
+             t1.double_col, t1.date_string_col, t1.string_col, t1.timestamp_col, t1.year,
+             t1.month"""
 
     self._check_metric_expect_init()
     self.execute_query_expect_success(self.client, q1, exec_options)
-    assert self.get_metric('impala.codegen-cache.entries-in-use') == 2
-    assert self.get_metric('impala.codegen-cache.entries-evicted') == 0
+    cache_entries_in_use = self.get_metric('impala.codegen-cache.entries-in-use')
+    assert cache_entries_in_use > 0
     assert self.get_metric('impala.codegen-cache.hits') == 0
 
     self.execute_query_expect_success(self.client, q2, exec_options)
-    assert self.get_metric('impala.codegen-cache.entries-in-use') == 3
-    assert self.get_metric('impala.codegen-cache.entries-evicted') == 2
+    assert self.get_metric('impala.codegen-cache.entries-evicted') >= cache_entries_in_use
     assert self.get_metric('impala.codegen-cache.hits') == 0
 
   def _check_metric_expect_init(self):