You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by db...@apache.org on 2023/08/09 12:32:45 UTC

[impala] branch master updated: IMPALA-12306: (Part 1) Make codegen cache tests with symbol emitter more robust

This is an automated email from the ASF dual-hosted git repository.

dbecker pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 3435baba6 IMPALA-12306: (Part 1) Make codegen cache tests with symbol emitter more robust
3435baba6 is described below

commit 3435baba6782227befcd7b091a186ede0f1a2480
Author: Daniel Becker <da...@cloudera.com>
AuthorDate: Thu Aug 3 17:31:23 2023 +0200

    IMPALA-12306: (Part 1) Make codegen cache tests with symbol emitter more robust
    
    The codegen cache tests that include having a symbol emitter (previously
    TestCodegenCache.{test_codegen_cache_with_asm_module_dir,test_codegen_cache_with_perf_map})
    introduced by IMPALA-12260 were added to ensure we don't produce a
    use-after-free.
    
    There are two problems with these tests:
      1. Setting the codegen cache size correctly in the tests has proved to
         be difficult because new commits and different build types (debug
         vs. release) have a huge effect on what sizes are appropriate. We
         have had many build failures because of this.
    
      2. Use-after-free is undefined behaviour and does not guarantee a
         crash but the tests rely on the crash to catch the bug described in
         IMPALA-12260.
    
    This commit solves the first problem. We use the
    '--codegen_cache_entry_bytes_charge_overhead' startup flag to
    artificially assign a higher size (memory charge) to the cache entries,
    compared to which the real size, and therefore also changes in the real
    size, are insignificant.
    
    Change-Id: If801ae6d3d9f5286ed886b1d06c37a32bc1d2c54
    Reviewed-on: http://gerrit.cloudera.org:8080/20304
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/codegen/llvm-codegen-cache.cc       |  6 +++-
 tests/custom_cluster/test_codegen_cache.py | 45 ++++++++++++++++++++----------
 2 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/be/src/codegen/llvm-codegen-cache.cc b/be/src/codegen/llvm-codegen-cache.cc
index 3199e0ad4..b0d27a522 100644
--- a/be/src/codegen/llvm-codegen-cache.cc
+++ b/be/src/codegen/llvm-codegen-cache.cc
@@ -23,6 +23,10 @@
 using namespace std;
 using strings::Substitute;
 
+DEFINE_int32_hidden(codegen_cache_entry_bytes_charge_overhead, 0,
+    "Value (in bytes) that is added to the memory charge of codegen cache entries. "
+    "Used for testing.");
+
 namespace impala {
 
 // Maximum codegen cache entry size for the purposes of histogram sizing in stats
@@ -148,7 +152,7 @@ Status CodeGenCache::StoreInternal(const CodeGenCacheKey& cache_key,
   }
   // Memory charge includes both key and entry size.
   int64_t mem_charge = codegen->memory_manager_->bytes_allocated() + key.size()
-      + sizeof(CodeGenCacheEntry);
+      + sizeof(CodeGenCacheEntry) + FLAGS_codegen_cache_entry_bytes_charge_overhead;
   Cache::UniquePendingHandle pending_handle(
       cache_->Allocate(key, sizeof(CodeGenCacheEntry), mem_charge));
   if (pending_handle == nullptr) {
diff --git a/tests/custom_cluster/test_codegen_cache.py b/tests/custom_cluster/test_codegen_cache.py
index c0cafa5f8..a10685ec9 100644
--- a/tests/custom_cluster/test_codegen_cache.py
+++ b/tests/custom_cluster/test_codegen_cache.py
@@ -140,34 +140,51 @@ class TestCodegenCache(CustomClusterTestSuite):
     self._test_codegen_cache(vector,
       "select sum(identity(bigint_col)) from functional.alltypes", False)
 
-  CODEGEN_CACHE_CAPACITY_IN_SYMBOL_EMITTER_TESTS = "3.25MB"
+  SYMBOL_EMITTER_TESTS_IMPALAD_ARGS = "--cache_force_single_shard=1 \
+      --codegen_cache_entry_bytes_charge_overhead=10000000 --codegen_cache_capacity=25MB "
 
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(cluster_size=1,
-          impalad_args="--codegen_cache_capacity={} --asm_module_dir=/dev/null".format(
-              CODEGEN_CACHE_CAPACITY_IN_SYMBOL_EMITTER_TESTS))
+          impalad_args=SYMBOL_EMITTER_TESTS_IMPALAD_ARGS + "--asm_module_dir=/dev/null")
   # Regression test for IMPALA-12260.
   def test_codegen_cache_with_asm_module_dir(self, vector):
     self._test_codegen_cache_with_symbol_emitter(vector)
 
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(cluster_size=1,
-          impalad_args="--codegen_cache_capacity={} --perf_map".format(
-              CODEGEN_CACHE_CAPACITY_IN_SYMBOL_EMITTER_TESTS))
+          impalad_args=SYMBOL_EMITTER_TESTS_IMPALAD_ARGS + "--perf_map")
   # Regression test for IMPALA-12260.
   def test_codegen_cache_with_perf_map(self, vector):
     self._test_codegen_cache_with_symbol_emitter(vector)
 
   def _test_codegen_cache_with_symbol_emitter(self, vector):
-    # Regression test for IMPALA-12260. The first query produces two entries in the cache,
-    # and one of them has a 'CodegenSymbolEmitter' as an event listener because of the
-    # '--asm_module_dir' or '--perf_map' flag. The second query inserts new entries in the
-    # cache - the size of the cache is chosen such that both entries from the first query
-    # fit in it but both are evicted during the second query. When an
-    # 'llvm::ExecutionEngine', which is part of the cache entry, is destroyed, it notifies
-    # its listeners, so the listeners should be alive at this time. Prior to IMPALA-12260
-    # the 'CodegenSymbolEmitter' was destroyed at the end of the first query, causing a
-    # crash during the second one.
+    """Regression test for IMPALA-12260. In the test we run two queries. The first query
+    produces two entries in the cache, and they both have a 'CodegenSymbolEmitter' as
+    event listeners because of the '--asm_module_dir' or '--perf_map' startup flag. The
+    second query inserts new entries in the cache - the size of the cache should be such
+    that both entries from the first query fit in it but both are evicted during the
+    second query.
+
+    When an 'llvm::ExecutionEngine', which is part of the cache entry, is destroyed, it
+    frees any remaining object files and notifies the listeners about this, so the
+    listeners should be alive at this time. Prior to IMPALA-12260 the
+    'CodegenSymbolEmitter's of the cached fragment instances were destroyed at the end of
+    the first query, causing a use-after-free (sometimes leading to a crash) during the
+    second one.
+
+    The choice of the size of the cache is based on the following:
+      - the first query imposes a lower bound on the cache size (both cache entries should
+        fit in the cache) AND
+      - the second query imposes an upper bound (the cache entries of the first query
+        should be evicted during the second query).
+    The acceptable values are in the intersection of these two intervals.
+    However, code changes and the difference between debug and release builds
+    can have a huge effect on the acceptable range. To get around this, we use
+    the '--codegen_cache_entry_bytes_charge_overhead' startup flag to
+    artificially assign a higher size to the cache entries, compared to which
+    the real size, and therefore also changes in the real size, are
+    insignificant."""
+
     exec_options = copy(vector.get_value('exec_option'))
     exec_options['exec_single_node_rows_threshold'] = 0