You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2023/03/24 16:19:44 UTC

[impala] 01/02: IMPALA-11966: Enable cache_ozone_file_handles by default

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 5e694568d5e837e6f6648bed573bb3b60c7d5a92
Author: Michael Smith <mi...@cloudera.com>
AuthorDate: Thu Mar 2 15:47:48 2023 -0800

    IMPALA-11966: Enable cache_ozone_file_handles by default
    
    Updates Ozone dependency to 1.3.0 to address HDDS-7135 and enables
    cache_ozone_file_handles by default for a ~10% improvement on TPC-DS
    query time.
    
    Updates the Ozone CDP dependency for HDDS-8095. Fix for it will be
    available in Ozone 1.4.0, so testing with TDE currently requires the CDP
    build.
    
    Testing:
    - ran backend, e2e, and custom cluster test suites with Ozone
    
    Change-Id: Icc66551f9b87af785a1c30b516ac39f4640638fe
    Reviewed-on: http://gerrit.cloudera.org:8080/19573
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/runtime/io/disk-io-mgr.cc             |  2 +-
 bin/impala-config.sh                         | 24 ++++++++++++------------
 tests/custom_cluster/test_hdfs_fd_caching.py | 16 +++++++---------
 3 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/be/src/runtime/io/disk-io-mgr.cc b/be/src/runtime/io/disk-io-mgr.cc
index 29babb3e0..fae86e948 100644
--- a/be/src/runtime/io/disk-io-mgr.cc
+++ b/be/src/runtime/io/disk-io-mgr.cc
@@ -199,7 +199,7 @@ DEFINE_bool(cache_s3_file_handles, true, "Enable the file handle cache for "
 DEFINE_bool(cache_abfs_file_handles, true, "Enable the file handle cache for "
     "ABFS files.");
 
-DEFINE_bool(cache_ozone_file_handles, false, "Enable the file handle cache for Ozone "
+DEFINE_bool(cache_ozone_file_handles, true, "Enable the file handle cache for Ozone "
     "files.");
 
 DECLARE_int64(min_buffer_size);
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 2470ebb15..acfd43210 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -213,26 +213,26 @@ fi
 : ${IMPALA_TOOLCHAIN_HOST:=native-toolchain.s3.amazonaws.com}
 export IMPALA_TOOLCHAIN_HOST
 
-export CDP_BUILD_NUMBER=38235009
+export CDP_BUILD_NUMBER=39127492
 export CDP_MAVEN_REPOSITORY=\
 "https://${IMPALA_TOOLCHAIN_HOST}/build/cdp_components/${CDP_BUILD_NUMBER}/maven"
-export CDP_AVRO_JAVA_VERSION=1.8.2.7.2.17.0-127
-export CDP_HADOOP_VERSION=3.1.1.7.2.17.0-127
-export CDP_HBASE_VERSION=2.4.6.7.2.17.0-127
-export CDP_HIVE_VERSION=3.1.3000.7.2.17.0-127
-export CDP_ICEBERG_VERSION=1.1.0.7.2.17.0-127
-export CDP_KNOX_VERSION=1.3.0.7.2.17.0-127
-export CDP_OZONE_VERSION=1.3.0.7.2.17.0-127
-export CDP_PARQUET_VERSION=1.10.99.7.2.17.0-127
-export CDP_RANGER_VERSION=2.3.0.7.2.17.0-127
-export CDP_TEZ_VERSION=0.9.1.7.2.17.0-127
+export CDP_AVRO_JAVA_VERSION=1.8.2.7.2.17.0-160
+export CDP_HADOOP_VERSION=3.1.1.7.2.17.0-160
+export CDP_HBASE_VERSION=2.4.6.7.2.17.0-160
+export CDP_HIVE_VERSION=3.1.3000.7.2.17.0-160
+export CDP_ICEBERG_VERSION=1.1.0.7.2.17.0-160
+export CDP_KNOX_VERSION=1.3.0.7.2.17.0-160
+export CDP_OZONE_VERSION=1.3.0.7.2.17.0-160
+export CDP_PARQUET_VERSION=1.10.99.7.2.17.0-160
+export CDP_RANGER_VERSION=2.3.0.7.2.17.0-160
+export CDP_TEZ_VERSION=0.9.1.7.2.17.0-160
 
 # Ref: https://infra.apache.org/release-download-pages.html#closer
 : ${APACHE_MIRROR:="https://www.apache.org/dyn/closer.cgi"}
 export APACHE_MIRROR
 export APACHE_HIVE_VERSION=3.1.3
 export APACHE_HIVE_STORAGE_API_VERSION=2.7.0
-export APACHE_OZONE_VERSION=1.2.1
+export APACHE_OZONE_VERSION=1.3.0
 
 export ARCH_NAME=$(uname -p)
 
diff --git a/tests/custom_cluster/test_hdfs_fd_caching.py b/tests/custom_cluster/test_hdfs_fd_caching.py
index b5e5db5e8..9cb6936a2 100644
--- a/tests/custom_cluster/test_hdfs_fd_caching.py
+++ b/tests/custom_cluster/test_hdfs_fd_caching.py
@@ -125,8 +125,7 @@ class TestHdfsFdCaching(CustomClusterTestSuite):
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
       impalad_args="--max_cached_file_handles=16"
-                   " --unused_file_handle_timeout_sec=18446744073709551600"
-                   " --cache_ozone_file_handles=true",
+                   " --unused_file_handle_timeout_sec=18446744073709551600",
       catalogd_args="--load_catalog_in_background=false")
   def test_caching_enabled(self, vector):
     """
@@ -146,8 +145,7 @@ class TestHdfsFdCaching(CustomClusterTestSuite):
 
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
-      impalad_args="--max_cached_file_handles=16 --unused_file_handle_timeout_sec=5"
-                   " --cache_ozone_file_handles=true",
+      impalad_args="--max_cached_file_handles=16 --unused_file_handle_timeout_sec=5",
       catalogd_args="--load_catalog_in_background=false")
   def test_caching_with_eviction(self, vector):
     """Test of the HDFS file handle cache with unused file handle eviction enabled"""
@@ -162,7 +160,7 @@ class TestHdfsFdCaching(CustomClusterTestSuite):
 
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
-      impalad_args="--max_cached_file_handles=0 --cache_ozone_file_handles=true",
+      impalad_args="--max_cached_file_handles=0",
       catalogd_args="--load_catalog_in_background=false")
   def test_caching_disabled_by_param(self, vector):
     """Test that the HDFS file handle cache is disabled when the parameter is zero"""
@@ -173,7 +171,8 @@ class TestHdfsFdCaching(CustomClusterTestSuite):
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
       impalad_args="--cache_remote_file_handles=false --cache_s3_file_handles=false "
-                   "--cache_abfs_file_handles=false --hostname=" + get_external_ip(),
+                   "--cache_abfs_file_handles=false --cache_ozone_file_handles=false "
+                   "--hostname=" + get_external_ip(),
       catalogd_args="--load_catalog_in_background=false")
   def test_remote_caching_disabled_by_param(self, vector):
     """Test that the file handle cache is disabled for remote files when disabled"""
@@ -183,8 +182,7 @@ class TestHdfsFdCaching(CustomClusterTestSuite):
 
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
-      impalad_args="--max_cached_file_handles=0 --cache_ozone_file_handles=true "
-                   "--hostname=" + get_external_ip(),
+      impalad_args="--max_cached_file_handles=0 --hostname=" + get_external_ip(),
       catalogd_args="--load_catalog_in_background=false")
   def test_remote_caching_disabled_by_global_param(self, vector):
     """Test that the file handle cache is disabled for remote files when all caching is
@@ -196,7 +194,7 @@ class TestHdfsFdCaching(CustomClusterTestSuite):
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
       impalad_args="--max_cached_file_handles=16 --unused_file_handle_timeout_sec=5 "
-                   "--always_use_data_cache=true --cache_ozone_file_handles=true",
+                   "--always_use_data_cache=true",
       start_args="--data_cache_dir=/tmp --data_cache_size=500MB",
       catalogd_args="--load_catalog_in_background=false")
   def test_no_fd_caching_on_cached_data(self, vector):