You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2023/03/24 16:19:44 UTC
[impala] 01/02: IMPALA-11966: Enable cache_ozone_file_handles by default
This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 5e694568d5e837e6f6648bed573bb3b60c7d5a92
Author: Michael Smith <mi...@cloudera.com>
AuthorDate: Thu Mar 2 15:47:48 2023 -0800
IMPALA-11966: Enable cache_ozone_file_handles by default
Updates Ozone dependency to 1.3.0 to address HDDS-7135 and enables
cache_ozone_file_handles by default for a ~10% improvement on TPC-DS
query time.
Updates the Ozone CDP dependency for HDDS-8095. Fix for it will be
available in Ozone 1.4.0, so testing with TDE currently requires the CDP
build.
Testing:
- ran backend, e2e, and custom cluster test suites with Ozone
Change-Id: Icc66551f9b87af785a1c30b516ac39f4640638fe
Reviewed-on: http://gerrit.cloudera.org:8080/19573
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/runtime/io/disk-io-mgr.cc | 2 +-
bin/impala-config.sh | 24 ++++++++++++------------
tests/custom_cluster/test_hdfs_fd_caching.py | 16 +++++++---------
3 files changed, 20 insertions(+), 22 deletions(-)
diff --git a/be/src/runtime/io/disk-io-mgr.cc b/be/src/runtime/io/disk-io-mgr.cc
index 29babb3e0..fae86e948 100644
--- a/be/src/runtime/io/disk-io-mgr.cc
+++ b/be/src/runtime/io/disk-io-mgr.cc
@@ -199,7 +199,7 @@ DEFINE_bool(cache_s3_file_handles, true, "Enable the file handle cache for "
DEFINE_bool(cache_abfs_file_handles, true, "Enable the file handle cache for "
"ABFS files.");
-DEFINE_bool(cache_ozone_file_handles, false, "Enable the file handle cache for Ozone "
+DEFINE_bool(cache_ozone_file_handles, true, "Enable the file handle cache for Ozone "
"files.");
DECLARE_int64(min_buffer_size);
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 2470ebb15..acfd43210 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -213,26 +213,26 @@ fi
: ${IMPALA_TOOLCHAIN_HOST:=native-toolchain.s3.amazonaws.com}
export IMPALA_TOOLCHAIN_HOST
-export CDP_BUILD_NUMBER=38235009
+export CDP_BUILD_NUMBER=39127492
export CDP_MAVEN_REPOSITORY=\
"https://${IMPALA_TOOLCHAIN_HOST}/build/cdp_components/${CDP_BUILD_NUMBER}/maven"
-export CDP_AVRO_JAVA_VERSION=1.8.2.7.2.17.0-127
-export CDP_HADOOP_VERSION=3.1.1.7.2.17.0-127
-export CDP_HBASE_VERSION=2.4.6.7.2.17.0-127
-export CDP_HIVE_VERSION=3.1.3000.7.2.17.0-127
-export CDP_ICEBERG_VERSION=1.1.0.7.2.17.0-127
-export CDP_KNOX_VERSION=1.3.0.7.2.17.0-127
-export CDP_OZONE_VERSION=1.3.0.7.2.17.0-127
-export CDP_PARQUET_VERSION=1.10.99.7.2.17.0-127
-export CDP_RANGER_VERSION=2.3.0.7.2.17.0-127
-export CDP_TEZ_VERSION=0.9.1.7.2.17.0-127
+export CDP_AVRO_JAVA_VERSION=1.8.2.7.2.17.0-160
+export CDP_HADOOP_VERSION=3.1.1.7.2.17.0-160
+export CDP_HBASE_VERSION=2.4.6.7.2.17.0-160
+export CDP_HIVE_VERSION=3.1.3000.7.2.17.0-160
+export CDP_ICEBERG_VERSION=1.1.0.7.2.17.0-160
+export CDP_KNOX_VERSION=1.3.0.7.2.17.0-160
+export CDP_OZONE_VERSION=1.3.0.7.2.17.0-160
+export CDP_PARQUET_VERSION=1.10.99.7.2.17.0-160
+export CDP_RANGER_VERSION=2.3.0.7.2.17.0-160
+export CDP_TEZ_VERSION=0.9.1.7.2.17.0-160
# Ref: https://infra.apache.org/release-download-pages.html#closer
: ${APACHE_MIRROR:="https://www.apache.org/dyn/closer.cgi"}
export APACHE_MIRROR
export APACHE_HIVE_VERSION=3.1.3
export APACHE_HIVE_STORAGE_API_VERSION=2.7.0
-export APACHE_OZONE_VERSION=1.2.1
+export APACHE_OZONE_VERSION=1.3.0
export ARCH_NAME=$(uname -p)
diff --git a/tests/custom_cluster/test_hdfs_fd_caching.py b/tests/custom_cluster/test_hdfs_fd_caching.py
index b5e5db5e8..9cb6936a2 100644
--- a/tests/custom_cluster/test_hdfs_fd_caching.py
+++ b/tests/custom_cluster/test_hdfs_fd_caching.py
@@ -125,8 +125,7 @@ class TestHdfsFdCaching(CustomClusterTestSuite):
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
impalad_args="--max_cached_file_handles=16"
- " --unused_file_handle_timeout_sec=18446744073709551600"
- " --cache_ozone_file_handles=true",
+ " --unused_file_handle_timeout_sec=18446744073709551600",
catalogd_args="--load_catalog_in_background=false")
def test_caching_enabled(self, vector):
"""
@@ -146,8 +145,7 @@ class TestHdfsFdCaching(CustomClusterTestSuite):
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
- impalad_args="--max_cached_file_handles=16 --unused_file_handle_timeout_sec=5"
- " --cache_ozone_file_handles=true",
+ impalad_args="--max_cached_file_handles=16 --unused_file_handle_timeout_sec=5",
catalogd_args="--load_catalog_in_background=false")
def test_caching_with_eviction(self, vector):
"""Test of the HDFS file handle cache with unused file handle eviction enabled"""
@@ -162,7 +160,7 @@ class TestHdfsFdCaching(CustomClusterTestSuite):
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
- impalad_args="--max_cached_file_handles=0 --cache_ozone_file_handles=true",
+ impalad_args="--max_cached_file_handles=0",
catalogd_args="--load_catalog_in_background=false")
def test_caching_disabled_by_param(self, vector):
"""Test that the HDFS file handle cache is disabled when the parameter is zero"""
@@ -173,7 +171,8 @@ class TestHdfsFdCaching(CustomClusterTestSuite):
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
impalad_args="--cache_remote_file_handles=false --cache_s3_file_handles=false "
- "--cache_abfs_file_handles=false --hostname=" + get_external_ip(),
+ "--cache_abfs_file_handles=false --cache_ozone_file_handles=false "
+ "--hostname=" + get_external_ip(),
catalogd_args="--load_catalog_in_background=false")
def test_remote_caching_disabled_by_param(self, vector):
"""Test that the file handle cache is disabled for remote files when disabled"""
@@ -183,8 +182,7 @@ class TestHdfsFdCaching(CustomClusterTestSuite):
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
- impalad_args="--max_cached_file_handles=0 --cache_ozone_file_handles=true "
- "--hostname=" + get_external_ip(),
+ impalad_args="--max_cached_file_handles=0 --hostname=" + get_external_ip(),
catalogd_args="--load_catalog_in_background=false")
def test_remote_caching_disabled_by_global_param(self, vector):
"""Test that the file handle cache is disabled for remote files when all caching is
@@ -196,7 +194,7 @@ class TestHdfsFdCaching(CustomClusterTestSuite):
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
impalad_args="--max_cached_file_handles=16 --unused_file_handle_timeout_sec=5 "
- "--always_use_data_cache=true --cache_ozone_file_handles=true",
+ "--always_use_data_cache=true",
start_args="--data_cache_dir=/tmp --data_cache_size=500MB",
catalogd_args="--load_catalog_in_background=false")
def test_no_fd_caching_on_cached_data(self, vector):