You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2019/02/07 04:47:25 UTC
[impala] branch master updated (cbddda4 -> 255ec46)
This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.
from cbddda4 IMPALA-8162: Add memory reserved and admitted to the backends debug page
new 2cf66cf IMPALA-8169: small changes to Leopard
new 255ec46 IMPALA-7265: Enable caching of remote file handles by default
The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
be/src/runtime/io/disk-io-mgr.cc | 7 +++----
tests/comparison/leopard/controller.py | 12 ++++++------
tests/comparison/leopard/impala_docker_env.py | 7 ++++---
3 files changed, 13 insertions(+), 13 deletions(-)
[impala] 02/02: IMPALA-7265: Enable caching of remote file handles
by default
Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 255ec4687ebe6195b20e5566394f3692c07e3b7f
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Wed Feb 6 12:41:23 2019 -0800
IMPALA-7265: Enable caching of remote file handles by default
This changes the default value of cache_remote_file_handles
from false to true. Testing shows that this setting has a
major impact on performance for clusters that do remote HDFS
reads. Hand testing of the cache did not reveal any problems
with the semantics of caching remote file handles.
Change-Id: I2fc4a69c6bf721017f4adcdc302db9eace5135a4
Reviewed-on: http://gerrit.cloudera.org:8080/12387
Reviewed-by: Philip Zeyliger <ph...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/runtime/io/disk-io-mgr.cc | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/be/src/runtime/io/disk-io-mgr.cc b/be/src/runtime/io/disk-io-mgr.cc
index cad2e65..ce56be0 100644
--- a/be/src/runtime/io/disk-io-mgr.cc
+++ b/be/src/runtime/io/disk-io-mgr.cc
@@ -127,10 +127,9 @@ DEFINE_uint64(unused_file_handle_timeout_sec, 21600, "Maximum time, in seconds,
DEFINE_uint64(num_file_handle_cache_partitions, 16, "Number of partitions used by the "
"file handle cache.");
-// Given the extra complexity of remote accesses and semantics, caching for remote HDFS
-// file handles is currently not enabled by default. This parameter enables caching
-// for remote HDFS file handles. It does not impact S3, ADLS, or ABFS file handles.
-DEFINE_bool(cache_remote_file_handles, false, "Enable the file handle cache for "
+// This parameter controls whether remote HDFS file handles are cached. It does not impact
+// S3, ADLS, or ABFS file handles. This is enabled by default.
+DEFINE_bool(cache_remote_file_handles, true, "Enable the file handle cache for "
"remote HDFS files.");
AtomicInt32 DiskIoMgr::next_disk_id_;
[impala] 01/02: IMPALA-8169: small changes to Leopard
Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 2cf66cfc49bb53f044289258e741ce4dd5ded6a9
Author: Michael Brown <mi...@cloudera.com>
AuthorDate: Wed Feb 6 11:44:18 2019 -0800
IMPALA-8169: small changes to Leopard
- Fix a bug in which rsync --chown doesn't work on CentOS 7.
- Update HOST_TESTDATA_EXTERNAL_VOLUME_PATH (for the minicluster data):
most runs now are on EC2 etc., and they already need a large volume
for docker images, so just keep the cluster data there, too.
- Reduce extremely verbose logging.
- Default to a database that's part of dataload (tpch_kudu).
- Change some of the controller variables to my preferred defaults.
Change-Id: I169f60dad53d2e4980ed6bd1f350fb0dcf274306
Testing: Regular downstream runs for months.
Reviewed-on: http://gerrit.cloudera.org:8080/12386
Reviewed-by: David Knupp <dk...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
tests/comparison/leopard/controller.py | 12 ++++++------
tests/comparison/leopard/impala_docker_env.py | 7 ++++---
2 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/tests/comparison/leopard/controller.py b/tests/comparison/leopard/controller.py
index 5ac041d..24e18ce 100755
--- a/tests/comparison/leopard/controller.py
+++ b/tests/comparison/leopard/controller.py
@@ -31,8 +31,8 @@ PATH_TO_REPORTS = '/tmp/query_gen/reports'
PATH_TO_FINISHED_JOBS = '/tmp/query_gen/completed_jobs'
PATH_TO_LOG = '/tmp/query_gen/log'
RUN_TIME_LIMIT = 12 * 3600
-GENERATION_FREQUENCY = RUN_TIME_LIMIT
-MAX_CONCURRENCY = 2
+GENERATION_FREQUENCY = 300 + RUN_TIME_LIMIT
+MAX_CONCURRENCY = 1
DEFAULT_RUN_NAME = 'AUTO_RUN'
SLEEP_LENGTH = 3
@@ -40,9 +40,9 @@ NESTED_TYPES_MODE = False
DELETE_SCHEDULE_ITEMS_ON_STARTUP = True
SHOULD_BUILD_IMPALA = True
SHOULD_LOAD_DATA = False
-SHOULD_PULL_DOCKER_IMAGE = True
-DATABASE_NAME = 'randomness'
-POSTGRES_DATABASE_NAME = 'randomness'
+SHOULD_PULL_DOCKER_IMAGE = False
+DATABASE_NAME = 'tpch_kudu'
+POSTGRES_DATABASE_NAME = 'tpch_kudu'
LOG = logging.getLogger('Controller')
@@ -161,7 +161,7 @@ class Controller(object):
if __name__ == '__main__':
controller = Controller()
- logging.basicConfig(level=logging.DEBUG,
+ logging.basicConfig(level=logging.INFO,
filename=PATH_TO_LOG,
format='%(asctime)s %(threadName)s:%(module)s[%(lineno)s]:%(message)s',
datefmt='%H:%M:%S')
diff --git a/tests/comparison/leopard/impala_docker_env.py b/tests/comparison/leopard/impala_docker_env.py
index a837c00..cc5fdc8 100755
--- a/tests/comparison/leopard/impala_docker_env.py
+++ b/tests/comparison/leopard/impala_docker_env.py
@@ -50,7 +50,7 @@ DOCKER_IMPALA_USER_GID = int(os.environ.get(
HOST_TESTDATA_EXTERNAL_VOLUME_PATH = normpath(os.environ.get(
'HOST_TESTDATA_EXTERNAL_VOLUME_PATH',
- os.path.sep + join_path('data', '1', 'dockervols', 'cluster')))
+ os.path.sep + join_path('var', 'lib', 'docker', 'scratch', 'cluster')))
DEFAULT_DOCKER_TESTDATA_VOLUME_PATH = os.path.sep + join_path(
'home', DOCKER_USER_NAME, 'Impala', 'testdata', 'cluster')
@@ -312,8 +312,9 @@ class ImpalaDockerEnv(object):
'mkdir -p {host_testdata_path} && '
'rsync -e "ssh -i {priv_key} -o StrictHostKeyChecking=no '
'' '-o UserKnownHostsFile=/dev/null -p {ssh_port}" '
- '--delete --archive --verbose --progress --chown={uid}:{gid} '
- '{user}@127.0.0.1:{container_testdata_path} {host_testdata_path}'.format(
+ '--delete --archive --verbose --progress '
+ '{user}@127.0.0.1:{container_testdata_path} {host_testdata_path} && '
+ 'chown -R {uid}:{gid} {host_testdata_path}'.format(
host_testdata_path=HOST_TESTDATA_EXTERNAL_VOLUME_PATH,
priv_key=HOST_TO_DOCKER_SSH_KEY,
ssh_port=self.ssh_port,