You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2019/02/07 04:47:25 UTC

[impala] branch master updated (cbddda4 -> 255ec46)

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from cbddda4  IMPALA-8162: Add memory reserved and admitted to the backends debug page
     new 2cf66cf  IMPALA-8169: small changes to Leopard
     new 255ec46  IMPALA-7265: Enable caching of remote file handles by default

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/runtime/io/disk-io-mgr.cc              |  7 +++----
 tests/comparison/leopard/controller.py        | 12 ++++++------
 tests/comparison/leopard/impala_docker_env.py |  7 ++++---
 3 files changed, 13 insertions(+), 13 deletions(-)


[impala] 02/02: IMPALA-7265: Enable caching of remote file handles by default

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 255ec4687ebe6195b20e5566394f3692c07e3b7f
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Wed Feb 6 12:41:23 2019 -0800

    IMPALA-7265: Enable caching of remote file handles by default
    
    This changes the default value of cache_remote_file_handles
    from false to true. Testing shows that this setting has a
    major impact on performance for clusters that do remote HDFS
    reads. Hand testing of the cache did not reveal any problems
    with the semantics of caching remote file handles.
    
    Change-Id: I2fc4a69c6bf721017f4adcdc302db9eace5135a4
    Reviewed-on: http://gerrit.cloudera.org:8080/12387
    Reviewed-by: Philip Zeyliger <ph...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/runtime/io/disk-io-mgr.cc | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/be/src/runtime/io/disk-io-mgr.cc b/be/src/runtime/io/disk-io-mgr.cc
index cad2e65..ce56be0 100644
--- a/be/src/runtime/io/disk-io-mgr.cc
+++ b/be/src/runtime/io/disk-io-mgr.cc
@@ -127,10 +127,9 @@ DEFINE_uint64(unused_file_handle_timeout_sec, 21600, "Maximum time, in seconds,
 DEFINE_uint64(num_file_handle_cache_partitions, 16, "Number of partitions used by the "
     "file handle cache.");
 
-// Given the extra complexity of remote accesses and semantics, caching for remote HDFS
-// file handles is currently not enabled by default. This parameter enables caching
-// for remote HDFS file handles. It does not impact S3, ADLS, or ABFS file handles.
-DEFINE_bool(cache_remote_file_handles, false, "Enable the file handle cache for "
+// This parameter controls whether remote HDFS file handles are cached. It does not impact
+// S3, ADLS, or ABFS file handles. This is enabled by default.
+DEFINE_bool(cache_remote_file_handles, true, "Enable the file handle cache for "
     "remote HDFS files.");
 
 AtomicInt32 DiskIoMgr::next_disk_id_;


[impala] 01/02: IMPALA-8169: small changes to Leopard

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 2cf66cfc49bb53f044289258e741ce4dd5ded6a9
Author: Michael Brown <mi...@cloudera.com>
AuthorDate: Wed Feb 6 11:44:18 2019 -0800

    IMPALA-8169: small changes to Leopard
    
    - Fix a bug in which rsync --chown doesn't work on CentOS 7.
    
    - Update HOST_TESTDATA_EXTERNAL_VOLUME_PATH (for the minicluster data):
      most runs now are on EC2 etc., and they already need a large volume
      for docker images, so just keep the cluster data there, too.
    
    - Reduce extremely verbose logging.
    
    - Default to a database that's part of dataload (tpch_kudu).
    
    - Change some of the controller variables to my preferred defaults.
    
    Change-Id: I169f60dad53d2e4980ed6bd1f350fb0dcf274306
    Testing: Regular downstream runs for months.
    Reviewed-on: http://gerrit.cloudera.org:8080/12386
    Reviewed-by: David Knupp <dk...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/comparison/leopard/controller.py        | 12 ++++++------
 tests/comparison/leopard/impala_docker_env.py |  7 ++++---
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/tests/comparison/leopard/controller.py b/tests/comparison/leopard/controller.py
index 5ac041d..24e18ce 100755
--- a/tests/comparison/leopard/controller.py
+++ b/tests/comparison/leopard/controller.py
@@ -31,8 +31,8 @@ PATH_TO_REPORTS = '/tmp/query_gen/reports'
 PATH_TO_FINISHED_JOBS = '/tmp/query_gen/completed_jobs'
 PATH_TO_LOG = '/tmp/query_gen/log'
 RUN_TIME_LIMIT = 12 * 3600
-GENERATION_FREQUENCY = RUN_TIME_LIMIT
-MAX_CONCURRENCY = 2
+GENERATION_FREQUENCY = 300 + RUN_TIME_LIMIT
+MAX_CONCURRENCY = 1
 DEFAULT_RUN_NAME = 'AUTO_RUN'
 SLEEP_LENGTH = 3
 
@@ -40,9 +40,9 @@ NESTED_TYPES_MODE = False
 DELETE_SCHEDULE_ITEMS_ON_STARTUP = True
 SHOULD_BUILD_IMPALA = True
 SHOULD_LOAD_DATA = False
-SHOULD_PULL_DOCKER_IMAGE = True
-DATABASE_NAME = 'randomness'
-POSTGRES_DATABASE_NAME = 'randomness'
+SHOULD_PULL_DOCKER_IMAGE = False
+DATABASE_NAME = 'tpch_kudu'
+POSTGRES_DATABASE_NAME = 'tpch_kudu'
 
 LOG = logging.getLogger('Controller')
 
@@ -161,7 +161,7 @@ class Controller(object):
 
 if __name__ == '__main__':
   controller = Controller()
-  logging.basicConfig(level=logging.DEBUG,
+  logging.basicConfig(level=logging.INFO,
       filename=PATH_TO_LOG,
       format='%(asctime)s %(threadName)s:%(module)s[%(lineno)s]:%(message)s',
       datefmt='%H:%M:%S')
diff --git a/tests/comparison/leopard/impala_docker_env.py b/tests/comparison/leopard/impala_docker_env.py
index a837c00..cc5fdc8 100755
--- a/tests/comparison/leopard/impala_docker_env.py
+++ b/tests/comparison/leopard/impala_docker_env.py
@@ -50,7 +50,7 @@ DOCKER_IMPALA_USER_GID = int(os.environ.get(
 
 HOST_TESTDATA_EXTERNAL_VOLUME_PATH = normpath(os.environ.get(
     'HOST_TESTDATA_EXTERNAL_VOLUME_PATH',
-    os.path.sep + join_path('data', '1', 'dockervols', 'cluster')))
+    os.path.sep + join_path('var', 'lib', 'docker', 'scratch', 'cluster')))
 
 DEFAULT_DOCKER_TESTDATA_VOLUME_PATH = os.path.sep + join_path(
     'home', DOCKER_USER_NAME, 'Impala', 'testdata', 'cluster')
@@ -312,8 +312,9 @@ class ImpalaDockerEnv(object):
             'mkdir -p {host_testdata_path} && '
             'rsync -e "ssh -i {priv_key} -o StrictHostKeyChecking=no '
             ''         '-o UserKnownHostsFile=/dev/null -p {ssh_port}" '
-            '--delete --archive --verbose --progress --chown={uid}:{gid} '
-            '{user}@127.0.0.1:{container_testdata_path} {host_testdata_path}'.format(
+            '--delete --archive --verbose --progress '
+            '{user}@127.0.0.1:{container_testdata_path} {host_testdata_path} && '
+            'chown -R {uid}:{gid} {host_testdata_path}'.format(
                 host_testdata_path=HOST_TESTDATA_EXTERNAL_VOLUME_PATH,
                 priv_key=HOST_TO_DOCKER_SSH_KEY,
                 ssh_port=self.ssh_port,