You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2019/05/10 12:04:38 UTC

[impala] 01/02: Drop statestore update frequency during data loading

This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 9075099c27f68e4a9fd35c6db76d36dae3301643
Author: Todd Lipcon <to...@apache.org>
AuthorDate: Tue May 7 00:33:21 2019 -0700

    Drop statestore update frequency during data loading
    
    The statestore update frequency is the limiting factor in most DDL
    statements. This improved the speed of an incremental data load of the
    functional dataset by 5-10x or so on my machine in the case where data
    had previously been loaded.
    
    Change-Id: I8931a88aa04e0b4e8ef26a92bfe50a539a3c2505
    Reviewed-on: http://gerrit.cloudera.org:8080/13260
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Tim Armstrong <ta...@cloudera.com>
---
 testdata/bin/create-load-data.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh
index 9796d66..c2122d0 100755
--- a/testdata/bin/create-load-data.sh
+++ b/testdata/bin/create-load-data.sh
@@ -142,7 +142,8 @@ echo "REMOTE_LOAD=${REMOTE_LOAD:-}"
 
 function start-impala {
   : ${START_CLUSTER_ARGS=""}
-  START_CLUSTER_ARGS_INT=""
+  # Use a fast statestore update so that DDL operations run faster.
+  START_CLUSTER_ARGS_INT="--state_store_args=--statestore_update_frequency_ms=50"
   if [[ "${TARGET_FILESYSTEM}" == "local" ]]; then
     START_CLUSTER_ARGS_INT+=("--impalad_args=--abort_on_config_error=false -s 1")
   else