You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/10/15 23:34:30 UTC

[impala] branch master updated: IMPALA-9034: fix distcc+ccache

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 3ae5f98  IMPALA-9034: fix distcc+ccache
3ae5f98 is described below

commit 3ae5f98420d685365e6fee6880732ee825492fd2
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Wed Oct 9 17:18:49 2019 -0700

    IMPALA-9034: fix distcc+ccache
    
    According to the ccache manpage, the correct way to compose it with
    distcc is to use CCACHE_PREFIX. I think this explains why local
    ccache wasn't working for me.
    
    I updated the distcc wrapper scripts to do this instead and
    confirmed that it works - after doing a clean and rebuilding
    the same branch, it is much faster and "ccache -s" shows a
    bunch of cache hit.
    
    ccache on the distcc server side still works.
    
    Change-Id: Ie0b080709bd765056b9296d3deb805038fc01e5d
    Reviewed-on: http://gerrit.cloudera.org:8080/14408
    Reviewed-by: Andrew Sherman <as...@cloudera.com>
    Tested-by: Tim Armstrong <ta...@cloudera.com>
---
 be/CMakeLists.txt        |  6 -----
 bin/distcc/distcc.sh     | 10 +++++++-
 bin/distcc/distcc_env.sh | 67 ++++++++++++++++++++++++++----------------------
 3 files changed, 45 insertions(+), 38 deletions(-)

diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index 81151b6..978cae3 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -199,12 +199,6 @@ if (CCACHE AND NOT DEFINED ENV{DISABLE_CCACHE})
   endif()
 endif()
 
-if(DEFINED ENV{IMPALA_DISTCC_ENABLED})
-  if ($ENV{IMPALA_DISTCC_ENABLED} STREQUAL "true")
-    SET(RULE_LAUNCH_PREFIX "${RULE_LAUNCH_PREFIX} $ENV{IMPALA_HOME}/bin/distcc/distcc.sh")
-  endif()
-endif()
-
 set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${RULE_LAUNCH_PREFIX})
 set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${RULE_LAUNCH_PREFIX})
 
diff --git a/bin/distcc/distcc.sh b/bin/distcc/distcc.sh
index bfe7fc1..eb5728c 100755
--- a/bin/distcc/distcc.sh
+++ b/bin/distcc/distcc.sh
@@ -35,6 +35,13 @@ if [[ -z "$DISTCC_HOSTS" || -z "$CXX_COMPILER" ]]; then
   exit 1
 fi
 
+EXPECTED_VERSION=3
+if [[ -z "${IMPALA_DISTCC_ENV_VERSION:-}" || \
+      "$IMPALA_DISTCC_ENV_VERSION" -ne $EXPECTED_VERSION ]]; then
+  echo "Expected IMPALA_DISTCC_ENV_VERSION=$EXPECTED_VERSION, re-source distcc_env.sh"
+  exit 1
+fi
+
 REMOTE_TOOLCHAIN_DIR=/opt/Impala-Toolchain
 
 if $IMPALA_DISTCC_LOCAL; then
@@ -47,4 +54,5 @@ else
   CMD="$CXX_COMPILER"
 fi
 
-exec $CMD "$@"
+# Disable CCACHE_PREFIX to avoid recursive distcc invocations.
+CCACHE_PREFIX="" exec $CMD "$@"
diff --git a/bin/distcc/distcc_env.sh b/bin/distcc/distcc_env.sh
index 92e27a7..ebbac65 100644
--- a/bin/distcc/distcc_env.sh
+++ b/bin/distcc/distcc_env.sh
@@ -48,6 +48,16 @@ if ! cmd_exists distcc; then
   fi
 fi
 
+# Cleans old CMake files, this is required after installing ccache, since we need
+# CMake to insert ccache into all the command lines.
+function clean_cmake_files {
+  if [[ -z "$IMPALA_HOME" || ! -d "$IMPALA_HOME" ]]; then
+    echo IMPALA_HOME=$IMPALA_HOME is not valid. 1>&2
+    return 1
+  fi
+  $IMPALA_HOME/bin/clean-cmake.sh
+}
+
 # Install CCache if necessary.
 if ! cmd_exists ccache; then
   echo "ccache command not found, attempting installation"
@@ -55,6 +65,12 @@ if ! cmd_exists ccache; then
     echo "Unable to automatically install ccache"
     return 1
   fi
+  if ! clean_cmake_files; then
+    echo Failed to clean cmake files. 1>&2
+    return 1
+  fi
+  echo "distcc is not fully enabled, run 'buildall.sh' to complete the change." \
+    "Run 'disable_distcc' or 'switch_compiler local' to disable."
 fi
 
 # Don't include localhost in the list. It is already the slowest part of the build because
@@ -66,66 +82,55 @@ DISTCC_HOSTS+=" --localslots_cpp=$(nproc)"
 DISTCC_HOSTS+=" --randomize"
 DISTCC_HOSTS+=" ${BUILD_FARM}"
 
-# Set to true to enable the distcc.sh wrapper script. Takes effect when CMake is next run.
-export IMPALA_DISTCC_ENABLED=true
-
 # Set to false to make distcc.sh use local compilation instead of distcc. Takes effect
 # immediately if the distcc.sh wrapper script is used.
 : ${IMPALA_DISTCC_LOCAL=}
 export IMPALA_DISTCC_LOCAL
 
+# This is picked up by ccache as the command it should prefix all compiler invocations
+# with. We only support enabling distcc if ccache is available, so pointing this at
+# our distcc wrapper script is sufficient to enable distcc.
+export CCACHE_PREFIX=""
+
 # Even after generating make files, some state about compiler options would only exist in
 # environment vars. Any such vars should be saved to this file so they can be restored.
 if [[ -z "$IMPALA_HOME" ]]; then
   echo '$IMPALA_HOME must be set before sourcing this file.' 1>&2
   return 1
 fi
-IMPALA_COMPILER_CONFIG_FILE="$IMPALA_HOME/.impala_compiler_opts_v2"
+
+export IMPALA_DISTCC_ENV_VERSION=3
+IMPALA_COMPILER_CONFIG_FILE="$IMPALA_HOME/.impala_compiler_opts_v${IMPALA_DISTCC_ENV_VERSION}"
 
 # Completely disable anything that could have been setup using this script and clean
 # the make files.
 function disable_distcc {
-  export IMPALA_DISTCC_ENABLED=false
   export IMPALA_BUILD_THREADS=$(nproc)
   save_compiler_opts
-  if ! clean_cmake_files; then
-    echo Failed to clean cmake files. 1>&2
-    return 1
-  fi
-  echo "distcc is not fully disabled, run 'buildall.sh' to complete the change." \
-    "Run 'enable_distcc' to enable."
 }
 
 function enable_distcc {
-  export IMPALA_DISTCC_ENABLED=true
   switch_compiler distcc
-  if ! clean_cmake_files; then
-    echo Failed to clean cmake files. 1>&2
-    return 1
-  fi
-  echo "distcc is not fully enabled, run 'buildall.sh' to complete the change." \
-    "Run 'disable_distcc' or 'switch_compiler local' to disable."
-}
-
-# Cleans old CMake files, this is required when switching between distcc.sh and direct
-# compilation.
-function clean_cmake_files {
-  if [[ -z "$IMPALA_HOME" || ! -d "$IMPALA_HOME" ]]; then
-    echo IMPALA_HOME=$IMPALA_HOME is not valid. 1>&2
-    return 1
-  fi
-  $IMPALA_HOME/bin/clean-cmake.sh
 }
 
 function switch_compiler {
   for ARG in "$@"; do
     case "$ARG" in
       "local")
+        export
         IMPALA_DISTCC_LOCAL=false
-        IMPALA_BUILD_THREADS=$(nproc);;
+        IMPALA_BUILD_THREADS=$(nproc)
+        CCACHE_PREFIX=""
+        ;;
       distcc)
         IMPALA_DISTCC_LOCAL=true
-        IMPALA_BUILD_THREADS=$(distcc -j);;
+        echo HERE
+        local DISTCC_SLOT_COUNT=$(distcc -j)
+        # Set the parallelism based on the number of distcc slots, but cap it to avoid
+        # overwhelming this host  if a large distcc cluster is configured.
+        IMPALA_BUILD_THREADS=$((DISTCC_SLOT_COUNT < 128 ? DISTCC_SLOT_COUNT : 128))
+        CCACHE_PREFIX="${IMPALA_HOME}/bin/distcc/distcc.sh"
+        ;;
       *) echo "Valid compiler options are:
     'local'  - Don't use distcc and set -j value to $(nproc).
     'distcc' - Use distcc and set -j value to $(distcc -j)." 2>&1
@@ -138,7 +143,7 @@ function switch_compiler {
 function save_compiler_opts {
   rm -f "$IMPALA_COMPILER_CONFIG_FILE"
   cat <<EOF > "$IMPALA_COMPILER_CONFIG_FILE"
-IMPALA_DISTCC_ENABLED=$IMPALA_DISTCC_ENABLED
+CCACHE_PREFIX=$CCACHE_PREFIX
 IMPALA_BUILD_THREADS=$IMPALA_BUILD_THREADS
 IMPALA_DISTCC_LOCAL=$IMPALA_DISTCC_LOCAL
 EOF