You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by aw...@apache.org on 2015/05/28 19:36:49 UTC

hadoop git commit: HADOOP-11406. xargs -P is not portable (Kengo Seki via aw)

Repository: hadoop
Updated Branches:
  refs/heads/trunk 3077c299d -> 5504a261f


HADOOP-11406. xargs -P is not portable (Kengo Seki via aw)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/5504a261
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/5504a261
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/5504a261

Branch: refs/heads/trunk
Commit: 5504a261f829cf2e7b70970246bf5a55c172be84
Parents: 3077c29
Author: Allen Wittenauer <aw...@apache.org>
Authored: Thu May 28 10:36:40 2015 -0700
Committer: Allen Wittenauer <aw...@apache.org>
Committed: Thu May 28 10:36:40 2015 -0700

----------------------------------------------------------------------
 hadoop-common-project/hadoop-common/CHANGES.txt |  2 +
 .../src/main/bin/hadoop-functions.sh            | 42 +++++++++++---------
 .../main/conf/hadoop-user-functions.sh.example  | 29 +++++++++++++-
 3 files changed, 54 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/5504a261/hadoop-common-project/hadoop-common/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index eb1db29..7da02ed 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -466,6 +466,8 @@ Trunk (Unreleased)
     HADOOP-9891. CLIMiniCluster instructions fail with MiniYarnCluster
     ClassNotFoundException (Darrell Taylor via aw)
 
+    HADOOP-11406. xargs -P is not portable (Kengo Seki via aw)
+
   OPTIMIZATIONS
 
     HADOOP-7761. Improve the performance of raw comparisons. (todd)

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5504a261/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh
index 67e8870..5556f2f 100644
--- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh
+++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh
@@ -461,27 +461,33 @@ function hadoop_connect_to_hosts
     if [[ -z "${SLAVE_NAMES}" ]]; then
       SLAVE_NAMES=$(sed 's/#.*$//;/^$/d' "${SLAVE_FILE}")
     fi
-
-    # quoting here gets tricky. it's easier to push it into a function
-    # so that we don't have to deal with it. However...
-    # xargs can't use a function so instead we'll export it out
-    # and force it into a subshell
-    # moral of the story: just use pdsh.
-    export -f hadoop_actual_ssh
-    export HADOOP_SSH_OPTS
-
-    # xargs is used with option -I to replace the placeholder in arguments
-    # list with each hostname read from stdin/pipe. But it consider one
-    # line as one argument while reading from stdin/pipe. So place each
-    # hostname in different lines while passing via pipe.
-    SLAVE_NAMES=$(echo "$SLAVE_NAMES" | tr ' ' '\n' )
-    echo "${SLAVE_NAMES}" | \
-    xargs -n 1 -P"${HADOOP_SSH_PARALLEL}" \
-    -I {} bash -c --  "hadoop_actual_ssh {} ${params}"
-    wait
+    hadoop_connect_to_hosts_without_pdsh "${params}"
   fi
 }
 
+## @description  Connect to ${SLAVE_NAMES} and execute command
+## @description  under the environment which does not support pdsh.
+## @audience     private
+## @stability    evolving
+## @replaceable  yes
+## @param        command
+## @param        [...]
+function hadoop_connect_to_hosts_without_pdsh
+{
+  # shellcheck disable=SC2124
+  local params="$@"
+  local slaves=(${SLAVE_NAMES})
+  for (( i = 0; i < ${#slaves[@]}; i++ ))
+  do
+    if (( i != 0 && i % HADOOP_SSH_PARALLEL == 0 )); then
+      wait
+    fi
+    # shellcheck disable=SC2086
+    hadoop_actual_ssh "${slaves[$i]}" ${params} &
+  done
+  wait
+}
+
 ## @description  Utility routine to handle --slaves mode
 ## @audience     private
 ## @stability    evolving

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5504a261/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example
index b2f78f8..3cf5776 100644
--- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example
+++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example
@@ -50,7 +50,7 @@
 #
 
 #
-# Another example:  finding java
+# Example:  finding java
 #
 # By default, Hadoop assumes that $JAVA_HOME is always defined
 # outside of its configuration. Eons ago, Apple standardized
@@ -85,3 +85,30 @@
 #    exit 1
 #  fi
 #}
+
+#
+# Example:  efficient command execution for the slaves
+#
+# To improve performance, you can use xargs -P
+# instead of the for loop, if supported.
+#
+#function hadoop_connect_to_hosts_without_pdsh
+#{
+#  # quoting here gets tricky. it's easier to push it into a function
+#  # so that we don't have to deal with it. However...
+#  # xargs can't use a function so instead we'll export it out
+#  # and force it into a subshell
+#  # moral of the story: just use pdsh.
+#  export -f hadoop_actual_ssh
+#  export HADOOP_SSH_OPTS
+#
+#  # xargs is used with option -I to replace the placeholder in arguments
+#  # list with each hostname read from stdin/pipe. But it consider one
+#  # line as one argument while reading from stdin/pipe. So place each
+#  # hostname in different lines while passing via pipe.
+#  SLAVE_NAMES=$(echo "$SLAVE_NAMES" | tr ' ' '\n' )
+#  echo "${SLAVE_NAMES}" | \
+#  xargs -n 1 -P"${HADOOP_SSH_PARALLEL}" \
+#  -I {} bash -c --  "hadoop_actual_ssh {} ${params}"
+#  wait
+#}