You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by aw...@apache.org on 2015/05/28 19:36:49 UTC
hadoop git commit: HADOOP-11406. xargs -P is not portable (Kengo Seki
via aw)
Repository: hadoop
Updated Branches:
refs/heads/trunk 3077c299d -> 5504a261f
HADOOP-11406. xargs -P is not portable (Kengo Seki via aw)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/5504a261
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/5504a261
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/5504a261
Branch: refs/heads/trunk
Commit: 5504a261f829cf2e7b70970246bf5a55c172be84
Parents: 3077c29
Author: Allen Wittenauer <aw...@apache.org>
Authored: Thu May 28 10:36:40 2015 -0700
Committer: Allen Wittenauer <aw...@apache.org>
Committed: Thu May 28 10:36:40 2015 -0700
----------------------------------------------------------------------
hadoop-common-project/hadoop-common/CHANGES.txt | 2 +
.../src/main/bin/hadoop-functions.sh | 42 +++++++++++---------
.../main/conf/hadoop-user-functions.sh.example | 29 +++++++++++++-
3 files changed, 54 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/5504a261/hadoop-common-project/hadoop-common/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index eb1db29..7da02ed 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -466,6 +466,8 @@ Trunk (Unreleased)
HADOOP-9891. CLIMiniCluster instructions fail with MiniYarnCluster
ClassNotFoundException (Darrell Taylor via aw)
+ HADOOP-11406. xargs -P is not portable (Kengo Seki via aw)
+
OPTIMIZATIONS
HADOOP-7761. Improve the performance of raw comparisons. (todd)
http://git-wip-us.apache.org/repos/asf/hadoop/blob/5504a261/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh
index 67e8870..5556f2f 100644
--- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh
+++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh
@@ -461,27 +461,33 @@ function hadoop_connect_to_hosts
if [[ -z "${SLAVE_NAMES}" ]]; then
SLAVE_NAMES=$(sed 's/#.*$//;/^$/d' "${SLAVE_FILE}")
fi
-
- # quoting here gets tricky. it's easier to push it into a function
- # so that we don't have to deal with it. However...
- # xargs can't use a function so instead we'll export it out
- # and force it into a subshell
- # moral of the story: just use pdsh.
- export -f hadoop_actual_ssh
- export HADOOP_SSH_OPTS
-
- # xargs is used with option -I to replace the placeholder in arguments
- # list with each hostname read from stdin/pipe. But it consider one
- # line as one argument while reading from stdin/pipe. So place each
- # hostname in different lines while passing via pipe.
- SLAVE_NAMES=$(echo "$SLAVE_NAMES" | tr ' ' '\n' )
- echo "${SLAVE_NAMES}" | \
- xargs -n 1 -P"${HADOOP_SSH_PARALLEL}" \
- -I {} bash -c -- "hadoop_actual_ssh {} ${params}"
- wait
+ hadoop_connect_to_hosts_without_pdsh "${params}"
fi
}
+## @description Connect to ${SLAVE_NAMES} and execute command
+## @description under the environment which does not support pdsh.
+## @audience private
+## @stability evolving
+## @replaceable yes
+## @param command
+## @param [...]
+function hadoop_connect_to_hosts_without_pdsh
+{
+ # shellcheck disable=SC2124
+ local params="$@"
+ local slaves=(${SLAVE_NAMES})
+ for (( i = 0; i < ${#slaves[@]}; i++ ))
+ do
+ if (( i != 0 && i % HADOOP_SSH_PARALLEL == 0 )); then
+ wait
+ fi
+ # shellcheck disable=SC2086
+ hadoop_actual_ssh "${slaves[$i]}" ${params} &
+ done
+ wait
+}
+
## @description Utility routine to handle --slaves mode
## @audience private
## @stability evolving
http://git-wip-us.apache.org/repos/asf/hadoop/blob/5504a261/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example
index b2f78f8..3cf5776 100644
--- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example
+++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example
@@ -50,7 +50,7 @@
#
#
-# Another example: finding java
+# Example: finding java
#
# By default, Hadoop assumes that $JAVA_HOME is always defined
# outside of its configuration. Eons ago, Apple standardized
@@ -85,3 +85,30 @@
# exit 1
# fi
#}
+
+#
+# Example: efficient command execution for the slaves
+#
+# To improve performance, you can use xargs -P
+# instead of the for loop, if supported.
+#
+#function hadoop_connect_to_hosts_without_pdsh
+#{
+# # quoting here gets tricky. it's easier to push it into a function
+# # so that we don't have to deal with it. However...
+# # xargs can't use a function so instead we'll export it out
+# # and force it into a subshell
+# # moral of the story: just use pdsh.
+# export -f hadoop_actual_ssh
+# export HADOOP_SSH_OPTS
+#
+# # xargs is used with option -I to replace the placeholder in arguments
+# # list with each hostname read from stdin/pipe. But it consider one
+# # line as one argument while reading from stdin/pipe. So place each
+# # hostname in different lines while passing via pipe.
+# SLAVE_NAMES=$(echo "$SLAVE_NAMES" | tr ' ' '\n' )
+# echo "${SLAVE_NAMES}" | \
+# xargs -n 1 -P"${HADOOP_SSH_PARALLEL}" \
+# -I {} bash -c -- "hadoop_actual_ssh {} ${params}"
+# wait
+#}