You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by dl...@apache.org on 2016/06/08 12:22:47 UTC

accumulo git commit: ACCUMULO-4328: Modify scripts to allow for multiple tablet servers per host

Repository: accumulo
Updated Branches:
  refs/heads/1.8 7b8a11bf7 -> 60591c833


ACCUMULO-4328: Modify scripts to allow for multiple tablet servers per host


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/60591c83
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/60591c83
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/60591c83

Branch: refs/heads/1.8
Commit: 60591c833f05f2ab4613651ebdf5a57b653efb7b
Parents: 7b8a11b
Author: Dave Marion <dl...@apache.org>
Authored: Wed Jun 8 08:21:34 2016 -0400
Committer: Dave Marion <dl...@apache.org>
Committed: Wed Jun 8 08:21:34 2016 -0400

----------------------------------------------------------------------
 assemble/bin/accumulo                           | 15 ++++-
 assemble/bin/config.sh                          | 14 ++++
 assemble/bin/start-daemon.sh                    | 71 +++++++++++++++-----
 assemble/bin/stop-server.sh                     | 25 ++++---
 assemble/conf/templates/accumulo-env.sh         | 12 ++++
 assemble/conf/templates/generic_logger.xml      |  4 +-
 .../main/asciidoc/chapters/administration.txt   | 49 ++++++--------
 7 files changed, 130 insertions(+), 60 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/60591c83/assemble/bin/accumulo
----------------------------------------------------------------------
diff --git a/assemble/bin/accumulo b/assemble/bin/accumulo
index 1e29017..91298d1 100755
--- a/assemble/bin/accumulo
+++ b/assemble/bin/accumulo
@@ -105,7 +105,7 @@ fi
 case "$1" in
 master)  export ACCUMULO_OPTS="${ACCUMULO_GENERAL_OPTS} ${ACCUMULO_MASTER_OPTS}" ;;
 gc)      export ACCUMULO_OPTS="${ACCUMULO_GENERAL_OPTS} ${ACCUMULO_GC_OPTS}" ;;
-tserver) export ACCUMULO_OPTS="${ACCUMULO_GENERAL_OPTS} ${ACCUMULO_TSERVER_OPTS}" ;;
+tserver*) export ACCUMULO_OPTS="${ACCUMULO_GENERAL_OPTS} ${ACCUMULO_TSERVER_OPTS}" ;;
 monitor) export ACCUMULO_OPTS="${ACCUMULO_GENERAL_OPTS} ${ACCUMULO_MONITOR_OPTS}" ;;
 shell)   export ACCUMULO_OPTS="${ACCUMULO_GENERAL_OPTS} ${ACCUMULO_SHELL_OPTS}" ;;
 *)       export ACCUMULO_OPTS="${ACCUMULO_GENERAL_OPTS} ${ACCUMULO_OTHER_OPTS}" ;;
@@ -151,10 +151,23 @@ fi
 # Export the variables just in case they are not exported
 # This makes them available to java
 export JAVA_HOME HADOOP_PREFIX ZOOKEEPER_HOME LD_LIBRARY_PATH DYLD_LIBRARY_PATH
+
+# Strip the instance from $1
+APP=$1
+INSTANCE="1"
+if [[ "$1" =~ .*-.* ]]; then
+  APP=`echo $1 | cut -d"-" -f1`
+  INSTANCE=`echo $1 | cut -d"-" -f2`
+
+  #Rewrite the input arguments
+  set -- "$APP" "${@:2}"
+fi
+
 #
 # app isn't used anywhere, but it makes the process easier to spot when ps/top/snmp truncate the command line
 JAVA="${JAVA_HOME}/bin/java"
 exec "$JAVA" "-Dapp=$1" \
+   "-Dinstance=$INSTANCE" \
    $ACCUMULO_OPTS \
    -classpath "${CLASSPATH}" \
    -XX:OnOutOfMemoryError="${ACCUMULO_KILL_CMD:-kill -9 %p}" \

http://git-wip-us.apache.org/repos/asf/accumulo/blob/60591c83/assemble/bin/config.sh
----------------------------------------------------------------------
diff --git a/assemble/bin/config.sh b/assemble/bin/config.sh
index 2299a12..ae4b4ef 100755
--- a/assemble/bin/config.sh
+++ b/assemble/bin/config.sh
@@ -118,6 +118,20 @@ else
   export NUMA_CMD=""
 fi
 
+# NUMA sanity checks
+if [[ -z $NUM_TSERVERS ]]; then
+   echo "NUM_TSERVERS is missing in accumulo-env.sh, please check your configuration."
+   exit 1
+fi
+if [[ $NUM_TSERVERS -eq 1 && -n $TSERVER_NUMA_OPTIONS ]]; then
+   echo "TSERVER_NUMA_OPTIONS declared when NUM_TSERVERS is 1, use ACCUMULO_NUMACTL_OPTIONS instead"
+   exit 1
+fi
+if [[ $NUM_TSERVERS -gt 1 && -n $TSERVER_NUMA_OPTIONS && ${#TSERVER_NUMA_OPTIONS[*]} -ne $NUM_TSERVERS ]]; then
+   echo "TSERVER_NUMA_OPTIONS is declared, but not the same size as NUM_TSERVERS"
+   exit 1
+fi
+
 export HADOOP_HOME=$HADOOP_PREFIX
 export HADOOP_HOME_WARN_SUPPRESS=true
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/60591c83/assemble/bin/start-daemon.sh
----------------------------------------------------------------------
diff --git a/assemble/bin/start-daemon.sh b/assemble/bin/start-daemon.sh
index 54fb55f..4df228e 100755
--- a/assemble/bin/start-daemon.sh
+++ b/assemble/bin/start-daemon.sh
@@ -78,19 +78,6 @@ if [[ ${SERVICE} == "monitor" && ${ACCUMULO_MONITOR_BIND_ALL} == "true" ]]; then
    ADDRESS="0.0.0.0"
 fi
 
-# Check the pid file to figure out if its already running.
-PID_FILE="${ACCUMULO_PID_DIR}/accumulo-${ACCUMULO_IDENT_STRING}-${SERVICE}.pid"
-if [ -f ${PID_FILE} ]; then
-   PID=`cat ${PID_FILE}`
-   if kill -0 $PID 2>/dev/null; then
-      # Starting an already-started service shouldn't be an error per LSB
-      echo "$HOST : $SERVICE already running (${PID})"
-      exit 0
-   fi
-else
-   echo "Starting $SERVICE on $HOST"
-fi
-
 COMMAND="${bin}/accumulo"
 if [ "${ACCUMULO_WATCHER}" = "true" ]; then
    COMMAND="${bin}/accumulo_watcher.sh ${LOGHOST}"
@@ -103,9 +90,61 @@ ERRFILE="${ACCUMULO_LOG_DIR}/${SERVICE}_${LOGHOST}.err"
 rotate_log "$OUTFILE" ${ACCUMULO_NUM_OUT_FILES}
 rotate_log "$ERRFILE" ${ACCUMULO_NUM_OUT_FILES}
 
-# Fork the process, store the pid
-nohup ${NUMA_CMD} "$COMMAND" "${SERVICE}" --address "${ADDRESS}" >"$OUTFILE" 2>"$ERRFILE" < /dev/null &
-echo $! > ${PID_FILE}
+if [[ "$SERVICE" != "tserver" || $NUM_TSERVERS -eq 1 ]]; then
+   # Check the pid file to figure out if its already running.
+   PID_FILE="${ACCUMULO_PID_DIR}/accumulo-${ACCUMULO_IDENT_STRING}-${SERVICE}.pid"
+   if [ -f ${PID_FILE} ]; then
+      PID=`cat ${PID_FILE}`
+      if kill -0 $PID 2>/dev/null; then
+         # Starting an already-started service shouldn't be an error per LSB
+         echo "$HOST : $SERVICE already running (${PID})"
+         exit 0
+      fi
+   fi
+   echo "Starting $SERVICE on $HOST"
+
+   # Fork the process, store the pid
+   nohup ${NUMA_CMD} "$COMMAND" "${SERVICE}" --address "${ADDRESS}" >"$OUTFILE" 2>"$ERRFILE" < /dev/null &
+   echo $! > ${PID_FILE}
+
+else
+
+   S="$SERVICE"
+   for (( t=1; t<=$NUM_TSERVERS; t++)); do
+
+      SERVICE="$S-$t"
+
+      # Check the pid file to figure out if its already running.
+      PID_FILE="${ACCUMULO_PID_DIR}/accumulo-${ACCUMULO_IDENT_STRING}-${SERVICE}.pid"
+      if [ -f ${PID_FILE} ]; then
+         PID=`cat ${PID_FILE}`
+         if kill -0 $PID 2>/dev/null; then
+            # Starting an already-started service shouldn't be an error per LSB
+            echo "$HOST : $SERVICE already running (${PID})"
+            continue
+         fi
+      fi
+      echo "Starting $SERVICE on $HOST"
+
+      ACCUMULO_NUMACTL_OPTIONS=${ACCUMULO_NUMACTL_OPTIONS:-"--interleave=all"}
+      ACCUMULO_NUMACTL_OPTIONS=${TSERVER_NUMA_OPTIONS[$t]}
+      if [[ "$ACCUMULO_ENABLE_NUMACTL" == "true" ]]; then
+         NUMA=`which numactl 2>/dev/null`
+         NUMACTL_EXISTS=$?
+         if [[ ( ${NUMACTL_EXISTS} -eq 0 ) ]]; then
+            export NUMA_CMD="${NUMA} ${ACCUMULO_NUMACTL_OPTIONS}"
+         else
+            export NUMA_CMD=""
+         fi
+      fi
+
+      # Fork the process, store the pid
+      nohup ${NUMA_CMD} "$COMMAND" "${SERVICE}" --address "${ADDRESS}" >"${ACCUMULO_LOG_DIR}/${SERVICE}_${LOGHOST}.out" 2>"${ACCUMULO_LOG_DIR}/${SERVICE}_${LOGHOST}.err" < /dev/null &
+      echo $! > ${PID_FILE}
+
+   done
+
+fi
 
 # Check the max open files limit and selectively warn
 MAX_FILES_OPEN=$(ulimit -n)

http://git-wip-us.apache.org/repos/asf/accumulo/blob/60591c83/assemble/bin/stop-server.sh
----------------------------------------------------------------------
diff --git a/assemble/bin/stop-server.sh b/assemble/bin/stop-server.sh
index 7053ff7..bba0f1e 100755
--- a/assemble/bin/stop-server.sh
+++ b/assemble/bin/stop-server.sh
@@ -40,17 +40,20 @@ then
 fi
 
 # only stop if there's not one already running
-PID_FILE="${ACCUMULO_PID_DIR}/accumulo-${ACCUMULO_IDENT_STRING}-${3}.pid"
 if [[ $HOST == localhost || $HOST = "$(hostname -s)" || $HOST = "$(hostname -f)" || $HOST = "$IP" ]] ; then
-   if [ -f ${PID_FILE} ]; then
-      echo "Stopping $3 on $1";
-      kill -s "$4" `cat ${PID_FILE}` 2>/dev/null
-      rm -f ${PID_FILE} 2>/dev/null
-   fi;
+   for PID_FILE in ${ACCUMULO_PID_DIR}/accumulo-${ACCUMULO_IDENT_STRING}-${3}*.pid; do
+      if [[ -f ${PID_FILE} ]]; then
+         echo "Stopping $3 on $1";
+         kill -s "$4" `cat ${PID_FILE}` 2>/dev/null
+         rm -f ${PID_FILE} 2>/dev/null
+      fi;
+   done
 else
-   PID=$(ssh -q -o 'ConnectTimeout 8' "$1" cat "${PID_FILE}" 2>/dev/null)
-   if [[ ! -z $PID ]]; then
-      echo "Stopping $3 on $1";
-      ssh -q -o 'ConnectTimeout 8' "$1" "kill -s $4 $PID 2>/dev/null; rm -f ${PID_FILE} 2>/dev/null"
-   fi
+   for PID_FILE in $(ssh -q -o 'ConnectTimeout 8' "$1" ls "${ACCUMULO_PID_DIR}/accumulo-${ACCUMULO_IDENT_STRING}-${3}*.pid" 2>/dev/null); do
+      PID=$(ssh -q -o 'ConnectTimeout 8' "$1" cat "${PID_FILE}" 2>/dev/null)
+      if [[ ! -z $PID ]]; then
+         echo "Stopping $3 on $1";
+         ssh -q -o 'ConnectTimeout 8' "$1" "kill -s $4 $PID 2>/dev/null; rm -f ${PID_FILE} 2>/dev/null"
+      fi
+   done
 fi

http://git-wip-us.apache.org/repos/asf/accumulo/blob/60591c83/assemble/conf/templates/accumulo-env.sh
----------------------------------------------------------------------
diff --git a/assemble/conf/templates/accumulo-env.sh b/assemble/conf/templates/accumulo-env.sh
index 2ad753a..217465b 100644
--- a/assemble/conf/templates/accumulo-env.sh
+++ b/assemble/conf/templates/accumulo-env.sh
@@ -79,3 +79,15 @@ export ZKLOCK_RETRIES="5"
 
 # The number of .out and .err files per process to retain
 # export ACCUMULO_NUM_OUT_FILES=5
+
+export NUM_TSERVERS=1
+
+### Example for configuring multiple tservers per host. Note that the ACCUMULO_NUMACTL_OPTIONS
+### environment variable is used when NUM_TSERVERS is 1 to preserve backwards compatibility.
+### If NUM_TSERVERS is greater than 2, then the TSERVER_NUMA_OPTIONS array is used if defined.
+### If TSERVER_NUMA_OPTIONS is declared but not the correct size, then the service will not start.
+###
+### export NUM_TSERVERS=2
+### declare -a TSERVER_NUMA_OPTIONS
+### TSERVER_NUMA_OPTIONS[1]="--cpunodebind 0"
+### TSERVER_NUMA_OPTIONS[2]="--cpunodebind 1"

http://git-wip-us.apache.org/repos/asf/accumulo/blob/60591c83/assemble/conf/templates/generic_logger.xml
----------------------------------------------------------------------
diff --git a/assemble/conf/templates/generic_logger.xml b/assemble/conf/templates/generic_logger.xml
index db79efe..833df17 100644
--- a/assemble/conf/templates/generic_logger.xml
+++ b/assemble/conf/templates/generic_logger.xml
@@ -20,7 +20,7 @@
 
   <!-- Write out everything at the DEBUG level to the debug log -->
   <appender name="A2" class="org.apache.log4j.RollingFileAppender">
-     <param name="File"           value="${org.apache.accumulo.core.dir.log}/${org.apache.accumulo.core.application}_${org.apache.accumulo.core.ip.localhost.hostname}.debug.log"/>
+     <param name="File"           value="${org.apache.accumulo.core.dir.log}/${org.apache.accumulo.core.application}_${instance}_${org.apache.accumulo.core.ip.localhost.hostname}.debug.log"/>
      <param name="MaxFileSize"    value="1000MB"/>
      <param name="MaxBackupIndex" value="10"/>
      <param name="Threshold"      value="DEBUG"/>
@@ -31,7 +31,7 @@
 
   <!--  Write out INFO and higher to the regular log -->
   <appender name="A3" class="org.apache.log4j.RollingFileAppender">
-     <param name="File"           value="${org.apache.accumulo.core.dir.log}/${org.apache.accumulo.core.application}_${org.apache.accumulo.core.ip.localhost.hostname}.log"/>
+     <param name="File"           value="${org.apache.accumulo.core.dir.log}/${org.apache.accumulo.core.application}_${instance}_${org.apache.accumulo.core.ip.localhost.hostname}.log"/>
      <param name="MaxFileSize"    value="1000MB"/>
      <param name="MaxBackupIndex" value="10"/>
      <param name="Threshold"      value="INFO"/>

http://git-wip-us.apache.org/repos/asf/accumulo/blob/60591c83/docs/src/main/asciidoc/chapters/administration.txt
----------------------------------------------------------------------
diff --git a/docs/src/main/asciidoc/chapters/administration.txt b/docs/src/main/asciidoc/chapters/administration.txt
index 622c2d3..d75c601 100644
--- a/docs/src/main/asciidoc/chapters/administration.txt
+++ b/docs/src/main/asciidoc/chapters/administration.txt
@@ -479,40 +479,29 @@ With very powerful nodes, it may be beneficial to run more than one TabletServer
 node. This decision should be made carefully and with much deliberation as Accumulo is designed
 to be able to scale to using 10's of GB of RAM and 10's of CPU cores.
 
-To run multiple TabletServers on a single host, it is necessary to create multiple Accumulo configuration
-directories. Ensuring that these properties are appropriately set (and remain consistent) are an exercise
-for the user.
+To run multiple TabletServers on a single host you will need to change the +NUM_TSERVERS+ property
+in the accumulo-env.sh file from 1 to the number of TabletServers that you want to run. On NUMA
+hardware, with numactl installed, the TabletServer will interleave its memory allocations across
+the NUMA nodes and the processes will be scheduled on all the NUMA cores without restriction. To
+change this behavior you can uncomment the +TSERVER_NUMA_OPTIONS+ example in accumulo-env.sh and
+set the numactl options for each TabletServer.
 
 Accumulo TabletServers bind certain ports on the host to accommodate remote procedure calls to/from
-other nodes. This requires additional configuration values in +accumulo-site.xml+:
+other nodes. Running more than one TabletServer on a host requires that you set the following
+properties in +accumulo-site.xml+:
 
-* +tserver.port.client+
-* +replication.receipt.service.port+
-
-Normally, setting a value of +0+ for these configuration properties is sufficient. In some
-environment, the ports used by Accumulo must be well-known for security reasons and require a
-separate copy of the configuration files to use a static port for each TabletServer instance.
-
-It is also necessary to update the following exported variables in +accumulo-env.sh+.
-
-* +ACCUMULO_LOG_DIR+
-* +ACCUMULO_PID_DIR+
-
-The values for these properties are left up to the user to define; there are no constraints
-other than ensuring that the directory exists and the user running Accumulo has the permission
-to read/write into that directory.
-
-Accumulo's provided scripts for stopping a cluster operate under the assumption that one process
-is running per host. As such, starting and stopping multiple TabletServers on one host requires
-more effort on the user. It is important to ensure that +ACCUMULO_CONF_DIR+ is correctly
-set for the instance of the TabletServer being started.
-
-  $ACCUMULO_CONF_DIR=$ACCUMULO_HOME/conf $ACCUMULO_HOME/bin/accumulo tserver --address <your_server_ip> &
-
-To stop TabletServers, the normal +stop-all.sh+ will stop all instances of TabletServers across all nodes.
-Using the provided +kill+ command by your operation system is an option to stop a single instance on
-a single node. +stop-server.sh+ can be used to stop all TabletServers on a single node.
+  <property>
+    <name>tserver.port.client</name>
+    <value>0</value>
+  </property>
+  <property>
+    <name>replication.receipt.service.port</name>
+    <value>0</value>
+  </property>
 
+Accumulo's provided scripts for starting and stopping the cluster should work normally with multiple
+TabletServers on a host. Sanity checks are provided in the scripts and will output an error when there
+is a configuration mismatch.
 
 [[monitoring]]
 === Monitoring