You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:00:58 UTC

svn commit: r1181349 - in /hbase/branches/0.89: bin/hbase-daemon.sh bin/regionservers.sh bin/rolling-restart.sh conf/hadoop-metrics.properties conf/hbase-site.xml

Author: nspiegelberg
Date: Tue Oct 11 02:00:58 2011
New Revision: 1181349

URL: http://svn.apache.org/viewvc?rev=1181349&view=rev
Log:
HBASE-1660: script to handle rolling restarts

Summary:
1. added 'restart' option to hbase-daemon.sh for unit-level restarting
2. added rolling-restart.sh script to perform system-level rolling restarts

Test Plan:
./bin/start-hbase.sh
./bin/rolling-restart.sh
./bin/stop-hbase.sh

Reviewed in public trunk: https://review.cloudera.org/r/632

Added:
    hbase/branches/0.89/bin/rolling-restart.sh
Modified:
    hbase/branches/0.89/bin/hbase-daemon.sh
    hbase/branches/0.89/bin/regionservers.sh
    hbase/branches/0.89/conf/hadoop-metrics.properties
    hbase/branches/0.89/conf/hbase-site.xml

Modified: hbase/branches/0.89/bin/hbase-daemon.sh
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/bin/hbase-daemon.sh?rev=1181349&r1=1181348&r2=1181349&view=diff
==============================================================================
--- hbase/branches/0.89/bin/hbase-daemon.sh (original)
+++ hbase/branches/0.89/bin/hbase-daemon.sh Tue Oct 11 02:00:58 2011
@@ -33,7 +33,7 @@
 # Modelled after $HADOOP_HOME/bin/hadoop-daemon.sh
 
 usage="Usage: hbase-daemon.sh [--config <conf-dir>]\
- (start|stop) <hbase-command> \
+ (start|stop|restart) <hbase-command> \
  <args...>"
 
 # if no args specified, show usage
@@ -71,6 +71,24 @@ hbase_rotate_log ()
     fi
 }
 
+wait_until_done ()
+{
+    p=$1
+    cnt=${HBASE_SLAVE_TIMEOUT:-60}
+    origcnt=$cnt
+    while kill -0 $p > /dev/null 2>&1; do
+      if [ $cnt -gt 1 ]; then
+        cnt=`expr $cnt - 1`
+        sleep 1
+      else
+        echo "Process did not complete after $origcnt seconds, killing."
+        kill -9 $p
+        exit 1
+      fi
+    done
+    return 0
+}
+
 # get log directory
 if [ "$HBASE_LOG_DIR" = "" ]; then
   export HBASE_LOG_DIR="$HBASE_HOME/logs"
@@ -156,6 +174,22 @@ case $startStop in
     fi
     ;;
 
+  (restart)
+    thiscmd=$0
+    args=$@
+    # stop the command
+    $thiscmd --config "${HBASE_CONF_DIR}" stop $command $args &
+    wait_until_done $!
+    # wait a user-specified sleep period
+    sp=${HBASE_SLAVE_SLEEP:-3}
+    if [ $sp -gt 0 ]; then
+      sleep $sp
+    fi
+    # start the command
+    $thiscmd --config "${HBASE_CONF_DIR}" start $command $args &
+    wait_until_done $!
+    ;;
+
   (*)
     echo $usage
     exit 1

Modified: hbase/branches/0.89/bin/regionservers.sh
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/bin/regionservers.sh?rev=1181349&r1=1181348&r2=1181349&view=diff
==============================================================================
--- hbase/branches/0.89/bin/regionservers.sh (original)
+++ hbase/branches/0.89/bin/regionservers.sh Tue Oct 11 02:00:58 2011
@@ -60,11 +60,16 @@ if [ "$HOSTLIST" = "" ]; then
 fi
 
 for regionserver in `cat "$HOSTLIST"`; do
- ssh $HBASE_SSH_OPTS $regionserver $"${@// /\\ }" \
-   2>&1 | sed "s/^/$regionserver: /" &
- if [ "$HBASE_SLAVE_SLEEP" != "" ]; then
-   sleep $HBASE_SLAVE_SLEEP
- fi
+  if ${HBASE_SLAVE_PARALLEL:-true}; then
+    ssh $HBASE_SSH_OPTS $regionserver $"${@// /\\ }" \
+      2>&1 | sed "s/^/$regionserver: /" &
+  else # run each command serially
+    ssh $HBASE_SSH_OPTS $regionserver $"${@// /\\ }" \
+      2>&1 | sed "s/^/$regionserver: /"
+  fi
+  if [ "$HBASE_SLAVE_SLEEP" != "" ]; then
+    sleep $HBASE_SLAVE_SLEEP
+  fi
 done
 
 wait

Added: hbase/branches/0.89/bin/rolling-restart.sh
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/bin/rolling-restart.sh?rev=1181349&view=auto
==============================================================================
--- hbase/branches/0.89/bin/rolling-restart.sh (added)
+++ hbase/branches/0.89/bin/rolling-restart.sh Tue Oct 11 02:00:58 2011
@@ -0,0 +1,84 @@
+#!/usr/bin/env bash
+#
+#/**
+# * Copyright 2010 The Apache Software Foundation
+# *
+# * Licensed to the Apache Software Foundation (ASF) under one
+# * or more contributor license agreements.  See the NOTICE file
+# * distributed with this work for additional information
+# * regarding copyright ownership.  The ASF licenses this file
+# * to you under the Apache License, Version 2.0 (the
+# * "License"); you may not use this file except in compliance
+# * with the License.  You may obtain a copy of the License at
+# *
+# *     http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+#
+# Run a shell command on all regionserver hosts.
+#
+# Environment Variables
+#
+#   HBASE_REGIONSERVERS    File naming remote hosts.
+#     Default is ${HADOOP_CONF_DIR}/regionservers
+#   HADOOP_CONF_DIR  Alternate conf dir. Default is ${HADOOP_HOME}/conf.
+#   HBASE_CONF_DIR  Alternate hbase conf dir. Default is ${HBASE_HOME}/conf.
+#   HADOOP_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
+#   HADOOP_SLAVE_TIMEOUT Seconds to wait for timing out a remote command.
+#   HADOOP_SSH_OPTS Options passed to ssh when running remote commands.
+#
+# Modelled after $HADOOP_HOME/bin/slaves.sh.
+
+usage="Usage: $0 [--config <hbase-confdir>] commands..."
+
+bin=`dirname "$0"`
+bin=`cd "$bin">/dev/null; pwd`
+
+. "$bin"/hbase-config.sh
+
+# start hbase daemons
+errCode=$?
+if [ $errCode -ne 0 ]
+then
+  exit $errCode
+fi
+
+# quick function to get a value from the HBase config file
+distMode=`$bin/hbase org.apache.hadoop.hbase.HBaseConfTool hbase.cluster.distributed`
+if [ "$distMode" == 'false' ]; then
+  "$bin"/hbase-daemon.sh restart master
+else
+  # stop all masters before re-start to avoid races for master znode
+  "$bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" stop master
+  "$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
+    --hosts "${HBASE_BACKUP_MASTERS}" stop master-backup
+
+  # make sure the master znode has been deleted before continuing
+  zparent=`$bin/hbase org.apache.hadoop.hbase.HBaseConfTool zookeeper.znode.parent`
+  if [ "$zparent" == "null" ]; then zparent="/hbase"; fi
+  zmaster=`$bin/hbase org.apache.hadoop.hbase.HBaseConfTool zookeeper.znode.master`
+  if [ "$zmaster" == "null" ]; then zmaster="master"; fi
+  zmaster=$zparent/$zmaster
+  echo -n "Waiting for Master ZNode to expire"
+  while bin/hbase zkcli stat $zmaster >/dev/null 2>&1; do
+    echo -n "."
+    sleep 1
+  done
+  echo #force a newline
+
+  # all masters are down, now restart
+  "$bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" start master
+  "$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
+    --hosts "${HBASE_BACKUP_MASTERS}" start master-backup
+
+  # unlike the masters, roll all regionservers one-at-a-time
+  export HBASE_SLAVE_PARALLEL=false
+  "$bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" \
+    --hosts "${HBASE_REGIONSERVERS}" restart regionserver
+
+fi

Modified: hbase/branches/0.89/conf/hadoop-metrics.properties
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/conf/hadoop-metrics.properties?rev=1181349&r1=1181348&r2=1181349&view=diff
==============================================================================
--- hbase/branches/0.89/conf/hadoop-metrics.properties (original)
+++ hbase/branches/0.89/conf/hadoop-metrics.properties Tue Oct 11 02:00:58 2011
@@ -1,54 +1,29 @@
-# See http://wiki.apache.org/hadoop/GangliaMetrics
-# Make sure you know whether you are using ganglia 3.0 or 3.1.
-# If 3.1, you will have to patch your hadoop instance with HADOOP-4675
-# And, yes, this file is named hadoop-metrics.properties rather than
-# hbase-metrics.properties because we're leveraging the hadoop metrics
-# package and hadoop-metrics.properties is an hardcoded-name, at least
-# for the moment.
-#
-# See also http://hadoop.apache.org/hbase/docs/current/metrics.html
-
-# Configuration of the "hbase" context for null
-hbase.class=org.apache.hadoop.metrics.spi.NullContext
-
-# Configuration of the "hbase" context for file
-# hbase.class=org.apache.hadoop.hbase.metrics.file.TimeStampingFileContext
-# hbase.period=10
-# hbase.fileName=/tmp/metrics_hbase.log
-
-# Configuration of the "hbase" context for ganglia
-# Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter)
-# hbase.class=org.apache.hadoop.metrics.ganglia.GangliaContext
-# hbase.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
-# hbase.period=10
-# hbase.servers=GMETADHOST_IP:8649
+# DEFAULT METRICS CONFIGURATION
+# - Configuration used in production tiers. Allows only JMX stats collection
 
-# Configuration of the "jvm" context for null
+# Configuration of the "dfs" context
+dfs.class=org.apache.hadoop.metrics.jmx.JMXContext
+dfs.period=10
+
+# Configuration of the "mapred" context
+mapred.class=org.apache.hadoop.metrics.jmx.JMXContext
+dfs.period=10
+jmx_records=jobtracker,tasktracker
+
+# Configuration of the "jvm" context
 jvm.class=org.apache.hadoop.metrics.spi.NullContext
 
-# Configuration of the "jvm" context for file
-# jvm.class=org.apache.hadoop.hbase.metrics.file.TimeStampingFileContext
-# jvm.period=10
-# jvm.fileName=/tmp/metrics_jvm.log
-
-# Configuration of the "jvm" context for ganglia
-# Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter)
-# jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext
-# jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
-# jvm.period=10
-# jvm.servers=GMETADHOST_IP:8649
-
-# Configuration of the "rpc" context for null
-rpc.class=org.apache.hadoop.metrics.spi.NullContext
-
-# Configuration of the "rpc" context for file
-# rpc.class=org.apache.hadoop.hbase.metrics.file.TimeStampingFileContext
-# rpc.period=10
-# rpc.fileName=/tmp/metrics_rpc.log
-
-# Configuration of the "rpc" context for ganglia
-# Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter)
-# rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext
-# rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
-# rpc.period=10
-# rpc.servers=GMETADHOST_IP:8649
+
+###########################################################
+###########################################################
+
+# Beta tiers may want to use CompositeContext to enable both JMX + File export
+# Example configuration given below...
+
+#dfs.class=org.apache.hadoop.metrics.spi.CompositeContext
+#dfs.arity=2
+#dfs.sub1.class=org.apache.hadoop.metrics.file.FileContext
+#dfs.sub1.period=10
+#dfs.fileName=/usr/local/hadoop/logs/DFS1/dfs_metrics.log
+#dfs.sub2.class=org.apache.hadoop.metrics.jmx.JMXContext
+#dfs.sub2.period=10

Modified: hbase/branches/0.89/conf/hbase-site.xml
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/conf/hbase-site.xml?rev=1181349&r1=1181348&r2=1181349&view=diff
==============================================================================
--- hbase/branches/0.89/conf/hbase-site.xml (original)
+++ hbase/branches/0.89/conf/hbase-site.xml Tue Oct 11 02:00:58 2011
@@ -22,4 +22,66 @@
  */
 -->
 <configuration>
+
+<!-- NEEDED WHETHER OR NOT YOU ARE RUNNING OVER HDFS -->
+<property>
+  <name>hbase.cluster.distributed</name>
+  <value>true</value>
+  <description>For psuedo-distributed, you want to set this to true.
+  false means that HBase tries to put Master + RegionServers in one process.
+  Pseudo-distributed = seperate processes/pids</description>
+</property> <property>
+  <name>hbase.regionserver.hlog.replication</name>
+  <value>1</value>
+  <description>For HBase to offer good data durability, we roll logs if
+  filesystem replication falls below a certain amount.  In psuedo-distributed
+  mode, you normally only have the local filesystem or 1 HDFS DataNode, so you
+  don't want to roll logs constantly.</description>
+</property>
+<property>
+  <name>hbase.tmp.dir</name>
+  <value>/tmp/hbase-testing</value>
+  <description>Temporary directory on the local filesystem.</description>
+</property>
+<property>
+  <name>hbase.regionserver.port</name>
+  <value>50020</value>
+  <description>50020</description>
+</property>
+<property>
+  <name>hbase.regionserver.info.port</name>
+  <value>50030</value>
+  <description>50030</description>
+</property>
+
+<!-- DEFAULT = use local filesystem, not HDFS
+     ADD THESE LINES if you have a copy of HDFS source and want to run HBase
+     psuedo-distributed over a psuedo-distributed HDFS cluster.
+     For HDFS psuedo-distributed setup, see their documentation:
+
+     http://hadoop.apache.org/common/docs/r0.20.2/quickstart.html#PseudoDistributed
+
+
+<property>
+  <name>hbase.rootdir</name>
+  <value>hdfs://localhost:9000/hbase-testing</value>
+  <description>The directory shared by region servers.
+  Should be fully-qualified to include the filesystem to use.
+  E.g: hdfs://NAMENODE_SERVER:PORT/HBASE_ROOTDIR
+  </description>
+</property>
+-->
+
+<!-- OPTIONAL: You might want to add these options depending upon your use case
+
+
+<property>
+  <name>dfs.support.append</name>
+  <value>true</value>
+  <description>Allow append support (if you want to test data durability with HDFS)
+  </description>
+</property>
+-->
+
+
 </configuration>