You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by li...@apache.org on 2013/05/16 21:20:15 UTC
svn commit: r1483521 - in /hbase/branches/0.89-fb: bin/rolling_restart_v2
src/main/java/org/apache/hadoop/hbase/util/RollingRestart.java
Author: liyin
Date: Thu May 16 19:20:14 2013
New Revision: 1483521
URL: http://svn.apache.org/r1483521
Log:
[HBASE-8500] Minor Changes in RollingRestart + a basic wrapper around the utility
Author: rshroff
Summary:
Made small changes in RollingRestart class to be able to remotely
restart a region server.
The change also adds a very basic wrapper(groovy script) around the RollingRestart utility
to perform the RR for all/selected region servers in the cluster.
Test Plan: tested it multiple times on TSH025
Reviewers: aaiyer, liyintang, paultuckfield
Reviewed By: aaiyer
CC: hbase-eng@
Differential Revision: https://phabricator.fb.com/D807373
Task ID: 2229110
Added:
hbase/branches/0.89-fb/bin/rolling_restart_v2
Modified:
hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/RollingRestart.java
Added: hbase/branches/0.89-fb/bin/rolling_restart_v2
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/bin/rolling_restart_v2?rev=1483521&view=auto
==============================================================================
--- hbase/branches/0.89-fb/bin/rolling_restart_v2 (added)
+++ hbase/branches/0.89-fb/bin/rolling_restart_v2 Thu May 16 19:20:14 2013
@@ -0,0 +1,155 @@
+#!/bin/env /opt/hbase/bin/hbase-groovy
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.MetaScanner;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.*;
+import org.apache.hadoop.hbase.master.*;
+import org.apache.hadoop.hbase.*;
+import org.apache.hadoop.hbase.util.Bytes;
+import java.io.ByteArrayInputStream;
+import org.apache.log4j.*
+import org.apache.commons.cli.*
+
+Logger.getLogger("org.apache.zookeeper").setLevel(Level.ERROR);
+Logger.getLogger("org.apache.hadoop.hbase").setLevel(Level.INFO);
+
+def restartRegionServer(HBaseAdmin admin, HServerInfo regionserver,
+ Configuration conf, String rollingRestartArgs, String logsFolder) {
+ long startCode = regionserver.getStartCode()/1000;
+ String host = regionserver.getHostname();
+
+ // Start the rolling restart
+
+ String cmd ="/usr/local/hadoop/" + conf.get("titan.cell.name") +
+ "-HBASE/bin/hbase org.apache.hadoop.hbase.util.RollingRestart" +
+ rollingRestartArgs + " -s " + host;
+
+ println "Performing rolling restart for host " + host;
+ println cmd;
+
+ File file = new File(logsFolder + "/rolling_restart_" + host + ".txt");
+
+ file.createNewFile();
+ FileOutputStream ostream = new FileOutputStream(file);
+ println "Check the output at " + logsFolder + "/rolling_restart_" + host + ".txt";
+
+ def proc = cmd.execute()
+ proc.consumeProcessOutput(ostream, ostream);
+ proc.waitForOrKill(30 * 60 * 1000)
+
+ ostream.close();
+ long newStartCode = admin.getConnection().
+ getHRegionConnection(regionserver.getServerAddress()).getHServerInfo().getStartCode()/1000;
+
+ if (newStartCode <= startCode) {
+ throw new Exception("Rolling Restart failed for Regionserver " + host);
+ }
+}
+
+// START
+
+Options options = new Options();
+
+options.addOption("n", "nodes", true,
+ "Name of the region servers to restart");
+options.addOption("a", "all", false,
+ "Name of the region servers to restart");
+options.addOption("r", "sleep_after_restart", true,
+ "time interval after which the region server should be started assigning regions. Default : 10000ms");
+options.addOption("b", "sleep_before_restart", true,
+ "time interval after which the region server should be restarted after draining. Default : 10000ms");
+options.addOption("d", "region_drain_interval", true,
+ "time interval between region movements while draining. Default : 1000ms");
+options.addOption("u", "region_undrain_interval", true,
+ "time interval between region movements while undraining. Default : 10000ms");
+options.addOption("g", "get_request_frequency", true,
+ "frequency at which region checker will check for region availability. Default : 1000ms");
+options.addOption("c", "clear", false,
+ "Clear all the regionserver from blacklist. Default : false");
+options.addOption("h", "dont_use_hadoopctl", false,
+ "Don't hadoopctl to restart the regionserver. Default : true");
+options.addOption("f", "log_file_folder", true,
+ "Default location where logs for rolling restart should be stored. Default : /tmp");
+
+if (args.length == 0) {
+ HelpFormatter formatter = new HelpFormatter();
+ formatter.printHelp("rolling_restart", options, true);
+ return;
+}
+
+CommandLineParser parser = new PosixParser();
+CommandLine cmd = parser.parse(options, args);
+
+String rollingRestartArgs = "";
+String logsFolder = "/tmp";
+hosts = null;
+
+if (cmd.hasOption('r')) {
+ rollingRestartArgs += " -r " + cmd.getOptionValue('r');
+}
+if (cmd.hasOption('b')) {
+ rollingRestartArgs += " -b " + cmd.getOptionValue('b');
+}
+if (cmd.hasOption('d')) {
+ rollingRestartArgs += " -d " + cmd.getOptionValue('d');
+}
+if (cmd.hasOption('u')) {
+ rollingRestartArgs += " -u " + cmd.getOptionValue('u');
+}
+if (cmd.hasOption('g')) {
+ rollingRestartArgs += " -g " + cmd.getOptionValue('g');
+}
+if (cmd.hasOption('c')) {
+ rollingRestartArgs += " -c " + cmd.getOptionValue('c');
+}
+if (cmd.hasOption('h')) {
+ rollingRestartArgs += " -h " + cmd.getOptionValue('h');
+}
+if (cmd.hasOption('f')) {
+ logsFolder = cmd.getOptionValue('f');
+}
+
+if (cmd.hasOption("n")) {
+ hosts = cmd.getOptionValue("n").split(',');
+ println hosts;
+} else if (cmd.hasOption('a')) {
+ println "Performing restart on all regionservers";
+} else {
+ HelpFormatter formatter = new HelpFormatter();
+ formatter.printHelp("rolling_restart", options, true);
+ return;
+}
+
+conf = HBaseConfiguration.create();
+admin = new HBaseAdmin(conf);
+
+PrintWriter errFileWriter = new PrintWriter(logsFolder + "/rolling_restart_failed_nodes");
+
+if (hosts != null) {
+ for (String server in hosts) {
+ HServerAddress serverAddr = new HServerAddress(server, 60020);
+ try {
+ HServerInfo host =
+ admin.getConnection().getHRegionConnection(serverAddr).getHServerInfo();
+ restartRegionServer(admin, host, conf, rollingRestartArgs, logsFolder);
+ } catch (Exception e) {
+ e.printStackTrace();
+ println "\nERROR: "+ server + " Failed.";
+ errFileWriter.println(host.getHostname());
+ }
+ }
+} else {
+
+ for (host in admin.getClusterStatus().getServerInfo()) {
+ try {
+ restartRegionServer(admin, host, conf, rollingRestartArgs, logsFolder);
+ } catch (Exception e) {
+ e.printStackTrace();
+ println "\nERROR: "+ host + " Failed.";
+ errFileWriter.println(host.getHostname());
+ }
+ }
+}
+
+errFileWriter.close();
Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/RollingRestart.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/RollingRestart.java?rev=1483521&r1=1483520&r2=1483521&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/RollingRestart.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/RollingRestart.java Thu May 16 19:20:14 2013
@@ -85,7 +85,6 @@ public class RollingRestart {
currentState = STAGE.FAIL;
return;
}
-
this.serverAddr = new HServerAddress(serverName, 60020);
currentState = STAGE.SETUP;
@@ -164,22 +163,50 @@ public class RollingRestart {
* @throws InterruptedException
*/
void restart() throws IOException, InterruptedException {
- System.out.println("Shutting down the region server");
+ System.out.println("Shutting down the region server after sleep of " +
+ this.sleepIntervalBeforeRestart);
Thread.sleep(this.sleepIntervalBeforeRestart);
String cellName = conf.get("titan.cell.name");
- try {
+ String sshCmd = "ssh hadoop@" + serverAddr.getHostname();
+ try {
if (this.useHadoopCtl) {
- Process p = Runtime.getRuntime().exec("/usr/local/bin/hadoopctl restart regionserver");
+ sshCmd += " hadoopctl restart regionserver";
+ LOG.info("Executing " + sshCmd);
+ Process p = Runtime.getRuntime().exec(sshCmd);
+
p.waitFor();
+
+ LOG.info("Exit value for the region server restart " + p.exitValue());
+
+ if (p.exitValue() != 0) {
+ LOG.error("Failed to restart. regionserver. Aborting..");
+ throw new IOException("Failed to restart regionserver. Aborting..");
+ }
} else {
- Process p = Runtime.getRuntime().exec("/usr/local/hadoop/" +
- cellName + "-HBASE/bin/hbase-daemon.sh stop regionserver");
+ String sshCmdToStopRS = sshCmd + " /usr/local/hadoop/" +
+ cellName + "-HBASE/bin/hbase-daemon.sh stop regionserver";
+ LOG.info("Executing " + sshCmd);
+ Process p = Runtime.getRuntime().exec(sshCmdToStopRS);
p.waitFor();
- p = Runtime.getRuntime().exec("/usr/local/hadoop/" +
- cellName + "-HBASE/bin/hbase-daemon.sh start regionserver");
+
+ LOG.info("Exit value for the region server stop " + p.exitValue());
+
+ if (p.exitValue() != 0) {
+ LOG.error("Failed to stop regionserver. Aborting..");
+ throw new IOException("Failed to stop regionserver. Aborting..");
+ }
+ String sshCmdToStartRS = sshCmd + " /usr/local/hadoop/" +
+ cellName + "-HBASE/bin/hbase-daemon.sh start regionserver ";
+ p = Runtime.getRuntime().exec(sshCmdToStartRS);
p.waitFor();
- LOG.info("Exit value for the restarter " + p.exitValue());
+
+ LOG.info("Exit value for the region server start " + p.exitValue());
+
+ if (p.exitValue() != 0) {
+ LOG.error("Failed to start regionserver. Aborting..");
+ throw new IOException("Failed to start regionserver. Aborting..");
+ }
}
} catch (IOException e1) {
@@ -210,6 +237,9 @@ public class RollingRestart {
List<HServerAddress> serversForRegion = plan.getAssignment(region);
+ if (serversForRegion == null) {
+ return null;
+ }
// Get the preferred region server from the Assignment Plan
for (HServerAddress server : serversForRegion) {
if (!server.equals(serverAddr)) {
@@ -402,7 +432,7 @@ public class RollingRestart {
"Name of the region server to restart");
options.addOption("r", "sleep_after_restart", true,
"time interval after which the region server should be started assigning regions. Default : 10000ms");
- options.addOption("r", "sleep_before_restart", true,
+ options.addOption("b", "sleep_before_restart", true,
"time interval after which the region server should be restarted after draining. Default : 10000ms");
options.addOption("d", "region_drain_interval", true,
"time interval between region movements while draining. Default : 1000ms");
@@ -434,6 +464,7 @@ public class RollingRestart {
if (cmd.hasOption("c")) {
RollingRestart.clearAll();
+ return;
}
if (!cmd.hasOption("s")) {