You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by el...@apache.org on 2018/09/04 02:48:44 UTC
[2/3] hbase git commit: HBASE-21126 Configurable number of allowed
failures for ZooKeeper Canary
HBASE-21126 Configurable number of allowed failures for ZooKeeper Canary
Signed-off-by: Josh Elser <el...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/58365c8d
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/58365c8d
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/58365c8d
Branch: refs/heads/branch-2
Commit: 58365c8dd2fa8c913039d0d0249f6b54af252585
Parents: 68c5313
Author: David Manning <da...@salesforce.com>
Authored: Wed Aug 29 12:06:59 2018 -0700
Committer: Josh Elser <el...@apache.org>
Committed: Mon Sep 3 19:35:28 2018 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hbase/tool/Canary.java | 52 +++++++++++++++-----
.../hadoop/hbase/tool/TestCanaryTool.java | 35 ++++++++-----
2 files changed, 63 insertions(+), 24 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/58365c8d/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
index ae6324f..7a549fc 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
@@ -586,6 +586,7 @@ public final class Canary implements Tool {
private boolean failOnError = true;
private boolean regionServerMode = false;
private boolean zookeeperMode = false;
+ private long permittedFailures = 0;
private boolean regionServerAllRegions = false;
private boolean writeSniffing = false;
private long configuredWriteTableTimeout = DEFAULT_TIMEOUT;
@@ -729,6 +730,19 @@ public final class Canary implements Tool {
}
this.configuredReadTableTimeouts.put(nameTimeout[0], timeoutVal);
}
+ } else if (cmd.equals("-permittedZookeeperFailures")) {
+ i++;
+
+ if (i == args.length) {
+ System.err.println("-permittedZookeeperFailures needs a numeric value argument.");
+ printUsageAndExit();
+ }
+ try {
+ this.permittedFailures = Long.parseLong(args[i]);
+ } catch (NumberFormatException e) {
+ System.err.println("-permittedZookeeperFailures needs a numeric value argument.");
+ printUsageAndExit();
+ }
} else {
// no options match
System.err.println(cmd + " options is invalid.");
@@ -750,6 +764,10 @@ public final class Canary implements Tool {
printUsageAndExit();
}
}
+ if (this.permittedFailures != 0 && !this.zookeeperMode) {
+ System.err.println("-permittedZookeeperFailures requires -zookeeper mode.");
+ printUsageAndExit();
+ }
if (!this.configuredReadTableTimeouts.isEmpty() && (this.regionServerMode || this.zookeeperMode)) {
System.err.println("-readTableTimeouts can only be configured in region mode.");
printUsageAndExit();
@@ -847,6 +865,8 @@ public final class Canary implements Tool {
System.err.println(" only works in regionserver mode.");
System.err.println(" -zookeeper Tries to grab zookeeper.znode.parent ");
System.err.println(" on each zookeeper instance");
+ System.err.println(" -permittedZookeeperFailures <N> Ignore first N failures when attempting to ");
+ System.err.println(" connect to individual zookeeper nodes in the ensemble");
System.err.println(" -daemon Continuous check at defined intervals.");
System.err.println(" -interval <N> Interval between checks (sec)");
System.err.println(" -e Use table/regionserver as regular expression");
@@ -889,17 +909,18 @@ public final class Canary implements Tool {
monitor =
new RegionServerMonitor(connection, monitorTargets, this.useRegExp,
(StdOutSink) this.sink, this.executor, this.regionServerAllRegions,
- this.treatFailureAsError);
+ this.treatFailureAsError, this.permittedFailures);
} else if (this.sink instanceof ZookeeperStdOutSink || this.zookeeperMode) {
monitor =
new ZookeeperMonitor(connection, monitorTargets, this.useRegExp,
- (StdOutSink) this.sink, this.executor, this.treatFailureAsError);
+ (StdOutSink) this.sink, this.executor, this.treatFailureAsError,
+ this.permittedFailures);
} else {
monitor =
new RegionMonitor(connection, monitorTargets, this.useRegExp,
(StdOutSink) this.sink, this.executor, this.writeSniffing,
this.writeTableName, this.treatFailureAsError, this.configuredReadTableTimeouts,
- this.configuredWriteTableTimeout);
+ this.configuredWriteTableTimeout, this.permittedFailures);
}
return monitor;
}
@@ -916,6 +937,7 @@ public final class Canary implements Tool {
protected boolean done = false;
protected int errorCode = 0;
+ protected long allowedFailures = 0;
protected Sink sink;
protected ExecutorService executor;
@@ -932,7 +954,8 @@ public final class Canary implements Tool {
return true;
}
if (treatFailureAsError &&
- (sink.getReadFailureCount() > 0 || sink.getWriteFailureCount() > 0)) {
+ (sink.getReadFailureCount() > allowedFailures || sink.getWriteFailureCount() > allowedFailures)) {
+ LOG.error("Too many failures detected, treating failure as error, failing the Canary.");
errorCode = FAILURE_EXIT_CODE;
return true;
}
@@ -945,7 +968,7 @@ public final class Canary implements Tool {
}
protected Monitor(Connection connection, String[] monitorTargets, boolean useRegExp, Sink sink,
- ExecutorService executor, boolean treatFailureAsError) {
+ ExecutorService executor, boolean treatFailureAsError, long allowedFailures) {
if (null == connection) throw new IllegalArgumentException("connection shall not be null");
this.connection = connection;
@@ -954,6 +977,7 @@ public final class Canary implements Tool {
this.treatFailureAsError = treatFailureAsError;
this.sink = sink;
this.executor = executor;
+ this.allowedFailures = allowedFailures;
}
@Override
@@ -995,8 +1019,9 @@ public final class Canary implements Tool {
public RegionMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
StdOutSink sink, ExecutorService executor, boolean writeSniffing, TableName writeTableName,
- boolean treatFailureAsError, HashMap<String, Long> configuredReadTableTimeouts, long configuredWriteTableTimeout) {
- super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError);
+ boolean treatFailureAsError, HashMap<String, Long> configuredReadTableTimeouts, long configuredWriteTableTimeout,
+ long allowedFailures) {
+ super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError, allowedFailures);
Configuration conf = connection.getConfiguration();
this.writeSniffing = writeSniffing;
this.writeTableName = writeTableName;
@@ -1289,8 +1314,8 @@ public final class Canary implements Tool {
private final int timeout;
protected ZookeeperMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
- StdOutSink sink, ExecutorService executor, boolean treatFailureAsError) {
- super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError);
+ StdOutSink sink, ExecutorService executor, boolean treatFailureAsError, long allowedFailures) {
+ super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError, allowedFailures);
Configuration configuration = connection.getConfiguration();
znode =
configuration.get(ZOOKEEPER_ZNODE_PARENT,
@@ -1303,6 +1328,11 @@ public final class Canary implements Tool {
for (InetSocketAddress server : parser.getServerAddresses()) {
hosts.add(server.toString());
}
+ if (allowedFailures > (hosts.size() - 1) / 2) {
+ LOG.warn("Confirm allowable number of failed ZooKeeper nodes, as quorum will " +
+ "already be lost. Setting of {} failures is unexpected for {} ensemble size.",
+ allowedFailures, hosts.size());
+ }
}
@Override public void run() {
@@ -1351,8 +1381,8 @@ public final class Canary implements Tool {
public RegionServerMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
StdOutSink sink, ExecutorService executor, boolean allRegions,
- boolean treatFailureAsError) {
- super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError);
+ boolean treatFailureAsError, long allowedFailures) {
+ super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError, allowedFailures);
this.allRegions = allRegions;
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/58365c8d/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
index e713a5a..cdbf426 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
@@ -90,20 +90,14 @@ public class TestCanaryTool {
@Test
public void testBasicZookeeperCanaryWorks() throws Exception {
- Integer port =
- Iterables.getOnlyElement(testingUtility.getZkCluster().getClientPortList(), null);
- testingUtility.getConfiguration().set(HConstants.ZOOKEEPER_QUORUM,
- "localhost:" + port + "/hbase");
- ExecutorService executor = new ScheduledThreadPoolExecutor(2);
- Canary.ZookeeperStdOutSink sink = spy(new Canary.ZookeeperStdOutSink());
- Canary canary = new Canary(executor, sink);
- String[] args = { "-t", "10000", "-zookeeper" };
- assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args));
+ final String[] args = { "-t", "10000", "-zookeeper" };
+ testZookeeperCanaryWithArgs(args);
+ }
- String baseZnode = testingUtility.getConfiguration()
- .get(HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
- verify(sink, atLeastOnce())
- .publishReadTiming(eq(baseZnode), eq("localhost:" + port), anyLong());
+ @Test
+ public void testZookeeperCanaryPermittedFailuresArgumentWorks() throws Exception {
+ final String[] args = { "-t", "10000", "-zookeeper", "-treatFailureAsError", "-permittedZookeeperFailures", "1" };
+ testZookeeperCanaryWithArgs(args);
}
@Test
@@ -250,4 +244,19 @@ public class TestCanaryTool {
assertEquals("verify no read error count", 0, canary.getReadFailures().size());
}
+ private void testZookeeperCanaryWithArgs(String[] args) throws Exception {
+ Integer port =
+ Iterables.getOnlyElement(testingUtility.getZkCluster().getClientPortList(), null);
+ testingUtility.getConfiguration().set(HConstants.ZOOKEEPER_QUORUM,
+ "localhost:" + port + "/hbase");
+ ExecutorService executor = new ScheduledThreadPoolExecutor(2);
+ Canary.ZookeeperStdOutSink sink = spy(new Canary.ZookeeperStdOutSink());
+ Canary canary = new Canary(executor, sink);
+ assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args));
+
+ String baseZnode = testingUtility.getConfiguration()
+ .get(HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
+ verify(sink, atLeastOnce())
+ .publishReadTiming(eq(baseZnode), eq("localhost:" + port), anyLong());
+ }
}