You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by el...@apache.org on 2018/09/04 02:48:43 UTC

[1/3] hbase git commit: HBASE-21126 Configurable number of allowed failures for ZooKeeper Canary

Repository: hbase
Updated Branches:
  refs/heads/branch-1 329a7176f -> 484a1d804
  refs/heads/branch-2 68c5313ca -> 58365c8dd
  refs/heads/master dc7902996 -> 5cca61c4d


HBASE-21126 Configurable number of allowed failures for ZooKeeper Canary

Signed-off-by: Josh Elser <el...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/5cca61c4
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/5cca61c4
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/5cca61c4

Branch: refs/heads/master
Commit: 5cca61c4d0e7f1cf03fb7a80c7611bde4a49214b
Parents: dc79029
Author: David Manning <da...@salesforce.com>
Authored: Wed Aug 29 12:06:59 2018 -0700
Committer: Josh Elser <el...@apache.org>
Committed: Mon Sep 3 19:27:23 2018 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hbase/tool/Canary.java    | 52 +++++++++++++++-----
 .../hadoop/hbase/tool/TestCanaryTool.java       | 35 ++++++++-----
 2 files changed, 63 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/5cca61c4/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
index ae6324f..7a549fc 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
@@ -586,6 +586,7 @@ public final class Canary implements Tool {
   private boolean failOnError = true;
   private boolean regionServerMode = false;
   private boolean zookeeperMode = false;
+  private long permittedFailures = 0;
   private boolean regionServerAllRegions = false;
   private boolean writeSniffing = false;
   private long configuredWriteTableTimeout = DEFAULT_TIMEOUT;
@@ -729,6 +730,19 @@ public final class Canary implements Tool {
             }
             this.configuredReadTableTimeouts.put(nameTimeout[0], timeoutVal);
           }
+        } else if (cmd.equals("-permittedZookeeperFailures")) {
+          i++;
+
+          if (i == args.length) {
+            System.err.println("-permittedZookeeperFailures needs a numeric value argument.");
+            printUsageAndExit();
+          }
+          try {
+            this.permittedFailures = Long.parseLong(args[i]);
+          } catch (NumberFormatException e) {
+            System.err.println("-permittedZookeeperFailures needs a numeric value argument.");
+            printUsageAndExit();
+          }
         } else {
           // no options match
           System.err.println(cmd + " options is invalid.");
@@ -750,6 +764,10 @@ public final class Canary implements Tool {
         printUsageAndExit();
       }
     }
+    if (this.permittedFailures != 0 && !this.zookeeperMode) {
+      System.err.println("-permittedZookeeperFailures requires -zookeeper mode.");
+      printUsageAndExit();
+    }
     if (!this.configuredReadTableTimeouts.isEmpty() && (this.regionServerMode || this.zookeeperMode)) {
       System.err.println("-readTableTimeouts can only be configured in region mode.");
       printUsageAndExit();
@@ -847,6 +865,8 @@ public final class Canary implements Tool {
     System.err.println("      only works in regionserver mode.");
     System.err.println("   -zookeeper    Tries to grab zookeeper.znode.parent ");
     System.err.println("      on each zookeeper instance");
+    System.err.println("   -permittedZookeeperFailures <N>    Ignore first N failures when attempting to ");
+    System.err.println("      connect to individual zookeeper nodes in the ensemble");
     System.err.println("   -daemon        Continuous check at defined intervals.");
     System.err.println("   -interval <N>  Interval between checks (sec)");
     System.err.println("   -e             Use table/regionserver as regular expression");
@@ -889,17 +909,18 @@ public final class Canary implements Tool {
       monitor =
           new RegionServerMonitor(connection, monitorTargets, this.useRegExp,
               (StdOutSink) this.sink, this.executor, this.regionServerAllRegions,
-              this.treatFailureAsError);
+              this.treatFailureAsError, this.permittedFailures);
     } else if (this.sink instanceof ZookeeperStdOutSink || this.zookeeperMode) {
       monitor =
           new ZookeeperMonitor(connection, monitorTargets, this.useRegExp,
-              (StdOutSink) this.sink, this.executor, this.treatFailureAsError);
+              (StdOutSink) this.sink, this.executor, this.treatFailureAsError,
+              this.permittedFailures);
     } else {
       monitor =
           new RegionMonitor(connection, monitorTargets, this.useRegExp,
               (StdOutSink) this.sink, this.executor, this.writeSniffing,
               this.writeTableName, this.treatFailureAsError, this.configuredReadTableTimeouts,
-              this.configuredWriteTableTimeout);
+              this.configuredWriteTableTimeout, this.permittedFailures);
     }
     return monitor;
   }
@@ -916,6 +937,7 @@ public final class Canary implements Tool {
 
     protected boolean done = false;
     protected int errorCode = 0;
+    protected long allowedFailures = 0;
     protected Sink sink;
     protected ExecutorService executor;
 
@@ -932,7 +954,8 @@ public final class Canary implements Tool {
         return true;
       }
       if (treatFailureAsError &&
-          (sink.getReadFailureCount() > 0 || sink.getWriteFailureCount() > 0)) {
+          (sink.getReadFailureCount() > allowedFailures || sink.getWriteFailureCount() > allowedFailures)) {
+        LOG.error("Too many failures detected, treating failure as error, failing the Canary.");
         errorCode = FAILURE_EXIT_CODE;
         return true;
       }
@@ -945,7 +968,7 @@ public final class Canary implements Tool {
     }
 
     protected Monitor(Connection connection, String[] monitorTargets, boolean useRegExp, Sink sink,
-        ExecutorService executor, boolean treatFailureAsError) {
+        ExecutorService executor, boolean treatFailureAsError, long allowedFailures) {
       if (null == connection) throw new IllegalArgumentException("connection shall not be null");
 
       this.connection = connection;
@@ -954,6 +977,7 @@ public final class Canary implements Tool {
       this.treatFailureAsError = treatFailureAsError;
       this.sink = sink;
       this.executor = executor;
+      this.allowedFailures = allowedFailures;
     }
 
     @Override
@@ -995,8 +1019,9 @@ public final class Canary implements Tool {
 
     public RegionMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
         StdOutSink sink, ExecutorService executor, boolean writeSniffing, TableName writeTableName,
-        boolean treatFailureAsError, HashMap<String, Long> configuredReadTableTimeouts, long configuredWriteTableTimeout) {
-      super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError);
+        boolean treatFailureAsError, HashMap<String, Long> configuredReadTableTimeouts, long configuredWriteTableTimeout,
+        long allowedFailures) {
+      super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError, allowedFailures);
       Configuration conf = connection.getConfiguration();
       this.writeSniffing = writeSniffing;
       this.writeTableName = writeTableName;
@@ -1289,8 +1314,8 @@ public final class Canary implements Tool {
     private final int timeout;
 
     protected ZookeeperMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
-        StdOutSink sink, ExecutorService executor, boolean treatFailureAsError)  {
-      super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError);
+        StdOutSink sink, ExecutorService executor, boolean treatFailureAsError, long allowedFailures)  {
+      super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError, allowedFailures);
       Configuration configuration = connection.getConfiguration();
       znode =
           configuration.get(ZOOKEEPER_ZNODE_PARENT,
@@ -1303,6 +1328,11 @@ public final class Canary implements Tool {
       for (InetSocketAddress server : parser.getServerAddresses()) {
         hosts.add(server.toString());
       }
+      if (allowedFailures > (hosts.size() - 1) / 2) {
+        LOG.warn("Confirm allowable number of failed ZooKeeper nodes, as quorum will " +
+                        "already be lost. Setting of {} failures is unexpected for {} ensemble size.",
+                allowedFailures, hosts.size());
+      }
     }
 
     @Override public void run() {
@@ -1351,8 +1381,8 @@ public final class Canary implements Tool {
 
     public RegionServerMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
         StdOutSink sink, ExecutorService executor, boolean allRegions,
-        boolean treatFailureAsError) {
-      super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError);
+        boolean treatFailureAsError, long allowedFailures) {
+      super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError, allowedFailures);
       this.allRegions = allRegions;
     }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/5cca61c4/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
index e713a5a..cdbf426 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
@@ -90,20 +90,14 @@ public class TestCanaryTool {
 
   @Test
   public void testBasicZookeeperCanaryWorks() throws Exception {
-    Integer port =
-        Iterables.getOnlyElement(testingUtility.getZkCluster().getClientPortList(), null);
-    testingUtility.getConfiguration().set(HConstants.ZOOKEEPER_QUORUM,
-        "localhost:" + port + "/hbase");
-    ExecutorService executor = new ScheduledThreadPoolExecutor(2);
-    Canary.ZookeeperStdOutSink sink = spy(new Canary.ZookeeperStdOutSink());
-    Canary canary = new Canary(executor, sink);
-    String[] args = { "-t", "10000", "-zookeeper" };
-    assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args));
+    final String[] args = { "-t", "10000", "-zookeeper" };
+    testZookeeperCanaryWithArgs(args);
+  }
 
-    String baseZnode = testingUtility.getConfiguration()
-        .get(HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
-    verify(sink, atLeastOnce())
-        .publishReadTiming(eq(baseZnode), eq("localhost:" + port), anyLong());
+  @Test
+  public void testZookeeperCanaryPermittedFailuresArgumentWorks() throws Exception {
+    final String[] args = { "-t", "10000", "-zookeeper", "-treatFailureAsError", "-permittedZookeeperFailures", "1" };
+    testZookeeperCanaryWithArgs(args);
   }
 
   @Test
@@ -250,4 +244,19 @@ public class TestCanaryTool {
     assertEquals("verify no read error count", 0, canary.getReadFailures().size());
   }
 
+  private void testZookeeperCanaryWithArgs(String[] args) throws Exception {
+    Integer port =
+      Iterables.getOnlyElement(testingUtility.getZkCluster().getClientPortList(), null);
+    testingUtility.getConfiguration().set(HConstants.ZOOKEEPER_QUORUM,
+      "localhost:" + port + "/hbase");
+    ExecutorService executor = new ScheduledThreadPoolExecutor(2);
+    Canary.ZookeeperStdOutSink sink = spy(new Canary.ZookeeperStdOutSink());
+    Canary canary = new Canary(executor, sink);
+    assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args));
+
+    String baseZnode = testingUtility.getConfiguration()
+      .get(HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
+    verify(sink, atLeastOnce())
+      .publishReadTiming(eq(baseZnode), eq("localhost:" + port), anyLong());
+  }
 }


[2/3] hbase git commit: HBASE-21126 Configurable number of allowed failures for ZooKeeper Canary

Posted by el...@apache.org.
HBASE-21126 Configurable number of allowed failures for ZooKeeper Canary

Signed-off-by: Josh Elser <el...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/58365c8d
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/58365c8d
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/58365c8d

Branch: refs/heads/branch-2
Commit: 58365c8dd2fa8c913039d0d0249f6b54af252585
Parents: 68c5313
Author: David Manning <da...@salesforce.com>
Authored: Wed Aug 29 12:06:59 2018 -0700
Committer: Josh Elser <el...@apache.org>
Committed: Mon Sep 3 19:35:28 2018 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hbase/tool/Canary.java    | 52 +++++++++++++++-----
 .../hadoop/hbase/tool/TestCanaryTool.java       | 35 ++++++++-----
 2 files changed, 63 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/58365c8d/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
index ae6324f..7a549fc 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
@@ -586,6 +586,7 @@ public final class Canary implements Tool {
   private boolean failOnError = true;
   private boolean regionServerMode = false;
   private boolean zookeeperMode = false;
+  private long permittedFailures = 0;
   private boolean regionServerAllRegions = false;
   private boolean writeSniffing = false;
   private long configuredWriteTableTimeout = DEFAULT_TIMEOUT;
@@ -729,6 +730,19 @@ public final class Canary implements Tool {
             }
             this.configuredReadTableTimeouts.put(nameTimeout[0], timeoutVal);
           }
+        } else if (cmd.equals("-permittedZookeeperFailures")) {
+          i++;
+
+          if (i == args.length) {
+            System.err.println("-permittedZookeeperFailures needs a numeric value argument.");
+            printUsageAndExit();
+          }
+          try {
+            this.permittedFailures = Long.parseLong(args[i]);
+          } catch (NumberFormatException e) {
+            System.err.println("-permittedZookeeperFailures needs a numeric value argument.");
+            printUsageAndExit();
+          }
         } else {
           // no options match
           System.err.println(cmd + " options is invalid.");
@@ -750,6 +764,10 @@ public final class Canary implements Tool {
         printUsageAndExit();
       }
     }
+    if (this.permittedFailures != 0 && !this.zookeeperMode) {
+      System.err.println("-permittedZookeeperFailures requires -zookeeper mode.");
+      printUsageAndExit();
+    }
     if (!this.configuredReadTableTimeouts.isEmpty() && (this.regionServerMode || this.zookeeperMode)) {
       System.err.println("-readTableTimeouts can only be configured in region mode.");
       printUsageAndExit();
@@ -847,6 +865,8 @@ public final class Canary implements Tool {
     System.err.println("      only works in regionserver mode.");
     System.err.println("   -zookeeper    Tries to grab zookeeper.znode.parent ");
     System.err.println("      on each zookeeper instance");
+    System.err.println("   -permittedZookeeperFailures <N>    Ignore first N failures when attempting to ");
+    System.err.println("      connect to individual zookeeper nodes in the ensemble");
     System.err.println("   -daemon        Continuous check at defined intervals.");
     System.err.println("   -interval <N>  Interval between checks (sec)");
     System.err.println("   -e             Use table/regionserver as regular expression");
@@ -889,17 +909,18 @@ public final class Canary implements Tool {
       monitor =
           new RegionServerMonitor(connection, monitorTargets, this.useRegExp,
               (StdOutSink) this.sink, this.executor, this.regionServerAllRegions,
-              this.treatFailureAsError);
+              this.treatFailureAsError, this.permittedFailures);
     } else if (this.sink instanceof ZookeeperStdOutSink || this.zookeeperMode) {
       monitor =
           new ZookeeperMonitor(connection, monitorTargets, this.useRegExp,
-              (StdOutSink) this.sink, this.executor, this.treatFailureAsError);
+              (StdOutSink) this.sink, this.executor, this.treatFailureAsError,
+              this.permittedFailures);
     } else {
       monitor =
           new RegionMonitor(connection, monitorTargets, this.useRegExp,
               (StdOutSink) this.sink, this.executor, this.writeSniffing,
               this.writeTableName, this.treatFailureAsError, this.configuredReadTableTimeouts,
-              this.configuredWriteTableTimeout);
+              this.configuredWriteTableTimeout, this.permittedFailures);
     }
     return monitor;
   }
@@ -916,6 +937,7 @@ public final class Canary implements Tool {
 
     protected boolean done = false;
     protected int errorCode = 0;
+    protected long allowedFailures = 0;
     protected Sink sink;
     protected ExecutorService executor;
 
@@ -932,7 +954,8 @@ public final class Canary implements Tool {
         return true;
       }
       if (treatFailureAsError &&
-          (sink.getReadFailureCount() > 0 || sink.getWriteFailureCount() > 0)) {
+          (sink.getReadFailureCount() > allowedFailures || sink.getWriteFailureCount() > allowedFailures)) {
+        LOG.error("Too many failures detected, treating failure as error, failing the Canary.");
         errorCode = FAILURE_EXIT_CODE;
         return true;
       }
@@ -945,7 +968,7 @@ public final class Canary implements Tool {
     }
 
     protected Monitor(Connection connection, String[] monitorTargets, boolean useRegExp, Sink sink,
-        ExecutorService executor, boolean treatFailureAsError) {
+        ExecutorService executor, boolean treatFailureAsError, long allowedFailures) {
       if (null == connection) throw new IllegalArgumentException("connection shall not be null");
 
       this.connection = connection;
@@ -954,6 +977,7 @@ public final class Canary implements Tool {
       this.treatFailureAsError = treatFailureAsError;
       this.sink = sink;
       this.executor = executor;
+      this.allowedFailures = allowedFailures;
     }
 
     @Override
@@ -995,8 +1019,9 @@ public final class Canary implements Tool {
 
     public RegionMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
         StdOutSink sink, ExecutorService executor, boolean writeSniffing, TableName writeTableName,
-        boolean treatFailureAsError, HashMap<String, Long> configuredReadTableTimeouts, long configuredWriteTableTimeout) {
-      super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError);
+        boolean treatFailureAsError, HashMap<String, Long> configuredReadTableTimeouts, long configuredWriteTableTimeout,
+        long allowedFailures) {
+      super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError, allowedFailures);
       Configuration conf = connection.getConfiguration();
       this.writeSniffing = writeSniffing;
       this.writeTableName = writeTableName;
@@ -1289,8 +1314,8 @@ public final class Canary implements Tool {
     private final int timeout;
 
     protected ZookeeperMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
-        StdOutSink sink, ExecutorService executor, boolean treatFailureAsError)  {
-      super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError);
+        StdOutSink sink, ExecutorService executor, boolean treatFailureAsError, long allowedFailures)  {
+      super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError, allowedFailures);
       Configuration configuration = connection.getConfiguration();
       znode =
           configuration.get(ZOOKEEPER_ZNODE_PARENT,
@@ -1303,6 +1328,11 @@ public final class Canary implements Tool {
       for (InetSocketAddress server : parser.getServerAddresses()) {
         hosts.add(server.toString());
       }
+      if (allowedFailures > (hosts.size() - 1) / 2) {
+        LOG.warn("Confirm allowable number of failed ZooKeeper nodes, as quorum will " +
+                        "already be lost. Setting of {} failures is unexpected for {} ensemble size.",
+                allowedFailures, hosts.size());
+      }
     }
 
     @Override public void run() {
@@ -1351,8 +1381,8 @@ public final class Canary implements Tool {
 
     public RegionServerMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
         StdOutSink sink, ExecutorService executor, boolean allRegions,
-        boolean treatFailureAsError) {
-      super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError);
+        boolean treatFailureAsError, long allowedFailures) {
+      super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError, allowedFailures);
       this.allRegions = allRegions;
     }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/58365c8d/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
index e713a5a..cdbf426 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
@@ -90,20 +90,14 @@ public class TestCanaryTool {
 
   @Test
   public void testBasicZookeeperCanaryWorks() throws Exception {
-    Integer port =
-        Iterables.getOnlyElement(testingUtility.getZkCluster().getClientPortList(), null);
-    testingUtility.getConfiguration().set(HConstants.ZOOKEEPER_QUORUM,
-        "localhost:" + port + "/hbase");
-    ExecutorService executor = new ScheduledThreadPoolExecutor(2);
-    Canary.ZookeeperStdOutSink sink = spy(new Canary.ZookeeperStdOutSink());
-    Canary canary = new Canary(executor, sink);
-    String[] args = { "-t", "10000", "-zookeeper" };
-    assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args));
+    final String[] args = { "-t", "10000", "-zookeeper" };
+    testZookeeperCanaryWithArgs(args);
+  }
 
-    String baseZnode = testingUtility.getConfiguration()
-        .get(HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
-    verify(sink, atLeastOnce())
-        .publishReadTiming(eq(baseZnode), eq("localhost:" + port), anyLong());
+  @Test
+  public void testZookeeperCanaryPermittedFailuresArgumentWorks() throws Exception {
+    final String[] args = { "-t", "10000", "-zookeeper", "-treatFailureAsError", "-permittedZookeeperFailures", "1" };
+    testZookeeperCanaryWithArgs(args);
   }
 
   @Test
@@ -250,4 +244,19 @@ public class TestCanaryTool {
     assertEquals("verify no read error count", 0, canary.getReadFailures().size());
   }
 
+  private void testZookeeperCanaryWithArgs(String[] args) throws Exception {
+    Integer port =
+      Iterables.getOnlyElement(testingUtility.getZkCluster().getClientPortList(), null);
+    testingUtility.getConfiguration().set(HConstants.ZOOKEEPER_QUORUM,
+      "localhost:" + port + "/hbase");
+    ExecutorService executor = new ScheduledThreadPoolExecutor(2);
+    Canary.ZookeeperStdOutSink sink = spy(new Canary.ZookeeperStdOutSink());
+    Canary canary = new Canary(executor, sink);
+    assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args));
+
+    String baseZnode = testingUtility.getConfiguration()
+      .get(HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
+    verify(sink, atLeastOnce())
+      .publishReadTiming(eq(baseZnode), eq("localhost:" + port), anyLong());
+  }
 }


[3/3] hbase git commit: HBASE-21126 Configurable number of allowed failures for ZooKeeper Canary

Posted by el...@apache.org.
HBASE-21126 Configurable number of allowed failures for ZooKeeper Canary

Signed-off-by: Josh Elser <el...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/484a1d80
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/484a1d80
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/484a1d80

Branch: refs/heads/branch-1
Commit: 484a1d8049de0a354bc7eeeaf0fa2f8b10b72319
Parents: 329a717
Author: David Manning <da...@salesforce.com>
Authored: Fri Aug 31 18:32:15 2018 -0700
Committer: Josh Elser <el...@apache.org>
Committed: Mon Sep 3 19:44:17 2018 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hbase/tool/Canary.java    | 51 +++++++++++++++-----
 .../hadoop/hbase/tool/TestCanaryTool.java       | 35 +++++++++-----
 2 files changed, 62 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/484a1d80/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
index 1d119a2..e2d5919 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/Canary.java
@@ -580,6 +580,7 @@ public final class Canary implements Tool {
   private boolean failOnError = true;
   private boolean regionServerMode = false;
   private boolean zookeeperMode = false;
+  private long permittedFailures = 0;
   private boolean regionServerAllRegions = false;
   private boolean writeSniffing = false;
   private long configuredWriteTableTimeout = DEFAULT_TIMEOUT;
@@ -723,6 +724,19 @@ public final class Canary implements Tool {
             }
             this.configuredReadTableTimeouts.put(nameTimeout[0], timeoutVal);
           }
+        } else if (cmd.equals("-permittedZookeeperFailures")) {
+          i++;
+
+          if (i == args.length) {
+            System.err.println("-permittedZookeeperFailures needs a numeric value argument.");
+            printUsageAndExit();
+          }
+          try {
+            this.permittedFailures = Long.parseLong(args[i]);
+          } catch (NumberFormatException e) {
+            System.err.println("-permittedZookeeperFailures needs a numeric value argument.");
+            printUsageAndExit();
+          }
         } else {
           // no options match
           System.err.println(cmd + " options is invalid.");
@@ -744,6 +758,10 @@ public final class Canary implements Tool {
         printUsageAndExit();
       }
     }
+    if (this.permittedFailures != 0 && !this.zookeeperMode) {
+      System.err.println("-permittedZookeeperFailures requires -zookeeper mode.");
+      printUsageAndExit();
+    }
     if (!this.configuredReadTableTimeouts.isEmpty() && (this.regionServerMode || this.zookeeperMode)) {
       System.err.println("-readTableTimeouts can only be configured in region mode.");
       printUsageAndExit();
@@ -842,6 +860,8 @@ public final class Canary implements Tool {
     System.err.println("      only works in regionserver mode.");
     System.err.println("   -zookeeper    Tries to grab zookeeper.znode.parent ");
     System.err.println("      on each zookeeper instance");
+    System.err.println("   -permittedZookeeperFailures <N>    Ignore first N failures when attempting to ");
+    System.err.println("      connect to individual zookeeper nodes in the ensemble");
     System.err.println("   -daemon        Continuous check at defined intervals.");
     System.err.println("   -interval <N>  Interval between checks (sec)");
     System.err.println("   -e             Use table/regionserver as regular expression");
@@ -884,17 +904,18 @@ public final class Canary implements Tool {
       monitor =
           new RegionServerMonitor(connection, monitorTargets, this.useRegExp,
               (StdOutSink) this.sink, this.executor, this.regionServerAllRegions,
-              this.treatFailureAsError);
+              this.treatFailureAsError, this.permittedFailures);
     } else if (this.sink instanceof ZookeeperStdOutSink || this.zookeeperMode) {
       monitor =
           new ZookeeperMonitor(connection, monitorTargets, this.useRegExp,
-              (StdOutSink) this.sink, this.executor, this.treatFailureAsError);
+              (StdOutSink) this.sink, this.executor, this.treatFailureAsError,
+              this.permittedFailures);
     } else {
       monitor =
           new RegionMonitor(connection, monitorTargets, this.useRegExp,
               (StdOutSink) this.sink, this.executor, this.writeSniffing,
               this.writeTableName, this.treatFailureAsError, this.configuredReadTableTimeouts,
-              this.configuredWriteTableTimeout);
+              this.configuredWriteTableTimeout, this.permittedFailures);
     }
     return monitor;
   }
@@ -911,6 +932,7 @@ public final class Canary implements Tool {
 
     protected boolean done = false;
     protected int errorCode = 0;
+    protected long allowedFailures = 0;
     protected Sink sink;
     protected ExecutorService executor;
 
@@ -927,7 +949,8 @@ public final class Canary implements Tool {
         return true;
       }
       if (treatFailureAsError &&
-          (sink.getReadFailureCount() > 0 || sink.getWriteFailureCount() > 0)) {
+          (sink.getReadFailureCount() > allowedFailures || sink.getWriteFailureCount() > allowedFailures)) {
+        LOG.error("Too many failures detected, treating failure as error, failing the Canary.");
         errorCode = FAILURE_EXIT_CODE;
         return true;
       }
@@ -940,7 +963,7 @@ public final class Canary implements Tool {
     }
 
     protected Monitor(Connection connection, String[] monitorTargets, boolean useRegExp, Sink sink,
-        ExecutorService executor, boolean treatFailureAsError) {
+        ExecutorService executor, boolean treatFailureAsError, long allowedFailures) {
       if (null == connection) throw new IllegalArgumentException("connection shall not be null");
 
       this.connection = connection;
@@ -949,6 +972,7 @@ public final class Canary implements Tool {
       this.treatFailureAsError = treatFailureAsError;
       this.sink = sink;
       this.executor = executor;
+      this.allowedFailures = allowedFailures;
     }
 
     @Override
@@ -991,8 +1015,8 @@ public final class Canary implements Tool {
     public RegionMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
         StdOutSink sink, ExecutorService executor, boolean writeSniffing, TableName writeTableName,
         boolean treatFailureAsError, HashMap<String, Long> configuredReadTableTimeouts,
-        long configuredWriteTableTimeout) {
-      super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError);
+        long configuredWriteTableTimeout, long allowedFailures) {
+      super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError, allowedFailures);
       Configuration conf = connection.getConfiguration();
       this.writeSniffing = writeSniffing;
       this.writeTableName = writeTableName;
@@ -1287,8 +1311,8 @@ public final class Canary implements Tool {
     private final int timeout;
 
     protected ZookeeperMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
-        StdOutSink sink, ExecutorService executor, boolean treatFailureAsError)  {
-      super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError);
+        StdOutSink sink, ExecutorService executor, boolean treatFailureAsError, long allowedFailures)  {
+      super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError, allowedFailures);
       Configuration configuration = connection.getConfiguration();
       znode =
           configuration.get(ZOOKEEPER_ZNODE_PARENT,
@@ -1301,6 +1325,11 @@ public final class Canary implements Tool {
       for (InetSocketAddress server : parser.getServerAddresses()) {
         hosts.add(server.toString());
       }
+      if (allowedFailures > (hosts.size() - 1) / 2) {
+        LOG.warn(String.format("Confirm allowable number of failed ZooKeeper nodes, as quorum will " +
+                        "already be lost. Setting of %d failures is unexpected for %d ensemble size.",
+                allowedFailures, hosts.size()));
+      }
     }
 
     @Override public void run() {
@@ -1349,8 +1378,8 @@ public final class Canary implements Tool {
 
     public RegionServerMonitor(Connection connection, String[] monitorTargets, boolean useRegExp,
         StdOutSink sink, ExecutorService executor, boolean allRegions,
-        boolean treatFailureAsError) {
-      super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError);
+        boolean treatFailureAsError, long allowedFailures) {
+      super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError, allowedFailures);
       this.allRegions = allRegions;
     }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/484a1d80/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
index 2206a16..976f02a 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryTool.java
@@ -82,20 +82,14 @@ public class TestCanaryTool {
 
   @Test
   public void testBasicZookeeperCanaryWorks() throws Exception {
-    Integer port =
-        Iterables.getOnlyElement(testingUtility.getZkCluster().getClientPortList(), null);
-    testingUtility.getConfiguration().set(HConstants.ZOOKEEPER_QUORUM,
-        "localhost:" + port + "/hbase");
-    ExecutorService executor = new ScheduledThreadPoolExecutor(2);
-    Canary.ZookeeperStdOutSink sink = spy(new Canary.ZookeeperStdOutSink());
-    Canary canary = new Canary(executor, sink);
-    String[] args = { "-t", "10000", "-zookeeper" };
-    assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args));
+    final String[] args = { "-t", "10000", "-zookeeper" };
+    testZookeeperCanaryWithArgs(args);
+  }
 
-    String baseZnode = testingUtility.getConfiguration()
-        .get(HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
-    verify(sink, atLeastOnce())
-        .publishReadTiming(eq(baseZnode), eq("localhost:" + port), anyLong());
+  @Test
+  public void testZookeeperCanaryPermittedFailuresArgumentWorks() throws Exception {
+    final String[] args = { "-t", "10000", "-zookeeper", "-treatFailureAsError", "-permittedZookeeperFailures", "1" };
+    testZookeeperCanaryWithArgs(args);
   }
 
   @Test
@@ -238,4 +232,19 @@ public class TestCanaryTool {
     assertEquals("verify no read error count", 0, canary.getReadFailures().size());
   }
 
+  private void testZookeeperCanaryWithArgs(String[] args) throws Exception {
+    Integer port =
+      Iterables.getOnlyElement(testingUtility.getZkCluster().getClientPortList(), null);
+    testingUtility.getConfiguration().set(HConstants.ZOOKEEPER_QUORUM,
+      "localhost:" + port + "/hbase");
+    ExecutorService executor = new ScheduledThreadPoolExecutor(2);
+    Canary.ZookeeperStdOutSink sink = spy(new Canary.ZookeeperStdOutSink());
+    Canary canary = new Canary(executor, sink);
+    assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args));
+
+    String baseZnode = testingUtility.getConfiguration()
+      .get(HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
+    verify(sink, atLeastOnce())
+      .publishReadTiming(eq(baseZnode), eq("localhost:" + port), anyLong());
+  }
 }
\ No newline at end of file