You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by la...@apache.org on 2015/05/13 06:29:52 UTC

[1/4] hbase git commit: HBASE-13618 ReplicationSource is too eager to remove sinks.

Repository: hbase
Updated Branches:
  refs/heads/branch-1 e42e7ed11 -> 34327408c
  refs/heads/branch-1.0 1b319ec2e -> 2ea7394c9
  refs/heads/branch-1.1 c349ff19e -> 45c91e214
  refs/heads/master befb46c4d -> 220ac141b


HBASE-13618 ReplicationSource is too eager to remove sinks.


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/220ac141
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/220ac141
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/220ac141

Branch: refs/heads/master
Commit: 220ac141bfcea7798faa5f73295ec61d8b173af9
Parents: befb46c
Author: Lars Hofhansl <la...@apache.org>
Authored: Tue May 12 21:27:55 2015 -0700
Committer: Lars Hofhansl <la...@apache.org>
Committed: Tue May 12 21:27:55 2015 -0700

----------------------------------------------------------------------
 .../HBaseInterClusterReplicationEndpoint.java   |  1 +
 .../regionserver/ReplicationSinkManager.java    | 10 +++++++
 .../TestReplicationSinkManager.java             | 30 ++++++++++++++++++--
 3 files changed, 39 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/220ac141/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
index 1a53c24..884bce1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
@@ -163,6 +163,7 @@ public class HBaseInterClusterReplicationEndpoint extends HBaseReplicationEndpoi
 
         // update metrics
         this.metrics.setAgeOfLastShippedOp(entries.get(entries.size()-1).getKey().getWriteTime());
+        replicationSinkMgr.reportSinkSuccess(sinkPeer);
         return true;
 
       } catch (IOException ioe) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/220ac141/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSinkManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSinkManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSinkManager.java
index b186e08..76fa6c2 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSinkManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSinkManager.java
@@ -140,6 +140,16 @@ public class ReplicationSinkManager {
     }
   }
 
+  /**
+   * Report that a {@code SinkPeer} successfully replicated a chunk of data.
+   *
+   * @param sinkPeer
+   *          The SinkPeer that had a failed replication attempt on it
+   */
+  public void reportSinkSuccess(SinkPeer sinkPeer) {
+    badReportCounts.remove(sinkPeer.getServerName());
+  }
+
   void chooseSinks() {
     List<ServerName> slaveAddresses = endpoint.getRegionServers();
     Collections.shuffle(slaveAddresses, random);

http://git-wip-us.apache.org/repos/asf/hbase/blob/220ac141/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSinkManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSinkManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSinkManager.java
index a2ea258..57c3196 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSinkManager.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSinkManager.java
@@ -111,7 +111,7 @@ public class TestReplicationSinkManager {
   @Test
   public void testReportBadSink_PastThreshold() {
     List<ServerName> serverNames = Lists.newArrayList();
-    for (int i = 0; i < 20; i++) {
+    for (int i = 0; i < 30; i++) {
       serverNames.add(mock(ServerName.class));
     }
     when(replicationEndpoint.getRegionServers())
@@ -120,18 +120,44 @@ public class TestReplicationSinkManager {
 
     sinkManager.chooseSinks();
     // Sanity check
-    assertEquals(2, sinkManager.getSinks().size());
+    assertEquals(3, sinkManager.getSinks().size());
 
     ServerName serverName = sinkManager.getSinks().get(0);
 
     SinkPeer sinkPeer = new SinkPeer(serverName, mock(AdminService.BlockingInterface.class));
 
+    sinkManager.reportSinkSuccess(sinkPeer); // has no effect, counter does not go negative
     for (int i = 0; i <= ReplicationSinkManager.DEFAULT_BAD_SINK_THRESHOLD; i++) {
       sinkManager.reportBadSink(sinkPeer);
     }
 
     // Reporting a bad sink more than the threshold count should remove it
     // from the list of potential sinks
+    assertEquals(2, sinkManager.getSinks().size());
+
+    //
+    // now try a sink that has some successes
+    //
+    serverName = sinkManager.getSinks().get(0);
+
+    sinkPeer = new SinkPeer(serverName, mock(AdminService.BlockingInterface.class));
+    for (int i = 0; i <= ReplicationSinkManager.DEFAULT_BAD_SINK_THRESHOLD-1; i++) {
+      sinkManager.reportBadSink(sinkPeer);
+    }
+    sinkManager.reportSinkSuccess(sinkPeer); // one success
+    sinkManager.reportBadSink(sinkPeer);
+
+    // did not remove the sink, since we had one successful try
+    assertEquals(2, sinkManager.getSinks().size());
+
+    for (int i = 0; i <= ReplicationSinkManager.DEFAULT_BAD_SINK_THRESHOLD-2; i++) {
+      sinkManager.reportBadSink(sinkPeer);
+    }
+    // still not remove, since the success reset the counter
+    assertEquals(2, sinkManager.getSinks().size());
+
+    sinkManager.reportBadSink(sinkPeer);
+    // but we exhausted the tries
     assertEquals(1, sinkManager.getSinks().size());
   }
 


[2/4] hbase git commit: HBASE-13618 ReplicationSource is too eager to remove sinks.

Posted by la...@apache.org.
HBASE-13618 ReplicationSource is too eager to remove sinks.


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/34327408
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/34327408
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/34327408

Branch: refs/heads/branch-1
Commit: 34327408c1fa5080a2f3c2ae45784cf0af911c4c
Parents: e42e7ed
Author: Lars Hofhansl <la...@apache.org>
Authored: Tue May 12 21:27:55 2015 -0700
Committer: Lars Hofhansl <la...@apache.org>
Committed: Tue May 12 21:28:19 2015 -0700

----------------------------------------------------------------------
 .../HBaseInterClusterReplicationEndpoint.java   |  1 +
 .../regionserver/ReplicationSinkManager.java    | 10 +++++++
 .../TestReplicationSinkManager.java             | 30 ++++++++++++++++++--
 3 files changed, 39 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/34327408/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
index 95c253d..735ad55 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
@@ -163,6 +163,7 @@ public class HBaseInterClusterReplicationEndpoint extends HBaseReplicationEndpoi
 
         // update metrics
         this.metrics.setAgeOfLastShippedOp(entries.get(entries.size()-1).getKey().getWriteTime());
+        replicationSinkMgr.reportSinkSuccess(sinkPeer);
         return true;
 
       } catch (IOException ioe) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/34327408/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSinkManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSinkManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSinkManager.java
index b186e08..76fa6c2 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSinkManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSinkManager.java
@@ -140,6 +140,16 @@ public class ReplicationSinkManager {
     }
   }
 
+  /**
+   * Report that a {@code SinkPeer} successfully replicated a chunk of data.
+   *
+   * @param sinkPeer
+   *          The SinkPeer that had a failed replication attempt on it
+   */
+  public void reportSinkSuccess(SinkPeer sinkPeer) {
+    badReportCounts.remove(sinkPeer.getServerName());
+  }
+
   void chooseSinks() {
     List<ServerName> slaveAddresses = endpoint.getRegionServers();
     Collections.shuffle(slaveAddresses, random);

http://git-wip-us.apache.org/repos/asf/hbase/blob/34327408/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSinkManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSinkManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSinkManager.java
index d725d21..4eb7f51 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSinkManager.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSinkManager.java
@@ -110,7 +110,7 @@ public class TestReplicationSinkManager {
   @Test
   public void testReportBadSink_PastThreshold() {
     List<ServerName> serverNames = Lists.newArrayList();
-    for (int i = 0; i < 20; i++) {
+    for (int i = 0; i < 30; i++) {
       serverNames.add(mock(ServerName.class));
     }
     when(replicationEndpoint.getRegionServers())
@@ -119,18 +119,44 @@ public class TestReplicationSinkManager {
 
     sinkManager.chooseSinks();
     // Sanity check
-    assertEquals(2, sinkManager.getSinks().size());
+    assertEquals(3, sinkManager.getSinks().size());
 
     ServerName serverName = sinkManager.getSinks().get(0);
 
     SinkPeer sinkPeer = new SinkPeer(serverName, mock(AdminService.BlockingInterface.class));
 
+    sinkManager.reportSinkSuccess(sinkPeer); // has no effect, counter does not go negative
     for (int i = 0; i <= ReplicationSinkManager.DEFAULT_BAD_SINK_THRESHOLD; i++) {
       sinkManager.reportBadSink(sinkPeer);
     }
 
     // Reporting a bad sink more than the threshold count should remove it
     // from the list of potential sinks
+    assertEquals(2, sinkManager.getSinks().size());
+
+    //
+    // now try a sink that has some successes
+    //
+    serverName = sinkManager.getSinks().get(0);
+
+    sinkPeer = new SinkPeer(serverName, mock(AdminService.BlockingInterface.class));
+    for (int i = 0; i <= ReplicationSinkManager.DEFAULT_BAD_SINK_THRESHOLD-1; i++) {
+      sinkManager.reportBadSink(sinkPeer);
+    }
+    sinkManager.reportSinkSuccess(sinkPeer); // one success
+    sinkManager.reportBadSink(sinkPeer);
+
+    // did not remove the sink, since we had one successful try
+    assertEquals(2, sinkManager.getSinks().size());
+
+    for (int i = 0; i <= ReplicationSinkManager.DEFAULT_BAD_SINK_THRESHOLD-2; i++) {
+      sinkManager.reportBadSink(sinkPeer);
+    }
+    // still not remove, since the success reset the counter
+    assertEquals(2, sinkManager.getSinks().size());
+
+    sinkManager.reportBadSink(sinkPeer);
+    // but we exhausted the tries
     assertEquals(1, sinkManager.getSinks().size());
   }
 


[4/4] hbase git commit: HBASE-13618 ReplicationSource is too eager to remove sinks.

Posted by la...@apache.org.
HBASE-13618 ReplicationSource is too eager to remove sinks.


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/2ea7394c
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/2ea7394c
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/2ea7394c

Branch: refs/heads/branch-1.0
Commit: 2ea7394c91db5e48b1164119f8d94954f545f73b
Parents: 1b319ec
Author: Lars Hofhansl <la...@apache.org>
Authored: Tue May 12 21:27:55 2015 -0700
Committer: Lars Hofhansl <la...@apache.org>
Committed: Tue May 12 21:30:10 2015 -0700

----------------------------------------------------------------------
 .../HBaseInterClusterReplicationEndpoint.java   |  1 +
 .../regionserver/ReplicationSinkManager.java    | 10 +++++++
 .../TestReplicationSinkManager.java             | 30 ++++++++++++++++++--
 3 files changed, 39 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/2ea7394c/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
index 95c253d..735ad55 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
@@ -163,6 +163,7 @@ public class HBaseInterClusterReplicationEndpoint extends HBaseReplicationEndpoi
 
         // update metrics
         this.metrics.setAgeOfLastShippedOp(entries.get(entries.size()-1).getKey().getWriteTime());
+        replicationSinkMgr.reportSinkSuccess(sinkPeer);
         return true;
 
       } catch (IOException ioe) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/2ea7394c/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSinkManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSinkManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSinkManager.java
index b186e08..76fa6c2 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSinkManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSinkManager.java
@@ -140,6 +140,16 @@ public class ReplicationSinkManager {
     }
   }
 
+  /**
+   * Report that a {@code SinkPeer} successfully replicated a chunk of data.
+   *
+   * @param sinkPeer
+   *          The SinkPeer that had a failed replication attempt on it
+   */
+  public void reportSinkSuccess(SinkPeer sinkPeer) {
+    badReportCounts.remove(sinkPeer.getServerName());
+  }
+
   void chooseSinks() {
     List<ServerName> slaveAddresses = endpoint.getRegionServers();
     Collections.shuffle(slaveAddresses, random);

http://git-wip-us.apache.org/repos/asf/hbase/blob/2ea7394c/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSinkManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSinkManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSinkManager.java
index d725d21..4eb7f51 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSinkManager.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSinkManager.java
@@ -110,7 +110,7 @@ public class TestReplicationSinkManager {
   @Test
   public void testReportBadSink_PastThreshold() {
     List<ServerName> serverNames = Lists.newArrayList();
-    for (int i = 0; i < 20; i++) {
+    for (int i = 0; i < 30; i++) {
       serverNames.add(mock(ServerName.class));
     }
     when(replicationEndpoint.getRegionServers())
@@ -119,18 +119,44 @@ public class TestReplicationSinkManager {
 
     sinkManager.chooseSinks();
     // Sanity check
-    assertEquals(2, sinkManager.getSinks().size());
+    assertEquals(3, sinkManager.getSinks().size());
 
     ServerName serverName = sinkManager.getSinks().get(0);
 
     SinkPeer sinkPeer = new SinkPeer(serverName, mock(AdminService.BlockingInterface.class));
 
+    sinkManager.reportSinkSuccess(sinkPeer); // has no effect, counter does not go negative
     for (int i = 0; i <= ReplicationSinkManager.DEFAULT_BAD_SINK_THRESHOLD; i++) {
       sinkManager.reportBadSink(sinkPeer);
     }
 
     // Reporting a bad sink more than the threshold count should remove it
     // from the list of potential sinks
+    assertEquals(2, sinkManager.getSinks().size());
+
+    //
+    // now try a sink that has some successes
+    //
+    serverName = sinkManager.getSinks().get(0);
+
+    sinkPeer = new SinkPeer(serverName, mock(AdminService.BlockingInterface.class));
+    for (int i = 0; i <= ReplicationSinkManager.DEFAULT_BAD_SINK_THRESHOLD-1; i++) {
+      sinkManager.reportBadSink(sinkPeer);
+    }
+    sinkManager.reportSinkSuccess(sinkPeer); // one success
+    sinkManager.reportBadSink(sinkPeer);
+
+    // did not remove the sink, since we had one successful try
+    assertEquals(2, sinkManager.getSinks().size());
+
+    for (int i = 0; i <= ReplicationSinkManager.DEFAULT_BAD_SINK_THRESHOLD-2; i++) {
+      sinkManager.reportBadSink(sinkPeer);
+    }
+    // still not remove, since the success reset the counter
+    assertEquals(2, sinkManager.getSinks().size());
+
+    sinkManager.reportBadSink(sinkPeer);
+    // but we exhausted the tries
     assertEquals(1, sinkManager.getSinks().size());
   }
 


[3/4] hbase git commit: HBASE-13618 ReplicationSource is too eager to remove sinks.

Posted by la...@apache.org.
HBASE-13618 ReplicationSource is too eager to remove sinks.


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/45c91e21
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/45c91e21
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/45c91e21

Branch: refs/heads/branch-1.1
Commit: 45c91e2149e7ae4e2f594ddd91c1cfd6acc3037b
Parents: c349ff1
Author: Lars Hofhansl <la...@apache.org>
Authored: Tue May 12 21:27:55 2015 -0700
Committer: Lars Hofhansl <la...@apache.org>
Committed: Tue May 12 21:29:27 2015 -0700

----------------------------------------------------------------------
 .../HBaseInterClusterReplicationEndpoint.java   |  1 +
 .../regionserver/ReplicationSinkManager.java    | 10 +++++++
 .../TestReplicationSinkManager.java             | 30 ++++++++++++++++++--
 3 files changed, 39 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/45c91e21/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
index 95c253d..735ad55 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
@@ -163,6 +163,7 @@ public class HBaseInterClusterReplicationEndpoint extends HBaseReplicationEndpoi
 
         // update metrics
         this.metrics.setAgeOfLastShippedOp(entries.get(entries.size()-1).getKey().getWriteTime());
+        replicationSinkMgr.reportSinkSuccess(sinkPeer);
         return true;
 
       } catch (IOException ioe) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/45c91e21/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSinkManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSinkManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSinkManager.java
index b186e08..76fa6c2 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSinkManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSinkManager.java
@@ -140,6 +140,16 @@ public class ReplicationSinkManager {
     }
   }
 
+  /**
+   * Report that a {@code SinkPeer} successfully replicated a chunk of data.
+   *
+   * @param sinkPeer
+   *          The SinkPeer that had a failed replication attempt on it
+   */
+  public void reportSinkSuccess(SinkPeer sinkPeer) {
+    badReportCounts.remove(sinkPeer.getServerName());
+  }
+
   void chooseSinks() {
     List<ServerName> slaveAddresses = endpoint.getRegionServers();
     Collections.shuffle(slaveAddresses, random);

http://git-wip-us.apache.org/repos/asf/hbase/blob/45c91e21/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSinkManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSinkManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSinkManager.java
index d725d21..4eb7f51 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSinkManager.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSinkManager.java
@@ -110,7 +110,7 @@ public class TestReplicationSinkManager {
   @Test
   public void testReportBadSink_PastThreshold() {
     List<ServerName> serverNames = Lists.newArrayList();
-    for (int i = 0; i < 20; i++) {
+    for (int i = 0; i < 30; i++) {
       serverNames.add(mock(ServerName.class));
     }
     when(replicationEndpoint.getRegionServers())
@@ -119,18 +119,44 @@ public class TestReplicationSinkManager {
 
     sinkManager.chooseSinks();
     // Sanity check
-    assertEquals(2, sinkManager.getSinks().size());
+    assertEquals(3, sinkManager.getSinks().size());
 
     ServerName serverName = sinkManager.getSinks().get(0);
 
     SinkPeer sinkPeer = new SinkPeer(serverName, mock(AdminService.BlockingInterface.class));
 
+    sinkManager.reportSinkSuccess(sinkPeer); // has no effect, counter does not go negative
     for (int i = 0; i <= ReplicationSinkManager.DEFAULT_BAD_SINK_THRESHOLD; i++) {
       sinkManager.reportBadSink(sinkPeer);
     }
 
     // Reporting a bad sink more than the threshold count should remove it
     // from the list of potential sinks
+    assertEquals(2, sinkManager.getSinks().size());
+
+    //
+    // now try a sink that has some successes
+    //
+    serverName = sinkManager.getSinks().get(0);
+
+    sinkPeer = new SinkPeer(serverName, mock(AdminService.BlockingInterface.class));
+    for (int i = 0; i <= ReplicationSinkManager.DEFAULT_BAD_SINK_THRESHOLD-1; i++) {
+      sinkManager.reportBadSink(sinkPeer);
+    }
+    sinkManager.reportSinkSuccess(sinkPeer); // one success
+    sinkManager.reportBadSink(sinkPeer);
+
+    // did not remove the sink, since we had one successful try
+    assertEquals(2, sinkManager.getSinks().size());
+
+    for (int i = 0; i <= ReplicationSinkManager.DEFAULT_BAD_SINK_THRESHOLD-2; i++) {
+      sinkManager.reportBadSink(sinkPeer);
+    }
+    // still not remove, since the success reset the counter
+    assertEquals(2, sinkManager.getSinks().size());
+
+    sinkManager.reportBadSink(sinkPeer);
+    // but we exhausted the tries
     assertEquals(1, sinkManager.getSinks().size());
   }