You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2013/05/15 17:09:37 UTC

svn commit: r1482890 - in /hbase/trunk/hbase-client/src: main/java/org/apache/hadoop/hbase/client/ main/java/org/apache/hadoop/hbase/exceptions/ test/java/org/apache/hadoop/hbase/client/

Author: stack
Date: Wed May 15 15:09:37 2013
New Revision: 1482890

URL: http://svn.apache.org/r1482890
Log:
HBASE-8531 TestZooKeeper fails in trunk/0.95 builds

Modified:
    hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientScanner.java
    hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ScannerCallable.java
    hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/exceptions/RegionServerStoppedException.java
    hbase/trunk/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestClientNoCluster.java

Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientScanner.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientScanner.java?rev=1482890&r1=1482889&r2=1482890&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientScanner.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientScanner.java Wed May 15 15:09:37 2013
@@ -285,11 +285,13 @@ public class ClientScanner extends Abstr
             values = callable.withRetries();
             retryAfterOutOfOrderException  = true;
           } catch (DoNotRetryIOException e) {
+            // DNRIOEs are thrown to make us break out of retries.  Some types of DNRIOEs want us
+            // to reset the scanner and come back in again.
             if (e instanceof UnknownScannerException) {
               long timeout = lastNext + scannerTimeout;
-              // If we are over the timeout, throw this exception to the client
-              // Else, it's because the region moved and we used the old id
-              // against the new region server; reset the scanner.
+              // If we are over the timeout, throw this exception to the client wrapped in
+              // a ScannerTimeoutException. Else, it's because the region moved and we used the old
+              // id against the new region server; reset the scanner.
               if (timeout < System.currentTimeMillis()) {
                 long elapsed = System.currentTimeMillis() - lastNext;
                 ScannerTimeoutException ex = new ScannerTimeoutException(
@@ -299,16 +301,20 @@ public class ClientScanner extends Abstr
                 throw ex;
               }
             } else {
+              // If exception is any but the list below throw it back to the client; else setup
+              // the scanner and retry.
               Throwable cause = e.getCause();
-              if ((cause == null || (!(cause instanceof NotServingRegionException)
-                  && !(cause instanceof RegionServerStoppedException))) &&
-                  !(e instanceof RegionServerStoppedException) &&
-                  !(e instanceof OutOfOrderScannerNextException)) {
+              if ((cause != null && cause instanceof NotServingRegionException) ||
+                (cause != null && cause instanceof RegionServerStoppedException) ||
+                e instanceof OutOfOrderScannerNextException) {
+                // Pass
+                // It is easier writing the if loop test as list of what is allowed rather than
+                // as a list of what is not allowed... so if in here, it means we do not throw.
+              } else {
                 throw e;
               }
             }
-            // Else, its signal from depths of ScannerCallable that we got an
-            // NSRE on a next and that we need to reset the scanner.
+            // Else, its signal from depths of ScannerCallable that we need to reset the scanner.
             if (this.lastResult != null) {
               this.scan.setStartRow(this.lastResult.getRow());
               // Skip first row returned.  We already let it out on previous
@@ -319,13 +325,17 @@ public class ClientScanner extends Abstr
               if (retryAfterOutOfOrderException) {
                 retryAfterOutOfOrderException = false;
               } else {
+                // TODO: Why wrap this in a DNRIOE when it already is a DNRIOE?
                 throw new DoNotRetryIOException("Failed after retry of " +
                   "OutOfOrderScannerNextException: was there a rpc timeout?", e);
               }
             }
-            // Clear region
+            // Clear region.
             this.currentRegion = null;
+            // Set this to zero so we don't try and do an rpc and close on remote server when
+            // the exception we got was UnknownScanner or the Server is going down.
             callable = null;
+            // This continue will take us to while at end of loop where we will set up new scanner.
             continue;
           }
           long currentTime = System.currentTimeMillis();

Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ScannerCallable.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ScannerCallable.java?rev=1482890&r1=1482889&r2=1482890&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ScannerCallable.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ScannerCallable.java Wed May 15 15:09:37 2013
@@ -140,8 +140,7 @@ public class ScannerCallable extends Ser
         ScanRequest request = null;
         try {
           incRPCcallsMetrics();
-          request =
-            RequestConverter.buildScanRequest(scannerId, caching, false, nextCallSeq);
+          request = RequestConverter.buildScanRequest(scannerId, caching, false, nextCallSeq);
           ScanResponse response = null;
           try {
             response = stub.scan(null, request);
@@ -194,14 +193,24 @@ public class ScannerCallable extends Ser
               LOG.info("Failed to relocate region", t);
             }
           }
+          // The below convertion of exceptions into DoNotRetryExceptions is a little strange.
+          // Why not just have these exceptions implment DNRIOE you ask?  Well, usually we want
+          // ServerCallable#withRetries to just retry when it gets these exceptions.  In here in
+          // a scan when doing a next in particular, we want to break out and get the scanner to
+          // reset itself up again.  Throwing a DNRIOE is how we signal this to happen (its ugly,
+          // yeah and hard to follow and in need of a refactor).
           if (ioe instanceof NotServingRegionException) {
             // Throw a DNRE so that we break out of cycle of calling NSRE
             // when what we need is to open scanner against new location.
-            // Attach NSRE to signal client that it needs to resetup scanner.
+            // Attach NSRE to signal client that it needs to re-setup scanner.
             if (this.scanMetrics != null) {
               this.scanMetrics.countOfNSRE.incrementAndGet();
             }
             throw new DoNotRetryIOException("Resetting the scanner -- see exception cause", ioe);
+          } else if (ioe instanceof RegionServerStoppedException) {
+            // Throw a DNRE so that we break out of cycle of the retries and instead go and
+            // open scanner against new location.
+            throw new DoNotRetryIOException("Resetting the scanner -- see exception cause", ioe);
           } else {
             // The outer layers will retry
             throw ioe;

Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/exceptions/RegionServerStoppedException.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/exceptions/RegionServerStoppedException.java?rev=1482890&r1=1482889&r2=1482890&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/exceptions/RegionServerStoppedException.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/exceptions/RegionServerStoppedException.java Wed May 15 15:09:37 2013
@@ -18,6 +18,8 @@
  */
 package org.apache.hadoop.hbase.exceptions;
 
+import java.io.IOException;
+
 import org.apache.hadoop.classification.InterfaceAudience;
 
 /**
@@ -25,8 +27,7 @@ import org.apache.hadoop.classification.
  */
 @SuppressWarnings("serial")
 @InterfaceAudience.Private
-public class RegionServerStoppedException extends DoNotRetryIOException {
-
+public class RegionServerStoppedException extends IOException {
   public RegionServerStoppedException(String s) {
     super(s);
   }

Modified: hbase/trunk/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestClientNoCluster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestClientNoCluster.java?rev=1482890&r1=1482889&r2=1482890&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestClientNoCluster.java (original)
+++ hbase/trunk/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestClientNoCluster.java Wed May 15 15:09:37 2013
@@ -52,7 +52,6 @@ public class TestClientNoCluster {
     // Run my HConnection overrides.  Use my little HConnectionImplementation below which
     // allows me insert mocks and also use my Registry below rather than the default zk based
     // one so tests run faster and don't have zk dependency.
-    this.conf.set("hbase.client.connection.impl", NoClusterConnection.class.getName());
     this.conf.set("hbase.client.registry.impl", SimpleRegistry.class.getName());
   }
 
@@ -90,11 +89,15 @@ public class TestClientNoCluster {
 
   @Test
   public void testDoNotRetryMetaScanner() throws IOException {
+    this.conf.set("hbase.client.connection.impl",
+      RegionServerStoppedOnScannerOpenConnection.class.getName());
     MetaScanner.metaScan(this.conf, null);
   }
 
   @Test
-  public void testDoNotRetryOnScan() throws IOException {
+  public void testDoNotRetryOnScanNext() throws IOException {
+    this.conf.set("hbase.client.connection.impl",
+      RegionServerStoppedOnScannerOpenConnection.class.getName());
     // Go against meta else we will try to find first region for the table on construction which
     // means we'll have to do a bunch more mocking.  Tests that go against meta only should be
     // good for a bit of testing.
@@ -111,13 +114,67 @@ public class TestClientNoCluster {
     }
   }
 
+  @Test
+  public void testRegionServerStoppedOnScannerOpen() throws IOException {
+    this.conf.set("hbase.client.connection.impl",
+      RegionServerStoppedOnScannerOpenConnection.class.getName());
+    // Go against meta else we will try to find first region for the table on construction which
+    // means we'll have to do a bunch more mocking.  Tests that go against meta only should be
+    // good for a bit of testing.
+    HTable table = new HTable(this.conf, HConstants.META_TABLE_NAME);
+    ResultScanner scanner = table.getScanner(HConstants.CATALOG_FAMILY);
+    try {
+      Result result = null;
+      while ((result = scanner.next()) != null) {
+        LOG.info(result);
+      }
+    } finally {
+      scanner.close();
+      table.close();
+    }
+  }
+
+  /**
+   * Override to shutdown going to zookeeper for cluster id and meta location.
+   */
+  static class ScanOpenNextThenExceptionThenRecoverConnection
+  extends HConnectionManager.HConnectionImplementation {
+    final ClientService.BlockingInterface stub;
+
+    ScanOpenNextThenExceptionThenRecoverConnection(Configuration conf,
+        boolean managed) throws IOException {
+      super(conf, managed);
+      // Mock up my stub so open scanner returns a scanner id and then on next, we throw
+      // exceptions for three times and then after that, we return no more to scan.
+      this.stub = Mockito.mock(ClientService.BlockingInterface.class);
+      long sid = 12345L;
+      try {
+        Mockito.when(stub.scan((RpcController)Mockito.any(),
+            (ClientProtos.ScanRequest)Mockito.any())).
+          thenReturn(ClientProtos.ScanResponse.newBuilder().setScannerId(sid).build()).
+          thenThrow(new ServiceException(new RegionServerStoppedException("From Mockito"))).
+          thenReturn(ClientProtos.ScanResponse.newBuilder().setScannerId(sid).
+            setMoreResults(false).build());
+      } catch (ServiceException e) {
+        throw new IOException(e);
+      }
+    }
+
+    @Override
+    public BlockingInterface getClient(ServerName sn) throws IOException {
+      return this.stub;
+    }
+  }
+
   /**
    * Override to shutdown going to zookeeper for cluster id and meta location.
    */
-  static class NoClusterConnection extends HConnectionManager.HConnectionImplementation {
+  static class RegionServerStoppedOnScannerOpenConnection
+  extends HConnectionManager.HConnectionImplementation {
     final ClientService.BlockingInterface stub;
 
-    NoClusterConnection(Configuration conf, boolean managed) throws IOException {
+    RegionServerStoppedOnScannerOpenConnection(Configuration conf, boolean managed)
+    throws IOException {
       super(conf, managed);
       // Mock up my stub so open scanner returns a scanner id and then on next, we throw
       // exceptions for three times and then after that, we return no more to scan.