You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by te...@apache.org on 2012/01/13 22:17:31 UTC

svn commit: r1231302 - in /hbase/trunk: ./ src/main/java/org/apache/hadoop/hbase/master/ src/main/java/org/apache/hadoop/hbase/master/handler/ src/main/java/org/apache/hadoop/hbase/regionserver/

Author: tedyu
Date: Fri Jan 13 21:17:30 2012
New Revision: 1231302

URL: http://svn.apache.org/viewvc?rev=1231302&view=rev
Log:
HBASE-5196  Failure in region split after PONR could cause region hole (Jimmy Xiang)

Modified:
    hbase/trunk/CHANGES.txt
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java

Modified: hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=1231302&r1=1231301&r2=1231302&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Fri Jan 13 21:17:30 2012
@@ -478,6 +478,7 @@ Release 0.92.0 - Unreleased
    HBASE-5137  MasterFileSystem.splitLog() should abort even if waitOnSafeMode() throws IOException(Ted)
    HBASE-5121  MajorCompaction may affect scan's correctness (chunhui shen and Lars H)
    HBASE-5143  Fix config typo in pluggable load balancer factory (Harsh J)
+   HBASE-5196  Failure in region split after PONR could cause region hole (Jimmy Xiang)
 
   TESTS
    HBASE-4450  test for number of blocks read: to serve as baseline for expected

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1231302&r1=1231301&r2=1231302&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Fri Jan 13 21:17:30 2012
@@ -25,6 +25,7 @@ import java.lang.reflect.InvocationTarge
 import java.net.InetAddress;
 import java.net.InetSocketAddress;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -68,11 +69,13 @@ import org.apache.hadoop.hbase.ipc.HMast
 import org.apache.hadoop.hbase.ipc.HMasterRegionInterface;
 import org.apache.hadoop.hbase.ipc.ProtocolSignature;
 import org.apache.hadoop.hbase.ipc.RpcServer;
+import org.apache.hadoop.hbase.master.CatalogJanitor.SplitParentFirstComparator;
 import org.apache.hadoop.hbase.master.handler.CreateTableHandler;
 import org.apache.hadoop.hbase.master.handler.DeleteTableHandler;
 import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
 import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
 import org.apache.hadoop.hbase.master.handler.ModifyTableHandler;
+import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
 import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
 import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
 import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
@@ -527,7 +530,11 @@ implements HMasterInterface, HMasterRegi
 
     this.balancer.setClusterStatus(getClusterStatus());
     this.balancer.setMasterServices(this);
-    
+
+    // Fixing up missing daughters if any
+    status.setStatus("Fixing up missing daughters");
+    fixupDaughters(status);
+
     // Start balancer and meta catalog janitor after meta and regions have
     // been assigned.
     status.setStatus("Starting balancer and catalog janitor");
@@ -622,6 +629,39 @@ implements HMasterInterface, HMasterRegi
     return assigned;
   }
 
+  void fixupDaughters(final MonitoredTask status) throws IOException {
+    final Map<HRegionInfo, Result> offlineSplitParents =
+      new HashMap<HRegionInfo, Result>();
+    // This visitor collects offline split parents in the .META. table
+    MetaReader.Visitor visitor = new MetaReader.Visitor() {
+      @Override
+      public boolean visit(Result r) throws IOException {
+        if (r == null || r.isEmpty()) return true;
+        HRegionInfo info =
+          MetaReader.parseHRegionInfoFromCatalogResult(
+            r, HConstants.REGIONINFO_QUALIFIER);
+        if (info == null) return true; // Keep scanning
+        if (info.isOffline() && info.isSplit()) {
+          offlineSplitParents.put(info, r);
+        }
+        // Returning true means "keep scanning"
+        return true;
+      }
+    };
+    // Run full scan of .META. catalog table passing in our custom visitor
+    MetaReader.fullScan(this.catalogTracker, visitor);
+    // Now work on our list of found parents. See if any we can clean up.
+    int fixups = 0;
+    for (Map.Entry<HRegionInfo, Result> e : offlineSplitParents.entrySet()) {
+      fixups += ServerShutdownHandler.fixupDaughters(
+          e.getValue(), assignmentManager, catalogTracker);
+    }
+    if (fixups != 0) {
+      LOG.info("Scanned the catalog and fixed up " + fixups +
+        " missing daughter region(s)");
+    }
+  }
+
   /**
    * Expire a server if we find it is one of the online servers set.
    * @param sn ServerName to check.

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java?rev=1231302&r1=1231301&r2=1231302&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java Fri Jan 13 21:17:30 2012
@@ -342,31 +342,34 @@ public class ServerShutdownHandler exten
    * Check that daughter regions are up in .META. and if not, add them.
    * @param hris All regions for this server in meta.
    * @param result The contents of the parent row in .META.
+   * @return the number of daughters missing and fixed
    * @throws IOException
    */
-  static void fixupDaughters(final Result result,
+  public static int fixupDaughters(final Result result,
       final AssignmentManager assignmentManager,
       final CatalogTracker catalogTracker)
   throws IOException {
-    fixupDaughter(result, HConstants.SPLITA_QUALIFIER, assignmentManager,
-      catalogTracker);
-    fixupDaughter(result, HConstants.SPLITB_QUALIFIER, assignmentManager,
-      catalogTracker);
+    int fixedA = fixupDaughter(result, HConstants.SPLITA_QUALIFIER,
+      assignmentManager, catalogTracker);
+    int fixedB = fixupDaughter(result, HConstants.SPLITB_QUALIFIER,
+      assignmentManager, catalogTracker);
+    return fixedA + fixedB;
   }
 
   /**
    * Check individual daughter is up in .META.; fixup if its not.
    * @param result The contents of the parent row in .META.
    * @param qualifier Which daughter to check for.
+   * @return 1 if the daughter is missing and fixed. Otherwise 0
    * @throws IOException
    */
-  static void fixupDaughter(final Result result, final byte [] qualifier,
+  static int fixupDaughter(final Result result, final byte [] qualifier,
       final AssignmentManager assignmentManager,
       final CatalogTracker catalogTracker)
   throws IOException {
     HRegionInfo daughter =
       MetaReader.parseHRegionInfoFromCatalogResult(result, qualifier);
-    if (daughter == null) return;
+    if (daughter == null) return 0;
     if (isDaughterMissing(catalogTracker, daughter)) {
       LOG.info("Fixup; missing daughter " + daughter.getRegionNameAsString());
       MetaEditor.addDaughter(catalogTracker, daughter, null);
@@ -377,9 +380,11 @@ public class ServerShutdownHandler exten
 
       // And assign it.
       assignmentManager.assign(daughter, true);
+      return 1;
     } else {
       LOG.debug("Daughter " + daughter.getRegionNameAsString() + " present");
     }
+    return 0;
   }
 
   /**

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java?rev=1231302&r1=1231301&r2=1231302&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java Fri Jan 13 21:17:30 2012
@@ -68,7 +68,7 @@ class SplitRequest implements Runnable {
       } catch (Exception e) {
         try {
           LOG.info("Running rollback/cleanup of failed split of " +
-            parent.getRegionNameAsString() + "; " + e.getMessage());
+            parent.getRegionNameAsString() + "; " + e.getMessage(), e);
           if (st.rollback(this.server, this.server)) {
             LOG.info("Successful rollback of failed split of " +
               parent.getRegionNameAsString());
@@ -95,4 +95,4 @@ class SplitRequest implements Runnable {
       server.checkFileSystem();
     }
   }
-}
\ No newline at end of file
+}