You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by te...@apache.org on 2012/01/13 22:16:28 UTC
svn commit: r1231300 - in /hbase/branches/0.92: ./
src/main/java/org/apache/hadoop/hbase/master/
src/main/java/org/apache/hadoop/hbase/master/handler/
src/main/java/org/apache/hadoop/hbase/regionserver/
Author: tedyu
Date: Fri Jan 13 21:16:28 2012
New Revision: 1231300
URL: http://svn.apache.org/viewvc?rev=1231300&view=rev
Log:
HBASE-5196 Failure in region split after PONR could cause region hole (Jimmy Xiang)
Modified:
hbase/branches/0.92/CHANGES.txt
hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java
Modified: hbase/branches/0.92/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/CHANGES.txt?rev=1231300&r1=1231299&r2=1231300&view=diff
==============================================================================
--- hbase/branches/0.92/CHANGES.txt (original)
+++ hbase/branches/0.92/CHANGES.txt Fri Jan 13 21:16:28 2012
@@ -520,6 +520,7 @@ Release 0.92.0 - Unreleased
HBASE-5163 TestLogRolling#testLogRollOnDatanodeDeath fails sometimes on Jenkins or hadoop QA
("The directory is already locked.") (N Keywal)
HBASE-5143 Fix config typo in pluggable load balancer factory (Harsh J)
+ HBASE-5196 Failure in region split after PONR could cause region hole (Jimmy Xiang)
TESTS
HBASE-4492 TestRollingRestart fails intermittently
Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1231300&r1=1231299&r2=1231300&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Fri Jan 13 21:16:28 2012
@@ -25,6 +25,7 @@ import java.lang.reflect.InvocationTarge
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -67,11 +68,13 @@ import org.apache.hadoop.hbase.ipc.HMast
import org.apache.hadoop.hbase.ipc.HMasterRegionInterface;
import org.apache.hadoop.hbase.ipc.ProtocolSignature;
import org.apache.hadoop.hbase.ipc.RpcServer;
+import org.apache.hadoop.hbase.master.CatalogJanitor.SplitParentFirstComparator;
import org.apache.hadoop.hbase.master.handler.CreateTableHandler;
import org.apache.hadoop.hbase.master.handler.DeleteTableHandler;
import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
import org.apache.hadoop.hbase.master.handler.ModifyTableHandler;
+import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
@@ -499,7 +502,11 @@ implements HMasterInterface, HMasterRegi
this.balancer.setClusterStatus(getClusterStatus());
this.balancer.setMasterServices(this);
-
+
+ // Fixing up missing daughters if any
+ status.setStatus("Fixing up missing daughters");
+ fixupDaughters(status);
+
// Start balancer and meta catalog janitor after meta and regions have
// been assigned.
status.setStatus("Starting balancer and catalog janitor");
@@ -591,6 +598,39 @@ implements HMasterInterface, HMasterRegi
return assigned;
}
+ void fixupDaughters(final MonitoredTask status) throws IOException {
+ final Map<HRegionInfo, Result> offlineSplitParents =
+ new HashMap<HRegionInfo, Result>();
+ // This visitor collects offline split parents in the .META. table
+ MetaReader.Visitor visitor = new MetaReader.Visitor() {
+ @Override
+ public boolean visit(Result r) throws IOException {
+ if (r == null || r.isEmpty()) return true;
+ HRegionInfo info =
+ MetaReader.parseHRegionInfoFromCatalogResult(
+ r, HConstants.REGIONINFO_QUALIFIER);
+ if (info == null) return true; // Keep scanning
+ if (info.isOffline() && info.isSplit()) {
+ offlineSplitParents.put(info, r);
+ }
+ // Returning true means "keep scanning"
+ return true;
+ }
+ };
+ // Run full scan of .META. catalog table passing in our custom visitor
+ MetaReader.fullScan(this.catalogTracker, visitor);
+ // Now work on our list of found parents. See if any we can clean up.
+ int fixups = 0;
+ for (Map.Entry<HRegionInfo, Result> e : offlineSplitParents.entrySet()) {
+ fixups += ServerShutdownHandler.fixupDaughters(
+ e.getValue(), assignmentManager, catalogTracker);
+ }
+ if (fixups != 0) {
+ LOG.info("Scanned the catalog and fixed up " + fixups +
+ " missing daughter region(s)");
+ }
+ }
+
/**
* Expire a server if we find it is one of the online servers set.
* @param sn ServerName to check.
Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java?rev=1231300&r1=1231299&r2=1231300&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java Fri Jan 13 21:16:28 2012
@@ -341,31 +341,34 @@ public class ServerShutdownHandler exten
* Check that daughter regions are up in .META. and if not, add them.
* @param hris All regions for this server in meta.
* @param result The contents of the parent row in .META.
+ * @return the number of daughters missing and fixed
* @throws IOException
*/
- static void fixupDaughters(final Result result,
+ public static int fixupDaughters(final Result result,
final AssignmentManager assignmentManager,
final CatalogTracker catalogTracker)
throws IOException {
- fixupDaughter(result, HConstants.SPLITA_QUALIFIER, assignmentManager,
- catalogTracker);
- fixupDaughter(result, HConstants.SPLITB_QUALIFIER, assignmentManager,
- catalogTracker);
+ int fixedA = fixupDaughter(result, HConstants.SPLITA_QUALIFIER,
+ assignmentManager, catalogTracker);
+ int fixedB = fixupDaughter(result, HConstants.SPLITB_QUALIFIER,
+ assignmentManager, catalogTracker);
+ return fixedA + fixedB;
}
/**
* Check individual daughter is up in .META.; fixup if its not.
* @param result The contents of the parent row in .META.
* @param qualifier Which daughter to check for.
+ * @return 1 if the daughter is missing and fixed. Otherwise 0
* @throws IOException
*/
- static void fixupDaughter(final Result result, final byte [] qualifier,
+ static int fixupDaughter(final Result result, final byte [] qualifier,
final AssignmentManager assignmentManager,
final CatalogTracker catalogTracker)
throws IOException {
HRegionInfo daughter =
MetaReader.parseHRegionInfoFromCatalogResult(result, qualifier);
- if (daughter == null) return;
+ if (daughter == null) return 0;
if (isDaughterMissing(catalogTracker, daughter)) {
LOG.info("Fixup; missing daughter " + daughter.getRegionNameAsString());
MetaEditor.addDaughter(catalogTracker, daughter, null);
@@ -376,9 +379,11 @@ public class ServerShutdownHandler exten
// And assign it.
assignmentManager.assign(daughter, true);
+ return 1;
} else {
LOG.debug("Daughter " + daughter.getRegionNameAsString() + " present");
}
+ return 0;
}
/**
Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java?rev=1231300&r1=1231299&r2=1231300&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java Fri Jan 13 21:16:28 2012
@@ -68,7 +68,7 @@ class SplitRequest implements Runnable {
} catch (Exception e) {
try {
LOG.info("Running rollback/cleanup of failed split of " +
- parent.getRegionNameAsString() + "; " + e.getMessage());
+ parent.getRegionNameAsString() + "; " + e.getMessage(), e);
if (st.rollback(this.server, this.server)) {
LOG.info("Successful rollback of failed split of " +
parent.getRegionNameAsString());
@@ -95,4 +95,4 @@ class SplitRequest implements Runnable {
server.checkFileSystem();
}
}
-}
\ No newline at end of file
+}