You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by jx...@apache.org on 2012/06/17 23:20:10 UTC
svn commit: r1351169 - in /hbase/trunk/hbase-server/src:
main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
Author: jxiang
Date: Sun Jun 17 21:20:10 2012
New Revision: 1351169
URL: http://svn.apache.org/viewvc?rev=1351169&view=rev
Log:
HBASE-5360 [uberhbck] Add options for how to handle offline split parents.
Modified:
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java?rev=1351169&r1=1351168&r2=1351169&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java Sun Jun 17 21:20:10 2012
@@ -73,6 +73,7 @@ import org.apache.hadoop.hbase.client.Me
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.RowMutations;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.master.MasterFileSystem;
@@ -172,6 +173,7 @@ public class HBaseFsck {
private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
+ private boolean fixSplitParents = false; // fix lingering split parents
// limit checking/fixes to listed tables, if empty attempt to check/fix all
// -ROOT- and .META. are always checked
@@ -1182,6 +1184,29 @@ public class HBaseFsck {
}
/**
+ * Reset the split parent region info in meta table
+ */
+ private void resetSplitParent(HbckInfo hi) throws IOException {
+ RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
+ Delete d = new Delete(hi.metaEntry.getRegionName());
+ d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
+ d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
+ mutations.add(d);
+
+ Put p = new Put(hi.metaEntry.getRegionName());
+ HRegionInfo hri = new HRegionInfo(hi.metaEntry);
+ hri.setOffline(false);
+ hri.setSplit(false);
+ p.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
+ Writables.getBytes(hri));
+ mutations.add(p);
+
+ meta.mutateRow(mutations);
+ meta.flushCommits();
+ LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
+ }
+
+ /**
* This backwards-compatibility wrapper for permanently offlining a region
* that should not be alive. If the region server does not support the
* "offline" method, it will use the closest unassign method instead. This
@@ -1320,9 +1345,6 @@ public class HBaseFsck {
}
if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
return;
- } else if (inMeta && inHdfs && !isDeployed && splitParent) {
- LOG.warn("Region " + descriptiveName + " is a split parent in META and in HDFS");
- return;
} else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
"tabled that is not deployed");
@@ -1379,6 +1401,14 @@ public class HBaseFsck {
}
// ========== Cases where the region is in META =============
+ } else if (inMeta && inHdfs && !isDeployed && splitParent) {
+ errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
+ + descriptiveName + " is a split parent in META, in HDFS, "
+ + "and not deployed on any region server. This could be transient.");
+ if (shouldFixSplitParents()) {
+ setShouldRerun();
+ resetSplitParent(hbi);
+ }
} else if (inMeta && !inHdfs && !isDeployed) {
errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
+ descriptiveName + " found in META, but not in HDFS "
@@ -2505,7 +2535,7 @@ public class HBaseFsck {
MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
FIRST_REGION_STARTKEY_NOT_EMPTY, DUPE_STARTKEYS,
HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
- ORPHAN_HDFS_REGION
+ ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT
}
public void clear();
public void report(String message);
@@ -2908,6 +2938,14 @@ public class HBaseFsck {
return sidelineBigOverlaps;
}
+ public void setFixSplitParents(boolean shouldFix) {
+ fixSplitParents = shouldFix;
+ }
+
+ boolean shouldFixSplitParents() {
+ return fixSplitParents;
+ }
+
/**
* @param mm maximum number of regions to merge into a single region.
*/
@@ -2972,6 +3010,7 @@ public class HBaseFsck {
System.err.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
System.err.println(" -sidelineBigOverlaps When fixing region overlaps, allow to sideline big overlaps");
System.err.println(" -maxOverlapsToSideline <n> When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
+ System.err.println(" -fixSplitParents Try to force offline split parents to be online.");
System.err.println("");
System.err.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
"-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps");
@@ -3046,6 +3085,8 @@ public class HBaseFsck {
fsck.setFixVersionFile(true);
} else if (cmd.equals("-sidelineBigOverlaps")) {
fsck.setSidelineBigOverlaps(true);
+ } else if (cmd.equals("-fixSplitParents")) {
+ fsck.setFixSplitParents(true);
} else if (cmd.equals("-repair")) {
// this attempts to merge overlapping hdfs regions, needs testing
// under load
@@ -3056,6 +3097,7 @@ public class HBaseFsck {
fsck.setFixHdfsOverlaps(true);
fsck.setFixVersionFile(true);
fsck.setSidelineBigOverlaps(true);
+ fsck.setFixSplitParents(false);
} else if (cmd.equals("-repairHoles")) {
// this will make all missing hdfs regions available but may lose data
fsck.setFixHdfsHoles(true);
@@ -3064,6 +3106,7 @@ public class HBaseFsck {
fsck.setFixAssignments(true);
fsck.setFixHdfsOverlaps(false);
fsck.setSidelineBigOverlaps(false);
+ fsck.setFixSplitParents(false);
} else if (cmd.equals("-maxOverlapsToSideline")) {
if (i == args.length - 1) {
System.err.println("-maxOverlapsToSideline needs a numeric value argument.");
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java?rev=1351169&r1=1351168&r2=1351169&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java Sun Jun 17 21:20:10 2012
@@ -45,6 +45,7 @@ import org.apache.hadoop.hbase.HBaseTest
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MediumTests;
import org.apache.hadoop.hbase.MiniHBaseCluster;
@@ -52,6 +53,7 @@ import org.apache.hadoop.hbase.RegionTra
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.client.AdminProtocol;
import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HTable;
@@ -1012,6 +1014,85 @@ public class TestHBaseFsck {
deleteTable(table2);
}
}
+ /**
+ * A split parent in meta, in hdfs, and not deployed
+ */
+ @Test
+ public void testLingeringSplitParent() throws Exception {
+ String table = "testLingeringSplitParent";
+ try {
+ setupTable(table);
+ assertEquals(ROWKEYS.length, countRows());
+
+ // make sure data in regions, if in hlog only there is no data loss
+ TEST_UTIL.getHBaseAdmin().flush(table);
+ HRegionLocation location = tbl.getRegionLocation("B");
+
+ // Delete one region from meta, but not hdfs, unassign it.
+ deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
+ Bytes.toBytes("C"), true, true, false);
+
+ // Create a new meta entry to fake it as a split parent.
+ HTable meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getName());
+ HRegionInfo hri = location.getRegionInfo();
+
+ HRegionInfo a = new HRegionInfo(tbl.getTableName(),
+ Bytes.toBytes("B"), Bytes.toBytes("BM"));
+ HRegionInfo b = new HRegionInfo(tbl.getTableName(),
+ Bytes.toBytes("BM"), Bytes.toBytes("C"));
+ Put p = new Put(hri.getRegionName());
+ hri.setOffline(true);
+ hri.setSplit(true);
+ p.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
+ Writables.getBytes(hri));
+ p.add(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER,
+ Writables.getBytes(a));
+ p.add(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER,
+ Writables.getBytes(b));
+ meta.put(p);
+ meta.flushCommits();
+ TEST_UTIL.getHBaseAdmin().flush(HConstants.META_TABLE_NAME);
+
+ HBaseFsck hbck = doFsck(conf, false);
+ assertErrors(hbck, new ERROR_CODE[] {
+ ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
+
+ // regular repair cannot fix lingering split parent
+ hbck = doFsck(conf, true);
+ assertErrors(hbck, new ERROR_CODE[] {
+ ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
+ assertFalse(hbck.shouldRerun());
+ hbck = doFsck(conf, false);
+ assertErrors(hbck, new ERROR_CODE[] {
+ ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
+
+ // fix lingering split parent
+ hbck = new HBaseFsck(conf);
+ hbck.connect();
+ hbck.setDisplayFullReport(); // i.e. -details
+ hbck.setTimeLag(0);
+ hbck.setFixSplitParents(true);
+ hbck.onlineHbck();
+ assertTrue(hbck.shouldRerun());
+
+ Get get = new Get(hri.getRegionName());
+ Result result = meta.get(get);
+ assertTrue(result.getColumn(HConstants.CATALOG_FAMILY,
+ HConstants.SPLITA_QUALIFIER).isEmpty());
+ assertTrue(result.getColumn(HConstants.CATALOG_FAMILY,
+ HConstants.SPLITB_QUALIFIER).isEmpty());
+ TEST_UTIL.getHBaseAdmin().flush(HConstants.META_TABLE_NAME);
+
+ // fix other issues
+ doFsck(conf, true);
+
+ // check that all are fixed
+ assertNoErrors(doFsck(conf, false));
+ assertEquals(ROWKEYS.length, countRows());
+ } finally {
+ deleteTable(table);
+ }
+ }
@org.junit.Rule
public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =