You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by jx...@apache.org on 2012/06/18 01:35:27 UTC

svn commit: r1351183 - in /hbase/branches/0.92: CHANGES.txt src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java

Author: jxiang
Date: Sun Jun 17 23:35:26 2012
New Revision: 1351183

URL: http://svn.apache.org/viewvc?rev=1351183&view=rev
Log:
HBASE-5360 [uberhbck] Add options for how to handle offline split parents. 

Modified:
    hbase/branches/0.92/CHANGES.txt
    hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
    hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java

Modified: hbase/branches/0.92/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/CHANGES.txt?rev=1351183&r1=1351182&r2=1351183&view=diff
==============================================================================
--- hbase/branches/0.92/CHANGES.txt (original)
+++ hbase/branches/0.92/CHANGES.txt Sun Jun 17 23:35:26 2012
@@ -108,6 +108,7 @@ Release 0.92.2 - Unreleased
    HBASE-5892  [hbck] Refactor parallel WorkItem* to Futures (Andrew Wang)
    HBASE-6067  HBase won't start when hbase.rootdir uses ViewFileSystem
    HBASE-6173  hbck check specified tables only
+   HBASE-5360  [uberhbck] Add options for how to handle offline split parents. 
 
   NEW FEATURE
    HBASE-5128  [uber hbck] Online automated repair of table integrity and region consistency problems

Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java?rev=1351183&r1=1351182&r2=1351183&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java Sun Jun 17 23:35:26 2012
@@ -167,6 +167,7 @@ public class HBaseFsck {
   private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
   private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
   private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
+  private boolean fixSplitParents = false; // fix lingering split parents
 
   // limit checking/fixes to listed tables, if empty attempt to check/fix all
   // -ROOT- and .META. are always checked
@@ -1179,6 +1180,27 @@ public class HBaseFsck {
   }
 
   /**
+   * Reset the split parent region info in meta table
+   */
+  private void resetSplitParent(HbckInfo hi) throws IOException {
+    Delete d = new Delete(hi.metaEntry.getRegionName());
+    d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
+    d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
+    meta.delete(d);
+
+    Put p = new Put(hi.metaEntry.getRegionName());
+    HRegionInfo hri = new HRegionInfo(hi.metaEntry);
+    hri.setOffline(false);
+    hri.setSplit(false);
+    p.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
+      Writables.getBytes(hri));
+    meta.put(p);
+
+    meta.flushCommits();
+    LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
+  }
+
+  /**
    * This backwards-compatibility wrapper for permanently offlining a region
    * that should not be alive.  If the region server does not support the
    * "offline" method, it will use the closest unassign method instead.  This
@@ -1317,9 +1339,6 @@ public class HBaseFsck {
     }
     if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
       return;
-    } else if (inMeta && inHdfs && !isDeployed && splitParent) {
-      LOG.warn("Region " + descriptiveName + " is a split parent in META and in HDFS");
-      return;
     } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
       LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
         "tabled that is not deployed");
@@ -1376,6 +1395,14 @@ public class HBaseFsck {
       }
 
     // ========== Cases where the region is in META =============
+    } else if (inMeta && inHdfs && !isDeployed && splitParent) {
+      errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
+          + descriptiveName + " is a split parent in META, in HDFS, "
+          + "and not deployed on any region server. This could be transient.");
+      if (shouldFixSplitParents()) {
+        setShouldRerun();
+        resetSplitParent(hbi);
+      }
     } else if (inMeta && !inHdfs && !isDeployed) {
       errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
           + descriptiveName + " found in META, but not in HDFS "
@@ -2502,7 +2529,7 @@ public class HBaseFsck {
       MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
       FIRST_REGION_STARTKEY_NOT_EMPTY, DUPE_STARTKEYS,
       HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
-      ORPHAN_HDFS_REGION
+      ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT
     }
     public void clear();
     public void report(String message);
@@ -2904,6 +2931,14 @@ public class HBaseFsck {
     return sidelineBigOverlaps;
   }
 
+  public void setFixSplitParents(boolean shouldFix) {
+    fixSplitParents = shouldFix;
+  }
+
+  boolean shouldFixSplitParents() {
+    return fixSplitParents;
+  }
+
   /**
    * @param mm maximum number of regions to merge into a single region.
    */
@@ -2968,6 +3003,7 @@ public class HBaseFsck {
     System.err.println("   -maxMerge <n>     When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
     System.err.println("   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
     System.err.println("   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
+    System.err.println("   -fixSplitParents  Try to force offline split parents to be online.");
     System.err.println("");
     System.err.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles -fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps");
     System.err.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles -fixHdfsOrphans");
@@ -3041,6 +3077,8 @@ public class HBaseFsck {
         fsck.setFixVersionFile(true);
       } else if (cmd.equals("-sidelineBigOverlaps")) {
         fsck.setSidelineBigOverlaps(true);
+      } else if (cmd.equals("-fixSplitParents")) {
+        fsck.setFixSplitParents(true);
       } else if (cmd.equals("-repair")) {
         // this attempts to merge overlapping hdfs regions, needs testing
         // under load
@@ -3051,6 +3089,7 @@ public class HBaseFsck {
         fsck.setFixHdfsOverlaps(true);
         fsck.setFixVersionFile(true);
         fsck.setSidelineBigOverlaps(true);
+        fsck.setFixSplitParents(false);
       } else if (cmd.equals("-repairHoles")) {
         // this will make all missing hdfs regions available but may lose data
         fsck.setFixHdfsHoles(true);
@@ -3059,6 +3098,7 @@ public class HBaseFsck {
         fsck.setFixAssignments(true);
         fsck.setFixHdfsOverlaps(false);
         fsck.setSidelineBigOverlaps(false);
+        fsck.setFixSplitParents(false);
       } else if (cmd.equals("-maxOverlapsToSideline")) {
         if (i == args.length - 1) {
           System.err.println("-maxOverlapsToSideline needs a numeric value argument.");

Modified: hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java?rev=1351183&r1=1351182&r2=1351183&view=diff
==============================================================================
--- hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java (original)
+++ hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java Sun Jun 17 23:35:26 2012
@@ -45,10 +45,12 @@ import org.apache.hadoop.hbase.HBaseTest
 import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HRegionLocation;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.MiniHBaseCluster;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.HBaseAdmin;
 import org.apache.hadoop.hbase.client.HConnection;
 import org.apache.hadoop.hbase.client.HTable;
@@ -229,7 +231,7 @@ public class TestHBaseFsck {
 
         if (unassign) {
           LOG.info("Undeploying region " + hri + " from server " + hsa);
-          undeployRegion(new HBaseAdmin(conf), hsa, hri);
+          undeployRegion(new HBaseAdmin(conf), hsa, new HRegionInfo(hri));
         }
 
         if (regionInfoOnly) {
@@ -1003,4 +1005,84 @@ public class TestHBaseFsck {
       deleteTable(table2);
     }
   }
+
+  /**
+   * A split parent in meta, in hdfs, and not deployed
+   */
+  @Test
+  public void testLingeringSplitParent() throws Exception {
+    String table = "testLingeringSplitParent";
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // make sure data in regions, if in hlog only there is no data loss
+      TEST_UTIL.getHBaseAdmin().flush(table);
+      HRegionLocation location = tbl.getRegionLocation("B");
+
+      // Delete one region from meta, but not hdfs, unassign it.
+      deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
+        Bytes.toBytes("C"), true, true, false);
+
+      // Create a new meta entry to fake it as a split parent.
+      HTable meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getName());
+      HRegionInfo hri = location.getRegionInfo();
+
+      HRegionInfo a = new HRegionInfo(tbl.getTableName(),
+        Bytes.toBytes("B"), Bytes.toBytes("BM"));
+      HRegionInfo b = new HRegionInfo(tbl.getTableName(),
+        Bytes.toBytes("BM"), Bytes.toBytes("C"));
+      Put p = new Put(hri.getRegionName());
+      hri.setOffline(true);
+      hri.setSplit(true);
+      p.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
+        Writables.getBytes(hri));
+      p.add(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER,
+        Writables.getBytes(a));
+      p.add(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER,
+        Writables.getBytes(b));
+      meta.put(p);
+      meta.flushCommits();
+      TEST_UTIL.getHBaseAdmin().flush(HConstants.META_TABLE_NAME);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new ERROR_CODE[] {
+        ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
+
+      // regular repair cannot fix lingering split parent
+      hbck = doFsck(conf, true);
+      assertErrors(hbck, new ERROR_CODE[] {
+        ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
+      assertFalse(hbck.shouldRerun());
+      hbck = doFsck(conf, false);
+      assertErrors(hbck, new ERROR_CODE[] {
+        ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
+
+      // fix lingering split parent
+      hbck = new HBaseFsck(conf);
+      hbck.connect();
+      hbck.setDisplayFullReport(); // i.e. -details
+      hbck.setTimeLag(0);
+      hbck.setFixSplitParents(true);
+      hbck.onlineHbck();
+      assertTrue(hbck.shouldRerun());
+
+      Get get = new Get(hri.getRegionName());
+      Result result = meta.get(get);
+      assertTrue(result.getColumn(HConstants.CATALOG_FAMILY,
+        HConstants.SPLITA_QUALIFIER).isEmpty());
+      assertTrue(result.getColumn(HConstants.CATALOG_FAMILY,
+        HConstants.SPLITB_QUALIFIER).isEmpty());
+      TEST_UTIL.getHBaseAdmin().flush(HConstants.META_TABLE_NAME);
+
+      // fix other issues
+      doFsck(conf, true);
+
+      // check that all are fixed
+      assertNoErrors(doFsck(conf, false));
+      assertEquals(ROWKEYS.length, countRows());
+    } finally {
+      deleteTable(table);
+    }
+  }
 }