You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2011/10/01 06:26:42 UTC

svn commit: r1177902 - in /hbase/branches/0.90: CHANGES.txt src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java

Author: stack
Date: Sat Oct  1 04:26:42 2011
New Revision: 1177902

URL: http://svn.apache.org/viewvc?rev=1177902&view=rev
Log:
HBASE-4509 [hbck] Improve region map output

Modified:
    hbase/branches/0.90/CHANGES.txt
    hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
    hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java

Modified: hbase/branches/0.90/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/CHANGES.txt?rev=1177902&r1=1177901&r2=1177902&view=diff
==============================================================================
--- hbase/branches/0.90/CHANGES.txt (original)
+++ hbase/branches/0.90/CHANGES.txt Sat Oct  1 04:26:42 2011
@@ -90,6 +90,10 @@ Release 0.90.5 - Unreleased
    HBASE-4375  [hbck] Add region coverage visualization to hbck
                (Jonathan Hsieh)
    HBASE-4280  [replication] ReplicationSink can deadlock itself via handlers
+   HBASE-4506  [hbck] Allow HBaseFsck to be instantiated without connecting
+               (Jonathan Hsieh)
+   HBASE-4509  [hbck] Improve region map output
+               (Jonathan Hsieh)
  
 Release 0.90.4 - August 10, 2011
   BUG FIXES

Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java?rev=1177902&r1=1177901&r2=1177902&view=diff
==============================================================================
--- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java (original)
+++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java Sat Oct  1 04:26:42 2011
@@ -69,6 +69,7 @@ import org.apache.zookeeper.KeeperExcept
 import com.google.common.base.Joiner;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Multimap;
+import com.google.common.collect.TreeMultimap;
 
 /**
  * Check consistency among the in-memory states of the master and the
@@ -103,6 +104,7 @@ public class HBaseFsck {
   // Empty regioninfo qualifiers in .META.
   private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
   private int numThreads = MAX_NUM_THREADS;
+  private HBaseAdmin admin;
 
   ThreadPoolExecutor executor; // threads to retrieve data from regionservers
 
@@ -113,18 +115,20 @@ public class HBaseFsck {
    * @throws MasterNotRunningException if the master is not running
    * @throws ZooKeeperConnectionException if unable to connect to zookeeper
    */
-  public HBaseFsck(Configuration conf)
-    throws MasterNotRunningException, ZooKeeperConnectionException, IOException {
+  public HBaseFsck(Configuration conf) throws MasterNotRunningException,
+      ZooKeeperConnectionException, IOException {
     this.conf = conf;
+    numThreads = conf.getInt("hbasefsck.numthreads", numThreads);
+    executor = new ThreadPoolExecutor(0, numThreads,
+        THREADS_KEEP_ALIVE_SECONDS, TimeUnit.SECONDS,
+        new LinkedBlockingQueue<Runnable>());
+  }
 
-    HBaseAdmin admin = new HBaseAdmin(conf);
+  public void connect() throws MasterNotRunningException,
+      ZooKeeperConnectionException {
+    admin = new HBaseAdmin(conf);
     status = admin.getMaster().getClusterStatus();
     connection = admin.getConnection();
-
-    numThreads = conf.getInt("hbasefsck.numthreads", numThreads);
-    executor = new ThreadPoolExecutor(0, numThreads,
-          THREADS_KEEP_ALIVE_SECONDS, TimeUnit.SECONDS,
-          new LinkedBlockingQueue<Runnable>());
   }
 
   /**
@@ -536,6 +540,10 @@ public class HBaseFsck {
     final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
     final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
 
+    // key = start split, values = set of splits in problem group
+    final Multimap<byte[], HbckInfo> overlapGroups = 
+      TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
+
     TInfo(String name) {
       this.tableName = name;
       deployedOn = new TreeSet <HServerAddress>();
@@ -582,26 +590,44 @@ public class HBaseFsck {
      * @return false if there are errors
      */
     public boolean checkRegionChain() {
-      
       int originalErrorsCount = errors.getErrorList().size();
       Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
       SortedSet<byte[]> splits = sc.getSplits();
 
       byte[] prevKey = null;
-      for (byte[] key: splits) {
+      byte[] problemKey = null;
+      for (byte[] key : splits) {
         Collection<HbckInfo> ranges = regions.get(key);
         if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
           for (HbckInfo rng : ranges) {
+            // TODO offline fix region hole.
+
             errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
-                "First region should start with an empty key.",
+                "First region should start with an empty key. When HBase is "
+                + "online, create a new regio to plug the hole using hbck -fix",
                 this, rng);
           }
         }
-        
-        // Check if the startkeys are different
-        if (ranges.size() > 1) {
+
+        if (ranges.size() == 1) {
+          // this split key is ok -- no overlap, not a hole.
+          if (problemKey != null) {
+            LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
+          }
+          problemKey = null; // fell through, no more problem.
+        } else if (ranges.size() > 1) {
+          // set the new problem key group name, if already have problem key, just
+          // keep using it.
+          if (problemKey == null) {
+            // only for overlap regions.
+            LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
+            problemKey = key;
+          }
+          overlapGroups.putAll(problemKey, ranges);
+
+          // record errors
           ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
-          // this dumb and n^2 but this shouldn't happen often
+          //  this dumb and n^2 but this shouldn't happen often
           for (HbckInfo r1 : ranges) {
             subRange.remove(r1);
             for (HbckInfo r2 : subRange) {
@@ -614,30 +640,42 @@ public class HBaseFsck {
                 "Multiple regions have the same startkey: "
                     + Bytes.toStringBinary(key), this, r2);
               } else {
-            // overlap
-            errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
-                "There is an overlap in the region chain.",
-                this, r1);
+                // overlap
+                errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
+                    "There is an overlap in the region chain.",
+                    this, r1);
               }
             }
           }
-        }
-        
-        if (ranges.size() == 0) {
+
+        } else if (ranges.size() == 0) {
+          if (problemKey != null) {
+            LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
+          }
+          problemKey = null;
+
           byte[] holeStopKey = sc.getSplits().higher(key);
           // if higher key is null we reached the top.
           if (holeStopKey != null) {
             // hole
             errors.reportError(ERROR_CODE.HOLE_IN_REGION_CHAIN,
-                "There is a hole in the region chain between "
-                + Bytes.toString(key) + " and " + Bytes.toString(holeStopKey));
+                "There is a hole in the region chain between " 
+                + Bytes.toStringBinary(key) + " and "
+                + Bytes.toStringBinary(holeStopKey)
+                + ".  When HBase is online, create a new regioninfo and region " 
+                + "dir to plug the hole.");
           }
-        }
+        } 
         prevKey = key;
       }
+
       if (details) {
         // do full region split map dump
-        dump(sc.getSplits(), regions);
+        dump(splits, regions);
+        dumpOverlapProblems(overlapGroups);
+        System.out.println("There are " + overlapGroups.keySet().size()
+            + " problem groups with " + overlapGroups.size()
+            + " problem regions");
       }
       return errors.getErrorList().size() == originalErrorsCount;
     }
@@ -648,7 +686,7 @@ public class HBaseFsck {
      * @param splits
      * @param regions
      */
-    void dump(TreeSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
+    void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
       // we display this way because the last end key should be displayed as well.
       for (byte[] k : splits) {
         System.out.print(Bytes.toString(k) + ":\t");
@@ -659,7 +697,23 @@ public class HBaseFsck {
         System.out.println();
       }
     }
-    
+  }
+
+  public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
+    // we display this way because the last end key should be displayed as
+    // well.
+    for (byte[] k : regions.keySet()) {
+      System.out.print(Bytes.toStringBinary(k) + ":\n");
+      for (HbckInfo r : regions.get(k)) {
+        System.out.print("[ " + r.toString() + ", "
+            + Bytes.toStringBinary(r.getEndKey()) + "]\n");
+      }
+      System.out.println("----");
+    }
+  }
+
+  public Multimap<byte[], HbckInfo> getOverlapGroups(String table) {
+    return tablesInfo.get(table).overlapGroups;
   }
 
   /**
@@ -857,7 +911,7 @@ public class HBaseFsck {
   /**
    * Maintain information about a particular region.
    */
-  static class HbckInfo implements KeyRange {
+  public static class HbckInfo implements KeyRange {
     boolean onlyEdits = false;
     MetaEntry metaEntry = null;
     FileStatus foundRegionDir = null;
@@ -1299,11 +1353,11 @@ public class HBaseFsck {
    * @param args
    * @throws Exception
    */
-  public static void main(String [] args) throws Exception {
+  public static void main(String[] args) throws Exception {
 
     // create a fsck object
     Configuration conf = HBaseConfiguration.create();
-    conf.set("fs.defaultFS", conf.get("hbase.rootdir"));
+    conf.set("fs.defaultFS", conf.get(HConstants.HBASE_DIR));
     HBaseFsck fsck = new HBaseFsck(conf);
     long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
 
@@ -1351,6 +1405,7 @@ public class HBaseFsck {
       }
     }
     // do the real work of fsck
+    fsck.connect();
     int code = fsck.doWork();
     // If we have changed the HBase state it is better to run fsck again
     // to see if we haven't broken something else in the process.

Modified: hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java?rev=1177902&r1=1177901&r2=1177902&view=diff
==============================================================================
--- hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java (original)
+++ hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java Sat Oct  1 04:26:42 2011
@@ -70,20 +70,23 @@ public class TestHBaseFsck {
     TEST_UTIL.shutdownMiniCluster();
   }
 
-  private List<ERROR_CODE> doFsck(boolean fix) throws Exception {
+  private HBaseFsck doFsck(boolean fix) throws Exception {
     HBaseFsck fsck = new HBaseFsck(conf);
+    fsck.connect();
     fsck.displayFullReport(); // i.e. -details
     fsck.setTimeLag(0);
     fsck.setFixErrors(fix);
     fsck.doWork();
-    return fsck.getErrors().getErrorList();
+    return fsck;
   }
 
-  private void assertNoErrors(List<ERROR_CODE> errs) throws Exception {
+  private void assertNoErrors(HBaseFsck fsck) throws Exception {
+    List<ERROR_CODE> errs = fsck.getErrors().getErrorList();
     assertEquals(0, errs.size());
   }
 
-  private void assertErrors(List<ERROR_CODE> errs, ERROR_CODE[] expectedErrors) {
+  private void assertErrors(HBaseFsck fsck, ERROR_CODE[] expectedErrors) {
+    List<ERROR_CODE> errs = fsck.getErrors().getErrorList();
     assertEquals(Arrays.asList(expectedErrors), errs);
   }
 
@@ -182,10 +185,15 @@ public class TestHBaseFsck {
     assertNoErrors(doFsck(false));
     String table = "tableClean";
     try {
-      setupTable(table);
+      HBaseFsck hbck = doFsck(false);
+      assertNoErrors(hbck);
 
+      setupTable(table);
+      
       // We created 1 table, should be fine
-      assertNoErrors(doFsck(false));
+      hbck = doFsck( false);
+      assertNoErrors(hbck);
+      assertEquals(0, hbck.getOverlapGroups(table).size());
     } finally {
       deleteTable(table);
     }
@@ -209,9 +217,10 @@ public class TestHBaseFsck {
       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
           .waitForAssignment(hriDupe);
 
-      assertErrors(doFsck(false),
-          new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
+      HBaseFsck hbck = doFsck(false);
+      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
             ERROR_CODE.DUPE_STARTKEYS});
+      assertEquals(2, hbck.getOverlapGroups(table).size());
     } finally {
       deleteTable(table);
     }
@@ -232,9 +241,12 @@ public class TestHBaseFsck {
       TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
       TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
           .waitForAssignment(hriOverlap);
-      assertErrors(doFsck(false), new ERROR_CODE[] {
+
+      HBaseFsck hbck = doFsck(false);
+      assertErrors(hbck, new ERROR_CODE[] {
           ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
           ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
+      assertEquals(3, hbck.getOverlapGroups(table).size());
     } finally {
       deleteTable(table);
     }
@@ -259,8 +271,11 @@ public class TestHBaseFsck {
       TEST_UTIL.getHBaseAdmin().disableTable(table);
       deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""));
       TEST_UTIL.getHBaseAdmin().enableTable(table);
-      assertErrors(doFsck(false),
-          new ERROR_CODE[] { ERROR_CODE.HOLE_IN_REGION_CHAIN });
+
+      HBaseFsck hbck = doFsck(false);
+      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.HOLE_IN_REGION_CHAIN });
+      // holes are separate from overlap groups
+      assertEquals(0, hbck.getOverlapGroups(table).size());
     } finally {
       deleteTable(table);
     }