You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by an...@apache.org on 2013/08/06 21:18:51 UTC

svn commit: r1511081 - in /hbase/trunk/hbase-server/src: main/java/org/apache/hadoop/hbase/util/HBaseFsck.java test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java

Author: anoopsamjohn
Date: Tue Aug  6 19:18:51 2013
New Revision: 1511081

URL: http://svn.apache.org/r1511081
Log:
HBASE-8627 HBCK can not fix meta not assigned issue

Modified:
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
    hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java?rev=1511081&r1=1511080&r2=1511081&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java Tue Aug  6 19:18:51 2013
@@ -390,16 +390,29 @@ public class HBaseFsck extends Configure
     InterruptedException {
     clearState();
 
-    LOG.info("Loading regionsinfo from the .META. table");
-    boolean success = loadMetaEntries();
-    if (!success) return -1;
-
+    // get regions according to what is online on each RegionServer
+    loadDeployedRegions();
+    // check whether .META. is deployed and online
+    if (!recordMetaRegion()) {
+      // Will remove later if we can fix it
+      errors.reportError("Fatal error: unable to get .META. region location. Exiting...");
+      return -2;
+    }
     // Check if .META. is found only once and in the right place
     if (!checkMetaRegion()) {
-      // Will remove later if we can fix it
-      errors.reportError("Encountered fatal error. Exiting...");
+      String errorMsg = ".META. table is not consistent. ";
+      if (shouldFixAssignments()) {
+        errorMsg += "HBCK will try fixing it. Rerun once .META. is back to consistent state.";
+      } else {
+        errorMsg += "Run HBCK with proper fix options to fix .META. inconsistency.";
+      }
+      errors.reportError(errorMsg + " Exiting...");
       return -2;
     }
+    // Not going with further consistency check for tables when META itself is not consistent.
+    LOG.info("Loading regionsinfo from the .META. table");
+    boolean success = loadMetaEntries();
+    if (!success) return -1;
 
     // Empty cells in .META.?
     reportEmptyMetaCells();
@@ -414,9 +427,6 @@ public class HBaseFsck extends Configure
       reportTablesInFlux();
     }
 
-    // get regions according to what is online on each RegionServer
-    loadDeployedRegions();
-
     // load regiondirs and regioninfos from HDFS
     if (shouldCheckHdfs()) {
       loadHdfsRegionDirs();
@@ -1334,10 +1344,13 @@ public class HBaseFsck extends Configure
     } catch (KeeperException e) {
       throw new IOException(e);
     }
-    MetaEntry m =
-      new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
-    HbckInfo hbInfo = new HbckInfo(m);
-    regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), hbInfo);
+    MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
+    HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
+    if (hbckInfo == null) {
+      regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
+    } else {
+      hbckInfo.metaEntry = m;
+    }
     return true;
   }
 
@@ -2492,45 +2505,36 @@ public class HBaseFsck extends Configure
    * @throws KeeperException
    * @throws InterruptedException
     */
-  boolean checkMetaRegion()
-    throws IOException, KeeperException, InterruptedException {
-    List <HbckInfo> metaRegions = Lists.newArrayList();
+  boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
+    List<HbckInfo> metaRegions = Lists.newArrayList();
     for (HbckInfo value : regionInfoMap.values()) {
-      if (value.metaEntry.isMetaRegion()) {
+      if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
         metaRegions.add(value);
       }
     }
 
-    // If something is wrong
-    if (metaRegions.size() != 1) {
-      HRegionLocation rootLocation = connection.locateRegion(
-        HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
-      HbckInfo root =
-          regionInfoMap.get(rootLocation.getRegionInfo().getEncodedName());
-
-      // If there is no region holding .META.
-      if (metaRegions.size() == 0) {
+    // There will be always one entry in regionInfoMap corresponding to .META.
+    // Check the deployed servers. It should be exactly one server.
+    HbckInfo metaHbckInfo = metaRegions.get(0);
+    List<ServerName> servers = metaHbckInfo.deployedOn;
+    if (servers.size() != 1) {
+      if (servers.size() == 0) {
         errors.reportError(ERROR_CODE.NO_META_REGION, ".META. is not found on any region.");
         if (shouldFixAssignments()) {
           errors.print("Trying to fix a problem with .META...");
           setShouldRerun();
           // try to fix it (treat it as unassigned region)
-          HBaseFsckRepair.fixUnassigned(admin, root.metaEntry);
-          HBaseFsckRepair.waitUntilAssigned(admin, root.getHdfsHRI());
+          HBaseFsckRepair.fixUnassigned(admin, metaHbckInfo.metaEntry);
+          HBaseFsckRepair.waitUntilAssigned(admin, metaHbckInfo.metaEntry);
         }
-      }
-      // If there are more than one regions pretending to hold the .META.
-      else if (metaRegions.size() > 1) {
-        errors.reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
+      } else if (servers.size() > 1) {
+        errors
+            .reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
         if (shouldFixAssignments()) {
           errors.print("Trying to fix a problem with .META...");
           setShouldRerun();
           // try fix it (treat is a dupe assignment)
-          List <ServerName> deployedOn = Lists.newArrayList();
-          for (HbckInfo mRegion : metaRegions) {
-            deployedOn.add(mRegion.metaEntry.regionServer);
-          }
-          HBaseFsckRepair.fixMultiAssignment(admin, root.metaEntry, deployedOn);
+          HBaseFsckRepair.fixMultiAssignment(admin, metaHbckInfo.metaEntry, servers);
         }
       }
       // rerun hbck with hopefully fixed META
@@ -2545,15 +2549,6 @@ public class HBaseFsck extends Configure
    * @throws IOException if an error is encountered
    */
   boolean loadMetaEntries() throws IOException {
-
-    // get a list of all regions from the master. This involves
-    // scanning the META table
-    if (!recordMetaRegion()) {
-      // Will remove later if we can fix it
-      errors.reportError("Fatal error: unable to get root region location. Exiting...");
-      return false;
-    }
-
     MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
       int countRecord = 1;
 
@@ -2587,9 +2582,12 @@ public class HBaseFsck extends Configure
           }
           PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
           MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
-          HbckInfo hbInfo = new HbckInfo(m);
-          HbckInfo previous = regionInfoMap.put(hri.getEncodedName(), hbInfo);
-          if (previous != null) {
+          HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
+          if (previous == null) {
+            regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
+          } else if (previous.metaEntry == null) {
+            previous.metaEntry = m;
+          } else {
             throw new IOException("Two entries in META are same " + previous);
           }
 

Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java?rev=1511081&r1=1511080&r2=1511081&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java Tue Aug  6 19:18:51 2013
@@ -60,15 +60,16 @@ import org.apache.hadoop.hbase.MiniHBase
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.catalog.MetaEditor;
 import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Durability;
 import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.HBaseAdmin;
 import org.apache.hadoop.hbase.client.HConnection;
+import org.apache.hadoop.hbase.client.HConnectionManager;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.ResultScanner;
 import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Durability;
 import org.apache.hadoop.hbase.io.hfile.TestHFile;
 import org.apache.hadoop.hbase.master.AssignmentManager;
 import org.apache.hadoop.hbase.master.HMaster;
@@ -231,7 +232,9 @@ public class TestHBaseFsck {
       HRegionInfo hri) throws IOException, InterruptedException {
     try {
       HBaseFsckRepair.closeRegionSilentlyAndWait(admin, sn, hri);
-      admin.offline(hri.getRegionName());
+      if (!hri.isMetaTable()) {
+        admin.offline(hri.getRegionName());
+      }
     } catch (IOException ioe) {
       LOG.warn("Got exception when attempting to offline region "
           + Bytes.toString(hri.getRegionName()), ioe);
@@ -2000,6 +2003,57 @@ public class TestHBaseFsck {
     writeLock.release(); // release for clean state
   }
 
+  @Test
+  public void testMetaOffline() throws Exception {
+    // check no errors
+    HBaseFsck hbck = doFsck(conf, false);
+    assertNoErrors(hbck);
+    deleteMetaRegion(conf, true, false, false);
+    hbck = doFsck(conf, false);
+    // ERROR_CODE.UNKNOWN is coming because we reportError with a message for the .META.
+    // inconsistency and whether we will be fixing it or not.
+    assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
+    hbck = doFsck(conf, true);
+    assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
+    hbck = doFsck(conf, false);
+    assertNoErrors(hbck);
+  }
+  
+  private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs,
+      boolean regionInfoOnly) throws IOException, InterruptedException {
+    HConnection connection = HConnectionManager.getConnection(conf);
+    HRegionLocation metaLocation = connection.locateRegion(HConstants.META_TABLE_NAME,
+        HConstants.EMPTY_START_ROW);
+    ServerName hsa = new ServerName(metaLocation.getHostnamePort(), 0L);
+    HRegionInfo hri = metaLocation.getRegionInfo();
+    if (unassign) {
+      LOG.info("Undeploying meta region " + hri + " from server " + hsa);
+      undeployRegion(new HBaseAdmin(conf), hsa, hri);
+    }
+
+    if (regionInfoOnly) {
+      LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
+      Path rootDir = FSUtils.getRootDir(conf);
+      FileSystem fs = rootDir.getFileSystem(conf);
+      Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(),
+          hri.getEncodedName());
+      Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
+      fs.delete(hriPath, true);
+    }
+
+    if (hdfs) {
+      LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
+      Path rootDir = FSUtils.getRootDir(conf);
+      FileSystem fs = rootDir.getFileSystem(conf);
+      Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(),
+          hri.getEncodedName());
+      HBaseFsck.debugLsr(conf, p);
+      boolean success = fs.delete(p, true);
+      LOG.info("Deleted " + p + " sucessfully? " + success);
+      HBaseFsck.debugLsr(conf, p);
+    }
+  }
+  
   @org.junit.Rule
   public TestName name = new TestName();
 }