You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by an...@apache.org on 2013/08/06 21:18:51 UTC
svn commit: r1511081 - in /hbase/trunk/hbase-server/src:
main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
Author: anoopsamjohn
Date: Tue Aug 6 19:18:51 2013
New Revision: 1511081
URL: http://svn.apache.org/r1511081
Log:
HBASE-8627 HBCK can not fix meta not assigned issue
Modified:
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java?rev=1511081&r1=1511080&r2=1511081&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java Tue Aug 6 19:18:51 2013
@@ -390,16 +390,29 @@ public class HBaseFsck extends Configure
InterruptedException {
clearState();
- LOG.info("Loading regionsinfo from the .META. table");
- boolean success = loadMetaEntries();
- if (!success) return -1;
-
+ // get regions according to what is online on each RegionServer
+ loadDeployedRegions();
+ // check whether .META. is deployed and online
+ if (!recordMetaRegion()) {
+ // Will remove later if we can fix it
+ errors.reportError("Fatal error: unable to get .META. region location. Exiting...");
+ return -2;
+ }
// Check if .META. is found only once and in the right place
if (!checkMetaRegion()) {
- // Will remove later if we can fix it
- errors.reportError("Encountered fatal error. Exiting...");
+ String errorMsg = ".META. table is not consistent. ";
+ if (shouldFixAssignments()) {
+ errorMsg += "HBCK will try fixing it. Rerun once .META. is back to consistent state.";
+ } else {
+ errorMsg += "Run HBCK with proper fix options to fix .META. inconsistency.";
+ }
+ errors.reportError(errorMsg + " Exiting...");
return -2;
}
+ // Not going with further consistency check for tables when META itself is not consistent.
+ LOG.info("Loading regionsinfo from the .META. table");
+ boolean success = loadMetaEntries();
+ if (!success) return -1;
// Empty cells in .META.?
reportEmptyMetaCells();
@@ -414,9 +427,6 @@ public class HBaseFsck extends Configure
reportTablesInFlux();
}
- // get regions according to what is online on each RegionServer
- loadDeployedRegions();
-
// load regiondirs and regioninfos from HDFS
if (shouldCheckHdfs()) {
loadHdfsRegionDirs();
@@ -1334,10 +1344,13 @@ public class HBaseFsck extends Configure
} catch (KeeperException e) {
throw new IOException(e);
}
- MetaEntry m =
- new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
- HbckInfo hbInfo = new HbckInfo(m);
- regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), hbInfo);
+ MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis());
+ HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
+ if (hbckInfo == null) {
+ regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
+ } else {
+ hbckInfo.metaEntry = m;
+ }
return true;
}
@@ -2492,45 +2505,36 @@ public class HBaseFsck extends Configure
* @throws KeeperException
* @throws InterruptedException
*/
- boolean checkMetaRegion()
- throws IOException, KeeperException, InterruptedException {
- List <HbckInfo> metaRegions = Lists.newArrayList();
+ boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
+ List<HbckInfo> metaRegions = Lists.newArrayList();
for (HbckInfo value : regionInfoMap.values()) {
- if (value.metaEntry.isMetaRegion()) {
+ if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
metaRegions.add(value);
}
}
- // If something is wrong
- if (metaRegions.size() != 1) {
- HRegionLocation rootLocation = connection.locateRegion(
- HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
- HbckInfo root =
- regionInfoMap.get(rootLocation.getRegionInfo().getEncodedName());
-
- // If there is no region holding .META.
- if (metaRegions.size() == 0) {
+ // There will be always one entry in regionInfoMap corresponding to .META.
+ // Check the deployed servers. It should be exactly one server.
+ HbckInfo metaHbckInfo = metaRegions.get(0);
+ List<ServerName> servers = metaHbckInfo.deployedOn;
+ if (servers.size() != 1) {
+ if (servers.size() == 0) {
errors.reportError(ERROR_CODE.NO_META_REGION, ".META. is not found on any region.");
if (shouldFixAssignments()) {
errors.print("Trying to fix a problem with .META...");
setShouldRerun();
// try to fix it (treat it as unassigned region)
- HBaseFsckRepair.fixUnassigned(admin, root.metaEntry);
- HBaseFsckRepair.waitUntilAssigned(admin, root.getHdfsHRI());
+ HBaseFsckRepair.fixUnassigned(admin, metaHbckInfo.metaEntry);
+ HBaseFsckRepair.waitUntilAssigned(admin, metaHbckInfo.metaEntry);
}
- }
- // If there are more than one regions pretending to hold the .META.
- else if (metaRegions.size() > 1) {
- errors.reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
+ } else if (servers.size() > 1) {
+ errors
+ .reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region.");
if (shouldFixAssignments()) {
errors.print("Trying to fix a problem with .META...");
setShouldRerun();
// try fix it (treat is a dupe assignment)
- List <ServerName> deployedOn = Lists.newArrayList();
- for (HbckInfo mRegion : metaRegions) {
- deployedOn.add(mRegion.metaEntry.regionServer);
- }
- HBaseFsckRepair.fixMultiAssignment(admin, root.metaEntry, deployedOn);
+ HBaseFsckRepair.fixMultiAssignment(admin, metaHbckInfo.metaEntry, servers);
}
}
// rerun hbck with hopefully fixed META
@@ -2545,15 +2549,6 @@ public class HBaseFsck extends Configure
* @throws IOException if an error is encountered
*/
boolean loadMetaEntries() throws IOException {
-
- // get a list of all regions from the master. This involves
- // scanning the META table
- if (!recordMetaRegion()) {
- // Will remove later if we can fix it
- errors.reportError("Fatal error: unable to get root region location. Exiting...");
- return false;
- }
-
MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
int countRecord = 1;
@@ -2587,9 +2582,12 @@ public class HBaseFsck extends Configure
}
PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
- HbckInfo hbInfo = new HbckInfo(m);
- HbckInfo previous = regionInfoMap.put(hri.getEncodedName(), hbInfo);
- if (previous != null) {
+ HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
+ if (previous == null) {
+ regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
+ } else if (previous.metaEntry == null) {
+ previous.metaEntry = m;
+ } else {
throw new IOException("Two entries in META are same " + previous);
}
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java?rev=1511081&r1=1511080&r2=1511081&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java Tue Aug 6 19:18:51 2013
@@ -60,15 +60,16 @@ import org.apache.hadoop.hbase.MiniHBase
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HConnection;
+import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.io.hfile.TestHFile;
import org.apache.hadoop.hbase.master.AssignmentManager;
import org.apache.hadoop.hbase.master.HMaster;
@@ -231,7 +232,9 @@ public class TestHBaseFsck {
HRegionInfo hri) throws IOException, InterruptedException {
try {
HBaseFsckRepair.closeRegionSilentlyAndWait(admin, sn, hri);
- admin.offline(hri.getRegionName());
+ if (!hri.isMetaTable()) {
+ admin.offline(hri.getRegionName());
+ }
} catch (IOException ioe) {
LOG.warn("Got exception when attempting to offline region "
+ Bytes.toString(hri.getRegionName()), ioe);
@@ -2000,6 +2003,57 @@ public class TestHBaseFsck {
writeLock.release(); // release for clean state
}
+ @Test
+ public void testMetaOffline() throws Exception {
+ // check no errors
+ HBaseFsck hbck = doFsck(conf, false);
+ assertNoErrors(hbck);
+ deleteMetaRegion(conf, true, false, false);
+ hbck = doFsck(conf, false);
+ // ERROR_CODE.UNKNOWN is coming because we reportError with a message for the .META.
+ // inconsistency and whether we will be fixing it or not.
+ assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
+ hbck = doFsck(conf, true);
+ assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
+ hbck = doFsck(conf, false);
+ assertNoErrors(hbck);
+ }
+
+ private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs,
+ boolean regionInfoOnly) throws IOException, InterruptedException {
+ HConnection connection = HConnectionManager.getConnection(conf);
+ HRegionLocation metaLocation = connection.locateRegion(HConstants.META_TABLE_NAME,
+ HConstants.EMPTY_START_ROW);
+ ServerName hsa = new ServerName(metaLocation.getHostnamePort(), 0L);
+ HRegionInfo hri = metaLocation.getRegionInfo();
+ if (unassign) {
+ LOG.info("Undeploying meta region " + hri + " from server " + hsa);
+ undeployRegion(new HBaseAdmin(conf), hsa, hri);
+ }
+
+ if (regionInfoOnly) {
+ LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
+ Path rootDir = FSUtils.getRootDir(conf);
+ FileSystem fs = rootDir.getFileSystem(conf);
+ Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(),
+ hri.getEncodedName());
+ Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
+ fs.delete(hriPath, true);
+ }
+
+ if (hdfs) {
+ LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
+ Path rootDir = FSUtils.getRootDir(conf);
+ FileSystem fs = rootDir.getFileSystem(conf);
+ Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(),
+ hri.getEncodedName());
+ HBaseFsck.debugLsr(conf, p);
+ boolean success = fs.delete(p, true);
+ LOG.info("Deleted " + p + " sucessfully? " + success);
+ HBaseFsck.debugLsr(conf, p);
+ }
+ }
+
@org.junit.Rule
public TestName name = new TestName();
}