You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by jm...@apache.org on 2012/09/09 18:21:15 UTC
svn commit: r1382529 - in /hbase/trunk/hbase-server/src:
main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java
Author: jmhsieh
Date: Sun Sep 9 16:21:15 2012
New Revision: 1382529
URL: http://svn.apache.org/viewvc?rev=1382529&view=rev
Log:
HBASE-5631 hbck should handle case where .tableinfo file is missing (Jie Huang)
Modified:
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java?rev=1382529&r1=1382528&r2=1382529&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java Sun Sep 9 16:21:15 2012
@@ -26,6 +26,7 @@ import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@@ -54,6 +55,7 @@ import org.apache.hadoop.fs.permission.F
import org.apache.hadoop.hbase.Abortable;
import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HRegionLocation;
@@ -182,6 +184,7 @@ public class HBaseFsck {
private boolean fixHdfsHoles = false; // fix fs holes?
private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
+ private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
private boolean fixSplitParents = false; // fix lingering split parents
@@ -231,6 +234,8 @@ public class HBaseFsck {
* When initially looking at HDFS, we attempt to find any orphaned data.
*/
private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
+
+ private Map<String, Set<String>> orphanTableDirs = new HashMap<String, Set<String>>();
/**
* Constructor
@@ -333,7 +338,8 @@ public class HBaseFsck {
*/
public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
// Initial pass to fix orphans.
- if (shouldFixHdfsOrphans() || shouldFixHdfsHoles() || shouldFixHdfsOverlaps()) {
+ if (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
+ || shouldFixHdfsOverlaps() || shouldFixTableOrphans()) {
LOG.info("Loading regioninfos HDFS");
// if nothing is happening this should always complete in two iterations.
int maxIterations = conf.getInt("hbase.hbck.integrityrepair.iterations.max", 3);
@@ -385,7 +391,7 @@ public class HBaseFsck {
if (!checkMetaOnly) {
reportTablesInFlux();
}
-
+
// get regions according to what is online on each RegionServer
loadDeployedRegions();
@@ -399,6 +405,9 @@ public class HBaseFsck {
// Get disabled tables from ZooKeeper
loadDisabledTables();
+ // fix the orphan tables
+ fixOrphanTables();
+
// Check and fix consistency
checkAndFixConsistency();
@@ -704,7 +713,7 @@ public class HBaseFsck {
if (modTInfo == null) {
// only executed once per table.
modTInfo = new TableInfo(tableName);
- Path hbaseRoot = new Path(conf.get(HConstants.HBASE_DIR));
+ Path hbaseRoot = FSUtils.getRootDir(conf);
tablesInfo.put(tableName, modTInfo);
try {
HTableDescriptor htd =
@@ -712,9 +721,14 @@ public class HBaseFsck {
hbaseRoot, tableName);
modTInfo.htds.add(htd);
} catch (IOException ioe) {
- LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
- errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
- "Unable to read .tableinfo from " + hbaseRoot);
+ if (!orphanTableDirs.containsKey(tableName)) {
+ LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
+ //should only report once for each table
+ errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
+ "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
+ Set<String> columns = new HashSet<String>();
+ orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
+ }
}
}
modTInfo.addRegionInfo(hbi);
@@ -722,6 +736,103 @@ public class HBaseFsck {
return tablesInfo;
}
+
+ /**
+ * To get the column family list according to the column family dirs
+ * @param columns
+ * @param hbi
+ * @return
+ * @throws IOException
+ */
+ private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
+ Path regionDir = hbi.getHdfsRegionDir();
+ FileSystem fs = regionDir.getFileSystem(conf);
+ FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
+ for (FileStatus subdir : subDirs) {
+ String columnfamily = subdir.getPath().getName();
+ columns.add(columnfamily);
+ }
+ return columns;
+ }
+
+ /**
+ * To fabricate a .tableinfo file with following contents<br>
+ * 1. the correct tablename <br>
+ * 2. the correct colfamily list<br>
+ * 3. the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
+ * @param tableName
+ * @throws IOException
+ */
+ private boolean fabricateTableInfo(String tableName, Set<String> columns) throws IOException {
+ if (columns ==null || columns.isEmpty()) return false;
+ HTableDescriptor htd = new HTableDescriptor(tableName);
+ for (String columnfamimly : columns) {
+ htd.addFamily(new HColumnDescriptor(columnfamimly));
+ }
+ FSTableDescriptors.createTableDescriptor(htd, conf, true);
+ return true;
+ }
+
+ /**
+ * To fix orphan table by creating a .tableinfo file under tableDir <br>
+ * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
+ * 2. else create a default .tableinfo file with following items<br>
+ * 2.1 the correct tablename <br>
+ * 2.2 the correct colfamily list<br>
+ * 2.3 the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
+ * @throws IOException
+ */
+ public void fixOrphanTables() throws IOException {
+ if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
+
+ Path hbaseRoot = FSUtils.getRootDir(conf);
+ List<String> tmpList = new ArrayList<String>();
+ tmpList.addAll(orphanTableDirs.keySet());
+ HTableDescriptor[] htds = getHTableDescriptors(tmpList);
+ Iterator iter = orphanTableDirs.entrySet().iterator();
+ int j = 0;
+ int numFailedCase = 0;
+ while (iter.hasNext()) {
+ Entry<String, Set<String>> entry = (Entry<String, Set<String>>) iter.next();
+ String tableName = entry.getKey();
+ LOG.info("Trying to fix orphan table error: " + tableName);
+ if (j < htds.length) {
+ if (tableName.equals(Bytes.toString(htds[j].getName()))) {
+ HTableDescriptor htd = htds[j];
+ LOG.info("fixing orphan table: " + tableName + " from cache");
+ FSTableDescriptors.createTableDescriptor(
+ hbaseRoot.getFileSystem(conf), hbaseRoot, htd, true);
+ j++;
+ iter.remove();
+ }
+ } else {
+ if (fabricateTableInfo(tableName, entry.getValue())) {
+ LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
+ LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
+ iter.remove();
+ } else {
+ LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
+ numFailedCase++;
+ }
+ }
+ fixes++;
+ }
+
+ if (orphanTableDirs.isEmpty()) {
+ // all orphanTableDirs are luckily recovered
+ // re-run doFsck after recovering the .tableinfo file
+ setShouldRerun();
+ LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
+ } else if (numFailedCase > 0) {
+ LOG.error("Failed to fix " + numFailedCase
+ + " OrphanTables with default .tableinfo files");
+ }
+
+ }
+ //cleanup the list
+ orphanTableDirs.clear();
+
+ }
/**
* This borrows code from MasterFileSystem.bootstrap()
@@ -3017,7 +3128,15 @@ public class HBaseFsck {
boolean shouldFixHdfsHoles() {
return fixHdfsHoles;
}
-
+
+ public void setFixTableOrphans(boolean shouldFix) {
+ fixTableOrphans = shouldFix;
+ }
+
+ boolean shouldFixTableOrphans() {
+ return fixTableOrphans;
+ }
+
public void setFixHdfsOverlaps(boolean shouldFix) {
fixHdfsOverlaps = shouldFix;
}
@@ -3159,6 +3278,7 @@ public class HBaseFsck {
System.err.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good.");
System.err.println(" -fixHdfsHoles Try to fix region holes in hdfs.");
System.err.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs");
+ System.err.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
System.err.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs.");
System.err.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs.");
System.err.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
@@ -3263,6 +3383,8 @@ public class HBaseFsck {
setFixHdfsHoles(true);
} else if (cmd.equals("-fixHdfsOrphans")) {
setFixHdfsOrphans(true);
+ } else if (cmd.equals("-fixTableOrphans")) {
+ setFixTableOrphans(true);
} else if (cmd.equals("-fixHdfsOverlaps")) {
setFixHdfsOverlaps(true);
} else if (cmd.equals("-fixVersionFile")) {
@@ -3389,6 +3511,7 @@ public class HBaseFsck {
setFixHdfsHoles(false);
setFixHdfsOverlaps(false);
setFixVersionFile(false);
+ setFixTableOrphans(false);
errors.resetErrors();
code = onlineHbck();
setRetCode(code);
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java?rev=1382529&r1=1382528&r2=1382529&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java Sun Sep 9 16:21:15 2012
@@ -412,28 +412,55 @@ public class TestHBaseFsck {
deleteTable(table);
}
}
-
+
@Test
- public void testHbckMissingTableinfo() throws Exception {
+ public void testHbckFixOrphanTable() throws Exception {
String table = "tableInfo";
FileSystem fs = null;
Path tableinfo = null;
try {
setupTable(table);
+ HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
+
Path hbaseTableDir = new Path(conf.get(HConstants.HBASE_DIR) + "/" + table );
fs = hbaseTableDir.getFileSystem(conf);
FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
tableinfo = status.getPath();
fs.rename(tableinfo, new Path("/.tableinfo"));
+ //to report error if .tableinfo is missing.
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE });
+
+ // fix OrphanTable with default .tableinfo
+ hbck = doFsck(conf, true);
+ assertNoErrors(hbck);
+ status = null;
+ status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
+ assertNotNull(status);
+
+ HTableDescriptor htd = admin.getTableDescriptor(table.getBytes());
+ htd.setValue("NOT_DEFAULT", "true");
+ admin.disableTable(table);
+ admin.modifyTable(table.getBytes(), htd);
+ admin.enableTable(table);
+ fs.delete(status.getPath(), true);
+
+ // fix OrphanTable with cache
+ htd = admin.getTableDescriptor(table.getBytes());
+ hbck = doFsck(conf, true);
+ assertNoErrors(hbck);
+ status = null;
+ status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
+ assertNotNull(status);
+ htd = admin.getTableDescriptor(table.getBytes());
+ assertEquals(htd.getValue("NOT_DEFAULT"), "true");
} finally {
fs.rename(new Path("/.tableinfo"), tableinfo);
deleteTable(table);
}
}
-
+
/**
* This create and fixes a bad table with regions that have a duplicate
* start key
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java?rev=1382529&r1=1382528&r2=1382529&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java Sun Sep 9 16:21:15 2012
@@ -37,12 +37,12 @@ public class HbckTestingUtil {
public static HBaseFsck doFsck(
Configuration conf, boolean fix, String table) throws Exception {
- return doFsck(conf, fix, fix, fix, fix,fix, fix, table);
+ return doFsck(conf, fix, fix, fix, fix,fix, fix, fix, table);
}
public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments,
boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps,
- boolean fixHdfsOrphans, boolean fixVersionFile,
+ boolean fixHdfsOrphans, boolean fixTableOrphans, boolean fixVersionFile,
String table) throws Exception {
HBaseFsck fsck = new HBaseFsck(conf);
fsck.connect();
@@ -53,6 +53,7 @@ public class HbckTestingUtil {
fsck.setFixHdfsHoles(fixHdfsHoles);
fsck.setFixHdfsOverlaps(fixHdfsOverlaps);
fsck.setFixHdfsOrphans(fixHdfsOrphans);
+ fsck.setFixTableOrphans(fixTableOrphans);
fsck.setFixVersionFile(fixVersionFile);
if (table != null) {
fsck.includeTable(table);