You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by jm...@apache.org on 2012/04/27 00:27:24 UTC
svn commit: r1331129 - in /hbase/branches/0.90: CHANGES.txt
src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java
src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
Author: jmhsieh
Date: Thu Apr 26 22:27:23 2012
New Revision: 1331129
URL: http://svn.apache.org/viewvc?rev=1331129&view=rev
Log:
HBASE-5801 [hbck] Hbck should handle case where some regions have different HTD settings in .regioninfo files (0.90 specific) (Jimmy Xiang)
Modified:
hbase/branches/0.90/CHANGES.txt
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java
hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
Modified: hbase/branches/0.90/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/CHANGES.txt?rev=1331129&r1=1331128&r2=1331129&view=diff
==============================================================================
--- hbase/branches/0.90/CHANGES.txt (original)
+++ hbase/branches/0.90/CHANGES.txt Thu Apr 26 22:27:23 2012
@@ -29,6 +29,7 @@ Release 0.90.7 - Unreleased
HBASE-5588 Deprecate/remove AssignmentManager#clearRegionFromTransition
HBASE-5589 Add of the offline call to the Master Interface
HBASE-5734 Change hbck sideline root (Jimmy Xiang)
+ HBASE-5801 [hbck] Hbck should handle case where some regions have different HTD settings in .regioninfo files (0.90 specific) (Jimmy Xiang)
NEW FEATURE
HBASE-5128 [uber hbck] Online automated repair of table integrity and region consistency problems
Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java?rev=1331129&r1=1331128&r2=1331129&view=diff
==============================================================================
--- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java (original)
+++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java Thu Apr 26 22:27:23 2012
@@ -165,6 +165,7 @@ public class HBaseFsck {
private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE; // maximum number of overlapping regions to sideline
private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
+ private boolean fixTableDesc = false; // fix table descriptor inconsistency?
private boolean rerun = false; // if we tried to fix something, rerun hbck
private static boolean summary = false; // if we want to print less output
@@ -174,6 +175,7 @@ public class HBaseFsck {
* State
*********/
private ErrorReporter errors = new PrintingErrorReporter();
+ private boolean multiTableDescFound = false; // to record if multi table descriptors found
int fixes = 0;
/**
@@ -263,6 +265,7 @@ public class HBaseFsck {
errors.clear();
tablesInfo.clear();
orphanHdfsDirs.clear();
+ multiTableDescFound = false;
}
/**
@@ -1075,6 +1078,12 @@ public class HBaseFsck {
for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
checkRegionConsistency(e.getKey(), e.getValue());
}
+ if (shouldFixTableDesc() && isMultiTableDescFound()) {
+ setShouldRerun(); // should re-run to verify it is fixed
+ for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
+ fixTableDescConsistency(e.getKey(), e.getValue());
+ }
+ }
}
/**
@@ -1208,6 +1217,32 @@ public class HBaseFsck {
}
/**
+ * Check a single region for table desc consistency.
+ */
+ private void fixTableDescConsistency(final String key, final HbckInfo hbi)
+ throws IOException, KeeperException, InterruptedException {
+ String tableName = Bytes.toString(hbi.getTableName());
+ TableInfo tableInfo = tablesInfo.get(tableName);
+ Preconditions.checkNotNull("Table " + tableName + "' not present!", tableInfo);
+ if (tableInfo.htds.size() != 1) {
+ HTableDescriptor htd = tableInfo.getHTD();
+ Path sidelineTableDir = new Path(getSidelineDir(), tableName);
+ if (!htd.equals(hbi.getHdfsHRI().getTableDesc())) {
+ if (hbi.deployedOn.size() > 1) {
+ LOG.warn("Region " + hbi.toString() + " is deployed on multiple region servers."
+ + " Please fix the multiple assignments before fixing multiple table desc.");
+ } else {
+ HServerAddress hsa = null;
+ if (hbi.deployedOn.size() == 1) {
+ hsa = hbi.deployedOn.get(0);
+ }
+ HBaseFsckRepair.fixTableDesc(admin, hsa, hbi, htd, sidelineTableDir);
+ }
+ }
+ }
+ }
+
+ /**
* Check a single region for consistency and correct deployment.
*/
private void checkRegionConsistency(final String key, final HbckInfo hbi)
@@ -1492,6 +1527,7 @@ public class HBaseFsck {
public class TableInfo {
String tableName;
TreeSet <HServerAddress> deployedOn;
+ HTableDescriptor htdFromAdmin;
// backwards regions
final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
@@ -1529,6 +1565,7 @@ public class HBaseFsck {
if (htds.size() > 1) {
LOG.warn("Multiple table descriptors found for table '"
+ Bytes.toString(hir.getTableName()) + "' regions: " + htds);
+ setMultiTableDescFound(true);
} else {
LOG.info("Added a table descriptor found in table '"
+ Bytes.toString(hir.getTableName()) + "' regions: " + htd);
@@ -1539,14 +1576,17 @@ public class HBaseFsck {
/**
* @return descriptor common to all regions. null if are none or multiple!
*/
- private HTableDescriptor getHTD() {
- if (htds.size() == 1) {
- return (HTableDescriptor)htds.toArray()[0];
- } else {
- LOG.error("None/Multiple table descriptors found for table '"
- + tableName + "' regions: " + htds);
+ private HTableDescriptor getHTD() throws IOException {
+ if (htds.size() != 1) {
+ if (htdFromAdmin == null) {
+ LOG.warn("None/Multiple table descriptors found for table '"
+ + tableName + "' regions: " + htds);
+ htdFromAdmin = admin.getTableDescriptor(Bytes.toBytes(tableName));
+ LOG.warn("Use this one from meta instead" + htdFromAdmin);
+ }
+ return htdFromAdmin;
}
- return null;
+ return (HTableDescriptor)htds.toArray()[0];
}
public void addRegionInfo(HbckInfo hir) {
@@ -2425,6 +2465,12 @@ public class HBaseFsck {
*/
private void printTableSummary(TreeMap<String, TableInfo> tablesInfo) {
System.out.println("Summary:");
+ if (isMultiTableDescFound()) {
+ System.out.println(" Multiple table descriptors were found.\n"
+ + " You can ignore it if your cluster is working fine.\n"
+ + " To fix it, please re-run hbck with option -fixTableDesc\n");
+ }
+
for (TableInfo tInfo : tablesInfo.values()) {
if (errors.tableHasErrors(tInfo)) {
System.out.println("Table " + tInfo.getName() + " is inconsistent.");
@@ -2820,6 +2866,22 @@ public class HBaseFsck {
return sidelineBigOverlaps;
}
+ public void setFixTableDesc(boolean ftd) {
+ this.fixTableDesc = ftd;
+ }
+
+ public boolean shouldFixTableDesc() {
+ return fixTableDesc;
+ }
+
+ public void setMultiTableDescFound(boolean multiTableDesc) {
+ multiTableDescFound = multiTableDesc;
+ }
+
+ public boolean isMultiTableDescFound() {
+ return multiTableDescFound;
+ }
+
/**
* @param mm maximum number of regions to merge into a single region.
*/
@@ -2893,6 +2955,7 @@ public class HBaseFsck {
System.err.println("");
System.err.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles -fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps");
System.err.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles -fixHdfsOrphans");
+ System.err.println(" -fixTableDesc Try to fix table descriptor inconsistency");
Runtime.getRuntime().exit(-2);
}
@@ -2958,6 +3021,8 @@ public class HBaseFsck {
fsck.setFixVersionFile(true);
} else if (cmd.equals("-sidelineBigOverlaps")) {
fsck.setSidelineBigOverlaps(true);
+ } else if (cmd.equals("-fixTableDesc")) {
+ fsck.setFixTableDesc(true);
} else if (cmd.equals("-repair")) {
// this attempts to merge overlapping hdfs regions, needs testing
// under load
Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java?rev=1331129&r1=1331128&r2=1331129&view=diff
==============================================================================
--- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java (original)
+++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java Thu Apr 26 22:27:23 2012
@@ -26,10 +26,13 @@ import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerAddress;
+import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HConnection;
@@ -190,6 +193,57 @@ public class HBaseFsckRepair {
}
/**
+ * Replace the .regioninfo with a new one with the expected table desc,
+ * then re-assign the region.
+ */
+ public static void fixTableDesc(final HBaseAdmin admin, final HServerAddress hsa,
+ final HBaseFsck.HbckInfo hbi, final HTableDescriptor htd, final Path sidelineTableDir)
+ throws IOException, KeeperException, InterruptedException {
+ // at first, sideline the current .regioninfo
+ Path regionDir = hbi.getHdfsRegionDir();
+ Path regioninfoPath = new Path(regionDir, HRegion.REGIONINFO_FILE);
+ Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
+ Path regioninfoSidelinePath = new Path(sidelineRegionDir, HRegion.REGIONINFO_FILE);
+ FileSystem fs = FileSystem.get(admin.getConfiguration());
+ fs.mkdirs(sidelineRegionDir);
+ boolean success = fs.rename(regioninfoPath, regioninfoSidelinePath);
+ if (!success) {
+ String msg = "Unable to rename file " + regioninfoPath + " to " + regioninfoSidelinePath;
+ LOG.error(msg);
+ throw new IOException(msg);
+ }
+
+ // then fix the table desc: create a new .regioninfo,
+ // offline the region and wait till it's assigned again.
+ HRegionInfo hri = hbi.getHdfsHRI();
+ hri.setTableDesc(htd);
+ Path tmpDir = new Path(sidelineRegionDir, ".tmp");
+ Path tmpPath = new Path(tmpDir, HRegion.REGIONINFO_FILE);
+
+ FSDataOutputStream out = fs.create(tmpPath, true);
+ try {
+ hri.write(out);
+ out.write('\n');
+ out.write('\n');
+ out.write(Bytes.toBytes(hri.toString()));
+ } finally {
+ out.close();
+ }
+ if (!fs.rename(tmpPath, regioninfoPath)) {
+ throw new IOException("Unable to rename " + tmpPath + " to " +
+ regioninfoPath);
+ }
+
+ if (hsa != null) {
+ closeRegionSilentlyAndWait(admin, hsa, hri);
+ }
+
+ // Force ZK node to OFFLINE so master assigns
+ forceOfflineInZK(admin, hri);
+ waitUntilAssigned(admin, hri);
+ }
+
+ /**
* Creates, flushes, and closes a new hdfs region dir
*/
public static HRegion createHDFSRegionDir(Configuration conf,
Modified: hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java?rev=1331129&r1=1331128&r2=1331129&view=diff
==============================================================================
--- hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java (original)
+++ hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java Thu Apr 26 22:27:23 2012
@@ -22,10 +22,7 @@ package org.apache.hadoop.hbase.util;
import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
+import static org.junit.Assert.*;
import java.io.IOException;
import java.util.ArrayList;
@@ -38,6 +35,8 @@ import java.util.Map.Entry;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Abortable;
@@ -882,4 +881,89 @@ public class TestHBaseFsck {
deleteTable(table);
}
}
+
+ /**
+ * This creates regions with inconsistent region info.
+ * Test it can be fixed properly.
+ */
+ @Test(timeout=300000)
+ public void testMultipleTableDesc() throws Exception {
+ String table = "MultipleTableDesc";
+ try {
+ setupTable(table);
+
+ HTableDescriptor htd = tbl.getTableDescriptor();
+ Map<HRegionInfo, HServerAddress> hris = tbl.getRegionsInfo();
+ assertTrue(hris.size() > 1);
+
+ // verify everything is fine before messing it up
+ Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
+ Path tableDir = new Path(rootDir + "/" + htd.getNameAsString());
+ FileSystem fs = rootDir.getFileSystem(conf);
+ int currentBlockSize = htd.getFamily(FAM).getBlocksize();
+ int i = 0;
+ for (HRegionInfo hri: hris.keySet()) {
+ assertEquals(htd, hri.getTableDesc());
+ Path regionDir = new Path(tableDir, hri.getEncodedName());
+ Path hriPath = new Path(regionDir, HRegion.REGIONINFO_FILE);
+ HRegionInfo hriOnFs = new HRegionInfo();
+ FSDataInputStream in = fs.open(hriPath);
+ try {
+ hriOnFs.readFields(in);
+ } finally {
+ in.close();
+ }
+ assertEquals(htd, hriOnFs.getTableDesc());
+
+ // now mess up one .regioninfo file
+ if (++i == 1) {
+ HTableDescriptor newHtd = new HTableDescriptor(htd);
+ int blockSize = currentBlockSize - 512;
+ newHtd.getFamily(FAM).setBlocksize(blockSize);
+ FSDataOutputStream out = fs.create(hriPath, true);
+ try {
+ hri.setTableDesc(newHtd);
+ hri.write(out);
+ out.write('\n');
+ out.write('\n');
+ out.write(Bytes.toBytes(hri.toString()));
+ } finally {
+ out.close();
+ }
+ }
+ }
+
+ HBaseFsck hbck = doFsck(conf, false);
+ assertTrue(hbck.isMultiTableDescFound());
+ // there should be no other issues
+ assertNoErrors(hbck);
+
+ // now fix it.
+ hbck = new HBaseFsck(conf);
+ hbck.setFixTableDesc(true);
+ hbck.connect();
+ hbck.onlineHbck();
+
+ // verify it is fixed
+ hbck = doFsck(conf, false);
+ assertFalse(hbck.isMultiTableDescFound());
+ assertNoErrors(hbck);
+
+ // verify all .regioninfo files on FS
+ for (HRegionInfo hri: hris.keySet()) {
+ Path regionDir = new Path(tableDir, hri.getEncodedName());
+ Path hriPath = new Path(regionDir, HRegion.REGIONINFO_FILE);
+ FSDataInputStream in = fs.open(hriPath);
+ HRegionInfo hriOnFs = new HRegionInfo();
+ try {
+ hriOnFs.readFields(in);
+ } finally {
+ in.close();
+ }
+ assertEquals(htd, hriOnFs.getTableDesc());
+ }
+ } finally {
+ deleteTable(table);
+ }
+ }
}