You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by an...@apache.org on 2015/06/08 07:48:16 UTC
hbase git commit: HBASE-13806 Check the mob files when there are
mob-enabled columns in HFileCorruptionChecker. (Jingcheng)
Repository: hbase
Updated Branches:
refs/heads/hbase-11339 efbef296d -> 13fe542bc
HBASE-13806 Check the mob files when there are mob-enabled columns in HFileCorruptionChecker. (Jingcheng)
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/13fe542b
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/13fe542b
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/13fe542b
Branch: refs/heads/hbase-11339
Commit: 13fe542bccea2ac70f052017ac71b99b10dbcda1
Parents: efbef29
Author: anoopsjohn <an...@gmail.com>
Authored: Mon Jun 8 11:17:43 2015 +0530
Committer: anoopsjohn <an...@gmail.com>
Committed: Mon Jun 8 11:17:43 2015 +0530
----------------------------------------------------------------------
.../hbase/util/hbck/HFileCorruptionChecker.java | 197 +++++++++++++++++++
.../apache/hadoop/hbase/util/TestHBaseFsck.java | 110 +++++++++++
2 files changed, 307 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/13fe542b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/hbck/HFileCorruptionChecker.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/hbck/HFileCorruptionChecker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/hbck/HFileCorruptionChecker.java
index 23dc570..a1b7c2c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/hbck/HFileCorruptionChecker.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/hbck/HFileCorruptionChecker.java
@@ -39,9 +39,11 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.CorruptHFileException;
import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.mob.MobUtils;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.FSUtils.FamilyDirFilter;
import org.apache.hadoop.hbase.util.FSUtils.HFileFilter;
@@ -68,8 +70,13 @@ public class HFileCorruptionChecker {
final Set<Path> failures = new ConcurrentSkipListSet<Path>();
final Set<Path> quarantined = new ConcurrentSkipListSet<Path>();
final Set<Path> missing = new ConcurrentSkipListSet<Path>();
+ final Set<Path> corruptedMobFiles = new ConcurrentSkipListSet<Path>();
+ final Set<Path> failureMobFiles = new ConcurrentSkipListSet<Path>();
+ final Set<Path> missedMobFiles = new ConcurrentSkipListSet<Path>();
+ final Set<Path> quarantinedMobFiles = new ConcurrentSkipListSet<Path>();
final boolean inQuarantineMode;
final AtomicInteger hfilesChecked = new AtomicInteger();
+ final AtomicInteger mobFilesChecked = new AtomicInteger();
public HFileCorruptionChecker(Configuration conf, ExecutorService executor,
boolean quarantine) throws IOException {
@@ -177,6 +184,109 @@ public class HFileCorruptionChecker {
}
/**
+ * Check all files in a mob column family dir.
+ *
+ * @param cfDir
+ * mob column family directory
+ * @throws IOException
+ */
+ protected void checkMobColFamDir(Path cfDir) throws IOException {
+ FileStatus[] hfs = null;
+ try {
+ hfs = fs.listStatus(cfDir, new HFileFilter(fs)); // use same filter as scanner.
+ } catch (FileNotFoundException fnfe) {
+ // Hadoop 0.23+ listStatus semantics throws an exception if the path does not exist.
+ LOG.warn("Mob colfam Directory " + cfDir +
+ " does not exist. Likely the table is deleted. Skipping.");
+ missedMobFiles.add(cfDir);
+ return;
+ }
+
+ // Hadoop 1.0 listStatus does not throw an exception if the path does not exist.
+ if (hfs.length == 0 && !fs.exists(cfDir)) {
+ LOG.warn("Mob colfam Directory " + cfDir +
+ " does not exist. Likely the table is deleted. Skipping.");
+ missedMobFiles.add(cfDir);
+ return;
+ }
+ for (FileStatus hfFs : hfs) {
+ Path hf = hfFs.getPath();
+ checkMobFile(hf);
+ }
+ }
+
+ /**
+ * Checks a path to see if it is a valid mob file.
+ *
+ * @param p
+ * full Path to a mob file.
+ * @throws IOException
+ * This is a connectivity related exception
+ */
+ protected void checkMobFile(Path p) throws IOException {
+ HFile.Reader r = null;
+ try {
+ r = HFile.createReader(fs, p, cacheConf, conf);
+ } catch (CorruptHFileException che) {
+ LOG.warn("Found corrupt mob file " + p, che);
+ corruptedMobFiles.add(p);
+ if (inQuarantineMode) {
+ Path dest = createQuarantinePath(p);
+ LOG.warn("Quarantining corrupt mob file " + p + " into " + dest);
+ boolean success = fs.mkdirs(dest.getParent());
+ success = success ? fs.rename(p, dest): false;
+ if (!success) {
+ failureMobFiles.add(p);
+ } else {
+ quarantinedMobFiles.add(dest);
+ }
+ }
+ return;
+ } catch (FileNotFoundException fnfe) {
+ LOG.warn("Mob file " + p + " was missing. Likely removed due to compaction?");
+ missedMobFiles.add(p);
+ } finally {
+ mobFilesChecked.addAndGet(1);
+ if (r != null) {
+ r.close(true);
+ }
+ }
+ }
+
+ /**
+ * Checks all the mob files of a table.
+ * @param regionDir The mob region directory
+ * @throws IOException
+ */
+ private void checkMobRegionDir(Path regionDir) throws IOException {
+ if (!fs.exists(regionDir)) {
+ return;
+ }
+ FileStatus[] hfs = null;
+ try {
+ hfs = fs.listStatus(regionDir, new FamilyDirFilter(fs));
+ } catch (FileNotFoundException fnfe) {
+ // Hadoop 0.23+ listStatus semantics throws an exception if the path does not exist.
+ LOG.warn("Mob directory " + regionDir
+ + " does not exist. Likely the table is deleted. Skipping.");
+ missedMobFiles.add(regionDir);
+ return;
+ }
+
+ // Hadoop 1.0 listStatus does not throw an exception if the path does not exist.
+ if (hfs.length == 0 && !fs.exists(regionDir)) {
+ LOG.warn("Mob directory " + regionDir
+ + " does not exist. Likely the table is deleted. Skipping.");
+ missedMobFiles.add(regionDir);
+ return;
+ }
+ for (FileStatus hfFs : hfs) {
+ Path hf = hfFs.getPath();
+ checkMobColFamDir(hf);
+ }
+ }
+
+ /**
* Check all column families in a region dir.
*
* @param regionDir
@@ -236,6 +346,8 @@ public class HFileCorruptionChecker {
rdcs.add(work);
}
+ // add mob region
+ rdcs.add(createMobRegionDirChecker(tableDir));
// Submit and wait for completion
try {
rdFutures = executor.invokeAll(rdcs);
@@ -293,6 +405,34 @@ public class HFileCorruptionChecker {
}
/**
+ * An individual work item for parallelized mob dir processing. This is
+ * intentionally an inner class so it can use the shared error sets and fs.
+ */
+ private class MobRegionDirChecker extends RegionDirChecker {
+
+ MobRegionDirChecker(Path regionDir) {
+ super(regionDir);
+ }
+
+ @Override
+ public Void call() throws IOException {
+ checkMobRegionDir(regionDir);
+ return null;
+ }
+ }
+
+ /**
+ * Creates an instance of MobRegionDirChecker.
+ * @param tableDir The current table directory.
+ * @return An instance of MobRegionDirChecker.
+ */
+ private MobRegionDirChecker createMobRegionDirChecker(Path tableDir) {
+ TableName tableName = FSUtils.getTableName(tableDir);
+ Path mobDir = MobUtils.getMobRegionPath(conf, tableName);
+ return new MobRegionDirChecker(mobDir);
+ }
+
+ /**
* Check the specified table dirs for bad hfiles.
*/
public void checkTables(Collection<Path> tables) throws IOException {
@@ -338,6 +478,42 @@ public class HFileCorruptionChecker {
}
/**
+ * @return the set of check failure mob file paths after checkTables is called.
+ */
+ public Collection<Path> getFailureMobFiles() {
+ return new HashSet<Path>(failureMobFiles);
+ }
+
+ /**
+ * @return the set of corrupted mob file paths after checkTables is called.
+ */
+ public Collection<Path> getCorruptedMobFiles() {
+ return new HashSet<Path>(corruptedMobFiles);
+ }
+
+ /**
+ * @return number of mob files checked in the last HfileCorruptionChecker run
+ */
+ public int getMobFilesChecked() {
+ return mobFilesChecked.get();
+ }
+
+ /**
+ * @return the set of successfully quarantined paths after checkTables is called.
+ */
+ public Collection<Path> getQuarantinedMobFiles() {
+ return new HashSet<Path>(quarantinedMobFiles);
+ }
+
+ /**
+ * @return the set of paths that were missing. Likely due to table deletion or
+ * deletion/moves from compaction.
+ */
+ public Collection<Path> getMissedMobFiles() {
+ return new HashSet<Path>(missedMobFiles);
+ }
+
+ /**
* Print a human readable summary of hfile quarantining operations.
* @param out
*/
@@ -363,10 +539,31 @@ public class HFileCorruptionChecker {
String fixedState = (corrupted.size() == quarantined.size()) ? "OK"
: "CORRUPTED";
+ // print mob-related report
+ if (inQuarantineMode) {
+ out.print(" Mob files successfully quarantined: " + quarantinedMobFiles.size());
+ for (Path sq : quarantinedMobFiles) {
+ out.print(" " + sq);
+ }
+ out.print(" Mob files failed quarantine: " + failureMobFiles.size());
+ for (Path fq : failureMobFiles) {
+ out.print(" " + fq);
+ }
+ }
+ out.print(" Mob files moved while checking: " + missedMobFiles.size());
+ for (Path mq : missedMobFiles) {
+ out.print(" " + mq);
+ }
+ String initialMobState = (corruptedMobFiles.size() == 0) ? "OK" : "CORRUPTED";
+ String fixedMobState = (corruptedMobFiles.size() == quarantinedMobFiles.size()) ? "OK"
+ : "CORRUPTED";
+
if (inQuarantineMode) {
out.print("Summary: " + initialState + " => " + fixedState);
+ out.print("Mob summary: " + initialMobState + " => " + fixedMobState);
} else {
out.print("Summary: " + initialState);
+ out.print("Mob summary: " + initialMobState);
}
}
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/13fe542b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
index 28b80ff..9938df7 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
@@ -39,6 +39,7 @@ import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Set;
+import java.util.UUID;
import java.util.concurrent.Callable;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
@@ -95,6 +96,8 @@ import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.master.RegionStates;
import org.apache.hadoop.hbase.master.TableLockManager;
import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
+import org.apache.hadoop.hbase.mob.MobFileName;
+import org.apache.hadoop.hbase.mob.MobUtils;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
import org.apache.hadoop.hbase.regionserver.HRegion;
@@ -438,6 +441,31 @@ public class TestHBaseFsck {
}
/**
+ * Setup a clean table with a mob-enabled column.
+ *
+ * @param tableName The name of a table to be created.
+ * @throws Exception
+ */
+ void setupMobTable(TableName tablename) throws Exception {
+ HTableDescriptor desc = new HTableDescriptor(tablename);
+ HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
+ hcd.setMobEnabled(true);
+ hcd.setMobThreshold(0);
+ desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
+ createTable(TEST_UTIL, desc, SPLITS);
+
+ tbl = (HTable) connection.getTable(tablename, tableExecutorService);
+ List<Put> puts = new ArrayList<Put>();
+ for (byte[] row : ROWKEYS) {
+ Put p = new Put(row);
+ p.add(FAM, Bytes.toBytes("val"), row);
+ puts.add(p);
+ }
+ tbl.put(puts);
+ tbl.flushCommits();
+ }
+
+ /**
* Counts the number of row to verify data loss or non-dataloss.
*/
int countRows() throws IOException {
@@ -2121,6 +2149,44 @@ public class TestHBaseFsck {
}
/**
+ * Gets flushed mob files.
+ * @param fs The current file system.
+ * @param table The current table name.
+ * @return Path of a flushed hfile.
+ * @throws IOException
+ */
+ Path getFlushedMobFile(FileSystem fs, TableName table) throws IOException {
+ Path regionDir = MobUtils.getMobRegionPath(conf, table);
+ Path famDir = new Path(regionDir, FAM_STR);
+
+ // keep doing this until we get a legit hfile
+ while (true) {
+ FileStatus[] hfFss = fs.listStatus(famDir);
+ if (hfFss.length == 0) {
+ continue;
+ }
+ for (FileStatus hfs : hfFss) {
+ if (!hfs.isDirectory()) {
+ return hfs.getPath();
+ }
+ }
+ }
+ }
+
+ /**
+ * Creates a new mob file name by the old one.
+ * @param oldFileName The old mob file name.
+ * @return The new mob file name.
+ */
+ String createMobFileName(String oldFileName) {
+ MobFileName mobFileName = MobFileName.create(oldFileName);
+ String startKey = mobFileName.getStartKey();
+ String date = mobFileName.getDate();
+ return MobFileName.create(startKey, date, UUID.randomUUID().toString().replaceAll("-", ""))
+ .getFileName();
+ }
+
+ /**
* This creates a table and then corrupts an hfile. Hbck should quarantine the file.
*/
@Test(timeout=180000)
@@ -2161,6 +2227,50 @@ public class TestHBaseFsck {
}
/**
+ * This creates a table and then corrupts a mob file. Hbck should quarantine the file.
+ */
+ @Test(timeout=180000)
+ public void testQuarantineCorruptMobFile() throws Exception {
+ TableName table = TableName.valueOf(name.getMethodName());
+ try {
+ setupMobTable(table);
+ assertEquals(ROWKEYS.length, countRows());
+ admin.flush(table);
+
+ FileSystem fs = FileSystem.get(conf);
+ Path mobFile = getFlushedMobFile(fs, table);
+ admin.disableTable(table);
+ // create new corrupt mob file.
+ String corruptMobFile = createMobFileName(mobFile.getName());
+ Path corrupt = new Path(mobFile.getParent(), corruptMobFile);
+ TestHFile.truncateFile(fs, mobFile, corrupt);
+ LOG.info("Created corrupted mob file " + corrupt);
+ HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
+ HBaseFsck.debugLsr(conf, MobUtils.getMobHome(conf));
+
+ // A corrupt mob file doesn't abort the start of regions, so we can enable the table.
+ admin.enableTable(table);
+ HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
+ assertEquals(res.getRetCode(), 0);
+ HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
+ assertEquals(hfcc.getHFilesChecked(), 4);
+ assertEquals(hfcc.getCorrupted().size(), 0);
+ assertEquals(hfcc.getFailures().size(), 0);
+ assertEquals(hfcc.getQuarantined().size(), 0);
+ assertEquals(hfcc.getMissing().size(), 0);
+ assertEquals(hfcc.getMobFilesChecked(), 5);
+ assertEquals(hfcc.getCorruptedMobFiles().size(), 1);
+ assertEquals(hfcc.getFailureMobFiles().size(), 0);
+ assertEquals(hfcc.getQuarantinedMobFiles().size(), 1);
+ assertEquals(hfcc.getMissedMobFiles().size(), 0);
+ String quarantinedMobFile = hfcc.getQuarantinedMobFiles().iterator().next().getName();
+ assertEquals(corruptMobFile, quarantinedMobFile);
+ } finally {
+ cleanupTable(table);
+ }
+ }
+
+ /**
* Test that use this should have a timeout, because this method could potentially wait forever.
*/
private void doQuarantineTest(TableName table, HBaseFsck hbck, int check,