You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by an...@apache.org on 2015/06/08 07:48:16 UTC

hbase git commit: HBASE-13806 Check the mob files when there are mob-enabled columns in HFileCorruptionChecker. (Jingcheng)

Repository: hbase
Updated Branches:
  refs/heads/hbase-11339 efbef296d -> 13fe542bc


HBASE-13806 Check the mob files when there are mob-enabled columns in HFileCorruptionChecker. (Jingcheng)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/13fe542b
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/13fe542b
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/13fe542b

Branch: refs/heads/hbase-11339
Commit: 13fe542bccea2ac70f052017ac71b99b10dbcda1
Parents: efbef29
Author: anoopsjohn <an...@gmail.com>
Authored: Mon Jun 8 11:17:43 2015 +0530
Committer: anoopsjohn <an...@gmail.com>
Committed: Mon Jun 8 11:17:43 2015 +0530

----------------------------------------------------------------------
 .../hbase/util/hbck/HFileCorruptionChecker.java | 197 +++++++++++++++++++
 .../apache/hadoop/hbase/util/TestHBaseFsck.java | 110 +++++++++++
 2 files changed, 307 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/13fe542b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/hbck/HFileCorruptionChecker.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/hbck/HFileCorruptionChecker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/hbck/HFileCorruptionChecker.java
index 23dc570..a1b7c2c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/hbck/HFileCorruptionChecker.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/hbck/HFileCorruptionChecker.java
@@ -39,9 +39,11 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
 import org.apache.hadoop.hbase.io.hfile.CorruptHFileException;
 import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.mob.MobUtils;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.FSUtils.FamilyDirFilter;
 import org.apache.hadoop.hbase.util.FSUtils.HFileFilter;
@@ -68,8 +70,13 @@ public class HFileCorruptionChecker {
   final Set<Path> failures = new ConcurrentSkipListSet<Path>();
   final Set<Path> quarantined = new ConcurrentSkipListSet<Path>();
   final Set<Path> missing = new ConcurrentSkipListSet<Path>();
+  final Set<Path> corruptedMobFiles = new ConcurrentSkipListSet<Path>();
+  final Set<Path> failureMobFiles = new ConcurrentSkipListSet<Path>();
+  final Set<Path> missedMobFiles = new ConcurrentSkipListSet<Path>();
+  final Set<Path> quarantinedMobFiles = new ConcurrentSkipListSet<Path>();
   final boolean inQuarantineMode;
   final AtomicInteger hfilesChecked = new AtomicInteger();
+  final AtomicInteger mobFilesChecked = new AtomicInteger();
 
   public HFileCorruptionChecker(Configuration conf, ExecutorService executor,
       boolean quarantine) throws IOException {
@@ -177,6 +184,109 @@ public class HFileCorruptionChecker {
   }
 
   /**
+   * Check all files in a mob column family dir.
+   *
+   * @param cfDir
+   *          mob column family directory
+   * @throws IOException
+   */
+  protected void checkMobColFamDir(Path cfDir) throws IOException {
+    FileStatus[] hfs = null;
+    try {
+      hfs = fs.listStatus(cfDir, new HFileFilter(fs)); // use same filter as scanner.
+    } catch (FileNotFoundException fnfe) {
+      // Hadoop 0.23+ listStatus semantics throws an exception if the path does not exist.
+      LOG.warn("Mob colfam Directory " + cfDir +
+          " does not exist.  Likely the table is deleted. Skipping.");
+      missedMobFiles.add(cfDir);
+      return;
+    }
+
+    // Hadoop 1.0 listStatus does not throw an exception if the path does not exist.
+    if (hfs.length == 0 && !fs.exists(cfDir)) {
+      LOG.warn("Mob colfam Directory " + cfDir +
+          " does not exist.  Likely the table is deleted. Skipping.");
+      missedMobFiles.add(cfDir);
+      return;
+    }
+    for (FileStatus hfFs : hfs) {
+      Path hf = hfFs.getPath();
+      checkMobFile(hf);
+    }
+  }
+
+  /**
+   * Checks a path to see if it is a valid mob file.
+   *
+   * @param p
+   *          full Path to a mob file.
+   * @throws IOException
+   *           This is a connectivity related exception
+   */
+  protected void checkMobFile(Path p) throws IOException {
+    HFile.Reader r = null;
+    try {
+      r = HFile.createReader(fs, p, cacheConf, conf);
+    } catch (CorruptHFileException che) {
+      LOG.warn("Found corrupt mob file " + p, che);
+      corruptedMobFiles.add(p);
+      if (inQuarantineMode) {
+        Path dest = createQuarantinePath(p);
+        LOG.warn("Quarantining corrupt mob file " + p + " into " + dest);
+        boolean success = fs.mkdirs(dest.getParent());
+        success = success ? fs.rename(p, dest): false;
+        if (!success) {
+          failureMobFiles.add(p);
+        } else {
+          quarantinedMobFiles.add(dest);
+        }
+      }
+      return;
+    } catch (FileNotFoundException fnfe) {
+      LOG.warn("Mob file " + p + " was missing.  Likely removed due to compaction?");
+      missedMobFiles.add(p);
+    } finally {
+      mobFilesChecked.addAndGet(1);
+      if (r != null) {
+        r.close(true);
+      }
+    }
+  }
+
+  /**
+   * Checks all the mob files of a table.
+   * @param regionDir The mob region directory
+   * @throws IOException
+   */
+  private void checkMobRegionDir(Path regionDir) throws IOException {
+    if (!fs.exists(regionDir)) {
+      return;
+    }
+    FileStatus[] hfs = null;
+    try {
+      hfs = fs.listStatus(regionDir, new FamilyDirFilter(fs));
+    } catch (FileNotFoundException fnfe) {
+      // Hadoop 0.23+ listStatus semantics throws an exception if the path does not exist.
+      LOG.warn("Mob directory " + regionDir
+        + " does not exist.  Likely the table is deleted. Skipping.");
+      missedMobFiles.add(regionDir);
+      return;
+    }
+
+    // Hadoop 1.0 listStatus does not throw an exception if the path does not exist.
+    if (hfs.length == 0 && !fs.exists(regionDir)) {
+      LOG.warn("Mob directory " + regionDir
+        + " does not exist.  Likely the table is deleted. Skipping.");
+      missedMobFiles.add(regionDir);
+      return;
+    }
+    for (FileStatus hfFs : hfs) {
+      Path hf = hfFs.getPath();
+      checkMobColFamDir(hf);
+    }
+  }
+
+  /**
    * Check all column families in a region dir.
    *
    * @param regionDir
@@ -236,6 +346,8 @@ public class HFileCorruptionChecker {
       rdcs.add(work);
     }
 
+    // add mob region
+    rdcs.add(createMobRegionDirChecker(tableDir));
     // Submit and wait for completion
     try {
       rdFutures = executor.invokeAll(rdcs);
@@ -293,6 +405,34 @@ public class HFileCorruptionChecker {
   }
 
   /**
+   * An individual work item for parallelized mob dir processing. This is
+   * intentionally an inner class so it can use the shared error sets and fs.
+   */
+  private class MobRegionDirChecker extends RegionDirChecker {
+
+    MobRegionDirChecker(Path regionDir) {
+      super(regionDir);
+    }
+
+    @Override
+    public Void call() throws IOException {
+      checkMobRegionDir(regionDir);
+      return null;
+    }
+  }
+
+  /**
+   * Creates an instance of MobRegionDirChecker.
+   * @param tableDir The current table directory.
+   * @return An instance of MobRegionDirChecker.
+   */
+  private MobRegionDirChecker createMobRegionDirChecker(Path tableDir) {
+    TableName tableName = FSUtils.getTableName(tableDir);
+    Path mobDir = MobUtils.getMobRegionPath(conf, tableName);
+    return new MobRegionDirChecker(mobDir);
+  }
+
+  /**
    * Check the specified table dirs for bad hfiles.
    */
   public void checkTables(Collection<Path> tables) throws IOException {
@@ -338,6 +478,42 @@ public class HFileCorruptionChecker {
   }
 
   /**
+   * @return the set of check failure mob file paths after checkTables is called.
+   */
+  public Collection<Path> getFailureMobFiles() {
+    return new HashSet<Path>(failureMobFiles);
+  }
+
+  /**
+   * @return the set of corrupted mob file paths after checkTables is called.
+   */
+  public Collection<Path> getCorruptedMobFiles() {
+    return new HashSet<Path>(corruptedMobFiles);
+  }
+
+  /**
+   * @return number of mob files checked in the last HfileCorruptionChecker run
+   */
+  public int getMobFilesChecked() {
+    return mobFilesChecked.get();
+  }
+
+  /**
+   * @return the set of successfully quarantined paths after checkTables is called.
+   */
+  public Collection<Path> getQuarantinedMobFiles() {
+    return new HashSet<Path>(quarantinedMobFiles);
+  }
+
+  /**
+   * @return the set of paths that were missing.  Likely due to table deletion or
+   *  deletion/moves from compaction.
+   */
+  public Collection<Path> getMissedMobFiles() {
+    return new HashSet<Path>(missedMobFiles);
+  }
+
+  /**
    * Print a human readable summary of hfile quarantining operations.
    * @param out
    */
@@ -363,10 +539,31 @@ public class HFileCorruptionChecker {
     String fixedState = (corrupted.size() == quarantined.size()) ? "OK"
         : "CORRUPTED";
 
+    // print mob-related report
+    if (inQuarantineMode) {
+      out.print("    Mob files successfully quarantined: " + quarantinedMobFiles.size());
+      for (Path sq : quarantinedMobFiles) {
+        out.print("      " + sq);
+      }
+      out.print("    Mob files failed quarantine:        " + failureMobFiles.size());
+      for (Path fq : failureMobFiles) {
+        out.print("      " + fq);
+      }
+    }
+    out.print("    Mob files moved while checking:     " + missedMobFiles.size());
+    for (Path mq : missedMobFiles) {
+      out.print("      " + mq);
+    }
+    String initialMobState = (corruptedMobFiles.size() == 0) ? "OK" : "CORRUPTED";
+    String fixedMobState = (corruptedMobFiles.size() == quarantinedMobFiles.size()) ? "OK"
+        : "CORRUPTED";
+
     if (inQuarantineMode) {
       out.print("Summary: " + initialState + " => " + fixedState);
+      out.print("Mob summary: " + initialMobState + " => " + fixedMobState);
     } else {
       out.print("Summary: " + initialState);
+      out.print("Mob summary: " + initialMobState);
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/13fe542b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
index 28b80ff..9938df7 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
@@ -39,6 +39,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.NavigableMap;
 import java.util.Set;
+import java.util.UUID;
 import java.util.concurrent.Callable;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutorService;
@@ -95,6 +96,8 @@ import org.apache.hadoop.hbase.master.RegionState;
 import org.apache.hadoop.hbase.master.RegionStates;
 import org.apache.hadoop.hbase.master.TableLockManager;
 import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
+import org.apache.hadoop.hbase.mob.MobFileName;
+import org.apache.hadoop.hbase.mob.MobUtils;
 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
 import org.apache.hadoop.hbase.regionserver.HRegion;
@@ -438,6 +441,31 @@ public class TestHBaseFsck {
   }
 
   /**
+   * Setup a clean table with a mob-enabled column.
+   *
+   * @param tableName The name of a table to be created.
+   * @throws Exception
+   */
+  void setupMobTable(TableName tablename) throws Exception {
+    HTableDescriptor desc = new HTableDescriptor(tablename);
+    HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
+    hcd.setMobEnabled(true);
+    hcd.setMobThreshold(0);
+    desc.addFamily(hcd); // If a table has no CF's it doesn't get checked
+    createTable(TEST_UTIL, desc, SPLITS);
+
+    tbl = (HTable) connection.getTable(tablename, tableExecutorService);
+    List<Put> puts = new ArrayList<Put>();
+    for (byte[] row : ROWKEYS) {
+      Put p = new Put(row);
+      p.add(FAM, Bytes.toBytes("val"), row);
+      puts.add(p);
+    }
+    tbl.put(puts);
+    tbl.flushCommits();
+  }
+
+  /**
    * Counts the number of row to verify data loss or non-dataloss.
    */
   int countRows() throws IOException {
@@ -2121,6 +2149,44 @@ public class TestHBaseFsck {
   }
 
   /**
+   * Gets flushed mob files.
+   * @param fs The current file system.
+   * @param table The current table name.
+   * @return Path of a flushed hfile.
+   * @throws IOException
+   */
+  Path getFlushedMobFile(FileSystem fs, TableName table) throws IOException {
+    Path regionDir = MobUtils.getMobRegionPath(conf, table);
+    Path famDir = new Path(regionDir, FAM_STR);
+
+    // keep doing this until we get a legit hfile
+    while (true) {
+      FileStatus[] hfFss = fs.listStatus(famDir);
+      if (hfFss.length == 0) {
+        continue;
+      }
+      for (FileStatus hfs : hfFss) {
+        if (!hfs.isDirectory()) {
+          return hfs.getPath();
+        }
+      }
+    }
+  }
+
+  /**
+   * Creates a new mob file name by the old one.
+   * @param oldFileName The old mob file name.
+   * @return The new mob file name.
+   */
+  String createMobFileName(String oldFileName) {
+    MobFileName mobFileName = MobFileName.create(oldFileName);
+    String startKey = mobFileName.getStartKey();
+    String date = mobFileName.getDate();
+    return MobFileName.create(startKey, date, UUID.randomUUID().toString().replaceAll("-", ""))
+      .getFileName();
+  }
+
+  /**
    * This creates a table and then corrupts an hfile.  Hbck should quarantine the file.
    */
   @Test(timeout=180000)
@@ -2161,6 +2227,50 @@ public class TestHBaseFsck {
   }
 
   /**
+   * This creates a table and then corrupts a mob file.  Hbck should quarantine the file.
+   */
+  @Test(timeout=180000)
+  public void testQuarantineCorruptMobFile() throws Exception {
+    TableName table = TableName.valueOf(name.getMethodName());
+    try {
+      setupMobTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+      admin.flush(table);
+
+      FileSystem fs = FileSystem.get(conf);
+      Path mobFile = getFlushedMobFile(fs, table);
+      admin.disableTable(table);
+      // create new corrupt mob file.
+      String corruptMobFile = createMobFileName(mobFile.getName());
+      Path corrupt = new Path(mobFile.getParent(), corruptMobFile);
+      TestHFile.truncateFile(fs, mobFile, corrupt);
+      LOG.info("Created corrupted mob file " + corrupt);
+      HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
+      HBaseFsck.debugLsr(conf, MobUtils.getMobHome(conf));
+
+      // A corrupt mob file doesn't abort the start of regions, so we can enable the table.
+      admin.enableTable(table);
+      HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
+      assertEquals(res.getRetCode(), 0);
+      HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
+      assertEquals(hfcc.getHFilesChecked(), 4);
+      assertEquals(hfcc.getCorrupted().size(), 0);
+      assertEquals(hfcc.getFailures().size(), 0);
+      assertEquals(hfcc.getQuarantined().size(), 0);
+      assertEquals(hfcc.getMissing().size(), 0);
+      assertEquals(hfcc.getMobFilesChecked(), 5);
+      assertEquals(hfcc.getCorruptedMobFiles().size(), 1);
+      assertEquals(hfcc.getFailureMobFiles().size(), 0);
+      assertEquals(hfcc.getQuarantinedMobFiles().size(), 1);
+      assertEquals(hfcc.getMissedMobFiles().size(), 0);
+      String quarantinedMobFile = hfcc.getQuarantinedMobFiles().iterator().next().getName();
+      assertEquals(corruptMobFile, quarantinedMobFile);
+    } finally {
+      cleanupTable(table);
+    }
+  }
+
+  /**
    * Test that use this should have a timeout, because this method could potentially wait forever.
   */
   private void doQuarantineTest(TableName table, HBaseFsck hbck, int check,