You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ra...@apache.org on 2015/06/24 05:53:21 UTC

hbase git commit: HBASE-13932 - Add mob integrity check in HFilePrettyPrinter (Jingcheng du)

Repository: hbase
Updated Branches:
  refs/heads/hbase-11339 c4437e251 -> ba4ba32b0


HBASE-13932 - Add mob integrity check in HFilePrettyPrinter (Jingcheng du)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/ba4ba32b
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/ba4ba32b
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/ba4ba32b

Branch: refs/heads/hbase-11339
Commit: ba4ba32b0dd5166b1cc2862e55e5c1c6eacfdf43
Parents: c4437e2
Author: ramkrishna <ra...@gmail.com>
Authored: Wed Jun 24 09:22:44 2015 +0530
Committer: ramkrishna <ra...@gmail.com>
Committed: Wed Jun 24 09:22:44 2015 +0530

----------------------------------------------------------------------
 .../hbase/io/hfile/HFilePrettyPrinter.java      | 100 ++++++++++++++++++-
 1 file changed, 99 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/ba4ba32b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java
index 7cc31d0..dc12762 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java
@@ -24,9 +24,13 @@ import java.io.DataInput;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.Set;
 import java.util.SortedMap;
 
 import org.apache.commons.cli.CommandLine;
@@ -57,12 +61,14 @@ import org.apache.hadoop.hbase.KeyValueUtil;
 import org.apache.hadoop.hbase.Tag;
 import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
 import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
+import org.apache.hadoop.hbase.mob.MobUtils;
 import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
 import org.apache.hadoop.hbase.util.BloomFilter;
 import org.apache.hadoop.hbase.util.BloomFilterUtil;
 import org.apache.hadoop.hbase.util.BloomFilterFactory;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.HFileArchiveUtil;
 import org.apache.hadoop.hbase.util.Writables;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
@@ -95,6 +101,10 @@ public class HFilePrettyPrinter extends Configured implements Tool {
   private boolean checkRow;
   private boolean checkFamily;
   private boolean isSeekToRow = false;
+  private boolean checkMobIntegrity = false;
+  private Map<String, List<Path>> mobFileLocations;
+  private static final int FOUND_MOB_FILES_CACHE_CAPACITY = 50;
+  private static final int MISSING_MOB_FILES_CACHE_CAPACITY = 20;
 
   /**
    * The row which the user wants to specify and print all the KeyValues for.
@@ -130,6 +140,8 @@ public class HFilePrettyPrinter extends Configured implements Tool {
     options.addOption("w", "seekToRow", true,
       "Seek to this row and print all the kvs for this row only");
     options.addOption("s", "stats", false, "Print statistics");
+    options.addOption("i", "checkMobIntegrity", false,
+      "Print all cells whose mob files are missing");
 
     OptionGroup files = new OptionGroup();
     files.addOption(new Option("f", "file", true,
@@ -158,6 +170,7 @@ public class HFilePrettyPrinter extends Configured implements Tool {
     printStats = cmd.hasOption("s");
     checkRow = cmd.hasOption("k");
     checkFamily = cmd.hasOption("a");
+    checkMobIntegrity = cmd.hasOption("i");
 
     if (cmd.hasOption("f")) {
       files.add(new Path(cmd.getOptionValue("f")));
@@ -199,6 +212,12 @@ public class HFilePrettyPrinter extends Configured implements Tool {
       files.addAll(regionFiles);
     }
 
+    if(checkMobIntegrity) {
+      if (verbose) {
+        System.out.println("checkMobIntegrity is enabled");
+      }
+      mobFileLocations = new HashMap<String, List<Path>>();
+    }
     return true;
   }
 
@@ -255,7 +274,7 @@ public class HFilePrettyPrinter extends Configured implements Tool {
 
     KeyValueStatsCollector fileStats = null;
 
-    if (verbose || printKey || checkRow || checkFamily || printStats) {
+    if (verbose || printKey || checkRow || checkFamily || printStats || checkMobIntegrity) {
       // scan over file and read key/value's and check if requested
       HFileScanner scanner = reader.getScanner(false, false, false);
       fileStats = new KeyValueStatsCollector();
@@ -313,6 +332,9 @@ public class HFilePrettyPrinter extends Configured implements Tool {
   private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
       HFileScanner scanner,  byte[] row) throws IOException {
     Cell pCell = null;
+    FileSystem fs = FileSystem.get(getConf());
+    Set<String> foundMobFiles = new LinkedHashSet<String>(FOUND_MOB_FILES_CACHE_CAPACITY);
+    Set<String> missingMobFiles = new LinkedHashSet<String>(MISSING_MOB_FILES_CACHE_CAPACITY);
     do {
       Cell cell = scanner.getKeyValue();
       if (row != null && row.length != 0) {
@@ -369,12 +391,88 @@ public class HFilePrettyPrinter extends Configured implements Tool {
               + "\n\tcurrent  -> " + CellUtil.getCellKeyAsString(cell));
         }
       }
+      // check if mob files are missing.
+      if (checkMobIntegrity && MobUtils.isMobReferenceCell(cell)) {
+        Tag tnTag = MobUtils.getTableNameTag(cell);
+        if (tnTag == null) {
+          System.err.println("ERROR, wrong tag format in mob reference cell "
+            + CellUtil.getCellKeyAsString(cell));
+        } else if (!MobUtils.hasValidMobRefCellValue(cell)) {
+          System.err.println("ERROR, wrong value format in mob reference cell "
+            + CellUtil.getCellKeyAsString(cell));
+        } else {
+          TableName tn = TableName.valueOf(tnTag.getValue());
+          String mobFileName = MobUtils.getMobFileName(cell);
+          boolean exist = mobFileExists(fs, tn, mobFileName,
+            Bytes.toString(CellUtil.cloneFamily(cell)), foundMobFiles, missingMobFiles);
+          if (!exist) {
+            // report error
+            System.err.println("ERROR, the mob file [" + mobFileName
+              + "] is missing referenced by cell " + CellUtil.getCellKeyAsString(cell));
+          }
+        }
+      }
       pCell = cell;
       ++count;
     } while (scanner.next());
   }
 
   /**
+   * Checks whether the referenced mob file exists.
+   */
+  private boolean mobFileExists(FileSystem fs, TableName tn, String mobFileName, String family,
+    Set<String> foundMobFiles, Set<String> missingMobFiles) throws IOException {
+    if (foundMobFiles.contains(mobFileName)) {
+      return true;
+    }
+    if (missingMobFiles.contains(mobFileName)) {
+      return false;
+    }
+    String tableName = tn.getNameAsString();
+    List<Path> locations = mobFileLocations.get(tableName);
+    if (locations == null) {
+      locations = new ArrayList<Path>(2);
+      locations.add(MobUtils.getMobFamilyPath(getConf(), tn, family));
+      locations.add(HFileArchiveUtil.getStoreArchivePath(getConf(), tn,
+        MobUtils.getMobRegionInfo(tn).getEncodedName(), family));
+      mobFileLocations.put(tn.getNameAsString(), locations);
+    }
+    boolean exist = false;
+    for (Path location : locations) {
+      Path mobFilePath = new Path(location, mobFileName);
+      if (fs.exists(mobFilePath)) {
+        exist = true;
+        break;
+      }
+    }
+    if (exist) {
+      evictMobFilesIfNecessary(foundMobFiles, FOUND_MOB_FILES_CACHE_CAPACITY);
+      foundMobFiles.add(mobFileName);
+    } else {
+      evictMobFilesIfNecessary(missingMobFiles, MISSING_MOB_FILES_CACHE_CAPACITY);
+      missingMobFiles.add(mobFileName);
+    }
+    return exist;
+  }
+
+  /**
+   * Evicts the cached mob files if the set is larger than the limit.
+   */
+  private void evictMobFilesIfNecessary(Set<String> mobFileNames, int limit) {
+    if (mobFileNames.size() < limit) {
+      return;
+    }
+    int index = 0;
+    int evict = limit / 2;
+    Iterator<String> fileNamesItr = mobFileNames.iterator();
+    while (index < evict && fileNamesItr.hasNext()) {
+      fileNamesItr.next();
+      fileNamesItr.remove();
+      index++;
+    }
+  }
+
+  /**
    * Format a string of the form "k1=v1, k2=v2, ..." into separate lines
    * with a four-space indentation.
    */