You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2015/08/13 04:04:12 UTC

[4/4] hbase git commit: HBASE-13985 Add configuration to skip validating HFile format when bulk loading (Victor Xu)

HBASE-13985 Add configuration to skip validating HFile format when bulk loading (Victor Xu)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/82866bd0
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/82866bd0
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/82866bd0

Branch: refs/heads/0.98
Commit: 82866bd09c3d75d7ff556827920a93d360c65231
Parents: a705a74
Author: Andrew Purtell <ap...@apache.org>
Authored: Wed Aug 12 18:36:23 2015 -0700
Committer: Andrew Purtell <ap...@apache.org>
Committed: Wed Aug 12 18:37:14 2015 -0700

----------------------------------------------------------------------
 .../hbase/mapreduce/LoadIncrementalHFiles.java  | 48 +++++++++++++++-----
 1 file changed, 36 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/82866bd0/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
index c07d5d4..096b5d9 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
@@ -166,6 +166,17 @@ public class LoadIncrementalHFiles extends Configured implements Tool {
    */
   private static <TFamily> void visitBulkHFiles(final FileSystem fs, final Path bulkDir,
     final BulkHFileVisitor<TFamily> visitor) throws IOException {
+    visitBulkHFiles(fs, bulkDir, visitor, true);
+  }
+
+  /**
+   * Iterate over the bulkDir hfiles.
+   * Skip reference, HFileLink, files starting with "_".
+   * Check and skip non-valid hfiles by default, or skip this validation by setting
+   * 'hbase.loadincremental.validate.hfile' to false.
+   */
+  private static <TFamily> void visitBulkHFiles(final FileSystem fs, final Path bulkDir,
+    final BulkHFileVisitor<TFamily> visitor, final boolean validateHFile) throws IOException {
     if (!fs.exists(bulkDir)) {
       throw new FileNotFoundException("Bulkload dir " + bulkDir + " not found");
     }
@@ -206,16 +217,18 @@ public class LoadIncrementalHFiles extends Configured implements Tool {
           continue;
         }
 
-        // Validate HFile Format
-        try {
-          if (!HFile.isHFileFormat(fs, hfile)) {
-            LOG.warn("the file " + hfile + " doesn't seems to be an hfile. skipping");
+        // Validate HFile Format if needed
+        if (validateHFile) {
+          try {
+            if (!HFile.isHFileFormat(fs, hfile)) {
+              LOG.warn("the file " + hfile + " doesn't seems to be an hfile. skipping");
+              continue;
+            }
+          } catch (FileNotFoundException e) {
+            LOG.warn("the file " + hfile + " was removed");
             continue;
           }
-        } catch (FileNotFoundException e) {
-          LOG.warn("the file " + hfile + " was removed");
-          continue;
-        }
+	}
 
         visitor.bulkHFile(family, hfileStatus);
       }
@@ -248,8 +261,8 @@ public class LoadIncrementalHFiles extends Configured implements Tool {
    * Walk the given directory for all HFiles, and return a Queue
    * containing all such files.
    */
-  private void discoverLoadQueue(final Deque<LoadQueueItem> ret, final Path hfofDir)
-  throws IOException {
+  private void discoverLoadQueue(final Deque<LoadQueueItem> ret, final Path hfofDir,
+    final boolean validateHFile) throws IOException {
     fs = hfofDir.getFileSystem(getConf());
     visitBulkHFiles(fs, hfofDir, new BulkHFileVisitor<byte[]>() {
       @Override
@@ -266,7 +279,7 @@ public class LoadIncrementalHFiles extends Configured implements Tool {
         }
         ret.add(new LoadQueueItem(family, hfile.getPath()));
       }
-    });
+    }, validateHFile);
   }
 
   /**
@@ -305,7 +318,18 @@ public class LoadIncrementalHFiles extends Configured implements Tool {
     // happen in this thread
     Deque<LoadQueueItem> queue = new LinkedList<LoadQueueItem>();
     try {
-      discoverLoadQueue(queue, hfofDir);
+      /*
+       * Checking hfile format is a time-consuming operation, we should have an option to skip
+       * this step when bulkloading millions of HFiles. See HBASE-13985.
+       */
+      boolean validateHFile = getConf().getBoolean("hbase.loadincremental.validate.hfile", true);
+      if(!validateHFile) {
+	LOG.warn("You are skipping HFiles validation, it might cause some data loss if files " +
+	    "are not correct. If you fail to read data from your table after using this " +
+	    "option, consider removing the files and bulkload again without this option. " +
+	    "See HBASE-13985");
+      }
+      discoverLoadQueue(queue, hfofDir, validateHFile);
       // check whether there is invalid family name in HFiles to be bulkloaded
       Collection<HColumnDescriptor> families = table.getTableDescriptor().getFamilies();
       ArrayList<String> familyNames = new ArrayList<String>(families.size());