You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2016/02/10 05:31:50 UTC

hbase git commit: HBASE-15238 HFileReaderV2 prefetch overreaches; runs off the end of the data

Repository: hbase
Updated Branches:
  refs/heads/master 7cab24729 -> b6328eb80


HBASE-15238 HFileReaderV2 prefetch overreaches; runs off the end of the data

    M hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
      Cleanup trace message and include offset; makes debug the easier.

    M hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
      Fix incorrect data member javadoc.

    M hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
      Pass along the offset we are checksumming at.

    M hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpljava
      Add trace logging for debugging and set the end of the prefetch to be
      last datablock, not size minus trailersize (there is the root indices
      and file info to be skipped)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/b6328eb8
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/b6328eb8
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/b6328eb8

Branch: refs/heads/master
Commit: b6328eb80336e4a30ddec1e03751572f9bec77cf
Parents: 7cab247
Author: stack <st...@apache.org>
Authored: Tue Feb 9 11:35:33 2016 -0800
Committer: stack <st...@apache.org>
Committed: Tue Feb 9 20:31:44 2016 -0800

----------------------------------------------------------------------
 .../hadoop/hbase/io/hfile/ChecksumUtil.java     | 29 ++++++++++----------
 .../hadoop/hbase/io/hfile/FixedFileTrailer.java |  9 +++---
 .../hadoop/hbase/io/hfile/HFileBlock.java       |  9 +++---
 .../hadoop/hbase/io/hfile/HFileReaderImpl.java  | 18 ++++++++----
 .../hadoop/hbase/io/hfile/TestChecksum.java     |  2 +-
 5 files changed, 37 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/b6328eb8/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
index 402caa8..69f4330 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
@@ -38,11 +38,11 @@ public class ChecksumUtil {
   /** This is used to reserve space in a byte buffer */
   private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE];
 
-  /** 
-   * This is used by unit tests to make checksum failures throw an 
-   * exception instead of returning null. Returning a null value from 
-   * checksum validation will cause the higher layer to retry that 
-   * read with hdfs-level checksums. Instead, we would like checksum 
+  /**
+   * This is used by unit tests to make checksum failures throw an
+   * exception instead of returning null. Returning a null value from
+   * checksum validation will cause the higher layer to retry that
+   * read with hdfs-level checksums. Instead, we would like checksum
    * failures to cause the entire unit test to fail.
    */
   private static boolean generateExceptions = false;
@@ -86,7 +86,7 @@ public class ChecksumUtil {
    * The header is extracted from the specified HFileBlock while the
    * data-to-be-verified is extracted from 'data'.
    */
-  static boolean validateBlockChecksum(String pathName, HFileBlock block,
+  static boolean validateBlockChecksum(String pathName, long offset, HFileBlock block,
     byte[] data, int hdrSize) throws IOException {
 
     // If this is an older version of the block that does not have
@@ -100,7 +100,7 @@ public class ChecksumUtil {
     }
 
     // Get a checksum object based on the type of checksum that is
-    // set in the HFileBlock header. A ChecksumType.NULL indicates that 
+    // set in the HFileBlock header. A ChecksumType.NULL indicates that
     // the caller is not interested in validating checksums, so we
     // always return true.
     ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType());
@@ -116,12 +116,13 @@ public class ChecksumUtil {
     assert dataChecksum != null;
     int sizeWithHeader =  block.getOnDiskDataSizeWithHeader();
     if (LOG.isTraceEnabled()) {
-      LOG.info("length of data = " + data.length
-          + " OnDiskDataSizeWithHeader = " + sizeWithHeader
-          + " checksum type = " + cktype.getName()
-          + " file =" + pathName
-          + " header size = " + hdrSize
-          + " bytesPerChecksum = " + bytesPerChecksum);
+      LOG.info("dataLength=" + data.length
+          + ", sizeWithHeader=" + sizeWithHeader
+          + ", checksumType=" + cktype.getName()
+          + ", file=" + pathName
+          + ", offset=" + offset
+          + ", headerSize=" + hdrSize
+          + ", bytesPerChecksum=" + bytesPerChecksum);
     }
     try {
       dataChecksum.verifyChunkedSums(ByteBuffer.wrap(data, 0, sizeWithHeader),
@@ -140,7 +141,7 @@ public class ChecksumUtil {
    * @return The number of bytes needed to store the checksum values
    */
   static long numBytes(long datasize, int bytesPerChecksum) {
-    return numChunks(datasize, bytesPerChecksum) * 
+    return numChunks(datasize, bytesPerChecksum) *
                      HFileBlock.CHECKSUM_SIZE;
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/b6328eb8/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
index 72f550a..ef6370e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
@@ -43,8 +43,7 @@ import org.apache.hadoop.hbase.util.Bytes;
  * trailer size is fixed within a given {@link HFile} format version only, but
  * we always store the version number as the last four-byte integer of the file.
  * The version number itself is split into two portions, a major 
- * version and a minor version. 
- * The last three bytes of a file is the major
+ * version and a minor version. The last three bytes of a file are the major
  * version and a single preceding byte is the minor number. The major version
  * determines which readers/writers to use to read/write a hfile while a minor
  * version determines smaller changes in hfile format that do not need a new
@@ -52,7 +51,6 @@ import org.apache.hadoop.hbase.util.Bytes;
  */
 @InterfaceAudience.Private
 public class FixedFileTrailer {
-
   /**
    * We store the comparator class name as a fixed-length field in the trailer.
    */
@@ -67,8 +65,9 @@ public class FixedFileTrailer {
   /**
    * In version 1, the offset to the data block index. Starting from version 2,
    * the meaning of this field is the offset to the section of the file that
-   * should be loaded at the time the file is being opened, and as of the time
-   * of writing, this happens to be the offset of the file info section.
+   * should be loaded at the time the file is being opened: i.e. on open we load
+   * the root index, file info, etc. See http://hbase.apache.org/book.html#_hfile_format_2
+   * in the reference guide.
    */
   private long loadOnOpenDataOffset;
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/b6328eb8/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
index e0719aa..e7a1e5e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@@ -1694,7 +1694,7 @@ public class HFileBlock implements Cacheable {
         b.assumeUncompressed();
       }
 
-      if (verifyChecksum && !validateBlockChecksum(b, onDiskBlock, hdrSize)) {
+      if (verifyChecksum && !validateBlockChecksum(b, offset, onDiskBlock, hdrSize)) {
         return null;             // checksum mismatch
       }
 
@@ -1743,9 +1743,10 @@ public class HFileBlock implements Cacheable {
      * If there is a checksum mismatch, then return false. Otherwise
      * return true.
      */
-    protected boolean validateBlockChecksum(HFileBlock block,  byte[] data, int hdrSize)
-        throws IOException {
-      return ChecksumUtil.validateBlockChecksum(pathName, block, data, hdrSize);
+    protected boolean validateBlockChecksum(HFileBlock block, long offset, byte[] data,
+        int hdrSize)
+    throws IOException {
+      return ChecksumUtil.validateBlockChecksum(pathName, offset, block, data, hdrSize);
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/hbase/blob/b6328eb8/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
index a873280..b2f5ded 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
@@ -248,10 +248,14 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
     if (cacheConf.shouldPrefetchOnOpen()) {
       PrefetchExecutor.request(path, new Runnable() {
         public void run() {
+          long offset = 0;
+          long end = 0;
           try {
-            long offset = 0;
-            long end = fileSize - getTrailer().getTrailerSize();
+            end = getTrailer().getLoadOnOpenDataOffset();
             HFileBlock prevBlock = null;
+            if (LOG.isTraceEnabled()) {
+              LOG.trace("File=" + path.toString() + ", offset=" + offset + ", end=" + end);
+            }
             while (offset < end) {
               if (Thread.interrupted()) {
                 break;
@@ -273,11 +277,11 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
           } catch (IOException e) {
             // IOExceptions are probably due to region closes (relocation, etc.)
             if (LOG.isTraceEnabled()) {
-              LOG.trace("Exception encountered while prefetching " + path + ":", e);
+              LOG.trace("File=" + path.toString() + ", offset=" + offset + ", end=" + end, e);
             }
           } catch (Exception e) {
             // Other exceptions are interesting
-            LOG.warn("Exception encountered while prefetching " + path + ":", e);
+            LOG.warn("File=" + path.toString() + ", offset=" + offset + ", end=" + end, e);
           } finally {
             PrefetchExecutor.complete(path);
           }
@@ -1457,9 +1461,11 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
     if (dataBlockIndexReader == null) {
       throw new IOException("Block index not loaded");
     }
-    if (dataBlockOffset < 0 || dataBlockOffset >= trailer.getLoadOnOpenDataOffset()) {
+    long trailerOffset = trailer.getLoadOnOpenDataOffset();
+    if (dataBlockOffset < 0 || dataBlockOffset >= trailerOffset) {
       throw new IOException("Requested block is out of range: " + dataBlockOffset +
-        ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset());
+        ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset() +
+        ", trailer.getLoadOnOpenDataOffset: " + trailerOffset);
     }
     // For any given block from any given file, synchronize reads for said
     // block.

http://git-wip-us.apache.org/repos/asf/hbase/blob/b6328eb8/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
index 1767fb2..e8a2882 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
@@ -349,7 +349,7 @@ public class TestChecksum {
     }
 
     @Override
-    protected boolean validateBlockChecksum(HFileBlock block, 
+    protected boolean validateBlockChecksum(HFileBlock block, long offset,
       byte[] data, int hdrSize) throws IOException {
       return false;  // checksum validation failure
     }