You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2016/02/10 05:44:22 UTC

hbase git commit: HBASE-15238 HFileReaderV2 prefetch overreaches; runs off the end of the data

Repository: hbase
Updated Branches:
  refs/heads/branch-1.2 4cb21cf6a -> 6f6cd661d


    HBASE-15238 HFileReaderV2 prefetch overreaches; runs off the end of the data

        M hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
          Cleanup trace message and include offset; makes debug the easier.

        M hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
          Fix incorrect data member javadoc.

        M hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
          Pass along the offset we are checksumming at.

        M hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpljava
          Add trace logging for debugging and set the end of the prefetch to be
          last datablock, not size minus trailersize (there is the root indices
          and file info to be skipped)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/6f6cd661
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/6f6cd661
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/6f6cd661

Branch: refs/heads/branch-1.2
Commit: 6f6cd661d5df7f04b3d933443abdcd2065fcd7bb
Parents: 4cb21cf
Author: stack <st...@apache.org>
Authored: Tue Feb 9 20:43:16 2016 -0800
Committer: stack <st...@apache.org>
Committed: Tue Feb 9 20:44:05 2016 -0800

----------------------------------------------------------------------
 .../hadoop/hbase/io/hfile/ChecksumUtil.java     | 27 ++++++++++----------
 .../hadoop/hbase/io/hfile/FixedFileTrailer.java |  9 +++----
 .../hadoop/hbase/io/hfile/HFileBlock.java       | 14 +++++-----
 .../hadoop/hbase/io/hfile/HFileReaderV2.java    | 18 ++++++++-----
 .../hadoop/hbase/io/hfile/TestChecksum.java     |  2 +-
 5 files changed, 38 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/6f6cd661/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
index 61862eb..0f7ede5 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
@@ -39,11 +39,11 @@ public class ChecksumUtil {
   /** This is used to reserve space in a byte buffer */
   private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE];
 
-  /** 
-   * This is used by unit tests to make checksum failures throw an 
-   * exception instead of returning null. Returning a null value from 
-   * checksum validation will cause the higher layer to retry that 
-   * read with hdfs-level checksums. Instead, we would like checksum 
+  /**
+   * This is used by unit tests to make checksum failures throw an
+   * exception instead of returning null. Returning a null value from
+   * checksum validation will cause the higher layer to retry that
+   * read with hdfs-level checksums. Instead, we would like checksum
    * failures to cause the entire unit test to fail.
    */
   private static boolean generateExceptions = false;
@@ -101,7 +101,7 @@ public class ChecksumUtil {
     }
 
     // Get a checksum object based on the type of checksum that is
-    // set in the HFileBlock header. A ChecksumType.NULL indicates that 
+    // set in the HFileBlock header. A ChecksumType.NULL indicates that
     // the caller is not interested in validating checksums, so we
     // always return true.
     ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType());
@@ -117,12 +117,13 @@ public class ChecksumUtil {
     assert dataChecksum != null;
     int sizeWithHeader =  block.getOnDiskDataSizeWithHeader();
     if (LOG.isTraceEnabled()) {
-      LOG.info("length of data = " + data.length
-          + " OnDiskDataSizeWithHeader = " + sizeWithHeader
-          + " checksum type = " + cktype.getName()
-          + " file =" + path.toString()
-          + " header size = " + hdrSize
-          + " bytesPerChecksum = " + bytesPerChecksum);
+      LOG.info("dataLength=" + data.length
+          + ", sizeWithHeader=" + sizeWithHeader
+          + ", checksumType=" + cktype.getName()
+          + ", file=" + path.toString()
+          + ", offset=" + offset
+          + ", headerSize=" + hdrSize
+          + ", bytesPerChecksum=" + bytesPerChecksum);
     }
     try {
       dataChecksum.verifyChunkedSums(ByteBuffer.wrap(data, 0, sizeWithHeader),
@@ -142,7 +143,7 @@ public class ChecksumUtil {
    * @return The number of bytes needed to store the checksum values
    */
   static long numBytes(long datasize, int bytesPerChecksum) {
-    return numChunks(datasize, bytesPerChecksum) * 
+    return numChunks(datasize, bytesPerChecksum) *
                      HFileBlock.CHECKSUM_SIZE;
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/6f6cd661/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
index 56510f0..6735036 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java
@@ -41,8 +41,7 @@ import org.apache.hadoop.hbase.util.Bytes;
  * trailer size is fixed within a given {@link HFile} format version only, but
  * we always store the version number as the last four-byte integer of the file.
  * The version number itself is split into two portions, a major 
- * version and a minor version. 
- * The last three bytes of a file is the major
+ * version and a minor version. The last three bytes of a file are the major
  * version and a single preceding byte is the minor number. The major version
  * determines which readers/writers to use to read/write a hfile while a minor
  * version determines smaller changes in hfile format that do not need a new
@@ -50,7 +49,6 @@ import org.apache.hadoop.hbase.util.Bytes;
  */
 @InterfaceAudience.Private
 public class FixedFileTrailer {
-
   /**
    * We store the comparator class name as a fixed-length field in the trailer.
    */
@@ -65,8 +63,9 @@ public class FixedFileTrailer {
   /**
    * In version 1, the offset to the data block index. Starting from version 2,
    * the meaning of this field is the offset to the section of the file that
-   * should be loaded at the time the file is being opened, and as of the time
-   * of writing, this happens to be the offset of the file info section.
+   * should be loaded at the time the file is being opened: i.e. on open we load
+   * the root index, file info, etc. See http://hbase.apache.org/book.html#_hfile_format_2
+   * in the reference guide.
    */
   private long loadOnOpenDataOffset;
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/6f6cd661/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
index 8f80c3e..20d1d3d 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@@ -347,9 +347,9 @@ public class HFileBlock implements Cacheable {
   /**
    * Returns the buffer this block stores internally. The clients must not
    * modify the buffer object. This method has to be public because it is
-   * used in {@link org.apache.hadoop.hbase.util.CompoundBloomFilter} 
-   * to avoid object creation on every Bloom filter lookup, but has to 
-   * be used with caution. Checksum data is not included in the returned 
+   * used in {@link org.apache.hadoop.hbase.util.CompoundBloomFilter}
+   * to avoid object creation on every Bloom filter lookup, but has to
+   * be used with caution. Checksum data is not included in the returned
    * buffer but header data is.
    *
    * @return the buffer of this block for read-only operations
@@ -1694,7 +1694,7 @@ public class HFileBlock implements Cacheable {
         b.assumeUncompressed();
       }
 
-      if (verifyChecksum && !validateBlockChecksum(b, onDiskBlock, hdrSize)) {
+      if (verifyChecksum && !validateBlockChecksum(b, offset, onDiskBlock, hdrSize)) {
         return null;             // checksum mismatch
       }
 
@@ -1743,9 +1743,9 @@ public class HFileBlock implements Cacheable {
      * If there is a checksum mismatch, then return false. Otherwise
      * return true.
      */
-    protected boolean validateBlockChecksum(HFileBlock block,  byte[] data, int hdrSize)
-        throws IOException {
-      return ChecksumUtil.validateBlockChecksum(path, block, data, hdrSize);
+    protected boolean validateBlockChecksum(HFileBlock block, long offset, byte[] data,
+        int hdrSize)
+      return ChecksumUtil.validateBlockChecksum(path, offset, block, data, hdrSize);
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/hbase/blob/6f6cd661/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
index 12c46e8..a1b4c34 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
@@ -189,10 +189,14 @@ public class HFileReaderV2 extends AbstractHFileReader {
     if (cacheConf.shouldPrefetchOnOpen()) {
       PrefetchExecutor.request(path, new Runnable() {
         public void run() {
+          long offset = 0;
+          long end = 0;
           try {
-            long offset = 0;
-            long end = fileSize - getTrailer().getTrailerSize();
+            end = getTrailer().getLoadOnOpenDataOffset();
             HFileBlock prevBlock = null;
+            if (LOG.isTraceEnabled()) {
+              LOG.trace("File=" + path.toString() + ", offset=" + offset + ", end=" + end);
+            }
             while (offset < end) {
               if (Thread.interrupted()) {
                 break;
@@ -209,11 +213,11 @@ public class HFileReaderV2 extends AbstractHFileReader {
           } catch (IOException e) {
             // IOExceptions are probably due to region closes (relocation, etc.)
             if (LOG.isTraceEnabled()) {
-              LOG.trace("Exception encountered while prefetching " + path + ":", e);
+              LOG.trace("File=" + path.toString() + ", offset=" + offset + ", end=" + end, e);
             }
           } catch (Exception e) {
             // Other exceptions are interesting
-            LOG.warn("Exception encountered while prefetching " + path + ":", e);
+            LOG.warn("File=" + path.toString() + ", offset=" + offset + ", end=" + end, e);
           } finally {
             PrefetchExecutor.complete(path);
           }
@@ -380,9 +384,11 @@ public class HFileReaderV2 extends AbstractHFileReader {
     if (dataBlockIndexReader == null) {
       throw new IOException("Block index not loaded");
     }
-    if (dataBlockOffset < 0 || dataBlockOffset >= trailer.getLoadOnOpenDataOffset()) {
+    long trailerOffset = trailer.getLoadOnOpenDataOffset();
+    if (dataBlockOffset < 0 || dataBlockOffset >= trailerOffset) {
       throw new IOException("Requested block is out of range: " + dataBlockOffset +
-        ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset());
+        ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset() +
+        ", trailer.getLoadOnOpenDataOffset: " + trailerOffset);
     }
 
     // For any given block from any given file, synchronize reads for said block.

http://git-wip-us.apache.org/repos/asf/hbase/blob/6f6cd661/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
index 9f19251..4f29ff5 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
@@ -348,7 +348,7 @@ public class TestChecksum {
     }
 
     @Override
-    protected boolean validateBlockChecksum(HFileBlock block, 
+    protected boolean validateBlockChecksum(HFileBlock block, long offset,
       byte[] data, int hdrSize) throws IOException {
       return false;  // checksum validation failure
     }