You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 19:45:24 UTC

svn commit: r1181979 - in /hbase/branches/0.89/src: main/java/org/apache/hadoop/hbase/io/hfile/ test/java/org/apache/hadoop/hbase/io/hfile/

Author: nspiegelberg
Date: Tue Oct 11 17:45:24 2011
New Revision: 1181979

URL: http://svn.apache.org/viewvc?rev=1181979&view=rev
Log:
Caching blocks in seekBefore and refactoring HFile reader access in HFileBlockIndex

Summary:
This fixes a couple of long-existing code issues in HFile v2:
- Making seekBefore cache the previous block it has to read when the scanner
happens to be at the first key of a block (this was a performance regression
introduced in HFile v2).
- Fixing the accounting of the number of blocks read for the one-level index
case in HFileBlockIndex.seekToDataBlock if the current block is the same as the
requested block.
- Getting rid of HFileBlock.BasicReader, which was used both by FSReaderV2 and
HFileReaderV2, but the former did not cache blocks (a source of confusion).
- Adding a new interface HFile.CachingBlockReader instead, which is implemented
by HFile readers and passed to HFileBlockIndex.

Test Plan: Unit tests, dev cluster, dark launch

Reviewers: kannan, liyintang

Reviewed By: kannan

CC: hbase-eng@lists, kannan

Differential Revision: 328625

Revert Plan: OK

Modified:
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
    hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java
    hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java?rev=1181979&r1=1181978&r2=1181979&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java Tue Oct 11 17:45:24 2011
@@ -295,9 +295,15 @@ public class HFile {
   public static final String CACHE_BLOCKS_ON_WRITE_KEY =
       "hbase.rs.cacheblocksonwrite";
 
-  /** An interface used by clients to open and iterate an {@link HFile}. */
-  public interface Reader extends Closeable {
+  /** An abstraction used by the block index */
+  public interface CachingBlockReader {
+    HFileBlock readBlock(long offset, long onDiskBlockSize,
+        boolean cacheBlock, final boolean pread, final boolean isCompaction)
+        throws IOException;
+  }
 
+  /** An interface used by clients to open and iterate an {@link HFile}. */
+  public interface Reader extends Closeable, CachingBlockReader {
     /**
      * Returns this reader's "name". Usually the last component of the path.
      * Needs to be constant as the file is being moved to support caching on
@@ -315,10 +321,6 @@ public class HFile {
     ByteBuffer getMetaBlock(String metaBlockName,
        boolean cacheBlock) throws IOException;
 
-    HFileBlock readBlock(long offset, int onDiskBlockSize,
-        boolean cacheBlock, final boolean pread, final boolean isCompaction)
-        throws IOException;
-
     Map<byte[], byte[]> loadFileInfo() throws IOException;
 
     byte[] getLastKey();

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java?rev=1181979&r1=1181978&r2=1181979&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java Tue Oct 11 17:45:24 2011
@@ -832,11 +832,9 @@ public class HFileBlock implements HeapS
     DataInputStream nextBlockAsStream(BlockType blockType) throws IOException;
   }
 
-  /**
-   * Just the basic ability to read blocks, providing optional hints of
-   * on-disk-size and/or uncompressed size.
-   */
-  public interface BasicReader {
+  /** A full-fledged reader with an iteration ability. */
+  public interface FSReader {
+
     /**
      * Reads the block at the given offset in the file with the given on-disk
      * size and uncompressed size.
@@ -850,10 +848,6 @@ public class HFileBlock implements HeapS
      */
     HFileBlock readBlockData(long offset, long onDiskSize,
         int uncompressedSize, boolean pread) throws IOException;
-  }
-
-  /** A full-fledged reader with an iteration ability. */
-  public interface FSReader extends BasicReader {
 
     /**
      * Creates a block iterator over the given portion of the {@link HFile}.

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java?rev=1181979&r1=1181978&r2=1181979&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java Tue Oct 11 17:45:24 2011
@@ -38,6 +38,7 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.io.HeapSize;
+import org.apache.hadoop.hbase.io.hfile.HFile.CachingBlockReader;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.ClassSize;
 import org.apache.hadoop.hbase.util.CompoundBloomFilterWriter;
@@ -132,12 +133,12 @@ public class HFileBlockIndex {
     private int searchTreeLevel;
 
     /** A way to read {@link HFile} blocks at a given offset */
-    private HFileBlock.BasicReader blockReader;
+    private CachingBlockReader cachingBlockReader;
 
     public BlockIndexReader(final RawComparator<byte[]> c, final int treeLevel,
-        final HFileBlock.BasicReader blockReader) {
+        final CachingBlockReader cachingBlockReader) {
       this(c, treeLevel);
-      this.blockReader = blockReader;
+      this.cachingBlockReader = cachingBlockReader;
     }
 
     public BlockIndexReader(final RawComparator<byte[]> c, final int treeLevel)
@@ -176,7 +177,8 @@ public class HFileBlockIndex {
      * @throws IOException
      */
     public HFileBlock seekToDataBlock(final byte[] key, int keyOffset,
-        int keyLength, HFileBlock currentBlock)
+        int keyLength, HFileBlock currentBlock, boolean cacheBlocks,
+        boolean pread, boolean isCompaction)
         throws IOException {
       int rootLevelIndex = rootBlockContainingKey(key, keyOffset, keyLength);
       if (rootLevelIndex < 0 || rootLevelIndex >= blockOffsets.length) {
@@ -188,23 +190,44 @@ public class HFileBlockIndex {
       int currentOnDiskSize = blockDataSizes[rootLevelIndex];
 
       int lookupLevel = 1; // How many levels deep we are in our lookup.
-      HFileBlock block = blockReader.readBlockData(currentOffset,
-          currentOnDiskSize, -1, true);
-      if (block == null) {
-        throw new IOException("Failed to read block at offset " +
-            currentOffset + ", onDiskSize=" + currentOnDiskSize);
-      }
-      while (!block.getBlockType().equals(BlockType.DATA)) {
-        // Read the block. It may be intermediate level block, leaf level block
-        // or data block. In any case, we expect non-root index block format.
 
-        // We don't allow more than searchTreeLevel iterations of this loop.
+      HFileBlock block;
+      while (true) {
+
+        if (currentBlock != null && currentBlock.getOffset() == currentOffset)
+        {
+          // Avoid reading the same block again, even with caching turned off.
+          // This is crucial for compaction-type workload which might have
+          // caching turned off. This is like a one-block cache inside the
+          // scanner.
+          block = currentBlock;
+        } else {
+          // Call HFile's caching block reader API. We always cache index
+          // blocks, otherwise we might get terrible performance.
+          boolean shouldCache = cacheBlocks || (lookupLevel < searchTreeLevel);
+          block = cachingBlockReader.readBlock(currentOffset, currentOnDiskSize,
+              shouldCache, pread, isCompaction);
+        }
+
+        if (block == null) {
+          throw new IOException("Failed to read block at offset " +
+              currentOffset + ", onDiskSize=" + currentOnDiskSize);
+        }
+
+        // Found a data block, break the loop and check our level in the tree.
+        if (block.getBlockType().equals(BlockType.DATA)) {
+          break;
+        }
+
+        // Not a data block. This must be a leaf-level or intermediate-level
+        // index block. We don't allow going deeper than searchTreeLevel.
         if (++lookupLevel > searchTreeLevel) {
-          throw new IOException("Search Tree Level overflow: lookupLevel: "+
-              lookupLevel + " searchTreeLevel: " + searchTreeLevel);
+          throw new IOException("Search Tree Level overflow: lookupLevel="+
+              lookupLevel + ", searchTreeLevel=" + searchTreeLevel);
         }
 
-        // read to the byte buffer
+        // Locate the entry corresponding to the given key in the non-root
+        // (leaf or intermediate-level) index block.
         ByteBuffer buffer = block.getBufferWithoutHeader();
         if (!locateNonRootIndexEntry(buffer, key, keyOffset, keyLength,
             comparator)) {
@@ -216,16 +239,6 @@ public class HFileBlockIndex {
 
         currentOffset = buffer.getLong();
         currentOnDiskSize = buffer.getInt();
-
-        // Located a deeper-level block, now read it.
-        if (currentBlock != null && currentBlock.getOffset() == currentOffset)
-        {
-          // Avoid reading the same block.
-          block = currentBlock;
-        } else {
-          block = blockReader.readBlockData(currentOffset, currentOnDiskSize,
-              -1, true);
-        }
       }
 
       if (lookupLevel != searchTreeLevel) {
@@ -252,12 +265,15 @@ public class HFileBlockIndex {
         return midKey;
 
       if (midLeafBlockOffset >= 0) {
-        if (blockReader == null) {
+        if (cachingBlockReader == null) {
           throw new IOException("Have to read the middle leaf block but " +
               "no block reader available");
         }
-        HFileBlock midLeafBlock = blockReader.readBlockData(
-            midLeafBlockOffset, midLeafBlockOnDiskSize, -1, true);
+
+        // Caching, using pread, assuming this is not a compaction.
+        HFileBlock midLeafBlock = cachingBlockReader.readBlock(
+            midLeafBlockOffset, midLeafBlockOnDiskSize, true, true, false);
+
         ByteBuffer b = midLeafBlock.getBufferWithoutHeader();
         int numDataBlocks = b.getInt();
         int keyRelOffset = b.getInt(Bytes.SIZEOF_INT * (midKeyEntry + 1));

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java?rev=1181979&r1=1181978&r2=1181979&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java Tue Oct 11 17:45:24 2011
@@ -655,7 +655,7 @@ public class HFileReaderV1 extends Abstr
   }
 
   @Override
-  public HFileBlock readBlock(long offset, int onDiskBlockSize,
+  public HFileBlock readBlock(long offset, long onDiskBlockSize,
       boolean cacheBlock, boolean pread, boolean isCompaction) {
     throw new UnsupportedOperationException();
   }

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java?rev=1181979&r1=1181978&r2=1181979&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java Tue Oct 11 17:45:24 2011
@@ -38,8 +38,7 @@ import org.apache.hadoop.hbase.util.IdLo
 /**
  * {@link HFile} reader for version 2.
  */
-public class HFileReaderV2 extends AbstractHFileReader implements
-    HFileBlock.BasicReader {
+public class HFileReaderV2 extends AbstractHFileReader {
 
   private static final Log LOG = LogFactory.getLog(HFileReaderV2.class);
 
@@ -206,23 +205,6 @@ public class HFileReaderV2 extends Abstr
   }
 
   /**
-   * Implements the "basic block reader" API, used mainly by
-   * {@link HFileBlockIndex.BlockIndexReader} in
-   * {@link HFileBlockIndex.BlockIndexReader#seekToDataBlock(byte[], int, int,
-   * HFileBlock)} in a random-read access pattern.
-   */
-  @Override
-  public HFileBlock readBlockData(long offset, long onDiskSize,
-      int uncompressedSize, boolean pread) throws IOException {
-    if (onDiskSize >= Integer.MAX_VALUE) {
-      throw new IOException("Invalid on-disk size: " + onDiskSize);
-    }
-
-    // Assuming we are not doing a compaction.
-    return readBlock(offset, (int) onDiskSize, true, pread, false);
-  }
-
-  /**
    * Read in a file block.
    *
    * @param dataBlockOffset offset to read.
@@ -233,7 +215,7 @@ public class HFileReaderV2 extends Abstr
    * @return Block wrapped in a ByteBuffer.
    * @throws IOException
    */
-  public HFileBlock readBlock(long dataBlockOffset, int onDiskBlockSize,
+  public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize,
       boolean cacheBlock, boolean pread, final boolean isCompaction)
       throws IOException {
     if (dataBlockIndexReader == null) {
@@ -506,9 +488,11 @@ public class HFileReaderV2 extends Abstr
      */
     private int seekTo(byte[] key, int offset, int length, boolean rewind)
         throws IOException {
-      HFileBlock seekToBlock =
-        ((HFileReaderV2) reader).getDataBlockIndexReader().seekToDataBlock(
-            key, offset, length, block);
+      HFileBlockIndex.BlockIndexReader indexReader =
+          reader.getDataBlockIndexReader();
+      HFileBlock seekToBlock = indexReader.seekToDataBlock(key, offset, length,
+          block, cacheBlocks, pread, isCompaction);
+
       if (seekToBlock == null) {
         // This happens if the key e.g. falls before the beginning of the file.
         return -1;
@@ -673,10 +657,9 @@ public class HFileReaderV2 extends Abstr
     @Override
     public boolean seekBefore(byte[] key, int offset, int length)
         throws IOException {
-      HFileReaderV2 reader2 = (HFileReaderV2) reader;
       HFileBlock seekToBlock =
-          reader2.getDataBlockIndexReader().seekToDataBlock(
-              key, offset, length, block);
+          reader.getDataBlockIndexReader().seekToDataBlock(key, offset,
+              length, block, cacheBlocks, pread, isCompaction);
       if (seekToBlock == null) {
         return false;
       }
@@ -694,8 +677,9 @@ public class HFileReaderV2 extends Abstr
         // It is important that we compute and pass onDiskSize to the block
         // reader so that it does not have to read the header separately to
         // figure out the size.
-        seekToBlock = reader2.fsBlockReader.readBlockData(previousBlockOffset,
-            seekToBlock.getOffset() - previousBlockOffset, -1, pread);
+        seekToBlock = reader.readBlock(previousBlockOffset,
+            seekToBlock.getOffset() - previousBlockOffset, cacheBlocks,
+            pread, isCompaction);
 
         // TODO shortcut: seek forward in this block to the last key of the
         // block.

Modified: hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java?rev=1181979&r1=1181978&r2=1181979&view=diff
==============================================================================
--- hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java (original)
+++ hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java Tue Oct 11 17:45:24 2011
@@ -160,8 +160,13 @@ public class TestCacheOnWrite {
         new EnumMap<BlockType, Integer>(BlockType.class);
 
     while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) {
-      HFileBlock block = reader.readBlockData(offset, prevBlock == null ? -1
-          : prevBlock.getNextBlockOnDiskSizeWithHeader(), -1, false);
+      long onDiskSize = -1;
+      if (prevBlock != null) {
+         onDiskSize = prevBlock.getNextBlockOnDiskSizeWithHeader();
+      }
+      // Flags: cache the block, use pread, this is not a compaction.
+      HFileBlock block = reader.readBlock(offset, onDiskSize, true, true,
+          false);
       String blockCacheKey = HFile.getBlockCacheKey(reader.getName(), offset);
       boolean isCached = blockCache.getBlock(blockCacheKey, true) != null;
       boolean shouldBeCached = cowType.shouldBeCached(block.getBlockType());

Modified: hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java?rev=1181979&r1=1181978&r2=1181979&view=diff
==============================================================================
--- hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java (original)
+++ hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java Tue Oct 11 17:45:24 2011
@@ -41,8 +41,11 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory;
+import org.apache.hadoop.hbase.io.hfile.ColumnFamilyMetrics.BlockMetricType;
 import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader;
 import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexChunk;
+import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.StoreFile;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.ClassSize;
@@ -121,37 +124,36 @@ public class TestHFileBlockIndex {
    * A wrapper around a block reader which only caches the results of the last
    * operation. Not thread-safe.
    */
-  private static class BlockReaderWrapper implements HFileBlock.BasicReader {
+  private static class BlockReaderWrapper implements HFile.CachingBlockReader {
 
-    private HFileBlock.BasicReader realReader;
+    private HFileBlock.FSReader realReader;
     private long prevOffset;
     private long prevOnDiskSize;
-    private long prevUncompressedSize;
     private boolean prevPread;
     private HFileBlock prevBlock;
 
     public int hitCount = 0;
     public int missCount = 0;
 
-    public BlockReaderWrapper(HFileBlock.BasicReader realReader) {
+    public BlockReaderWrapper(HFileBlock.FSReader realReader) {
       this.realReader = realReader;
     }
 
     @Override
-    public HFileBlock readBlockData(long offset, long onDiskSize,
-        int uncompressedSize, boolean pread) throws IOException {
+    public HFileBlock readBlock(long offset, long onDiskSize,
+        boolean cacheBlock, boolean pread, boolean isCompaction)
+        throws IOException {
       if (offset == prevOffset && onDiskSize == prevOnDiskSize &&
-          uncompressedSize == prevUncompressedSize && pread == prevPread) {
+          pread == prevPread) {
         hitCount += 1;
         return prevBlock;
       }
 
       missCount += 1;
       prevBlock = realReader.readBlockData(offset, onDiskSize,
-          uncompressedSize, pread);
+          -1, pread);
       prevOffset = offset;
       prevOnDiskSize = onDiskSize;
-      prevUncompressedSize = uncompressedSize;
       prevPread = pread;
 
       return prevBlock;
@@ -182,7 +184,8 @@ public class TestHFileBlockIndex {
     for (byte[] key : keys) {
       assertTrue(key != null);
       assertTrue(indexReader != null);
-      HFileBlock b = indexReader.seekToDataBlock(key, 0, key.length, null);
+      HFileBlock b = indexReader.seekToDataBlock(key, 0, key.length, null,
+          true, true, false);
       if (Bytes.BYTES_RAWCOMPARATOR.compare(key, firstKeyInFile) < 0) {
         assertTrue(b == null);
         ++i;