You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 19:45:24 UTC
svn commit: r1181979 - in /hbase/branches/0.89/src:
main/java/org/apache/hadoop/hbase/io/hfile/
test/java/org/apache/hadoop/hbase/io/hfile/
Author: nspiegelberg
Date: Tue Oct 11 17:45:24 2011
New Revision: 1181979
URL: http://svn.apache.org/viewvc?rev=1181979&view=rev
Log:
Caching blocks in seekBefore and refactoring HFile reader access in HFileBlockIndex
Summary:
This fixes a couple of long-existing code issues in HFile v2:
- Making seekBefore cache the previous block it has to read when the scanner
happens to be at the first key of a block (this was a performance regression
introduced in HFile v2).
- Fixing the accounting of the number of blocks read for the one-level index
case in HFileBlockIndex.seekToDataBlock if the current block is the same as the
requested block.
- Getting rid of HFileBlock.BasicReader, which was used both by FSReaderV2 and
HFileReaderV2, but the former did not cache blocks (a source of confusion).
- Adding a new interface HFile.CachingBlockReader instead, which is implemented
by HFile readers and passed to HFileBlockIndex.
Test Plan: Unit tests, dev cluster, dark launch
Reviewers: kannan, liyintang
Reviewed By: kannan
CC: hbase-eng@lists, kannan
Differential Revision: 328625
Revert Plan: OK
Modified:
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java
hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java?rev=1181979&r1=1181978&r2=1181979&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java Tue Oct 11 17:45:24 2011
@@ -295,9 +295,15 @@ public class HFile {
public static final String CACHE_BLOCKS_ON_WRITE_KEY =
"hbase.rs.cacheblocksonwrite";
- /** An interface used by clients to open and iterate an {@link HFile}. */
- public interface Reader extends Closeable {
+ /** An abstraction used by the block index */
+ public interface CachingBlockReader {
+ HFileBlock readBlock(long offset, long onDiskBlockSize,
+ boolean cacheBlock, final boolean pread, final boolean isCompaction)
+ throws IOException;
+ }
+ /** An interface used by clients to open and iterate an {@link HFile}. */
+ public interface Reader extends Closeable, CachingBlockReader {
/**
* Returns this reader's "name". Usually the last component of the path.
* Needs to be constant as the file is being moved to support caching on
@@ -315,10 +321,6 @@ public class HFile {
ByteBuffer getMetaBlock(String metaBlockName,
boolean cacheBlock) throws IOException;
- HFileBlock readBlock(long offset, int onDiskBlockSize,
- boolean cacheBlock, final boolean pread, final boolean isCompaction)
- throws IOException;
-
Map<byte[], byte[]> loadFileInfo() throws IOException;
byte[] getLastKey();
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java?rev=1181979&r1=1181978&r2=1181979&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java Tue Oct 11 17:45:24 2011
@@ -832,11 +832,9 @@ public class HFileBlock implements HeapS
DataInputStream nextBlockAsStream(BlockType blockType) throws IOException;
}
- /**
- * Just the basic ability to read blocks, providing optional hints of
- * on-disk-size and/or uncompressed size.
- */
- public interface BasicReader {
+ /** A full-fledged reader with an iteration ability. */
+ public interface FSReader {
+
/**
* Reads the block at the given offset in the file with the given on-disk
* size and uncompressed size.
@@ -850,10 +848,6 @@ public class HFileBlock implements HeapS
*/
HFileBlock readBlockData(long offset, long onDiskSize,
int uncompressedSize, boolean pread) throws IOException;
- }
-
- /** A full-fledged reader with an iteration ability. */
- public interface FSReader extends BasicReader {
/**
* Creates a block iterator over the given portion of the {@link HFile}.
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java?rev=1181979&r1=1181978&r2=1181979&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java Tue Oct 11 17:45:24 2011
@@ -38,6 +38,7 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.HeapSize;
+import org.apache.hadoop.hbase.io.hfile.HFile.CachingBlockReader;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ClassSize;
import org.apache.hadoop.hbase.util.CompoundBloomFilterWriter;
@@ -132,12 +133,12 @@ public class HFileBlockIndex {
private int searchTreeLevel;
/** A way to read {@link HFile} blocks at a given offset */
- private HFileBlock.BasicReader blockReader;
+ private CachingBlockReader cachingBlockReader;
public BlockIndexReader(final RawComparator<byte[]> c, final int treeLevel,
- final HFileBlock.BasicReader blockReader) {
+ final CachingBlockReader cachingBlockReader) {
this(c, treeLevel);
- this.blockReader = blockReader;
+ this.cachingBlockReader = cachingBlockReader;
}
public BlockIndexReader(final RawComparator<byte[]> c, final int treeLevel)
@@ -176,7 +177,8 @@ public class HFileBlockIndex {
* @throws IOException
*/
public HFileBlock seekToDataBlock(final byte[] key, int keyOffset,
- int keyLength, HFileBlock currentBlock)
+ int keyLength, HFileBlock currentBlock, boolean cacheBlocks,
+ boolean pread, boolean isCompaction)
throws IOException {
int rootLevelIndex = rootBlockContainingKey(key, keyOffset, keyLength);
if (rootLevelIndex < 0 || rootLevelIndex >= blockOffsets.length) {
@@ -188,23 +190,44 @@ public class HFileBlockIndex {
int currentOnDiskSize = blockDataSizes[rootLevelIndex];
int lookupLevel = 1; // How many levels deep we are in our lookup.
- HFileBlock block = blockReader.readBlockData(currentOffset,
- currentOnDiskSize, -1, true);
- if (block == null) {
- throw new IOException("Failed to read block at offset " +
- currentOffset + ", onDiskSize=" + currentOnDiskSize);
- }
- while (!block.getBlockType().equals(BlockType.DATA)) {
- // Read the block. It may be intermediate level block, leaf level block
- // or data block. In any case, we expect non-root index block format.
- // We don't allow more than searchTreeLevel iterations of this loop.
+ HFileBlock block;
+ while (true) {
+
+ if (currentBlock != null && currentBlock.getOffset() == currentOffset)
+ {
+ // Avoid reading the same block again, even with caching turned off.
+ // This is crucial for compaction-type workload which might have
+ // caching turned off. This is like a one-block cache inside the
+ // scanner.
+ block = currentBlock;
+ } else {
+ // Call HFile's caching block reader API. We always cache index
+ // blocks, otherwise we might get terrible performance.
+ boolean shouldCache = cacheBlocks || (lookupLevel < searchTreeLevel);
+ block = cachingBlockReader.readBlock(currentOffset, currentOnDiskSize,
+ shouldCache, pread, isCompaction);
+ }
+
+ if (block == null) {
+ throw new IOException("Failed to read block at offset " +
+ currentOffset + ", onDiskSize=" + currentOnDiskSize);
+ }
+
+ // Found a data block, break the loop and check our level in the tree.
+ if (block.getBlockType().equals(BlockType.DATA)) {
+ break;
+ }
+
+ // Not a data block. This must be a leaf-level or intermediate-level
+ // index block. We don't allow going deeper than searchTreeLevel.
if (++lookupLevel > searchTreeLevel) {
- throw new IOException("Search Tree Level overflow: lookupLevel: "+
- lookupLevel + " searchTreeLevel: " + searchTreeLevel);
+ throw new IOException("Search Tree Level overflow: lookupLevel="+
+ lookupLevel + ", searchTreeLevel=" + searchTreeLevel);
}
- // read to the byte buffer
+ // Locate the entry corresponding to the given key in the non-root
+ // (leaf or intermediate-level) index block.
ByteBuffer buffer = block.getBufferWithoutHeader();
if (!locateNonRootIndexEntry(buffer, key, keyOffset, keyLength,
comparator)) {
@@ -216,16 +239,6 @@ public class HFileBlockIndex {
currentOffset = buffer.getLong();
currentOnDiskSize = buffer.getInt();
-
- // Located a deeper-level block, now read it.
- if (currentBlock != null && currentBlock.getOffset() == currentOffset)
- {
- // Avoid reading the same block.
- block = currentBlock;
- } else {
- block = blockReader.readBlockData(currentOffset, currentOnDiskSize,
- -1, true);
- }
}
if (lookupLevel != searchTreeLevel) {
@@ -252,12 +265,15 @@ public class HFileBlockIndex {
return midKey;
if (midLeafBlockOffset >= 0) {
- if (blockReader == null) {
+ if (cachingBlockReader == null) {
throw new IOException("Have to read the middle leaf block but " +
"no block reader available");
}
- HFileBlock midLeafBlock = blockReader.readBlockData(
- midLeafBlockOffset, midLeafBlockOnDiskSize, -1, true);
+
+ // Caching, using pread, assuming this is not a compaction.
+ HFileBlock midLeafBlock = cachingBlockReader.readBlock(
+ midLeafBlockOffset, midLeafBlockOnDiskSize, true, true, false);
+
ByteBuffer b = midLeafBlock.getBufferWithoutHeader();
int numDataBlocks = b.getInt();
int keyRelOffset = b.getInt(Bytes.SIZEOF_INT * (midKeyEntry + 1));
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java?rev=1181979&r1=1181978&r2=1181979&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java Tue Oct 11 17:45:24 2011
@@ -655,7 +655,7 @@ public class HFileReaderV1 extends Abstr
}
@Override
- public HFileBlock readBlock(long offset, int onDiskBlockSize,
+ public HFileBlock readBlock(long offset, long onDiskBlockSize,
boolean cacheBlock, boolean pread, boolean isCompaction) {
throw new UnsupportedOperationException();
}
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java?rev=1181979&r1=1181978&r2=1181979&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java Tue Oct 11 17:45:24 2011
@@ -38,8 +38,7 @@ import org.apache.hadoop.hbase.util.IdLo
/**
* {@link HFile} reader for version 2.
*/
-public class HFileReaderV2 extends AbstractHFileReader implements
- HFileBlock.BasicReader {
+public class HFileReaderV2 extends AbstractHFileReader {
private static final Log LOG = LogFactory.getLog(HFileReaderV2.class);
@@ -206,23 +205,6 @@ public class HFileReaderV2 extends Abstr
}
/**
- * Implements the "basic block reader" API, used mainly by
- * {@link HFileBlockIndex.BlockIndexReader} in
- * {@link HFileBlockIndex.BlockIndexReader#seekToDataBlock(byte[], int, int,
- * HFileBlock)} in a random-read access pattern.
- */
- @Override
- public HFileBlock readBlockData(long offset, long onDiskSize,
- int uncompressedSize, boolean pread) throws IOException {
- if (onDiskSize >= Integer.MAX_VALUE) {
- throw new IOException("Invalid on-disk size: " + onDiskSize);
- }
-
- // Assuming we are not doing a compaction.
- return readBlock(offset, (int) onDiskSize, true, pread, false);
- }
-
- /**
* Read in a file block.
*
* @param dataBlockOffset offset to read.
@@ -233,7 +215,7 @@ public class HFileReaderV2 extends Abstr
* @return Block wrapped in a ByteBuffer.
* @throws IOException
*/
- public HFileBlock readBlock(long dataBlockOffset, int onDiskBlockSize,
+ public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize,
boolean cacheBlock, boolean pread, final boolean isCompaction)
throws IOException {
if (dataBlockIndexReader == null) {
@@ -506,9 +488,11 @@ public class HFileReaderV2 extends Abstr
*/
private int seekTo(byte[] key, int offset, int length, boolean rewind)
throws IOException {
- HFileBlock seekToBlock =
- ((HFileReaderV2) reader).getDataBlockIndexReader().seekToDataBlock(
- key, offset, length, block);
+ HFileBlockIndex.BlockIndexReader indexReader =
+ reader.getDataBlockIndexReader();
+ HFileBlock seekToBlock = indexReader.seekToDataBlock(key, offset, length,
+ block, cacheBlocks, pread, isCompaction);
+
if (seekToBlock == null) {
// This happens if the key e.g. falls before the beginning of the file.
return -1;
@@ -673,10 +657,9 @@ public class HFileReaderV2 extends Abstr
@Override
public boolean seekBefore(byte[] key, int offset, int length)
throws IOException {
- HFileReaderV2 reader2 = (HFileReaderV2) reader;
HFileBlock seekToBlock =
- reader2.getDataBlockIndexReader().seekToDataBlock(
- key, offset, length, block);
+ reader.getDataBlockIndexReader().seekToDataBlock(key, offset,
+ length, block, cacheBlocks, pread, isCompaction);
if (seekToBlock == null) {
return false;
}
@@ -694,8 +677,9 @@ public class HFileReaderV2 extends Abstr
// It is important that we compute and pass onDiskSize to the block
// reader so that it does not have to read the header separately to
// figure out the size.
- seekToBlock = reader2.fsBlockReader.readBlockData(previousBlockOffset,
- seekToBlock.getOffset() - previousBlockOffset, -1, pread);
+ seekToBlock = reader.readBlock(previousBlockOffset,
+ seekToBlock.getOffset() - previousBlockOffset, cacheBlocks,
+ pread, isCompaction);
// TODO shortcut: seek forward in this block to the last key of the
// block.
Modified: hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java?rev=1181979&r1=1181978&r2=1181979&view=diff
==============================================================================
--- hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java (original)
+++ hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java Tue Oct 11 17:45:24 2011
@@ -160,8 +160,13 @@ public class TestCacheOnWrite {
new EnumMap<BlockType, Integer>(BlockType.class);
while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) {
- HFileBlock block = reader.readBlockData(offset, prevBlock == null ? -1
- : prevBlock.getNextBlockOnDiskSizeWithHeader(), -1, false);
+ long onDiskSize = -1;
+ if (prevBlock != null) {
+ onDiskSize = prevBlock.getNextBlockOnDiskSizeWithHeader();
+ }
+ // Flags: cache the block, use pread, this is not a compaction.
+ HFileBlock block = reader.readBlock(offset, onDiskSize, true, true,
+ false);
String blockCacheKey = HFile.getBlockCacheKey(reader.getName(), offset);
boolean isCached = blockCache.getBlock(blockCacheKey, true) != null;
boolean shouldBeCached = cowType.shouldBeCached(block.getBlockType());
Modified: hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java?rev=1181979&r1=1181978&r2=1181979&view=diff
==============================================================================
--- hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java (original)
+++ hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java Tue Oct 11 17:45:24 2011
@@ -41,8 +41,11 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory;
+import org.apache.hadoop.hbase.io.hfile.ColumnFamilyMetrics.BlockMetricType;
import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader;
import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexChunk;
+import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.StoreFile;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ClassSize;
@@ -121,37 +124,36 @@ public class TestHFileBlockIndex {
* A wrapper around a block reader which only caches the results of the last
* operation. Not thread-safe.
*/
- private static class BlockReaderWrapper implements HFileBlock.BasicReader {
+ private static class BlockReaderWrapper implements HFile.CachingBlockReader {
- private HFileBlock.BasicReader realReader;
+ private HFileBlock.FSReader realReader;
private long prevOffset;
private long prevOnDiskSize;
- private long prevUncompressedSize;
private boolean prevPread;
private HFileBlock prevBlock;
public int hitCount = 0;
public int missCount = 0;
- public BlockReaderWrapper(HFileBlock.BasicReader realReader) {
+ public BlockReaderWrapper(HFileBlock.FSReader realReader) {
this.realReader = realReader;
}
@Override
- public HFileBlock readBlockData(long offset, long onDiskSize,
- int uncompressedSize, boolean pread) throws IOException {
+ public HFileBlock readBlock(long offset, long onDiskSize,
+ boolean cacheBlock, boolean pread, boolean isCompaction)
+ throws IOException {
if (offset == prevOffset && onDiskSize == prevOnDiskSize &&
- uncompressedSize == prevUncompressedSize && pread == prevPread) {
+ pread == prevPread) {
hitCount += 1;
return prevBlock;
}
missCount += 1;
prevBlock = realReader.readBlockData(offset, onDiskSize,
- uncompressedSize, pread);
+ -1, pread);
prevOffset = offset;
prevOnDiskSize = onDiskSize;
- prevUncompressedSize = uncompressedSize;
prevPread = pread;
return prevBlock;
@@ -182,7 +184,8 @@ public class TestHFileBlockIndex {
for (byte[] key : keys) {
assertTrue(key != null);
assertTrue(indexReader != null);
- HFileBlock b = indexReader.seekToDataBlock(key, 0, key.length, null);
+ HFileBlock b = indexReader.seekToDataBlock(key, 0, key.length, null,
+ true, true, false);
if (Bytes.BYTES_RAWCOMPARATOR.compare(key, firstKeyInFile) < 0) {
assertTrue(b == null);
++i;