You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by la...@apache.org on 2012/04/26 22:08:23 UTC
svn commit: r1331058 - in /hbase/trunk/src:
main/java/org/apache/hadoop/hbase/io/hfile/
test/java/org/apache/hadoop/hbase/io/hfile/
Author: larsh
Date: Thu Apr 26 20:08:23 2012
New Revision: 1331058
URL: http://svn.apache.org/viewvc?rev=1331058&view=rev
Log:
HBASE-5864 Error while reading from hfile in 0.94 (Ram)
Modified:
hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java?rev=1331058&r1=1331057&r2=1331058&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java Thu Apr 26 20:08:23 2012
@@ -1099,10 +1099,9 @@ public class HFileBlock extends SchemaCo
/**
* Similar to {@link #nextBlock()} but checks block type, throws an
- * exception if incorrect, and returns the data portion of the block as
- * an input stream.
+ * exception if incorrect, and returns the HFile block
*/
- DataInputStream nextBlockAsStream(BlockType blockType) throws IOException;
+ HFileBlock nextBlockWithBlockType(BlockType blockType) throws IOException;
}
/** A full-fledged reader with iteration ability. */
@@ -1200,14 +1199,14 @@ public class HFileBlock extends SchemaCo
}
@Override
- public DataInputStream nextBlockAsStream(BlockType blockType)
+ public HFileBlock nextBlockWithBlockType(BlockType blockType)
throws IOException {
HFileBlock blk = nextBlock();
if (blk.getBlockType() != blockType) {
throw new IOException("Expected block of type " + blockType
+ " but found " + blk.getBlockType());
}
- return blk.getByteStream();
+ return blk;
}
};
}
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java?rev=1331058&r1=1331057&r2=1331058&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java Thu Apr 26 20:08:23 2012
@@ -533,24 +533,43 @@ public class HFileBlockIndex {
}
}
}
+
+ /**
+ * Read in the root-level index from the given input stream. Must match
+ * what was written into the root level by
+ * {@link BlockIndexWriter#writeIndexBlocks(FSDataOutputStream)} at the
+ * offset that function returned.
+ *
+ * @param blk the HFile block
+ * @param numEntries the number of root-level index entries
+ * @return the buffered input stream or wrapped byte input stream
+ * @throws IOException
+ */
+ public DataInputStream readRootIndex(HFileBlock blk, final int numEntries) throws IOException {
+ DataInputStream in = blk.getByteStream();
+ readRootIndex(in, numEntries);
+ return in;
+ }
/**
* Read the root-level metadata of a multi-level block index. Based on
* {@link #readRootIndex(DataInput, int)}, but also reads metadata
* necessary to compute the mid-key in a multi-level index.
*
- * @param in the buffered or byte input stream to read from
+ * @param blk the HFile block
* @param numEntries the number of root-level index entries
* @throws IOException
*/
- public void readMultiLevelIndexRoot(DataInputStream in,
+ public void readMultiLevelIndexRoot(HFileBlock blk,
final int numEntries) throws IOException {
- readRootIndex(in, numEntries);
- if (in.available() < MID_KEY_METADATA_SIZE) {
+ DataInputStream in = readRootIndex(blk, numEntries);
+ // after reading the root index the checksum bytes have to
+ // be subtracted to know if the mid key exists.
+ int checkSumBytes = blk.totalChecksumBytes();
+ if ((in.available() - checkSumBytes) < MID_KEY_METADATA_SIZE) {
// No mid-key metadata available.
return;
}
-
midLeafBlockOffset = in.readLong();
midLeafBlockOnDiskSize = in.readInt();
midKeyEntry = in.readInt();
@@ -763,7 +782,7 @@ public class HFileBlockIndex {
if (LOG.isTraceEnabled()) {
LOG.trace("Wrote a " + numLevels + "-level index with root level at pos "
- + out.getPos() + ", " + rootChunk.getNumEntries()
+ + rootLevelIndexPos + ", " + rootChunk.getNumEntries()
+ " root-level entries, " + totalNumEntries + " total entries, "
+ StringUtils.humanReadableInt(this.totalBlockOnDiskSize) +
" on-disk size, "
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java?rev=1331058&r1=1331057&r2=1331058&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java Thu Apr 26 20:08:23 2012
@@ -126,17 +126,17 @@ public class HFileReaderV2 extends Abstr
// Data index. We also read statistics about the block index written after
// the root level.
dataBlockIndexReader.readMultiLevelIndexRoot(
- blockIter.nextBlockAsStream(BlockType.ROOT_INDEX),
+ blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX),
trailer.getDataIndexCount());
// Meta index.
metaBlockIndexReader.readRootIndex(
- blockIter.nextBlockAsStream(BlockType.ROOT_INDEX),
+ blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX),
trailer.getMetaIndexCount());
// File info
fileInfo = new FileInfo();
- fileInfo.readFields(blockIter.nextBlockAsStream(BlockType.FILE_INFO));
+ fileInfo.readFields(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
lastKey = fileInfo.get(FileInfo.LASTKEY);
avgKeyLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_KEY_LEN));
avgValueLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_VALUE_LEN));
Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java?rev=1331058&r1=1331057&r2=1331058&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java Thu Apr 26 20:08:23 2012
@@ -176,7 +176,7 @@ public class TestHFileBlockIndex {
Bytes.BYTES_RAWCOMPARATOR, numLevels, brw);
indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset,
- fileSize).nextBlockAsStream(BlockType.ROOT_INDEX), numRootEntries);
+ fileSize).nextBlockWithBlockType(BlockType.ROOT_INDEX), numRootEntries);
long prevOffset = -1;
int i = 0;
Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java?rev=1331058&r1=1331057&r2=1331058&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java Thu Apr 26 20:08:23 2012
@@ -37,8 +37,7 @@ import org.apache.hadoop.fs.FSDataInputS
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.*;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.io.hfile.Compression.Algorithm;
import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.RawComparator;
@@ -72,14 +71,30 @@ public class TestHFileWriterV2 {
@Test
public void testHFileFormatV2() throws IOException {
Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
- "testHFileFormatV2");
+ "testHFileFormatV2");
+ final Compression.Algorithm compressAlgo = Compression.Algorithm.GZ;
+ final int entryCount = 10000;
+ writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, false);
+ }
+
+
+ @Test
+ public void testMidKeyInHFile() throws IOException{
+ Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
+ "testMidKeyInHFile");
+ Compression.Algorithm compressAlgo = Compression.Algorithm.NONE;
+ int entryCount = 50000;
+ writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, true);
+ }
+
+ private void writeDataAndReadFromHFile(Path hfilePath,
+ Algorithm compressAlgo, int entryCount, boolean findMidKey) throws IOException {
- final Compression.Algorithm COMPRESS_ALGO = Compression.Algorithm.GZ;
HFileWriterV2 writer = (HFileWriterV2)
new HFileWriterV2.WriterFactoryV2(conf, new CacheConfig(conf))
.withPath(fs, hfilePath)
.withBlockSize(4096)
- .withCompression(COMPRESS_ALGO)
+ .withCompression(compressAlgo)
.withComparator(KeyValue.KEY_COMPARATOR)
.create();
@@ -88,11 +103,10 @@ public class TestHFileWriterV2 {
Random rand = new Random(9713312); // Just a fixed seed.
- final int ENTRY_COUNT = 10000;
List<byte[]> keys = new ArrayList<byte[]>();
List<byte[]> values = new ArrayList<byte[]>();
- for (int i = 0; i < ENTRY_COUNT; ++i) {
+ for (int i = 0; i < entryCount; ++i) {
byte[] keyBytes = randomOrderedKey(rand, i);
// A random-length random value.
@@ -113,6 +127,7 @@ public class TestHFileWriterV2 {
writer.appendMetaBlock("CAPITAL_OF_FRANCE", new Text("Paris"));
writer.close();
+
FSDataInputStream fsdis = fs.open(hfilePath);
@@ -124,10 +139,10 @@ public class TestHFileWriterV2 {
FixedFileTrailer.readFromStream(fsdis, fileSize);
assertEquals(2, trailer.getMajorVersion());
- assertEquals(ENTRY_COUNT, trailer.getEntryCount());
+ assertEquals(entryCount, trailer.getEntryCount());
HFileBlock.FSReader blockReader =
- new HFileBlock.FSReaderV2(fsdis, COMPRESS_ALGO, fileSize);
+ new HFileBlock.FSReaderV2(fsdis, compressAlgo, fileSize);
// Comparator class name is stored in the trailer in version 2.
RawComparator<byte []> comparator = trailer.createComparator();
HFileBlockIndex.BlockIndexReader dataBlockIndexReader =
@@ -143,16 +158,21 @@ public class TestHFileWriterV2 {
// Data index. We also read statistics about the block index written after
// the root level.
dataBlockIndexReader.readMultiLevelIndexRoot(
- blockIter.nextBlockAsStream(BlockType.ROOT_INDEX),
+ blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX),
trailer.getDataIndexCount());
-
+
+ if (findMidKey) {
+ byte[] midkey = dataBlockIndexReader.midkey();
+ assertNotNull("Midkey should not be null", midkey);
+ }
+
// Meta index.
metaBlockIndexReader.readRootIndex(
- blockIter.nextBlockAsStream(BlockType.ROOT_INDEX),
+ blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX).getByteStream(),
trailer.getMetaIndexCount());
// File info
FileInfo fileInfo = new FileInfo();
- fileInfo.readFields(blockIter.nextBlockAsStream(BlockType.FILE_INFO));
+ fileInfo.readFields(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
byte [] keyValueFormatVersion = fileInfo.get(
HFileWriterV2.KEY_VALUE_VERSION);
boolean includeMemstoreTS = keyValueFormatVersion != null &&
@@ -200,7 +220,7 @@ public class TestHFileWriterV2 {
}
LOG.info("Finished reading: entries=" + entriesRead + ", blocksRead="
+ blocksRead);
- assertEquals(ENTRY_COUNT, entriesRead);
+ assertEquals(entryCount, entriesRead);
// Meta blocks. We can scan until the load-on-open data offset (which is
// the root block index offset in version 2) because we are not testing
@@ -226,6 +246,7 @@ public class TestHFileWriterV2 {
fsdis.close();
}
+
// Static stuff used by various HFile v2 unit tests
private static final String COLUMN_FAMILY_NAME = "_-myColumnFamily-_";