You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by la...@apache.org on 2012/04/26 22:08:23 UTC

svn commit: r1331058 - in /hbase/trunk/src: main/java/org/apache/hadoop/hbase/io/hfile/ test/java/org/apache/hadoop/hbase/io/hfile/

Author: larsh
Date: Thu Apr 26 20:08:23 2012
New Revision: 1331058

URL: http://svn.apache.org/viewvc?rev=1331058&view=rev
Log:
HBASE-5864 Error while reading from hfile in 0.94 (Ram)

Modified:
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java?rev=1331058&r1=1331057&r2=1331058&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java Thu Apr 26 20:08:23 2012
@@ -1099,10 +1099,9 @@ public class HFileBlock extends SchemaCo
 
     /**
      * Similar to {@link #nextBlock()} but checks block type, throws an
-     * exception if incorrect, and returns the data portion of the block as
-     * an input stream.
+     * exception if incorrect, and returns the HFile block
      */
-    DataInputStream nextBlockAsStream(BlockType blockType) throws IOException;
+    HFileBlock nextBlockWithBlockType(BlockType blockType) throws IOException;
   }
 
   /** A full-fledged reader with iteration ability. */
@@ -1200,14 +1199,14 @@ public class HFileBlock extends SchemaCo
         }
 
         @Override
-        public DataInputStream nextBlockAsStream(BlockType blockType)
+        public HFileBlock nextBlockWithBlockType(BlockType blockType)
             throws IOException {
           HFileBlock blk = nextBlock();
           if (blk.getBlockType() != blockType) {
             throw new IOException("Expected block of type " + blockType
                 + " but found " + blk.getBlockType());
           }
-          return blk.getByteStream();
+          return blk;
         }
       };
     }

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java?rev=1331058&r1=1331057&r2=1331058&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java Thu Apr 26 20:08:23 2012
@@ -533,24 +533,43 @@ public class HFileBlockIndex {
         }
       }
     }
+    
+    /**
+     * Read in the root-level index from the given input stream. Must match
+     * what was written into the root level by
+     * {@link BlockIndexWriter#writeIndexBlocks(FSDataOutputStream)} at the
+     * offset that function returned.
+     *
+     * @param blk the HFile block
+     * @param numEntries the number of root-level index entries
+     * @return the buffered input stream or wrapped byte input stream
+     * @throws IOException
+     */
+    public DataInputStream readRootIndex(HFileBlock blk, final int numEntries) throws IOException {
+      DataInputStream in = blk.getByteStream();
+      readRootIndex(in, numEntries);
+      return in;
+    }
 
     /**
      * Read the root-level metadata of a multi-level block index. Based on
      * {@link #readRootIndex(DataInput, int)}, but also reads metadata
      * necessary to compute the mid-key in a multi-level index.
      *
-     * @param in the buffered or byte input stream to read from
+     * @param blk the HFile block
      * @param numEntries the number of root-level index entries
      * @throws IOException
      */
-    public void readMultiLevelIndexRoot(DataInputStream in,
+    public void readMultiLevelIndexRoot(HFileBlock blk,
         final int numEntries) throws IOException {
-      readRootIndex(in, numEntries);
-      if (in.available() < MID_KEY_METADATA_SIZE) {
+      DataInputStream in = readRootIndex(blk, numEntries);
+      // after reading the root index the checksum bytes have to
+      // be subtracted to know if the mid key exists.
+      int checkSumBytes = blk.totalChecksumBytes();
+      if ((in.available() - checkSumBytes) < MID_KEY_METADATA_SIZE) {
         // No mid-key metadata available.
         return;
       }
-
       midLeafBlockOffset = in.readLong();
       midLeafBlockOnDiskSize = in.readInt();
       midKeyEntry = in.readInt();
@@ -763,7 +782,7 @@ public class HFileBlockIndex {
 
       if (LOG.isTraceEnabled()) {
         LOG.trace("Wrote a " + numLevels + "-level index with root level at pos "
-          + out.getPos() + ", " + rootChunk.getNumEntries()
+          + rootLevelIndexPos + ", " + rootChunk.getNumEntries()
           + " root-level entries, " + totalNumEntries + " total entries, "
           + StringUtils.humanReadableInt(this.totalBlockOnDiskSize) +
           " on-disk size, "

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java?rev=1331058&r1=1331057&r2=1331058&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java Thu Apr 26 20:08:23 2012
@@ -126,17 +126,17 @@ public class HFileReaderV2 extends Abstr
     // Data index. We also read statistics about the block index written after
     // the root level.
     dataBlockIndexReader.readMultiLevelIndexRoot(
-        blockIter.nextBlockAsStream(BlockType.ROOT_INDEX),
+        blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX),
         trailer.getDataIndexCount());
 
     // Meta index.
     metaBlockIndexReader.readRootIndex(
-        blockIter.nextBlockAsStream(BlockType.ROOT_INDEX),
+        blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX),
         trailer.getMetaIndexCount());
 
     // File info
     fileInfo = new FileInfo();
-    fileInfo.readFields(blockIter.nextBlockAsStream(BlockType.FILE_INFO));
+    fileInfo.readFields(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
     lastKey = fileInfo.get(FileInfo.LASTKEY);
     avgKeyLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_KEY_LEN));
     avgValueLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_VALUE_LEN));

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java?rev=1331058&r1=1331057&r2=1331058&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java Thu Apr 26 20:08:23 2012
@@ -176,7 +176,7 @@ public class TestHFileBlockIndex {
             Bytes.BYTES_RAWCOMPARATOR, numLevels, brw);
 
     indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset,
-        fileSize).nextBlockAsStream(BlockType.ROOT_INDEX), numRootEntries);
+        fileSize).nextBlockWithBlockType(BlockType.ROOT_INDEX), numRootEntries);
 
     long prevOffset = -1;
     int i = 0;

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java?rev=1331058&r1=1331057&r2=1331058&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java Thu Apr 26 20:08:23 2012
@@ -37,8 +37,7 @@ import org.apache.hadoop.fs.FSDataInputS
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.*;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.io.hfile.Compression.Algorithm;
 import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.io.RawComparator;
@@ -72,14 +71,30 @@ public class TestHFileWriterV2 {
   @Test
   public void testHFileFormatV2() throws IOException {
     Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
-        "testHFileFormatV2");
+    "testHFileFormatV2");
+    final Compression.Algorithm compressAlgo = Compression.Algorithm.GZ;
+    final int entryCount = 10000;
+    writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, false);
+  }
+  
+  
+  @Test
+  public void testMidKeyInHFile() throws IOException{
+    Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
+    "testMidKeyInHFile");
+    Compression.Algorithm compressAlgo = Compression.Algorithm.NONE;
+    int entryCount = 50000;
+    writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, true);
+  }
+
+  private void writeDataAndReadFromHFile(Path hfilePath,
+      Algorithm compressAlgo, int entryCount, boolean findMidKey) throws IOException {
 
-    final Compression.Algorithm COMPRESS_ALGO = Compression.Algorithm.GZ;
     HFileWriterV2 writer = (HFileWriterV2)
         new HFileWriterV2.WriterFactoryV2(conf, new CacheConfig(conf))
             .withPath(fs, hfilePath)
             .withBlockSize(4096)
-            .withCompression(COMPRESS_ALGO)
+            .withCompression(compressAlgo)
             .withComparator(KeyValue.KEY_COMPARATOR)
             .create();
 
@@ -88,11 +103,10 @@ public class TestHFileWriterV2 {
 
     Random rand = new Random(9713312); // Just a fixed seed.
 
-    final int ENTRY_COUNT = 10000;
     List<byte[]> keys = new ArrayList<byte[]>();
     List<byte[]> values = new ArrayList<byte[]>();
 
-    for (int i = 0; i < ENTRY_COUNT; ++i) {
+    for (int i = 0; i < entryCount; ++i) {
       byte[] keyBytes = randomOrderedKey(rand, i);
 
       // A random-length random value.
@@ -113,6 +127,7 @@ public class TestHFileWriterV2 {
     writer.appendMetaBlock("CAPITAL_OF_FRANCE", new Text("Paris"));
 
     writer.close();
+    
 
     FSDataInputStream fsdis = fs.open(hfilePath);
 
@@ -124,10 +139,10 @@ public class TestHFileWriterV2 {
         FixedFileTrailer.readFromStream(fsdis, fileSize);
 
     assertEquals(2, trailer.getMajorVersion());
-    assertEquals(ENTRY_COUNT, trailer.getEntryCount());
+    assertEquals(entryCount, trailer.getEntryCount());
 
     HFileBlock.FSReader blockReader =
-        new HFileBlock.FSReaderV2(fsdis, COMPRESS_ALGO, fileSize);
+        new HFileBlock.FSReaderV2(fsdis, compressAlgo, fileSize);
     // Comparator class name is stored in the trailer in version 2.
     RawComparator<byte []> comparator = trailer.createComparator();
     HFileBlockIndex.BlockIndexReader dataBlockIndexReader =
@@ -143,16 +158,21 @@ public class TestHFileWriterV2 {
     // Data index. We also read statistics about the block index written after
     // the root level.
     dataBlockIndexReader.readMultiLevelIndexRoot(
-        blockIter.nextBlockAsStream(BlockType.ROOT_INDEX),
+        blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX),
         trailer.getDataIndexCount());
-
+    
+    if (findMidKey) {
+      byte[] midkey = dataBlockIndexReader.midkey();
+      assertNotNull("Midkey should not be null", midkey);
+    }
+    
     // Meta index.
     metaBlockIndexReader.readRootIndex(
-        blockIter.nextBlockAsStream(BlockType.ROOT_INDEX),
+        blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX).getByteStream(),
         trailer.getMetaIndexCount());
     // File info
     FileInfo fileInfo = new FileInfo();
-    fileInfo.readFields(blockIter.nextBlockAsStream(BlockType.FILE_INFO));
+    fileInfo.readFields(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
     byte [] keyValueFormatVersion = fileInfo.get(
         HFileWriterV2.KEY_VALUE_VERSION);
     boolean includeMemstoreTS = keyValueFormatVersion != null &&
@@ -200,7 +220,7 @@ public class TestHFileWriterV2 {
     }
     LOG.info("Finished reading: entries=" + entriesRead + ", blocksRead="
         + blocksRead);
-    assertEquals(ENTRY_COUNT, entriesRead);
+    assertEquals(entryCount, entriesRead);
 
     // Meta blocks. We can scan until the load-on-open data offset (which is
     // the root block index offset in version 2) because we are not testing
@@ -226,6 +246,7 @@ public class TestHFileWriterV2 {
     fsdis.close();
   }
 
+
   // Static stuff used by various HFile v2 unit tests
 
   private static final String COLUMN_FAMILY_NAME = "_-myColumnFamily-_";