You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by te...@apache.org on 2011/08/03 22:30:29 UTC

svn commit: r1153647 - in /hbase/trunk/src/test/java/org/apache/hadoop/hbase: io/hfile/ regionserver/ util/

Author: tedyu
Date: Wed Aug  3 20:30:27 2011
New Revision: 1153647

URL: http://svn.apache.org/viewvc?rev=1153647&view=rev
Log:
HBASE-3857 New test classes.

Added:
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestFixedFileTrailer.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileReaderV1.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestIdLock.java

Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestFixedFileTrailer.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestFixedFileTrailer.java?rev=1153647&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestFixedFileTrailer.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestFixedFileTrailer.java Wed Aug  3 20:30:27 2011
@@ -0,0 +1,227 @@
+/*
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+import static org.junit.Assert.*;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.KeyValue;
+
+@RunWith(Parameterized.class)
+public class TestFixedFileTrailer {
+
+  private static final Log LOG = LogFactory.getLog(TestFixedFileTrailer.class);
+
+  /** The number of used fields by version. Indexed by version minus one. */
+  private static final int[] NUM_FIELDS_BY_VERSION = new int[] { 8, 13 };
+
+  private HBaseTestingUtility util = new HBaseTestingUtility();
+  private FileSystem fs;
+  private ByteArrayOutputStream baos = new ByteArrayOutputStream();
+  private int version;
+
+  static {
+    assert NUM_FIELDS_BY_VERSION.length == HFile.MAX_FORMAT_VERSION
+        - HFile.MIN_FORMAT_VERSION + 1;
+  }
+
+  public TestFixedFileTrailer(int version) {
+    this.version = version;
+  }
+
+  @Parameters
+  public static Collection<Object[]> getParameters() {
+    List<Object[]> versionsToTest = new ArrayList<Object[]>();
+    for (int v = HFile.MIN_FORMAT_VERSION; v <= HFile.MAX_FORMAT_VERSION; ++v)
+      versionsToTest.add(new Integer[] { v } );
+    return versionsToTest;
+  }
+
+  @Before
+  public void setUp() throws IOException {
+    fs = FileSystem.get(util.getConfiguration());
+  }
+
+  @Test
+  public void testTrailer() throws IOException {
+    FixedFileTrailer t = new FixedFileTrailer(version);
+    t.setDataIndexCount(3);
+    t.setEntryCount(((long) Integer.MAX_VALUE) + 1);
+
+    if (version == 1) {
+      t.setFileInfoOffset(876);
+    }
+
+    if (version == 2) {
+      t.setLastDataBlockOffset(291);
+      t.setNumDataIndexLevels(3);
+      t.setComparatorClass(KeyValue.KEY_COMPARATOR.getClass());
+      t.setFirstDataBlockOffset(9081723123L); // Completely unrealistic.
+      t.setUncompressedDataIndexSize(827398717L); // Something random.
+    }
+
+    t.setLoadOnOpenOffset(128);
+    t.setMetaIndexCount(7);
+
+    t.setTotalUncompressedBytes(129731987);
+
+    {
+      DataOutputStream dos = new DataOutputStream(baos); // Limited scope.
+      t.serialize(dos);
+      dos.flush();
+      assertEquals(dos.size(), FixedFileTrailer.getTrailerSize(version));
+    }
+
+    byte[] bytes = baos.toByteArray();
+    baos.reset();
+
+    assertEquals(bytes.length, FixedFileTrailer.getTrailerSize(version));
+
+    ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
+
+    // Finished writing, trying to read.
+    {
+      DataInputStream dis = new DataInputStream(bais);
+      FixedFileTrailer t2 = new FixedFileTrailer(version);
+      t2.deserialize(dis);
+      assertEquals(-1, bais.read()); // Ensure we have read everything.
+      checkLoadedTrailer(version, t, t2);
+    }
+
+    // Now check what happens if the trailer is corrupted.
+    Path trailerPath = new Path(HBaseTestingUtility.getTestDir(), "trailer_"
+        + version);
+
+    {
+      for (byte invalidVersion : new byte[] { HFile.MIN_FORMAT_VERSION - 1,
+          HFile.MAX_FORMAT_VERSION + 1}) {
+        bytes[bytes.length - 1] = invalidVersion;
+        writeTrailer(trailerPath, null, bytes);
+        try {
+          readTrailer(trailerPath);
+          fail("Exception expected");
+        } catch (IOException ex) {
+          // Make it easy to debug this.
+          String msg = ex.getMessage();
+          String cleanMsg = msg.replaceAll(
+              "^(java(\\.[a-zA-Z]+)+:\\s+)?|\\s+\\(.*\\)\\s*$", "");
+          assertEquals("Actual exception message is \"" + msg + "\".\n" +
+              "Cleaned-up message", // will be followed by " expected: ..."
+              "Invalid HFile version: " + invalidVersion, cleanMsg);
+          LOG.info("Got an expected exception: " + msg);
+        }
+      }
+
+    }
+
+    // Now write the trailer into a file and auto-detect the version.
+    writeTrailer(trailerPath, t, null);
+
+    FixedFileTrailer t4 = readTrailer(trailerPath);
+
+    checkLoadedTrailer(version, t, t4);
+
+    String trailerStr = t.toString();
+    assertEquals("Invalid number of fields in the string representation "
+        + "of the trailer: " + trailerStr, NUM_FIELDS_BY_VERSION[version - 1],
+        trailerStr.split(", ").length);
+    assertEquals(trailerStr, t4.toString());
+  }
+
+  private FixedFileTrailer readTrailer(Path trailerPath) throws IOException {
+    FSDataInputStream fsdis = fs.open(trailerPath);
+    FixedFileTrailer trailerRead = FixedFileTrailer.readFromStream(fsdis,
+        fs.getFileStatus(trailerPath).getLen());
+    fsdis.close();
+    return trailerRead;
+  }
+
+  private void writeTrailer(Path trailerPath, FixedFileTrailer t,
+      byte[] useBytesInstead) throws IOException {
+    assert (t == null) != (useBytesInstead == null); // Expect one non-null.
+
+    FSDataOutputStream fsdos = fs.create(trailerPath);
+    fsdos.write(135); // to make deserializer's job less trivial
+    if (useBytesInstead != null) {
+      fsdos.write(useBytesInstead);
+    } else {
+      t.serialize(fsdos);
+    }
+    fsdos.close();
+  }
+
+  private void checkLoadedTrailer(int version, FixedFileTrailer expected,
+      FixedFileTrailer loaded) throws IOException {
+    assertEquals(version, loaded.getVersion());
+    assertEquals(expected.getDataIndexCount(), loaded.getDataIndexCount());
+
+    assertEquals(Math.min(expected.getEntryCount(),
+        version == 1 ? Integer.MAX_VALUE : Long.MAX_VALUE),
+        loaded.getEntryCount());
+
+    if (version == 1) {
+      assertEquals(expected.getFileInfoOffset(), loaded.getFileInfoOffset());
+    }
+
+    if (version == 2) {
+      assertEquals(expected.getLastDataBlockOffset(),
+          loaded.getLastDataBlockOffset());
+      assertEquals(expected.getNumDataIndexLevels(),
+          loaded.getNumDataIndexLevels());
+      assertEquals(expected.createComparator().getClass().getName(),
+          loaded.createComparator().getClass().getName());
+      assertEquals(expected.getFirstDataBlockOffset(),
+          loaded.getFirstDataBlockOffset());
+      assertTrue(
+          expected.createComparator() instanceof KeyValue.KeyComparator);
+      assertEquals(expected.getUncompressedDataIndexSize(),
+          loaded.getUncompressedDataIndexSize());
+    }
+
+    assertEquals(expected.getLoadOnOpenDataOffset(),
+        loaded.getLoadOnOpenDataOffset());
+    assertEquals(expected.getMetaIndexCount(), loaded.getMetaIndexCount());
+
+    assertEquals(expected.getTotalUncompressedBytes(),
+        loaded.getTotalUncompressedBytes());
+  }
+
+}

Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java?rev=1153647&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java Wed Aug  3 20:30:27 2011
@@ -0,0 +1,499 @@
+/*
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import static org.junit.Assert.*;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.Callable;
+import java.util.concurrent.Executor;
+import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.ClassSize;
+import org.apache.hadoop.io.compress.Compressor;
+
+import static org.apache.hadoop.hbase.io.hfile.Compression.Algorithm.*;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestHFileBlock {
+
+  private static final boolean[] BOOLEAN_VALUES = new boolean[] { false, true };
+
+  private static final Log LOG = LogFactory.getLog(TestHFileBlock.class);
+
+  static final Compression.Algorithm[] COMPRESSION_ALGORITHMS = {
+      NONE, GZ };
+
+  // In case we need to temporarily switch some test cases to just test gzip.
+  static final Compression.Algorithm[] GZIP_ONLY  = { GZ };
+
+  private static final int NUM_TEST_BLOCKS = 1000;
+
+  private static final int NUM_READER_THREADS = 26;
+
+  private static final HBaseTestingUtility TEST_UTIL =
+      new HBaseTestingUtility();
+  private FileSystem fs;
+  private int uncompressedSizeV1;
+
+  @Before
+  public void setUp() throws IOException {
+    fs = FileSystem.get(TEST_UTIL.getConfiguration());
+    TEST_UTIL.initTestDir();
+  }
+
+  public void writeTestBlockContents(DataOutputStream dos) throws IOException {
+    // This compresses really well.
+    for (int i = 0; i < 1000; ++i)
+      dos.writeInt(i / 100);
+  }
+
+  public byte[] createTestV1Block(Compression.Algorithm algo)
+      throws IOException {
+    Compressor compressor = algo.getCompressor();
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    OutputStream os = algo.createCompressionStream(baos, compressor, 0);
+    DataOutputStream dos = new DataOutputStream(os);
+    BlockType.META.write(dos); // Let's make this a meta block.
+    writeTestBlockContents(dos);
+    uncompressedSizeV1 = dos.size();
+    dos.flush();
+    algo.returnCompressor(compressor);
+    return baos.toByteArray();
+  }
+
+  private byte[] createTestV2Block(Compression.Algorithm algo)
+      throws IOException {
+    final BlockType blockType = BlockType.DATA;
+    HFileBlock.Writer hbw = new HFileBlock.Writer(algo);
+    DataOutputStream dos = hbw.startWriting(blockType, false);
+    writeTestBlockContents(dos);
+    byte[] headerAndData = hbw.getHeaderAndData();
+    assertEquals(1000 * 4, hbw.getUncompressedSizeWithoutHeader());
+    hbw.releaseCompressor();
+    return headerAndData;
+  }
+
+  public String createTestBlockStr(Compression.Algorithm algo)
+      throws IOException {
+    byte[] testV2Block = createTestV2Block(algo);
+    int osOffset = HFileBlock.HEADER_SIZE + 9;
+    if (osOffset < testV2Block.length) {
+      // Force-set the "OS" field of the gzip header to 3 (Unix) to avoid
+      // variations across operating systems.
+      // See http://www.gzip.org/zlib/rfc-gzip.html for gzip format.
+      testV2Block[osOffset] = 3;
+    }
+    return Bytes.toStringBinary(testV2Block);
+  }
+
+  @Test
+  public void testNoCompression() throws IOException {
+    assertEquals(4000 + HFileBlock.HEADER_SIZE, createTestV2Block(NONE).length);
+  }
+
+  @Test
+  public void testGzipCompression() throws IOException {
+    assertEquals(
+        "DATABLK*\\x00\\x00\\x00:\\x00\\x00\\x0F\\xA0\\xFF\\xFF\\xFF\\xFF"
+            + "\\xFF\\xFF\\xFF\\xFF"
+            // gzip-compressed block: http://www.gzip.org/zlib/rfc-gzip.html
+            + "\\x1F\\x8B"  // gzip magic signature
+            + "\\x08"  // Compression method: 8 = "deflate"
+            + "\\x00"  // Flags
+            + "\\x00\\x00\\x00\\x00"  // mtime
+            + "\\x00"  // XFL (extra flags)
+            // OS (0 = FAT filesystems, 3 = Unix). However, this field
+            // sometimes gets set to 0 on Linux and Mac, so we reset it to 3.
+            + "\\x03"
+            + "\\xED\\xC3\\xC1\\x11\\x00 \\x08\\xC00DD\\xDD\\x7Fa"
+            + "\\xD6\\xE8\\xA3\\xB9K\\x84`\\x96Q\\xD3\\xA8\\xDB\\xA8e\\xD4c"
+            + "\\xD46\\xEA5\\xEA3\\xEA7\\xE7\\x00LI\\s\\xA0\\x0F\\x00\\x00",
+        createTestBlockStr(GZ));
+  }
+
+  @Test
+  public void testReaderV1() throws IOException {
+    for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
+      for (boolean pread : new boolean[] { false, true }) {
+        byte[] block = createTestV1Block(algo);
+        Path path = new Path(HBaseTestingUtility.getTestDir(), "blocks_v1_"
+            + algo);
+        LOG.info("Creating temporary file at " + path);
+        FSDataOutputStream os = fs.create(path);
+        int totalSize = 0;
+        int numBlocks = 50;
+        for (int i = 0; i < numBlocks; ++i) {
+          os.write(block);
+          totalSize += block.length;
+        }
+        os.close();
+
+        FSDataInputStream is = fs.open(path);
+        HFileBlock.FSReader hbr = new HFileBlock.FSReaderV1(is, algo,
+            totalSize);
+        HFileBlock b;
+        int numBlocksRead = 0;
+        long pos = 0;
+        while (pos < totalSize) {
+          b = hbr.readBlockData(pos, block.length, uncompressedSizeV1, pread);
+          b.sanityCheck();
+          pos += block.length;
+          numBlocksRead++;
+        }
+        assertEquals(numBlocks, numBlocksRead);
+        is.close();
+      }
+    }
+  }
+
+  @Test
+  public void testReaderV2() throws IOException {
+    for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
+      for (boolean pread : new boolean[] { false, true }) {
+        Path path = new Path(HBaseTestingUtility.getTestDir(), "blocks_v2_"
+            + algo);
+        FSDataOutputStream os = fs.create(path);
+        HFileBlock.Writer hbw = new HFileBlock.Writer(algo);
+        long totalSize = 0;
+        for (int blockId = 0; blockId < 2; ++blockId) {
+          DataOutputStream dos = hbw.startWriting(BlockType.DATA, false);
+          for (int i = 0; i < 1234; ++i)
+            dos.writeInt(i);
+          hbw.writeHeaderAndData(os);
+          totalSize += hbw.getOnDiskSizeWithHeader();
+        }
+        os.close();
+
+        FSDataInputStream is = fs.open(path);
+        HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, algo,
+            totalSize);
+        HFileBlock b = hbr.readBlockData(0, -1, -1, pread);
+        is.close();
+
+        b.sanityCheck();
+        assertEquals(4936, b.getUncompressedSizeWithoutHeader());
+        assertEquals(algo == GZ ? 2173 : 4936, b.getOnDiskSizeWithoutHeader());
+        String blockStr = b.toString();
+
+        if (algo == GZ) {
+          is = fs.open(path);
+          hbr = new HFileBlock.FSReaderV2(is, algo, totalSize);
+          b = hbr.readBlockData(0, 2173 + HFileBlock.HEADER_SIZE, -1, pread);
+          assertEquals(blockStr, b.toString());
+          int wrongCompressedSize = 2172;
+          try {
+            b = hbr.readBlockData(0, wrongCompressedSize
+                + HFileBlock.HEADER_SIZE, -1, pread);
+            fail("Exception expected");
+          } catch (IOException ex) {
+            String expectedPrefix = "On-disk size without header provided is "
+                + wrongCompressedSize + ", but block header contains "
+                + b.getOnDiskSizeWithoutHeader() + ".";
+            assertTrue("Invalid exception message: '" + ex.getMessage()
+                + "'.\nMessage is expected to start with: '" + expectedPrefix
+                + "'", ex.getMessage().startsWith(expectedPrefix));
+          }
+          is.close();
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testPreviousOffset() throws IOException {
+    for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
+      for (boolean pread : BOOLEAN_VALUES) {
+        for (boolean cacheOnWrite : BOOLEAN_VALUES) {
+          Random rand = defaultRandom();
+          LOG.info("Compression algorithm: " + algo + ", pread=" + pread);
+          Path path = new Path(HBaseTestingUtility.getTestDir(), "prev_offset");
+          List<Long> expectedOffsets = new ArrayList<Long>();
+          List<Long> expectedPrevOffsets = new ArrayList<Long>();
+          List<BlockType> expectedTypes = new ArrayList<BlockType>();
+          List<ByteBuffer> expectedContents = cacheOnWrite
+              ? new ArrayList<ByteBuffer>() : null;
+          long totalSize = writeBlocks(rand, algo, path, expectedOffsets,
+              expectedPrevOffsets, expectedTypes, expectedContents, true);
+
+          FSDataInputStream is = fs.open(path);
+          HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, algo,
+              totalSize);
+          long curOffset = 0;
+          for (int i = 0; i < NUM_TEST_BLOCKS; ++i) {
+            if (!pread) {
+              assertEquals(is.getPos(), curOffset + (i == 0 ? 0 :
+                  HFileBlock.HEADER_SIZE));
+            }
+
+            assertEquals(expectedOffsets.get(i).longValue(), curOffset);
+
+            LOG.info("Reading block #" + i + " at offset " + curOffset);
+            HFileBlock b = hbr.readBlockData(curOffset, -1, -1, pread);
+            LOG.info("Block #" + i + ": " + b);
+            assertEquals("Invalid block #" + i + "'s type:",
+                expectedTypes.get(i), b.getBlockType());
+            assertEquals("Invalid previous block offset for block " + i
+                + " of " + "type " + b.getBlockType() + ":",
+                (long) expectedPrevOffsets.get(i), b.getPrevBlockOffset());
+            b.sanityCheck();
+            assertEquals(curOffset, b.getOffset());
+
+            // Now re-load this block knowing the on-disk size. This tests a
+            // different branch in the loader.
+            HFileBlock b2 = hbr.readBlockData(curOffset,
+                b.getOnDiskSizeWithHeader(), -1, pread);
+            b2.sanityCheck();
+
+            assertEquals(b.getBlockType(), b2.getBlockType());
+            assertEquals(b.getOnDiskSizeWithoutHeader(),
+                b2.getOnDiskSizeWithoutHeader());
+            assertEquals(b.getOnDiskSizeWithHeader(),
+                b2.getOnDiskSizeWithHeader());
+            assertEquals(b.getUncompressedSizeWithoutHeader(),
+                b2.getUncompressedSizeWithoutHeader());
+            assertEquals(b.getPrevBlockOffset(), b2.getPrevBlockOffset());
+            assertEquals(curOffset, b2.getOffset());
+
+            curOffset += b.getOnDiskSizeWithHeader();
+
+            if (cacheOnWrite) {
+              // In the cache-on-write mode we store uncompressed bytes so we
+              // can compare them to what was read by the block reader.
+
+              ByteBuffer bufRead = b.getBufferWithHeader();
+              ByteBuffer bufExpected = expectedContents.get(i);
+              boolean bytesAreCorrect = Bytes.compareTo(bufRead.array(),
+                  bufRead.arrayOffset(), bufRead.limit(),
+                  bufExpected.array(), bufExpected.arrayOffset(),
+                  bufExpected.limit()) == 0;
+              String wrongBytesMsg = "";
+
+              if (!bytesAreCorrect) {
+                // Optimization: only construct an error message in case we
+                // will need it.
+                wrongBytesMsg = "Expected bytes in block #" + i + " (algo="
+                    + algo + ", pread=" + pread + "):\n";
+                wrongBytesMsg += Bytes.toStringBinary(bufExpected.array(),
+                    bufExpected.arrayOffset(), Math.min(32,
+                        bufExpected.limit()))
+                    + ", actual:\n"
+                    + Bytes.toStringBinary(bufRead.array(),
+                        bufRead.arrayOffset(), Math.min(32, bufRead.limit()));
+              }
+
+              assertTrue(wrongBytesMsg, bytesAreCorrect);
+            }
+          }
+
+          assertEquals(curOffset, fs.getFileStatus(path).getLen());
+          is.close();
+        }
+      }
+    }
+  }
+
+  private Random defaultRandom() {
+    return new Random(189237);
+  }
+
+  private class BlockReaderThread implements Callable<Boolean> {
+    private final String clientId;
+    private final HFileBlock.FSReader hbr;
+    private final List<Long> offsets;
+    private final List<BlockType> types;
+    private final long fileSize;
+
+    public BlockReaderThread(String clientId,
+        HFileBlock.FSReader hbr, List<Long> offsets, List<BlockType> types,
+        long fileSize) {
+      this.clientId = clientId;
+      this.offsets = offsets;
+      this.hbr = hbr;
+      this.types = types;
+      this.fileSize = fileSize;
+    }
+
+    @Override
+    public Boolean call() throws Exception {
+      Random rand = new Random(clientId.hashCode());
+      long endTime = System.currentTimeMillis() + 10000;
+      int numBlocksRead = 0;
+      int numPositionalRead = 0;
+      int numWithOnDiskSize = 0;
+      while (System.currentTimeMillis() < endTime) {
+        int blockId = rand.nextInt(NUM_TEST_BLOCKS);
+        long offset = offsets.get(blockId);
+        boolean pread = rand.nextBoolean();
+        boolean withOnDiskSize = rand.nextBoolean();
+        long expectedSize =
+          (blockId == NUM_TEST_BLOCKS - 1 ? fileSize
+              : offsets.get(blockId + 1)) - offset;
+
+        HFileBlock b;
+        try {
+          long onDiskSizeArg = withOnDiskSize ? expectedSize : -1;
+          b = hbr.readBlockData(offset, onDiskSizeArg, -1, pread);
+        } catch (IOException ex) {
+          LOG.error("Error in client " + clientId + " trying to read block at "
+              + offset + ", pread=" + pread + ", withOnDiskSize=" +
+              withOnDiskSize, ex);
+          return false;
+        }
+
+        assertEquals(types.get(blockId), b.getBlockType());
+        assertEquals(expectedSize, b.getOnDiskSizeWithHeader());
+        assertEquals(offset, b.getOffset());
+
+        ++numBlocksRead;
+        if (pread)
+          ++numPositionalRead;
+        if (withOnDiskSize)
+          ++numWithOnDiskSize;
+      }
+      LOG.info("Client " + clientId + " successfully read " + numBlocksRead +
+          " blocks (with pread: " + numPositionalRead + ", with onDiskSize " +
+          "specified: " + numWithOnDiskSize + ")");
+      return true;
+    }
+
+  }
+
+  @Test
+  public void testConcurrentReading() throws Exception {
+    for (Compression.Algorithm compressAlgo : COMPRESSION_ALGORITHMS) {
+      Path path =
+          new Path(HBaseTestingUtility.getTestDir(), "concurrent_reading");
+      Random rand = defaultRandom();
+      List<Long> offsets = new ArrayList<Long>();
+      List<BlockType> types = new ArrayList<BlockType>();
+      writeBlocks(rand, compressAlgo, path, offsets, null, types, null, false);
+      FSDataInputStream is = fs.open(path);
+      long fileSize = fs.getFileStatus(path).getLen();
+      HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, compressAlgo,
+          fileSize);
+
+      Executor exec = Executors.newFixedThreadPool(NUM_READER_THREADS);
+      ExecutorCompletionService<Boolean> ecs =
+          new ExecutorCompletionService<Boolean>(exec);
+
+      for (int i = 0; i < NUM_READER_THREADS; ++i) {
+        ecs.submit(new BlockReaderThread("reader_" + (char) ('A' + i), hbr,
+            offsets, types, fileSize));
+      }
+
+      for (int i = 0; i < NUM_READER_THREADS; ++i) {
+        Future<Boolean> result = ecs.take();
+        assertTrue(result.get());
+        LOG.info(String.valueOf(i + 1)
+            + " reader threads finished successfully (algo=" + compressAlgo
+            + ")");
+      }
+
+      is.close();
+    }
+  }
+
+  private long writeBlocks(Random rand, Compression.Algorithm compressAlgo,
+      Path path, List<Long> expectedOffsets, List<Long> expectedPrevOffsets,
+      List<BlockType> expectedTypes, List<ByteBuffer> expectedContents,
+      boolean detailedLogging) throws IOException {
+    boolean cacheOnWrite = expectedContents != null;
+    FSDataOutputStream os = fs.create(path);
+    HFileBlock.Writer hbw = new HFileBlock.Writer(compressAlgo);
+    Map<BlockType, Long> prevOffsetByType = new HashMap<BlockType, Long>();
+    long totalSize = 0;
+    for (int i = 0; i < NUM_TEST_BLOCKS; ++i) {
+      int blockTypeOrdinal = rand.nextInt(BlockType.values().length);
+      BlockType bt = BlockType.values()[blockTypeOrdinal];
+      DataOutputStream dos = hbw.startWriting(bt, cacheOnWrite);
+      for (int j = 0; j < rand.nextInt(500); ++j) {
+        // This might compress well.
+        dos.writeShort(i + 1);
+        dos.writeInt(j + 1);
+      }
+
+      if (expectedOffsets != null)
+        expectedOffsets.add(os.getPos());
+
+      if (expectedPrevOffsets != null) {
+        Long prevOffset = prevOffsetByType.get(bt);
+        expectedPrevOffsets.add(prevOffset != null ? prevOffset : -1);
+        prevOffsetByType.put(bt, os.getPos());
+      }
+
+      expectedTypes.add(bt);
+
+      hbw.writeHeaderAndData(os);
+      totalSize += hbw.getOnDiskSizeWithHeader();
+
+      if (cacheOnWrite)
+        expectedContents.add(hbw.getUncompressedBufferWithHeader());
+
+      if (detailedLogging) {
+        LOG.info("Writing block #" + i + " of type " + bt
+            + ", uncompressed size " + hbw.getUncompressedSizeWithoutHeader()
+            + " at offset " + os.getPos());
+      }
+    }
+    os.close();
+    LOG.info("Created a temporary file at " + path + ", "
+        + fs.getFileStatus(path).getLen() + " byte, compression=" +
+        compressAlgo);
+    return totalSize;
+  }
+
+  @Test
+  public void testBlockHeapSize() {
+    for (int size : new int[] { 100, 256, 12345 }) {
+      byte[] byteArr = new byte[HFileBlock.HEADER_SIZE + size];
+      ByteBuffer buf = ByteBuffer.wrap(byteArr, 0, size);
+      HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf,
+          true, -1);
+      assertEquals(80, HFileBlock.BYTE_BUFFER_HEAP_SIZE);
+      long expected = ClassSize.align(ClassSize.estimateBase(HFileBlock.class,
+          true)
+          + ClassSize.estimateBase(buf.getClass(), true)
+          + HFileBlock.HEADER_SIZE + size);
+      assertEquals(expected, block.heapSize());
+    }
+  }
+
+}

Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java?rev=1153647&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java Wed Aug  3 20:30:27 2011
@@ -0,0 +1,602 @@
+/*
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader;
+import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexChunk;
+import org.apache.hadoop.hbase.regionserver.StoreFile;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.ClassSize;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+import static org.junit.Assert.*;
+
+@RunWith(Parameterized.class)
+public class TestHFileBlockIndex {
+
+  @Parameters
+  public static Collection<Object[]> compressionAlgorithms() {
+    return HBaseTestingUtility.COMPRESSION_ALGORITHMS_PARAMETERIZED;
+  }
+
+  public TestHFileBlockIndex(Compression.Algorithm compr) {
+    this.compr = compr;
+  }
+
+  private static final Log LOG = LogFactory.getLog(TestHFileBlockIndex.class);
+
+  private static final int NUM_DATA_BLOCKS = 1000;
+  private static final HBaseTestingUtility TEST_UTIL =
+      new HBaseTestingUtility();
+
+  private static final int SMALL_BLOCK_SIZE = 4096;
+  private static final int NUM_KV = 10000;
+
+  private static FileSystem fs;
+  private Path path;
+  private Random rand;
+  private long rootIndexOffset;
+  private int numRootEntries;
+  private int numLevels;
+  private static final List<byte[]> keys = new ArrayList<byte[]>();
+  private final Compression.Algorithm compr;
+  private byte[] firstKeyInFile;
+  private Configuration conf;
+
+  private static final int[] INDEX_CHUNK_SIZES = { 4096, 512, 384 };
+  private static final int[] EXPECTED_NUM_LEVELS = { 2, 3, 4 };
+  private static final int[] UNCOMPRESSED_INDEX_SIZES =
+      { 19187, 21813, 23086 };
+
+  static {
+    assert INDEX_CHUNK_SIZES.length == EXPECTED_NUM_LEVELS.length;
+    assert INDEX_CHUNK_SIZES.length == UNCOMPRESSED_INDEX_SIZES.length;
+  }
+
+  @Before
+  public void setUp() throws IOException {
+    keys.clear();
+    rand = new Random(2389757);
+    firstKeyInFile = null;
+    conf = TEST_UTIL.getConfiguration();
+
+    // This test requires at least HFile format version 2.
+    conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
+
+    fs = FileSystem.get(conf);
+  }
+
+  @Test
+  public void testBlockIndex() throws IOException {
+    path = new Path(HBaseTestingUtility.getTestDir(), "block_index_" + compr);
+    writeWholeIndex();
+    readIndex();
+  }
+
+  /**
+   * A wrapper around a block reader which only caches the results of the last
+   * operation. Not thread-safe.
+   */
+  private static class BlockReaderWrapper implements HFileBlock.BasicReader {
+
+    private HFileBlock.BasicReader realReader;
+    private long prevOffset;
+    private long prevOnDiskSize;
+    private long prevUncompressedSize;
+    private boolean prevPread;
+    private HFileBlock prevBlock;
+
+    public int hitCount = 0;
+    public int missCount = 0;
+
+    public BlockReaderWrapper(HFileBlock.BasicReader realReader) {
+      this.realReader = realReader;
+    }
+
+    @Override
+    public HFileBlock readBlockData(long offset, long onDiskSize,
+        int uncompressedSize, boolean pread) throws IOException {
+      if (offset == prevOffset && onDiskSize == prevOnDiskSize &&
+          uncompressedSize == prevUncompressedSize && pread == prevPread) {
+        hitCount += 1;
+        return prevBlock;
+      }
+
+      missCount += 1;
+      prevBlock = realReader.readBlockData(offset, onDiskSize,
+          uncompressedSize, pread);
+      prevOffset = offset;
+      prevOnDiskSize = onDiskSize;
+      prevUncompressedSize = uncompressedSize;
+      prevPread = pread;
+
+      return prevBlock;
+    }
+  }
+
+  public void readIndex() throws IOException {
+    long fileSize = fs.getFileStatus(path).getLen();
+    LOG.info("Size of " + path + ": " + fileSize);
+
+    FSDataInputStream istream = fs.open(path);
+    HFileBlock.FSReader blockReader = new HFileBlock.FSReaderV2(istream,
+        compr, fs.getFileStatus(path).getLen());
+
+    BlockReaderWrapper brw = new BlockReaderWrapper(blockReader);
+    HFileBlockIndex.BlockIndexReader indexReader =
+        new HFileBlockIndex.BlockIndexReader(
+            Bytes.BYTES_RAWCOMPARATOR, numLevels, brw);
+
+    indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset,
+        fileSize).nextBlockAsStream(BlockType.ROOT_INDEX), numRootEntries);
+
+    long prevOffset = -1;
+    int i = 0;
+    int expectedHitCount = 0;
+    int expectedMissCount = 0;
+    LOG.info("Total number of keys: " + keys.size());
+    for (byte[] key : keys) {
+      assertTrue(key != null);
+      assertTrue(indexReader != null);
+      HFileBlock b = indexReader.seekToDataBlock(key, 0, key.length, null);
+      if (Bytes.BYTES_RAWCOMPARATOR.compare(key, firstKeyInFile) < 0) {
+        assertTrue(b == null);
+        ++i;
+        continue;
+      }
+
+      String keyStr = "key #" + i + ", " + Bytes.toStringBinary(key);
+
+      assertTrue("seekToDataBlock failed for " + keyStr, b != null);
+
+      if (prevOffset == b.getOffset()) {
+        assertEquals(++expectedHitCount, brw.hitCount);
+      } else {
+        LOG.info("First key in a new block: " + keyStr + ", block offset: "
+            + b.getOffset() + ")");
+        assertTrue(b.getOffset() > prevOffset);
+        assertEquals(++expectedMissCount, brw.missCount);
+        prevOffset = b.getOffset();
+      }
+      ++i;
+    }
+
+    istream.close();
+  }
+
+  private void writeWholeIndex() throws IOException {
+    assertEquals(0, keys.size());
+    HFileBlock.Writer hbw = new HFileBlock.Writer(compr);
+    FSDataOutputStream outputStream = fs.create(path);
+    HFileBlockIndex.BlockIndexWriter biw =
+        new HFileBlockIndex.BlockIndexWriter(hbw, null, null);
+
+    for (int i = 0; i < NUM_DATA_BLOCKS; ++i) {
+      hbw.startWriting(BlockType.DATA, false).write(
+          String.valueOf(rand.nextInt(1000)).getBytes());
+      long blockOffset = outputStream.getPos();
+      hbw.writeHeaderAndData(outputStream);
+
+      byte[] firstKey = null;
+      for (int j = 0; j < 16; ++j) {
+        byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i * 16 + j);
+        keys.add(k);
+        if (j == 8)
+          firstKey = k;
+      }
+      assertTrue(firstKey != null);
+      if (firstKeyInFile == null)
+        firstKeyInFile = firstKey;
+      biw.addEntry(firstKey, blockOffset, hbw.getOnDiskSizeWithHeader());
+
+      writeInlineBlocks(hbw, outputStream, biw, false);
+    }
+    writeInlineBlocks(hbw, outputStream, biw, true);
+    rootIndexOffset = biw.writeIndexBlocks(outputStream);
+    outputStream.close();
+
+    numLevels = biw.getNumLevels();
+    numRootEntries = biw.getNumRootEntries();
+
+    LOG.info("Index written: numLevels=" + numLevels + ", numRootEntries=" +
+        numRootEntries + ", rootIndexOffset=" + rootIndexOffset);
+  }
+
+  private void writeInlineBlocks(HFileBlock.Writer hbw,
+      FSDataOutputStream outputStream, HFileBlockIndex.BlockIndexWriter biw,
+      boolean isClosing) throws IOException {
+    while (biw.shouldWriteBlock(isClosing)) {
+      long offset = outputStream.getPos();
+      biw.writeInlineBlock(hbw.startWriting(biw.getInlineBlockType(), false));
+      hbw.writeHeaderAndData(outputStream);
+      biw.blockWritten(offset, hbw.getOnDiskSizeWithHeader(),
+          hbw.getUncompressedSizeWithoutHeader());
+      LOG.info("Wrote an inline index block at " + offset + ", size " +
+          hbw.getOnDiskSizeWithHeader());
+    }
+  }
+
+  private static final long getDummyFileOffset(int i) {
+    return i * 185 + 379;
+  }
+
+  private static final int getDummyOnDiskSize(int i) {
+    return i * i * 37 + i * 19 + 13;
+  }
+
+  @Test
+  public void testSecondaryIndexBinarySearch() throws IOException {
+    int numTotalKeys = 99;
+    assertTrue(numTotalKeys % 2 == 1); // Ensure no one made this even.
+
+    // We only add odd-index keys into the array that we will binary-search.
+    int numSearchedKeys = (numTotalKeys - 1) / 2;
+
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    DataOutputStream dos = new DataOutputStream(baos);
+
+    dos.writeInt(numSearchedKeys);
+    int curAllEntriesSize = 0;
+    int numEntriesAdded = 0;
+
+    // Only odd-index elements of this array are used to keep the secondary
+    // index entries of the corresponding keys.
+    int secondaryIndexEntries[] = new int[numTotalKeys];
+
+    for (int i = 0; i < numTotalKeys; ++i) {
+      byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i * 2);
+      keys.add(k);
+      String msgPrefix = "Key #" + i + " (" + Bytes.toStringBinary(k) + "): ";
+      StringBuilder padding = new StringBuilder();
+      while (msgPrefix.length() + padding.length() < 70)
+        padding.append(' ');
+      msgPrefix += padding;
+      if (i % 2 == 1) {
+        dos.writeInt(curAllEntriesSize);
+        secondaryIndexEntries[i] = curAllEntriesSize;
+        LOG.info(msgPrefix + "secondary index entry #" + ((i - 1) / 2) +
+            ", offset " + curAllEntriesSize);
+        curAllEntriesSize += k.length
+            + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
+        ++numEntriesAdded;
+      } else {
+        secondaryIndexEntries[i] = -1;
+        LOG.info(msgPrefix + "not in the searched array");
+      }
+    }
+
+    // Make sure the keys are increasing.
+    for (int i = 0; i < keys.size() - 1; ++i)
+      assertTrue(Bytes.BYTES_RAWCOMPARATOR.compare(keys.get(i),
+          keys.get(i + 1)) < 0);
+
+    dos.writeInt(curAllEntriesSize);
+    assertEquals(numSearchedKeys, numEntriesAdded);
+    int secondaryIndexOffset = dos.size();
+    assertEquals(Bytes.SIZEOF_INT * (numSearchedKeys + 2),
+        secondaryIndexOffset);
+
+    for (int i = 1; i <= numTotalKeys - 1; i += 2) {
+      assertEquals(dos.size(),
+          secondaryIndexOffset + secondaryIndexEntries[i]);
+      long dummyFileOffset = getDummyFileOffset(i);
+      int dummyOnDiskSize = getDummyOnDiskSize(i);
+      LOG.debug("Storing file offset=" + dummyFileOffset + " and onDiskSize=" +
+          dummyOnDiskSize + " at offset " + dos.size());
+      dos.writeLong(dummyFileOffset);
+      dos.writeInt(dummyOnDiskSize);
+      LOG.debug("Stored key " + ((i - 1) / 2) +" at offset " + dos.size());
+      dos.write(keys.get(i));
+    }
+
+    dos.writeInt(curAllEntriesSize);
+
+    ByteBuffer nonRootIndex = ByteBuffer.wrap(baos.toByteArray());
+    for (int i = 0; i < numTotalKeys; ++i) {
+      byte[] searchKey = keys.get(i);
+      byte[] arrayHoldingKey = new byte[searchKey.length +
+                                        searchKey.length / 2];
+
+      // To make things a bit more interesting, store the key we are looking
+      // for at a non-zero offset in a new array.
+      System.arraycopy(searchKey, 0, arrayHoldingKey, searchKey.length / 2,
+            searchKey.length);
+
+      int searchResult = BlockIndexReader.binarySearchNonRootIndex(
+          arrayHoldingKey, searchKey.length / 2, searchKey.length, nonRootIndex,
+          Bytes.BYTES_RAWCOMPARATOR);
+      String lookupFailureMsg = "Failed to look up key #" + i + " ("
+          + Bytes.toStringBinary(searchKey) + ")";
+
+      int expectedResult;
+      int referenceItem;
+
+      if (i % 2 == 1) {
+        // This key is in the array we search as the element (i - 1) / 2. Make
+        // sure we find it.
+        expectedResult = (i - 1) / 2;
+        referenceItem = i;
+      } else {
+        // This key is not in the array but between two elements on the array,
+        // in the beginning, or in the end. The result should be the previous
+        // key in the searched array, or -1 for i = 0.
+        expectedResult = i / 2 - 1;
+        referenceItem = i - 1;
+      }
+
+      assertEquals(lookupFailureMsg, expectedResult, searchResult);
+
+      // Now test we can get the offset and the on-disk-size using a
+      // higher-level API function.s
+      boolean locateBlockResult =
+        BlockIndexReader.locateNonRootIndexEntry(nonRootIndex, arrayHoldingKey,
+            searchKey.length / 2, searchKey.length, Bytes.BYTES_RAWCOMPARATOR);
+
+      if (i == 0) {
+        assertFalse(locateBlockResult);
+      } else {
+        assertTrue(locateBlockResult);
+        String errorMsg = "i=" + i + ", position=" + nonRootIndex.position();
+        assertEquals(errorMsg, getDummyFileOffset(referenceItem),
+            nonRootIndex.getLong());
+        assertEquals(errorMsg, getDummyOnDiskSize(referenceItem),
+            nonRootIndex.getInt());
+      }
+    }
+
+  }
+
+  @Test
+  public void testBlockIndexChunk() throws IOException {
+    BlockIndexChunk c = new BlockIndexChunk();
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    int N = 1000;
+    int[] numSubEntriesAt = new int[N];
+    int numSubEntries = 0;
+    for (int i = 0; i < N; ++i) {
+      baos.reset();
+      DataOutputStream dos = new DataOutputStream(baos);
+      c.writeNonRoot(dos);
+      assertEquals(c.getNonRootSize(), dos.size());
+
+      baos.reset();
+      dos = new DataOutputStream(baos);
+      c.writeRoot(dos);
+      assertEquals(c.getRootSize(), dos.size());
+
+      byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
+      numSubEntries += rand.nextInt(5) + 1;
+      keys.add(k);
+      c.add(k, getDummyFileOffset(i), getDummyOnDiskSize(i), numSubEntries);
+    }
+
+    // Test the ability to look up the entry that contains a particular
+    // deeper-level index block's entry ("sub-entry"), assuming a global
+    // 0-based ordering of sub-entries. This is needed for mid-key calculation.
+    for (int i = 0; i < N; ++i) {
+      for (int j = i == 0 ? 0 : numSubEntriesAt[i - 1];
+           j < numSubEntriesAt[i];
+           ++j) {
+        assertEquals(i, c.getEntryBySubEntry(j));
+      }
+    }
+  }
+
+  /** Checks if the HeapSize calculator is within reason */
+  @Test
+  public void testHeapSizeForBlockIndex() throws IOException {
+    Class<HFileBlockIndex.BlockIndexReader> cl =
+        HFileBlockIndex.BlockIndexReader.class;
+    long expected = ClassSize.estimateBase(cl, false);
+
+    HFileBlockIndex.BlockIndexReader bi =
+        new HFileBlockIndex.BlockIndexReader(Bytes.BYTES_RAWCOMPARATOR, 1);
+    long actual = bi.heapSize();
+
+    // Since the arrays in BlockIndex(byte [][] blockKeys, long [] blockOffsets,
+    // int [] blockDataSizes) are all null they are not going to show up in the
+    // HeapSize calculation, so need to remove those array costs from expected.
+    expected -= ClassSize.align(3 * ClassSize.ARRAY);
+
+    if (expected != actual) {
+      ClassSize.estimateBase(cl, true);
+      assertEquals(expected, actual);
+    }
+  }
+
+  /**
+   * Testing block index through the HFile writer/reader APIs. Allows to test
+   * setting index block size through configuration, intermediate-level index
+   * blocks, and caching index blocks on write.
+   *
+   * @throws IOException
+   */
+  @Test
+  public void testHFileWriterAndReader() throws IOException {
+    Path hfilePath = new Path(HBaseTestingUtility.getTestDir(),
+        "hfile_for_block_index");
+    BlockCache blockCache = StoreFile.getBlockCache(conf);
+
+    for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; ++testI) {
+      int indexBlockSize = INDEX_CHUNK_SIZES[testI];
+      int expectedNumLevels = EXPECTED_NUM_LEVELS[testI];
+      LOG.info("Index block size: " + indexBlockSize + ", compression: "
+          + compr);
+      // Evict all blocks that were cached-on-write by the previous invocation.
+      blockCache.evictBlocksByPrefix(hfilePath.getName()
+          + HFile.CACHE_KEY_SEPARATOR);
+
+      conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize);
+      Set<String> keyStrSet = new HashSet<String>();
+      byte[][] keys = new byte[NUM_KV][];
+      byte[][] values = new byte[NUM_KV][];
+
+      // Write the HFile
+      {
+        HFile.Writer writer = HFile.getWriterFactory(conf).createWriter(fs,
+            hfilePath, SMALL_BLOCK_SIZE, compr, KeyValue.KEY_COMPARATOR);
+        Random rand = new Random(19231737);
+
+        for (int i = 0; i < NUM_KV; ++i) {
+          byte[] row = TestHFileWriterV2.randomOrderedKey(rand, i);
+
+          // Key will be interpreted by KeyValue.KEY_COMPARATOR
+          byte[] k = KeyValue.createFirstOnRow(row, 0, row.length, row, 0, 0,
+              row, 0, 0).getKey();
+
+          byte[] v = TestHFileWriterV2.randomValue(rand);
+          writer.append(k, v);
+          keys[i] = k;
+          values[i] = v;
+          keyStrSet.add(Bytes.toStringBinary(k));
+
+          if (i > 0) {
+            assertTrue(KeyValue.KEY_COMPARATOR.compare(keys[i - 1],
+                keys[i]) < 0);
+          }
+        }
+
+        writer.close();
+      }
+
+      // Read the HFile
+      HFile.Reader reader = HFile.createReader(fs, hfilePath, blockCache,
+          false, true);
+      assertEquals(expectedNumLevels,
+          reader.getTrailer().getNumDataIndexLevels());
+
+      assertTrue(Bytes.equals(keys[0], reader.getFirstKey()));
+      assertTrue(Bytes.equals(keys[NUM_KV - 1], reader.getLastKey()));
+      LOG.info("Last key: " + Bytes.toStringBinary(keys[NUM_KV - 1]));
+
+      for (boolean pread : new boolean[] { false, true }) {
+        HFileScanner scanner = reader.getScanner(true, pread);
+        for (int i = 0; i < NUM_KV; ++i) {
+          checkSeekTo(keys, scanner, i);
+          checkKeyValue("i=" + i, keys[i], values[i], scanner.getKey(),
+              scanner.getValue());
+        }
+        assertTrue(scanner.seekTo());
+        for (int i = NUM_KV - 1; i >= 0; --i) {
+          checkSeekTo(keys, scanner, i);
+          checkKeyValue("i=" + i, keys[i], values[i], scanner.getKey(),
+              scanner.getValue());
+        }
+      }
+
+      // Manually compute the mid-key and validate it.
+      HFileReaderV2 reader2 = (HFileReaderV2) reader;
+      HFileBlock.FSReader fsReader = reader2.getUncachedBlockReader();
+
+      HFileBlock.BlockIterator iter = fsReader.blockRange(0,
+          reader.getTrailer().getLoadOnOpenDataOffset());
+      HFileBlock block;
+      List<byte[]> blockKeys = new ArrayList<byte[]>();
+      while ((block = iter.nextBlock()) != null) {
+        if (block.getBlockType() != BlockType.LEAF_INDEX)
+          return;
+        ByteBuffer b = block.getBufferReadOnly();
+        int n = b.getInt();
+        // One int for the number of items, and n + 1 for the secondary index.
+        int entriesOffset = Bytes.SIZEOF_INT * (n + 2);
+
+        // Get all the keys from the leaf index block. S
+        for (int i = 0; i < n; ++i) {
+          int keyRelOffset = b.getInt(Bytes.SIZEOF_INT * (i + 1));
+          int nextKeyRelOffset = b.getInt(Bytes.SIZEOF_INT * (i + 2));
+          int keyLen = nextKeyRelOffset - keyRelOffset;
+          int keyOffset = b.arrayOffset() + entriesOffset + keyRelOffset +
+              HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
+          byte[] blockKey = Arrays.copyOfRange(b.array(), keyOffset, keyOffset
+              + keyLen);
+          String blockKeyStr = Bytes.toString(blockKey);
+          blockKeys.add(blockKey);
+
+          // If the first key of the block is not among the keys written, we
+          // are not parsing the non-root index block format correctly.
+          assertTrue("Invalid block key from leaf-level block: " + blockKeyStr,
+              keyStrSet.contains(blockKeyStr));
+        }
+      }
+
+      // Validate the mid-key.
+      assertEquals(
+          Bytes.toStringBinary(blockKeys.get((blockKeys.size() - 1) / 2)),
+          Bytes.toStringBinary(reader.midkey()));
+
+      assertEquals(UNCOMPRESSED_INDEX_SIZES[testI],
+          reader.getTrailer().getUncompressedDataIndexSize());
+
+      reader.close();
+    }
+  }
+
+  private void checkSeekTo(byte[][] keys, HFileScanner scanner, int i)
+      throws IOException {
+    assertEquals("Failed to seek to key #" + i + " ("
+        + Bytes.toStringBinary(keys[i]) + ")", 0, scanner.seekTo(keys[i]));
+  }
+
+  private void assertArrayEqualsBuffer(String msgPrefix, byte[] arr,
+      ByteBuffer buf) {
+    assertEquals(msgPrefix + ": expected " + Bytes.toStringBinary(arr)
+        + ", actual " + Bytes.toStringBinary(buf), 0, Bytes.compareTo(arr, 0,
+        arr.length, buf.array(), buf.arrayOffset(), buf.limit()));
+  }
+
+  /** Check a key/value pair after it was read by the reader */
+  private void checkKeyValue(String msgPrefix, byte[] expectedKey,
+      byte[] expectedValue, ByteBuffer keyRead, ByteBuffer valueRead) {
+    if (!msgPrefix.isEmpty())
+      msgPrefix += ". ";
+
+    assertArrayEqualsBuffer(msgPrefix + "Invalid key", expectedKey, keyRead);
+    assertArrayEqualsBuffer(msgPrefix + "Invalid value", expectedValue,
+        valueRead);
+  }
+
+}

Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileReaderV1.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileReaderV1.java?rev=1153647&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileReaderV1.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileReaderV1.java Wed Aug  3 20:30:27 2011
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.IOException;
+import java.net.URL;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.util.Bytes;
+
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+public class TestHFileReaderV1 {
+
+  private static final HBaseTestingUtility TEST_UTIL =
+    new HBaseTestingUtility();
+
+  private Configuration conf;
+  private FileSystem fs;
+
+  private static final int N = 1000;
+
+  @Before
+  public void setUp() throws IOException {
+    conf = TEST_UTIL.getConfiguration();
+    fs = FileSystem.get(conf);
+  }
+
+  @Test
+  public void testReadingExistingVersion1HFile() throws IOException {
+    URL url = TestHFileReaderV1.class.getResource(
+        "8e8ab58dcf39412da19833fcd8f687ac");
+    Path existingHFilePath = new Path(url.getPath());
+    HFile.Reader reader =
+      HFile.createReader(fs, existingHFilePath, null, false, false);
+    reader.loadFileInfo();
+    FixedFileTrailer trailer = reader.getTrailer();
+
+    assertEquals(N, reader.getEntries());
+    assertEquals(N, trailer.getEntryCount());
+    assertEquals(1, trailer.getVersion());
+    assertEquals(Compression.Algorithm.GZ, trailer.getCompressionCodec());
+
+    for (boolean pread : new boolean[] { false, true }) {
+      int totalDataSize = 0;
+      int n = 0;
+
+      HFileScanner scanner = reader.getScanner(false, pread);
+      assertTrue(scanner.seekTo());
+      do {
+        totalDataSize += scanner.getKey().limit() + scanner.getValue().limit()
+            + Bytes.SIZEOF_INT * 2;
+        ++n;
+      } while (scanner.next());
+
+      // Add magic record sizes, one per data block.
+      totalDataSize += 8 * trailer.getDataIndexCount();
+
+      assertEquals(N, n);
+      assertEquals(trailer.getTotalUncompressedBytes(), totalDataSize);
+    }
+    reader.close();
+  }
+
+}

Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java?rev=1153647&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java Wed Aug  3 20:30:27 2011
@@ -0,0 +1,256 @@
+/**
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.io.hfile;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Text;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Testing writing a version 2 {@link HFile}. This is a low-level test written
+ * during the development of {@link HFileWriterV2}.
+ */
+public class TestHFileWriterV2 {
+
+  private static final Log LOG = LogFactory.getLog(TestHFileWriterV2.class);
+
+  private static final HBaseTestingUtility TEST_UTIL =
+      new HBaseTestingUtility();
+
+  private Configuration conf;
+  private FileSystem fs;
+
+  @Before
+  public void setUp() throws IOException {
+    conf = TEST_UTIL.getConfiguration();
+    fs = FileSystem.get(conf);
+  }
+
+  @Test
+  public void testHFileFormatV2() throws IOException {
+    Path hfilePath = new Path(HBaseTestingUtility.getTestDir(),
+        "testHFileFormatV2");
+
+    final Compression.Algorithm COMPRESS_ALGO = Compression.Algorithm.GZ;
+    HFileWriterV2 writer = new HFileWriterV2(conf, fs, hfilePath, 4096,
+        COMPRESS_ALGO, KeyValue.KEY_COMPARATOR);
+
+    long totalKeyLength = 0;
+    long totalValueLength = 0;
+
+    Random rand = new Random(9713312); // Just a fixed seed.
+
+    final int ENTRY_COUNT = 10000;
+    List<byte[]> keys = new ArrayList<byte[]>();
+    List<byte[]> values = new ArrayList<byte[]>();
+
+    for (int i = 0; i < ENTRY_COUNT; ++i) {
+      byte[] keyBytes = randomOrderedKey(rand, i);
+
+      // A random-length random value.
+      byte[] valueBytes = randomValue(rand);
+      writer.append(keyBytes, valueBytes);
+
+      totalKeyLength += keyBytes.length;
+      totalValueLength += valueBytes.length;
+
+      keys.add(keyBytes);
+      values.add(valueBytes);
+    }
+
+    // Add in an arbitrary order. They will be sorted lexicographically by
+    // the key.
+    writer.appendMetaBlock("CAPITAL_OF_USA", new Text("Washington, D.C."));
+    writer.appendMetaBlock("CAPITAL_OF_RUSSIA", new Text("Moscow"));
+    writer.appendMetaBlock("CAPITAL_OF_FRANCE", new Text("Paris"));
+
+    writer.close();
+
+    FSDataInputStream fsdis = fs.open(hfilePath);
+
+    // A "manual" version of a new-format HFile reader. This unit test was
+    // written before the V2 reader was fully implemented.
+
+    long fileSize = fs.getFileStatus(hfilePath).getLen();
+    FixedFileTrailer trailer =
+        FixedFileTrailer.readFromStream(fsdis, fileSize);
+
+    assertEquals(2, trailer.getVersion());
+    assertEquals(ENTRY_COUNT, trailer.getEntryCount());
+
+    HFileBlock.FSReader blockReader =
+        new HFileBlock.FSReaderV2(fsdis, COMPRESS_ALGO, fileSize);
+
+    // Counters for the number of key/value pairs and the number of blocks
+    int entriesRead = 0;
+    int blocksRead = 0;
+
+    // Scan blocks the way the reader would scan them
+    fsdis.seek(0);
+    long curBlockPos = 0;
+    while (curBlockPos <= trailer.getLastDataBlockOffset()) {
+      HFileBlock block = blockReader.readBlockData(curBlockPos, -1, -1, false);
+      assertEquals(BlockType.DATA, block.getBlockType());
+      ByteBuffer buf = block.getBufferWithoutHeader();
+      while (buf.hasRemaining()) {
+        int keyLen = buf.getInt();
+        int valueLen = buf.getInt();
+
+        byte[] key = new byte[keyLen];
+        buf.get(key);
+
+        byte[] value = new byte[valueLen];
+        buf.get(value);
+
+        // A brute-force check to see that all keys and values are correct.
+        assertTrue(Bytes.compareTo(key, keys.get(entriesRead)) == 0);
+        assertTrue(Bytes.compareTo(value, values.get(entriesRead)) == 0);
+
+        ++entriesRead;
+      }
+      ++blocksRead;
+      curBlockPos += block.getOnDiskSizeWithHeader();
+    }
+    LOG.info("Finished reading: entries=" + entriesRead + ", blocksRead="
+        + blocksRead);
+    assertEquals(ENTRY_COUNT, entriesRead);
+
+    // Meta blocks. We can scan until the load-on-open data offset (which is
+    // the root block index offset in version 2) because we are not testing
+    // intermediate-level index blocks here.
+
+    int metaCounter = 0;
+    while (fsdis.getPos() < trailer.getLoadOnOpenDataOffset()) {
+      LOG.info("Current offset: " + fsdis.getPos() + ", scanning until " +
+          trailer.getLoadOnOpenDataOffset());
+      HFileBlock block = blockReader.readBlockData(curBlockPos, -1, -1, false);
+      assertEquals(BlockType.META, block.getBlockType());
+      Text t = new Text();
+      block.readInto(t);
+      Text expectedText =
+          (metaCounter == 0 ? new Text("Paris") : metaCounter == 1 ? new Text(
+              "Moscow") : new Text("Washington, D.C."));
+      assertEquals(expectedText, t);
+      LOG.info("Read meta block data: " + t);
+      ++metaCounter;
+      curBlockPos += block.getOnDiskSizeWithHeader();
+    }
+
+    fsdis.close();
+  }
+
+  // Static stuff used by various HFile v2 unit tests
+
+  private static final String COLUMN_FAMILY_NAME = "_-myColumnFamily-_";
+  private static final int MIN_ROW_OR_QUALIFIER_LENGTH = 64;
+  private static final int MAX_ROW_OR_QUALIFIER_LENGTH = 128;
+
+  /**
+   * Generates a random key that is guaranteed to increase as the given index i
+   * increases. The result consists of a prefix, which is a deterministic
+   * increasing function of i, and a random suffix.
+   *
+   * @param rand
+   *          random number generator to use
+   * @param i
+   * @return
+   */
+  public static byte[] randomOrderedKey(Random rand, int i) {
+    StringBuilder k = new StringBuilder();
+
+    // The fixed-length lexicographically increasing part of the key.
+    for (int bitIndex = 31; bitIndex >= 0; --bitIndex) {
+      if ((i & (1 << bitIndex)) == 0)
+        k.append("a");
+      else
+        k.append("b");
+    }
+
+    // A random-length random suffix of the key.
+    for (int j = 0; j < rand.nextInt(50); ++j)
+      k.append(randomReadableChar(rand));
+
+    byte[] keyBytes = k.toString().getBytes();
+    return keyBytes;
+  }
+
+  public static byte[] randomValue(Random rand) {
+    StringBuilder v = new StringBuilder();
+    for (int j = 0; j < 1 + rand.nextInt(2000); ++j) {
+      v.append((char) (32 + rand.nextInt(95)));
+    }
+
+    byte[] valueBytes = v.toString().getBytes();
+    return valueBytes;
+  }
+
+  public static final char randomReadableChar(Random rand) {
+    int i = rand.nextInt(26 * 2 + 10 + 1);
+    if (i < 26)
+      return (char) ('A' + i);
+    i -= 26;
+
+    if (i < 26)
+      return (char) ('a' + i);
+    i -= 26;
+
+    if (i < 10)
+      return (char) ('0' + i);
+    i -= 10;
+
+    assert i == 0;
+    return '_';
+  }
+
+  public static byte[] randomRowOrQualifier(Random rand) {
+    StringBuilder field = new StringBuilder();
+    int fieldLen = MIN_ROW_OR_QUALIFIER_LENGTH
+        + rand.nextInt(MAX_ROW_OR_QUALIFIER_LENGTH
+            - MIN_ROW_OR_QUALIFIER_LENGTH + 1);
+    for (int i = 0; i < fieldLen; ++i)
+      field.append(randomReadableChar(rand));
+    return field.toString().getBytes();
+  }
+
+  public static KeyValue randomKeyValue(Random rand) {
+    return new KeyValue(randomRowOrQualifier(rand),
+        COLUMN_FAMILY_NAME.getBytes(), randomRowOrQualifier(rand),
+        randomValue(rand));
+  }
+
+}

Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java?rev=1153647&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java Wed Aug  3 20:30:27 2011
@@ -0,0 +1,353 @@
+/*
+ * Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.regionserver;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+import java.util.TreeSet;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.hfile.BlockCache;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.TestHFileWriterV2;
+import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
+import org.apache.hadoop.hbase.util.BloomFilterFactory;
+import org.apache.hadoop.hbase.util.ByteBloomFilter;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.CompoundBloomFilter;
+import org.apache.hadoop.hbase.util.CompoundBloomFilterBase;
+import org.apache.hadoop.hbase.util.CompoundBloomFilterWriter;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests writing Bloom filter blocks in the same part of the file as data
+ * blocks.
+ */
+public class TestCompoundBloomFilter {
+
+  private static final HBaseTestingUtility TEST_UTIL =
+      new HBaseTestingUtility();
+
+  private static final Log LOG = LogFactory.getLog(
+      TestCompoundBloomFilter.class);
+
+  private static final int NUM_TESTS = 9;
+  private static final BloomType BLOOM_TYPES[] = { BloomType.ROW,
+      BloomType.ROW, BloomType.ROWCOL, BloomType.ROWCOL, BloomType.ROW,
+      BloomType.ROWCOL, BloomType.ROWCOL, BloomType.ROWCOL, BloomType.ROW };
+
+  private static final int NUM_KV[];
+  static {
+    final int N = 10000; // Only used in initialization.
+    NUM_KV = new int[] { 21870, N, N, N, N, 1000, N, 7500, 7500};
+    assert NUM_KV.length == NUM_TESTS;
+  }
+
+  private static final int BLOCK_SIZES[];
+  static {
+    final int blkSize = 65536;
+    BLOCK_SIZES = new int[] { 512, 1000, blkSize, blkSize, blkSize, 128, 300,
+        blkSize, blkSize };
+    assert BLOCK_SIZES.length == NUM_TESTS;
+  }
+
+  /**
+   * Be careful not to specify too high a Bloom filter block size, otherwise
+   * there will only be one oversized chunk and the observed false positive
+   * rate will be too low.
+   */
+  private static final int BLOOM_BLOCK_SIZES[] = { 1000, 4096, 4096, 4096,
+      8192, 128, 1024, 600, 600 };
+  static { assert BLOOM_BLOCK_SIZES.length == NUM_TESTS; }
+
+  private static final double TARGET_ERROR_RATES[] = { 0.025, 0.01, 0.015,
+      0.01, 0.03, 0.01, 0.01, 0.07, 0.07 };
+  static { assert TARGET_ERROR_RATES.length == NUM_TESTS; }
+
+  /** A false positive rate that is obviously too high. */
+  private static final double TOO_HIGH_ERROR_RATE;
+  static {
+    double m = 0;
+    for (double errorRate : TARGET_ERROR_RATES)
+      m = Math.max(m, errorRate);
+    TOO_HIGH_ERROR_RATE = m + 0.03;
+  }
+
+  private static Configuration conf;
+  private FileSystem fs;
+  private BlockCache blockCache;
+
+  /** A message of the form "in test#<number>:" to include in logging. */
+  private String testIdMsg;
+
+  private static final int GENERATION_SEED = 2319;
+  private static final int EVALUATION_SEED = 135;
+
+  @Before
+  public void setUp() throws IOException {
+    conf = TEST_UTIL.getConfiguration();
+
+    // This test requires the most recent HFile format (i.e. v2).
+    conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
+
+    fs = FileSystem.get(conf);
+
+    blockCache = StoreFile.getBlockCache(conf);
+    assertNotNull(blockCache);
+  }
+
+  private List<KeyValue> createSortedKeyValues(Random rand, int n) {
+    List<KeyValue> kvList = new ArrayList<KeyValue>(n);
+    for (int i = 0; i < n; ++i)
+      kvList.add(TestHFileWriterV2.randomKeyValue(rand));
+    Collections.sort(kvList, KeyValue.COMPARATOR);
+    return kvList;
+  }
+
+  @Test
+  public void testCompoundBloomFilter() throws IOException {
+    conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
+    for (int t = 0; t < NUM_TESTS; ++t) {
+      conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE,
+          (float) TARGET_ERROR_RATES[t]);
+
+      testIdMsg = "in test #" + t + ":";
+      Random generationRand = new Random(GENERATION_SEED);
+      List<KeyValue> kvs = createSortedKeyValues(generationRand, NUM_KV[t]);
+      BloomType bt = BLOOM_TYPES[t];
+      Path sfPath = writeStoreFile(t, bt, kvs);
+      readStoreFile(t, bt, kvs, sfPath);
+    }
+  }
+
+  /**
+   * Validates the false positive ratio by computing its z-value and comparing
+   * it to the provided threshold.
+   *
+   * @param falsePosRate experimental positive rate
+   * @param nTrials the number of calls to
+   *          {@link StoreFile.Reader#shouldSeek(Scan, java.util.SortedSet)}.
+   * @param zValueBoundary z-value boundary, positive for an upper bound and
+   *          negative for a lower bound
+   * @param cbf the compound Bloom filter we are using
+   * @param additionalMsg additional message to include in log output and
+   *          assertion failures
+   */
+  private void validateFalsePosRate(double falsePosRate, int nTrials,
+      double zValueBoundary, CompoundBloomFilter cbf, String additionalMsg) {
+    double p = BloomFilterFactory.getErrorRate(conf);
+    double zValue = (falsePosRate - p) / Math.sqrt(p * (1 - p) / nTrials);
+
+    String assortedStatsStr = " (targetErrorRate=" + p + ", falsePosRate="
+        + falsePosRate + ", nTrials=" + nTrials + ")";
+    LOG.info("z-value is " + zValue + assortedStatsStr);
+
+    boolean isUpperBound = zValueBoundary > 0;
+
+    if (isUpperBound && zValue > zValueBoundary ||
+        !isUpperBound && zValue < zValueBoundary) {
+      String errorMsg = "False positive rate z-value " + zValue + " is "
+          + (isUpperBound ? "higher" : "lower") + " than " + zValueBoundary
+          + assortedStatsStr + ". Per-chunk stats:\n"
+          + cbf.formatTestingStats();
+      fail(errorMsg + additionalMsg);
+    }
+  }
+
+  private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
+      Path sfPath) throws IOException {
+    StoreFile sf = new StoreFile(fs, sfPath, true, conf, bt, false);
+    StoreFile.Reader r = sf.createReader();
+    final boolean pread = true; // does not really matter
+    StoreFileScanner scanner = r.getStoreFileScanner(true, pread);
+
+    {
+      // Test for false negatives (not allowed).
+      int numChecked = 0;
+      for (KeyValue kv : kvs) {
+        byte[] row = kv.getRow();
+        boolean present = isInBloom(scanner, row, kv.getQualifier());
+        assertTrue(testIdMsg + " Bloom filter false negative on row "
+            + Bytes.toStringBinary(row) + " after " + numChecked
+            + " successful checks", present);
+        ++numChecked;
+      }
+    }
+
+    // Test for false positives (some percentage allowed). We test in two modes:
+    // "fake lookup" which ignores the key distribution, and production mode.
+    for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
+      ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
+      try {
+        String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
+            "enabled" : "disabled");
+        CompoundBloomFilter cbf = (CompoundBloomFilter) r.getBloomFilter();
+        cbf.enableTestingStats();
+        int numFalsePos = 0;
+        Random rand = new Random(EVALUATION_SEED);
+        int nTrials = NUM_KV[t] * 10;
+        for (int i = 0; i < nTrials; ++i) {
+          byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
+          if (isInBloom(scanner, query, bt, rand)) {
+            numFalsePos += 1;
+          }
+        }
+        double falsePosRate = numFalsePos * 1.0 / nTrials;
+        LOG.debug(String.format(testIdMsg
+            + " False positives: %d out of %d (%f)",
+            numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);
+
+        // Check for obvious Bloom filter crashes.
+        assertTrue("False positive is too high: " + falsePosRate + " (greater "
+            + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
+            falsePosRate < TOO_HIGH_ERROR_RATE);
+
+        // Now a more precise check to see if the false positive rate is not
+        // too high. The reason we use a relaxed restriction for the real-world
+        // case as opposed to the "fake lookup" case is that our hash functions
+        // are not completely independent.
+
+        double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
+        validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
+            fakeLookupModeStr);
+
+        // For checking the lower bound we need to eliminate the last chunk,
+        // because it is frequently smaller and the false positive rate in it
+        // is too low. This does not help if there is only one under-sized
+        // chunk, though.
+        int nChunks = cbf.getNumChunks();
+        if (nChunks > 1) {
+          numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
+          nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
+          falsePosRate = numFalsePos * 1.0 / nTrials;
+          LOG.info(testIdMsg + " False positive rate without last chunk is " +
+              falsePosRate + fakeLookupModeStr);
+        }
+
+        validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
+            fakeLookupModeStr);
+      } finally {
+        ByteBloomFilter.setFakeLookupMode(false);
+      }
+    }
+
+    r.close();
+  }
+
+  private boolean isInBloom(StoreFileScanner scanner, byte[] row, BloomType bt,
+      Random rand) {
+    return isInBloom(scanner, row,
+        TestHFileWriterV2.randomRowOrQualifier(rand));
+  }
+
+  private boolean isInBloom(StoreFileScanner scanner, byte[] row,
+      byte[] qualifier) {
+    Scan scan = new Scan(row, row);
+    TreeSet<byte[]> columns = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
+    columns.add(qualifier);
+    return scanner.shouldSeek(scan, columns);
+  }
+
+  private Path writeStoreFile(int t, BloomType bt, List<KeyValue> kvs)
+      throws IOException {
+    conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE,
+        BLOOM_BLOCK_SIZES[t]);
+    conf.setBoolean(HFile.CACHE_BLOCKS_ON_WRITE_KEY, true);
+
+    StoreFile.Writer w = StoreFile.createWriter(fs,
+        HBaseTestingUtility.getTestDir(), BLOCK_SIZES[t], null, null, conf,
+        bt, 0);
+
+    assertTrue(w.hasBloom());
+    assertTrue(w.getBloomWriter() instanceof CompoundBloomFilterWriter);
+    CompoundBloomFilterWriter cbbf =
+        (CompoundBloomFilterWriter) w.getBloomWriter();
+
+    int keyCount = 0;
+    KeyValue prev = null;
+    LOG.debug("Total keys/values to insert: " + kvs.size());
+    for (KeyValue kv : kvs) {
+      w.append(kv);
+
+      // Validate the key count in the Bloom filter.
+      boolean newKey = true;
+      if (prev != null) {
+        newKey = !(bt == BloomType.ROW ? KeyValue.COMPARATOR.matchingRows(kv,
+            prev) : KeyValue.COMPARATOR.matchingRowColumn(kv, prev));
+      }
+      if (newKey)
+        ++keyCount;
+      assertEquals(keyCount, cbbf.getKeyCount());
+
+      prev = kv;
+    }
+    w.close();
+
+    return w.getPath();
+  }
+
+  @Test
+  public void testCompoundBloomSizing() {
+    int bloomBlockByteSize = 4096;
+    int bloomBlockBitSize = bloomBlockByteSize * 8;
+    double targetErrorRate = 0.01;
+    long maxKeysPerChunk = ByteBloomFilter.idealMaxKeys(bloomBlockBitSize,
+        targetErrorRate);
+
+    long bloomSize1 = bloomBlockByteSize * 8;
+    long bloomSize2 = ByteBloomFilter.computeBitSize(maxKeysPerChunk,
+        targetErrorRate);
+
+    double bloomSizeRatio = (bloomSize2 * 1.0 / bloomSize1);
+    assertTrue(Math.abs(bloomSizeRatio - 0.9999) < 0.0001);
+  }
+
+  @Test
+  public void testCreateKey() {
+    CompoundBloomFilterBase cbfb = new CompoundBloomFilterBase();
+    byte[] row = "myRow".getBytes();
+    byte[] qualifier = "myQualifier".getBytes();
+    byte[] rowKey = cbfb.createBloomKey(row, 0, row.length,
+        row, 0, 0);
+    byte[] rowColKey = cbfb.createBloomKey(row, 0, row.length,
+        qualifier, 0, qualifier.length);
+    KeyValue rowKV = KeyValue.createKeyValueFromKey(rowKey);
+    KeyValue rowColKV = KeyValue.createKeyValueFromKey(rowColKey);
+    assertEquals(rowKV.getTimestamp(), rowColKV.getTimestamp());
+    assertEquals(Bytes.toStringBinary(rowKV.getRow()),
+        Bytes.toStringBinary(rowColKV.getRow()));
+    assertEquals(0, rowKV.getQualifier().length);
+  }
+
+}

Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestIdLock.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestIdLock.java?rev=1153647&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestIdLock.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestIdLock.java Wed Aug  3 20:30:27 2011
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util;
+
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+
+public class TestIdLock {
+
+  private static final Log LOG = LogFactory.getLog(TestIdLock.class);
+
+  private static final int NUM_IDS = 16;
+  private static final int NUM_THREADS = 128;
+  private static final int NUM_SECONDS = 20;
+
+  private IdLock idLock = new IdLock();
+
+  private Map<Long, String> idOwner = new ConcurrentHashMap<Long, String>();
+
+  private class IdLockTestThread implements Callable<Boolean> {
+
+    private String clientId;
+
+    public IdLockTestThread(String clientId) {
+      this.clientId = clientId;
+    }
+
+    @Override
+    public Boolean call() throws Exception {
+      Thread.currentThread().setName(clientId);
+      Random rand = new Random();
+      long endTime = System.currentTimeMillis() + NUM_SECONDS * 1000;
+      while (System.currentTimeMillis() < endTime) {
+        long id = rand.nextInt(NUM_IDS);
+
+        LOG.info(clientId + " is waiting for id " + id);
+        IdLock.Entry lockEntry = idLock.getLockEntry(id);
+        try {
+          int sleepMs = 1 + rand.nextInt(4);
+          String owner = idOwner.get(id);
+          if (owner != null) {
+            LOG.error("Id " + id + " already taken by " + owner + ", "
+                + clientId + " failed");
+            return false;
+          }
+
+          idOwner.put(id, clientId);
+          LOG.info(clientId + " took id " + id + ", sleeping for " +
+              sleepMs + "ms");
+          Thread.sleep(sleepMs);
+          LOG.info(clientId + " is releasing id " + id);
+          idOwner.remove(id);
+
+        } finally {
+          idLock.releaseLockEntry(lockEntry);
+        }
+      }
+      return true;
+    }
+
+  }
+
+  @Test
+  public void testMultipleClients() throws Exception {
+    ExecutorService exec = Executors.newFixedThreadPool(NUM_THREADS);
+    try {
+      ExecutorCompletionService<Boolean> ecs =
+          new ExecutorCompletionService<Boolean>(exec);
+      for (int i = 0; i < NUM_THREADS; ++i)
+        ecs.submit(new IdLockTestThread("client_" + i));
+      for (int i = 0; i < NUM_THREADS; ++i) {
+        Future<Boolean> result = ecs.take();
+        assertTrue(result.get());
+      }
+      idLock.assertMapEmpty();
+    } finally {
+      exec.shutdown();
+    }
+  }
+
+}