You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by te...@apache.org on 2011/08/03 22:30:29 UTC
svn commit: r1153647 - in
/hbase/trunk/src/test/java/org/apache/hadoop/hbase: io/hfile/ regionserver/
util/
Author: tedyu
Date: Wed Aug 3 20:30:27 2011
New Revision: 1153647
URL: http://svn.apache.org/viewvc?rev=1153647&view=rev
Log:
HBASE-3857 New test classes.
Added:
hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestFixedFileTrailer.java
hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileReaderV1.java
hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java
hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java
hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestIdLock.java
Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestFixedFileTrailer.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestFixedFileTrailer.java?rev=1153647&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestFixedFileTrailer.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestFixedFileTrailer.java Wed Aug 3 20:30:27 2011
@@ -0,0 +1,227 @@
+/*
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+import static org.junit.Assert.*;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.KeyValue;
+
+@RunWith(Parameterized.class)
+public class TestFixedFileTrailer {
+
+ private static final Log LOG = LogFactory.getLog(TestFixedFileTrailer.class);
+
+ /** The number of used fields by version. Indexed by version minus one. */
+ private static final int[] NUM_FIELDS_BY_VERSION = new int[] { 8, 13 };
+
+ private HBaseTestingUtility util = new HBaseTestingUtility();
+ private FileSystem fs;
+ private ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ private int version;
+
+ static {
+ assert NUM_FIELDS_BY_VERSION.length == HFile.MAX_FORMAT_VERSION
+ - HFile.MIN_FORMAT_VERSION + 1;
+ }
+
+ public TestFixedFileTrailer(int version) {
+ this.version = version;
+ }
+
+ @Parameters
+ public static Collection<Object[]> getParameters() {
+ List<Object[]> versionsToTest = new ArrayList<Object[]>();
+ for (int v = HFile.MIN_FORMAT_VERSION; v <= HFile.MAX_FORMAT_VERSION; ++v)
+ versionsToTest.add(new Integer[] { v } );
+ return versionsToTest;
+ }
+
+ @Before
+ public void setUp() throws IOException {
+ fs = FileSystem.get(util.getConfiguration());
+ }
+
+ @Test
+ public void testTrailer() throws IOException {
+ FixedFileTrailer t = new FixedFileTrailer(version);
+ t.setDataIndexCount(3);
+ t.setEntryCount(((long) Integer.MAX_VALUE) + 1);
+
+ if (version == 1) {
+ t.setFileInfoOffset(876);
+ }
+
+ if (version == 2) {
+ t.setLastDataBlockOffset(291);
+ t.setNumDataIndexLevels(3);
+ t.setComparatorClass(KeyValue.KEY_COMPARATOR.getClass());
+ t.setFirstDataBlockOffset(9081723123L); // Completely unrealistic.
+ t.setUncompressedDataIndexSize(827398717L); // Something random.
+ }
+
+ t.setLoadOnOpenOffset(128);
+ t.setMetaIndexCount(7);
+
+ t.setTotalUncompressedBytes(129731987);
+
+ {
+ DataOutputStream dos = new DataOutputStream(baos); // Limited scope.
+ t.serialize(dos);
+ dos.flush();
+ assertEquals(dos.size(), FixedFileTrailer.getTrailerSize(version));
+ }
+
+ byte[] bytes = baos.toByteArray();
+ baos.reset();
+
+ assertEquals(bytes.length, FixedFileTrailer.getTrailerSize(version));
+
+ ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
+
+ // Finished writing, trying to read.
+ {
+ DataInputStream dis = new DataInputStream(bais);
+ FixedFileTrailer t2 = new FixedFileTrailer(version);
+ t2.deserialize(dis);
+ assertEquals(-1, bais.read()); // Ensure we have read everything.
+ checkLoadedTrailer(version, t, t2);
+ }
+
+ // Now check what happens if the trailer is corrupted.
+ Path trailerPath = new Path(HBaseTestingUtility.getTestDir(), "trailer_"
+ + version);
+
+ {
+ for (byte invalidVersion : new byte[] { HFile.MIN_FORMAT_VERSION - 1,
+ HFile.MAX_FORMAT_VERSION + 1}) {
+ bytes[bytes.length - 1] = invalidVersion;
+ writeTrailer(trailerPath, null, bytes);
+ try {
+ readTrailer(trailerPath);
+ fail("Exception expected");
+ } catch (IOException ex) {
+ // Make it easy to debug this.
+ String msg = ex.getMessage();
+ String cleanMsg = msg.replaceAll(
+ "^(java(\\.[a-zA-Z]+)+:\\s+)?|\\s+\\(.*\\)\\s*$", "");
+ assertEquals("Actual exception message is \"" + msg + "\".\n" +
+ "Cleaned-up message", // will be followed by " expected: ..."
+ "Invalid HFile version: " + invalidVersion, cleanMsg);
+ LOG.info("Got an expected exception: " + msg);
+ }
+ }
+
+ }
+
+ // Now write the trailer into a file and auto-detect the version.
+ writeTrailer(trailerPath, t, null);
+
+ FixedFileTrailer t4 = readTrailer(trailerPath);
+
+ checkLoadedTrailer(version, t, t4);
+
+ String trailerStr = t.toString();
+ assertEquals("Invalid number of fields in the string representation "
+ + "of the trailer: " + trailerStr, NUM_FIELDS_BY_VERSION[version - 1],
+ trailerStr.split(", ").length);
+ assertEquals(trailerStr, t4.toString());
+ }
+
+ private FixedFileTrailer readTrailer(Path trailerPath) throws IOException {
+ FSDataInputStream fsdis = fs.open(trailerPath);
+ FixedFileTrailer trailerRead = FixedFileTrailer.readFromStream(fsdis,
+ fs.getFileStatus(trailerPath).getLen());
+ fsdis.close();
+ return trailerRead;
+ }
+
+ private void writeTrailer(Path trailerPath, FixedFileTrailer t,
+ byte[] useBytesInstead) throws IOException {
+ assert (t == null) != (useBytesInstead == null); // Expect one non-null.
+
+ FSDataOutputStream fsdos = fs.create(trailerPath);
+ fsdos.write(135); // to make deserializer's job less trivial
+ if (useBytesInstead != null) {
+ fsdos.write(useBytesInstead);
+ } else {
+ t.serialize(fsdos);
+ }
+ fsdos.close();
+ }
+
+ private void checkLoadedTrailer(int version, FixedFileTrailer expected,
+ FixedFileTrailer loaded) throws IOException {
+ assertEquals(version, loaded.getVersion());
+ assertEquals(expected.getDataIndexCount(), loaded.getDataIndexCount());
+
+ assertEquals(Math.min(expected.getEntryCount(),
+ version == 1 ? Integer.MAX_VALUE : Long.MAX_VALUE),
+ loaded.getEntryCount());
+
+ if (version == 1) {
+ assertEquals(expected.getFileInfoOffset(), loaded.getFileInfoOffset());
+ }
+
+ if (version == 2) {
+ assertEquals(expected.getLastDataBlockOffset(),
+ loaded.getLastDataBlockOffset());
+ assertEquals(expected.getNumDataIndexLevels(),
+ loaded.getNumDataIndexLevels());
+ assertEquals(expected.createComparator().getClass().getName(),
+ loaded.createComparator().getClass().getName());
+ assertEquals(expected.getFirstDataBlockOffset(),
+ loaded.getFirstDataBlockOffset());
+ assertTrue(
+ expected.createComparator() instanceof KeyValue.KeyComparator);
+ assertEquals(expected.getUncompressedDataIndexSize(),
+ loaded.getUncompressedDataIndexSize());
+ }
+
+ assertEquals(expected.getLoadOnOpenDataOffset(),
+ loaded.getLoadOnOpenDataOffset());
+ assertEquals(expected.getMetaIndexCount(), loaded.getMetaIndexCount());
+
+ assertEquals(expected.getTotalUncompressedBytes(),
+ loaded.getTotalUncompressedBytes());
+ }
+
+}
Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java?rev=1153647&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java Wed Aug 3 20:30:27 2011
@@ -0,0 +1,499 @@
+/*
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import static org.junit.Assert.*;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.Callable;
+import java.util.concurrent.Executor;
+import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.ClassSize;
+import org.apache.hadoop.io.compress.Compressor;
+
+import static org.apache.hadoop.hbase.io.hfile.Compression.Algorithm.*;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestHFileBlock {
+
+ private static final boolean[] BOOLEAN_VALUES = new boolean[] { false, true };
+
+ private static final Log LOG = LogFactory.getLog(TestHFileBlock.class);
+
+ static final Compression.Algorithm[] COMPRESSION_ALGORITHMS = {
+ NONE, GZ };
+
+ // In case we need to temporarily switch some test cases to just test gzip.
+ static final Compression.Algorithm[] GZIP_ONLY = { GZ };
+
+ private static final int NUM_TEST_BLOCKS = 1000;
+
+ private static final int NUM_READER_THREADS = 26;
+
+ private static final HBaseTestingUtility TEST_UTIL =
+ new HBaseTestingUtility();
+ private FileSystem fs;
+ private int uncompressedSizeV1;
+
+ @Before
+ public void setUp() throws IOException {
+ fs = FileSystem.get(TEST_UTIL.getConfiguration());
+ TEST_UTIL.initTestDir();
+ }
+
+ public void writeTestBlockContents(DataOutputStream dos) throws IOException {
+ // This compresses really well.
+ for (int i = 0; i < 1000; ++i)
+ dos.writeInt(i / 100);
+ }
+
+ public byte[] createTestV1Block(Compression.Algorithm algo)
+ throws IOException {
+ Compressor compressor = algo.getCompressor();
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ OutputStream os = algo.createCompressionStream(baos, compressor, 0);
+ DataOutputStream dos = new DataOutputStream(os);
+ BlockType.META.write(dos); // Let's make this a meta block.
+ writeTestBlockContents(dos);
+ uncompressedSizeV1 = dos.size();
+ dos.flush();
+ algo.returnCompressor(compressor);
+ return baos.toByteArray();
+ }
+
+ private byte[] createTestV2Block(Compression.Algorithm algo)
+ throws IOException {
+ final BlockType blockType = BlockType.DATA;
+ HFileBlock.Writer hbw = new HFileBlock.Writer(algo);
+ DataOutputStream dos = hbw.startWriting(blockType, false);
+ writeTestBlockContents(dos);
+ byte[] headerAndData = hbw.getHeaderAndData();
+ assertEquals(1000 * 4, hbw.getUncompressedSizeWithoutHeader());
+ hbw.releaseCompressor();
+ return headerAndData;
+ }
+
+ public String createTestBlockStr(Compression.Algorithm algo)
+ throws IOException {
+ byte[] testV2Block = createTestV2Block(algo);
+ int osOffset = HFileBlock.HEADER_SIZE + 9;
+ if (osOffset < testV2Block.length) {
+ // Force-set the "OS" field of the gzip header to 3 (Unix) to avoid
+ // variations across operating systems.
+ // See http://www.gzip.org/zlib/rfc-gzip.html for gzip format.
+ testV2Block[osOffset] = 3;
+ }
+ return Bytes.toStringBinary(testV2Block);
+ }
+
+ @Test
+ public void testNoCompression() throws IOException {
+ assertEquals(4000 + HFileBlock.HEADER_SIZE, createTestV2Block(NONE).length);
+ }
+
+ @Test
+ public void testGzipCompression() throws IOException {
+ assertEquals(
+ "DATABLK*\\x00\\x00\\x00:\\x00\\x00\\x0F\\xA0\\xFF\\xFF\\xFF\\xFF"
+ + "\\xFF\\xFF\\xFF\\xFF"
+ // gzip-compressed block: http://www.gzip.org/zlib/rfc-gzip.html
+ + "\\x1F\\x8B" // gzip magic signature
+ + "\\x08" // Compression method: 8 = "deflate"
+ + "\\x00" // Flags
+ + "\\x00\\x00\\x00\\x00" // mtime
+ + "\\x00" // XFL (extra flags)
+ // OS (0 = FAT filesystems, 3 = Unix). However, this field
+ // sometimes gets set to 0 on Linux and Mac, so we reset it to 3.
+ + "\\x03"
+ + "\\xED\\xC3\\xC1\\x11\\x00 \\x08\\xC00DD\\xDD\\x7Fa"
+ + "\\xD6\\xE8\\xA3\\xB9K\\x84`\\x96Q\\xD3\\xA8\\xDB\\xA8e\\xD4c"
+ + "\\xD46\\xEA5\\xEA3\\xEA7\\xE7\\x00LI\\s\\xA0\\x0F\\x00\\x00",
+ createTestBlockStr(GZ));
+ }
+
+ @Test
+ public void testReaderV1() throws IOException {
+ for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
+ for (boolean pread : new boolean[] { false, true }) {
+ byte[] block = createTestV1Block(algo);
+ Path path = new Path(HBaseTestingUtility.getTestDir(), "blocks_v1_"
+ + algo);
+ LOG.info("Creating temporary file at " + path);
+ FSDataOutputStream os = fs.create(path);
+ int totalSize = 0;
+ int numBlocks = 50;
+ for (int i = 0; i < numBlocks; ++i) {
+ os.write(block);
+ totalSize += block.length;
+ }
+ os.close();
+
+ FSDataInputStream is = fs.open(path);
+ HFileBlock.FSReader hbr = new HFileBlock.FSReaderV1(is, algo,
+ totalSize);
+ HFileBlock b;
+ int numBlocksRead = 0;
+ long pos = 0;
+ while (pos < totalSize) {
+ b = hbr.readBlockData(pos, block.length, uncompressedSizeV1, pread);
+ b.sanityCheck();
+ pos += block.length;
+ numBlocksRead++;
+ }
+ assertEquals(numBlocks, numBlocksRead);
+ is.close();
+ }
+ }
+ }
+
+ @Test
+ public void testReaderV2() throws IOException {
+ for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
+ for (boolean pread : new boolean[] { false, true }) {
+ Path path = new Path(HBaseTestingUtility.getTestDir(), "blocks_v2_"
+ + algo);
+ FSDataOutputStream os = fs.create(path);
+ HFileBlock.Writer hbw = new HFileBlock.Writer(algo);
+ long totalSize = 0;
+ for (int blockId = 0; blockId < 2; ++blockId) {
+ DataOutputStream dos = hbw.startWriting(BlockType.DATA, false);
+ for (int i = 0; i < 1234; ++i)
+ dos.writeInt(i);
+ hbw.writeHeaderAndData(os);
+ totalSize += hbw.getOnDiskSizeWithHeader();
+ }
+ os.close();
+
+ FSDataInputStream is = fs.open(path);
+ HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, algo,
+ totalSize);
+ HFileBlock b = hbr.readBlockData(0, -1, -1, pread);
+ is.close();
+
+ b.sanityCheck();
+ assertEquals(4936, b.getUncompressedSizeWithoutHeader());
+ assertEquals(algo == GZ ? 2173 : 4936, b.getOnDiskSizeWithoutHeader());
+ String blockStr = b.toString();
+
+ if (algo == GZ) {
+ is = fs.open(path);
+ hbr = new HFileBlock.FSReaderV2(is, algo, totalSize);
+ b = hbr.readBlockData(0, 2173 + HFileBlock.HEADER_SIZE, -1, pread);
+ assertEquals(blockStr, b.toString());
+ int wrongCompressedSize = 2172;
+ try {
+ b = hbr.readBlockData(0, wrongCompressedSize
+ + HFileBlock.HEADER_SIZE, -1, pread);
+ fail("Exception expected");
+ } catch (IOException ex) {
+ String expectedPrefix = "On-disk size without header provided is "
+ + wrongCompressedSize + ", but block header contains "
+ + b.getOnDiskSizeWithoutHeader() + ".";
+ assertTrue("Invalid exception message: '" + ex.getMessage()
+ + "'.\nMessage is expected to start with: '" + expectedPrefix
+ + "'", ex.getMessage().startsWith(expectedPrefix));
+ }
+ is.close();
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testPreviousOffset() throws IOException {
+ for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
+ for (boolean pread : BOOLEAN_VALUES) {
+ for (boolean cacheOnWrite : BOOLEAN_VALUES) {
+ Random rand = defaultRandom();
+ LOG.info("Compression algorithm: " + algo + ", pread=" + pread);
+ Path path = new Path(HBaseTestingUtility.getTestDir(), "prev_offset");
+ List<Long> expectedOffsets = new ArrayList<Long>();
+ List<Long> expectedPrevOffsets = new ArrayList<Long>();
+ List<BlockType> expectedTypes = new ArrayList<BlockType>();
+ List<ByteBuffer> expectedContents = cacheOnWrite
+ ? new ArrayList<ByteBuffer>() : null;
+ long totalSize = writeBlocks(rand, algo, path, expectedOffsets,
+ expectedPrevOffsets, expectedTypes, expectedContents, true);
+
+ FSDataInputStream is = fs.open(path);
+ HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, algo,
+ totalSize);
+ long curOffset = 0;
+ for (int i = 0; i < NUM_TEST_BLOCKS; ++i) {
+ if (!pread) {
+ assertEquals(is.getPos(), curOffset + (i == 0 ? 0 :
+ HFileBlock.HEADER_SIZE));
+ }
+
+ assertEquals(expectedOffsets.get(i).longValue(), curOffset);
+
+ LOG.info("Reading block #" + i + " at offset " + curOffset);
+ HFileBlock b = hbr.readBlockData(curOffset, -1, -1, pread);
+ LOG.info("Block #" + i + ": " + b);
+ assertEquals("Invalid block #" + i + "'s type:",
+ expectedTypes.get(i), b.getBlockType());
+ assertEquals("Invalid previous block offset for block " + i
+ + " of " + "type " + b.getBlockType() + ":",
+ (long) expectedPrevOffsets.get(i), b.getPrevBlockOffset());
+ b.sanityCheck();
+ assertEquals(curOffset, b.getOffset());
+
+ // Now re-load this block knowing the on-disk size. This tests a
+ // different branch in the loader.
+ HFileBlock b2 = hbr.readBlockData(curOffset,
+ b.getOnDiskSizeWithHeader(), -1, pread);
+ b2.sanityCheck();
+
+ assertEquals(b.getBlockType(), b2.getBlockType());
+ assertEquals(b.getOnDiskSizeWithoutHeader(),
+ b2.getOnDiskSizeWithoutHeader());
+ assertEquals(b.getOnDiskSizeWithHeader(),
+ b2.getOnDiskSizeWithHeader());
+ assertEquals(b.getUncompressedSizeWithoutHeader(),
+ b2.getUncompressedSizeWithoutHeader());
+ assertEquals(b.getPrevBlockOffset(), b2.getPrevBlockOffset());
+ assertEquals(curOffset, b2.getOffset());
+
+ curOffset += b.getOnDiskSizeWithHeader();
+
+ if (cacheOnWrite) {
+ // In the cache-on-write mode we store uncompressed bytes so we
+ // can compare them to what was read by the block reader.
+
+ ByteBuffer bufRead = b.getBufferWithHeader();
+ ByteBuffer bufExpected = expectedContents.get(i);
+ boolean bytesAreCorrect = Bytes.compareTo(bufRead.array(),
+ bufRead.arrayOffset(), bufRead.limit(),
+ bufExpected.array(), bufExpected.arrayOffset(),
+ bufExpected.limit()) == 0;
+ String wrongBytesMsg = "";
+
+ if (!bytesAreCorrect) {
+ // Optimization: only construct an error message in case we
+ // will need it.
+ wrongBytesMsg = "Expected bytes in block #" + i + " (algo="
+ + algo + ", pread=" + pread + "):\n";
+ wrongBytesMsg += Bytes.toStringBinary(bufExpected.array(),
+ bufExpected.arrayOffset(), Math.min(32,
+ bufExpected.limit()))
+ + ", actual:\n"
+ + Bytes.toStringBinary(bufRead.array(),
+ bufRead.arrayOffset(), Math.min(32, bufRead.limit()));
+ }
+
+ assertTrue(wrongBytesMsg, bytesAreCorrect);
+ }
+ }
+
+ assertEquals(curOffset, fs.getFileStatus(path).getLen());
+ is.close();
+ }
+ }
+ }
+ }
+
+ private Random defaultRandom() {
+ return new Random(189237);
+ }
+
+ private class BlockReaderThread implements Callable<Boolean> {
+ private final String clientId;
+ private final HFileBlock.FSReader hbr;
+ private final List<Long> offsets;
+ private final List<BlockType> types;
+ private final long fileSize;
+
+ public BlockReaderThread(String clientId,
+ HFileBlock.FSReader hbr, List<Long> offsets, List<BlockType> types,
+ long fileSize) {
+ this.clientId = clientId;
+ this.offsets = offsets;
+ this.hbr = hbr;
+ this.types = types;
+ this.fileSize = fileSize;
+ }
+
+ @Override
+ public Boolean call() throws Exception {
+ Random rand = new Random(clientId.hashCode());
+ long endTime = System.currentTimeMillis() + 10000;
+ int numBlocksRead = 0;
+ int numPositionalRead = 0;
+ int numWithOnDiskSize = 0;
+ while (System.currentTimeMillis() < endTime) {
+ int blockId = rand.nextInt(NUM_TEST_BLOCKS);
+ long offset = offsets.get(blockId);
+ boolean pread = rand.nextBoolean();
+ boolean withOnDiskSize = rand.nextBoolean();
+ long expectedSize =
+ (blockId == NUM_TEST_BLOCKS - 1 ? fileSize
+ : offsets.get(blockId + 1)) - offset;
+
+ HFileBlock b;
+ try {
+ long onDiskSizeArg = withOnDiskSize ? expectedSize : -1;
+ b = hbr.readBlockData(offset, onDiskSizeArg, -1, pread);
+ } catch (IOException ex) {
+ LOG.error("Error in client " + clientId + " trying to read block at "
+ + offset + ", pread=" + pread + ", withOnDiskSize=" +
+ withOnDiskSize, ex);
+ return false;
+ }
+
+ assertEquals(types.get(blockId), b.getBlockType());
+ assertEquals(expectedSize, b.getOnDiskSizeWithHeader());
+ assertEquals(offset, b.getOffset());
+
+ ++numBlocksRead;
+ if (pread)
+ ++numPositionalRead;
+ if (withOnDiskSize)
+ ++numWithOnDiskSize;
+ }
+ LOG.info("Client " + clientId + " successfully read " + numBlocksRead +
+ " blocks (with pread: " + numPositionalRead + ", with onDiskSize " +
+ "specified: " + numWithOnDiskSize + ")");
+ return true;
+ }
+
+ }
+
+ @Test
+ public void testConcurrentReading() throws Exception {
+ for (Compression.Algorithm compressAlgo : COMPRESSION_ALGORITHMS) {
+ Path path =
+ new Path(HBaseTestingUtility.getTestDir(), "concurrent_reading");
+ Random rand = defaultRandom();
+ List<Long> offsets = new ArrayList<Long>();
+ List<BlockType> types = new ArrayList<BlockType>();
+ writeBlocks(rand, compressAlgo, path, offsets, null, types, null, false);
+ FSDataInputStream is = fs.open(path);
+ long fileSize = fs.getFileStatus(path).getLen();
+ HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, compressAlgo,
+ fileSize);
+
+ Executor exec = Executors.newFixedThreadPool(NUM_READER_THREADS);
+ ExecutorCompletionService<Boolean> ecs =
+ new ExecutorCompletionService<Boolean>(exec);
+
+ for (int i = 0; i < NUM_READER_THREADS; ++i) {
+ ecs.submit(new BlockReaderThread("reader_" + (char) ('A' + i), hbr,
+ offsets, types, fileSize));
+ }
+
+ for (int i = 0; i < NUM_READER_THREADS; ++i) {
+ Future<Boolean> result = ecs.take();
+ assertTrue(result.get());
+ LOG.info(String.valueOf(i + 1)
+ + " reader threads finished successfully (algo=" + compressAlgo
+ + ")");
+ }
+
+ is.close();
+ }
+ }
+
+ private long writeBlocks(Random rand, Compression.Algorithm compressAlgo,
+ Path path, List<Long> expectedOffsets, List<Long> expectedPrevOffsets,
+ List<BlockType> expectedTypes, List<ByteBuffer> expectedContents,
+ boolean detailedLogging) throws IOException {
+ boolean cacheOnWrite = expectedContents != null;
+ FSDataOutputStream os = fs.create(path);
+ HFileBlock.Writer hbw = new HFileBlock.Writer(compressAlgo);
+ Map<BlockType, Long> prevOffsetByType = new HashMap<BlockType, Long>();
+ long totalSize = 0;
+ for (int i = 0; i < NUM_TEST_BLOCKS; ++i) {
+ int blockTypeOrdinal = rand.nextInt(BlockType.values().length);
+ BlockType bt = BlockType.values()[blockTypeOrdinal];
+ DataOutputStream dos = hbw.startWriting(bt, cacheOnWrite);
+ for (int j = 0; j < rand.nextInt(500); ++j) {
+ // This might compress well.
+ dos.writeShort(i + 1);
+ dos.writeInt(j + 1);
+ }
+
+ if (expectedOffsets != null)
+ expectedOffsets.add(os.getPos());
+
+ if (expectedPrevOffsets != null) {
+ Long prevOffset = prevOffsetByType.get(bt);
+ expectedPrevOffsets.add(prevOffset != null ? prevOffset : -1);
+ prevOffsetByType.put(bt, os.getPos());
+ }
+
+ expectedTypes.add(bt);
+
+ hbw.writeHeaderAndData(os);
+ totalSize += hbw.getOnDiskSizeWithHeader();
+
+ if (cacheOnWrite)
+ expectedContents.add(hbw.getUncompressedBufferWithHeader());
+
+ if (detailedLogging) {
+ LOG.info("Writing block #" + i + " of type " + bt
+ + ", uncompressed size " + hbw.getUncompressedSizeWithoutHeader()
+ + " at offset " + os.getPos());
+ }
+ }
+ os.close();
+ LOG.info("Created a temporary file at " + path + ", "
+ + fs.getFileStatus(path).getLen() + " byte, compression=" +
+ compressAlgo);
+ return totalSize;
+ }
+
+ @Test
+ public void testBlockHeapSize() {
+ for (int size : new int[] { 100, 256, 12345 }) {
+ byte[] byteArr = new byte[HFileBlock.HEADER_SIZE + size];
+ ByteBuffer buf = ByteBuffer.wrap(byteArr, 0, size);
+ HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf,
+ true, -1);
+ assertEquals(80, HFileBlock.BYTE_BUFFER_HEAP_SIZE);
+ long expected = ClassSize.align(ClassSize.estimateBase(HFileBlock.class,
+ true)
+ + ClassSize.estimateBase(buf.getClass(), true)
+ + HFileBlock.HEADER_SIZE + size);
+ assertEquals(expected, block.heapSize());
+ }
+ }
+
+}
Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java?rev=1153647&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java Wed Aug 3 20:30:27 2011
@@ -0,0 +1,602 @@
+/*
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader;
+import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexChunk;
+import org.apache.hadoop.hbase.regionserver.StoreFile;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.ClassSize;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+import static org.junit.Assert.*;
+
+@RunWith(Parameterized.class)
+public class TestHFileBlockIndex {
+
+ @Parameters
+ public static Collection<Object[]> compressionAlgorithms() {
+ return HBaseTestingUtility.COMPRESSION_ALGORITHMS_PARAMETERIZED;
+ }
+
+ public TestHFileBlockIndex(Compression.Algorithm compr) {
+ this.compr = compr;
+ }
+
+ private static final Log LOG = LogFactory.getLog(TestHFileBlockIndex.class);
+
+ private static final int NUM_DATA_BLOCKS = 1000;
+ private static final HBaseTestingUtility TEST_UTIL =
+ new HBaseTestingUtility();
+
+ private static final int SMALL_BLOCK_SIZE = 4096;
+ private static final int NUM_KV = 10000;
+
+ private static FileSystem fs;
+ private Path path;
+ private Random rand;
+ private long rootIndexOffset;
+ private int numRootEntries;
+ private int numLevels;
+ private static final List<byte[]> keys = new ArrayList<byte[]>();
+ private final Compression.Algorithm compr;
+ private byte[] firstKeyInFile;
+ private Configuration conf;
+
+ private static final int[] INDEX_CHUNK_SIZES = { 4096, 512, 384 };
+ private static final int[] EXPECTED_NUM_LEVELS = { 2, 3, 4 };
+ private static final int[] UNCOMPRESSED_INDEX_SIZES =
+ { 19187, 21813, 23086 };
+
+ static {
+ assert INDEX_CHUNK_SIZES.length == EXPECTED_NUM_LEVELS.length;
+ assert INDEX_CHUNK_SIZES.length == UNCOMPRESSED_INDEX_SIZES.length;
+ }
+
+ @Before
+ public void setUp() throws IOException {
+ keys.clear();
+ rand = new Random(2389757);
+ firstKeyInFile = null;
+ conf = TEST_UTIL.getConfiguration();
+
+ // This test requires at least HFile format version 2.
+ conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
+
+ fs = FileSystem.get(conf);
+ }
+
+ @Test
+ public void testBlockIndex() throws IOException {
+ path = new Path(HBaseTestingUtility.getTestDir(), "block_index_" + compr);
+ writeWholeIndex();
+ readIndex();
+ }
+
+ /**
+ * A wrapper around a block reader which only caches the results of the last
+ * operation. Not thread-safe.
+ */
+ private static class BlockReaderWrapper implements HFileBlock.BasicReader {
+
+ private HFileBlock.BasicReader realReader;
+ private long prevOffset;
+ private long prevOnDiskSize;
+ private long prevUncompressedSize;
+ private boolean prevPread;
+ private HFileBlock prevBlock;
+
+ public int hitCount = 0;
+ public int missCount = 0;
+
+ public BlockReaderWrapper(HFileBlock.BasicReader realReader) {
+ this.realReader = realReader;
+ }
+
+ @Override
+ public HFileBlock readBlockData(long offset, long onDiskSize,
+ int uncompressedSize, boolean pread) throws IOException {
+ if (offset == prevOffset && onDiskSize == prevOnDiskSize &&
+ uncompressedSize == prevUncompressedSize && pread == prevPread) {
+ hitCount += 1;
+ return prevBlock;
+ }
+
+ missCount += 1;
+ prevBlock = realReader.readBlockData(offset, onDiskSize,
+ uncompressedSize, pread);
+ prevOffset = offset;
+ prevOnDiskSize = onDiskSize;
+ prevUncompressedSize = uncompressedSize;
+ prevPread = pread;
+
+ return prevBlock;
+ }
+ }
+
+ public void readIndex() throws IOException {
+ long fileSize = fs.getFileStatus(path).getLen();
+ LOG.info("Size of " + path + ": " + fileSize);
+
+ FSDataInputStream istream = fs.open(path);
+ HFileBlock.FSReader blockReader = new HFileBlock.FSReaderV2(istream,
+ compr, fs.getFileStatus(path).getLen());
+
+ BlockReaderWrapper brw = new BlockReaderWrapper(blockReader);
+ HFileBlockIndex.BlockIndexReader indexReader =
+ new HFileBlockIndex.BlockIndexReader(
+ Bytes.BYTES_RAWCOMPARATOR, numLevels, brw);
+
+ indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset,
+ fileSize).nextBlockAsStream(BlockType.ROOT_INDEX), numRootEntries);
+
+ long prevOffset = -1;
+ int i = 0;
+ int expectedHitCount = 0;
+ int expectedMissCount = 0;
+ LOG.info("Total number of keys: " + keys.size());
+ for (byte[] key : keys) {
+ assertTrue(key != null);
+ assertTrue(indexReader != null);
+ HFileBlock b = indexReader.seekToDataBlock(key, 0, key.length, null);
+ if (Bytes.BYTES_RAWCOMPARATOR.compare(key, firstKeyInFile) < 0) {
+ assertTrue(b == null);
+ ++i;
+ continue;
+ }
+
+ String keyStr = "key #" + i + ", " + Bytes.toStringBinary(key);
+
+ assertTrue("seekToDataBlock failed for " + keyStr, b != null);
+
+ if (prevOffset == b.getOffset()) {
+ assertEquals(++expectedHitCount, brw.hitCount);
+ } else {
+ LOG.info("First key in a new block: " + keyStr + ", block offset: "
+ + b.getOffset() + ")");
+ assertTrue(b.getOffset() > prevOffset);
+ assertEquals(++expectedMissCount, brw.missCount);
+ prevOffset = b.getOffset();
+ }
+ ++i;
+ }
+
+ istream.close();
+ }
+
+ private void writeWholeIndex() throws IOException {
+ assertEquals(0, keys.size());
+ HFileBlock.Writer hbw = new HFileBlock.Writer(compr);
+ FSDataOutputStream outputStream = fs.create(path);
+ HFileBlockIndex.BlockIndexWriter biw =
+ new HFileBlockIndex.BlockIndexWriter(hbw, null, null);
+
+ for (int i = 0; i < NUM_DATA_BLOCKS; ++i) {
+ hbw.startWriting(BlockType.DATA, false).write(
+ String.valueOf(rand.nextInt(1000)).getBytes());
+ long blockOffset = outputStream.getPos();
+ hbw.writeHeaderAndData(outputStream);
+
+ byte[] firstKey = null;
+ for (int j = 0; j < 16; ++j) {
+ byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i * 16 + j);
+ keys.add(k);
+ if (j == 8)
+ firstKey = k;
+ }
+ assertTrue(firstKey != null);
+ if (firstKeyInFile == null)
+ firstKeyInFile = firstKey;
+ biw.addEntry(firstKey, blockOffset, hbw.getOnDiskSizeWithHeader());
+
+ writeInlineBlocks(hbw, outputStream, biw, false);
+ }
+ writeInlineBlocks(hbw, outputStream, biw, true);
+ rootIndexOffset = biw.writeIndexBlocks(outputStream);
+ outputStream.close();
+
+ numLevels = biw.getNumLevels();
+ numRootEntries = biw.getNumRootEntries();
+
+ LOG.info("Index written: numLevels=" + numLevels + ", numRootEntries=" +
+ numRootEntries + ", rootIndexOffset=" + rootIndexOffset);
+ }
+
+ private void writeInlineBlocks(HFileBlock.Writer hbw,
+ FSDataOutputStream outputStream, HFileBlockIndex.BlockIndexWriter biw,
+ boolean isClosing) throws IOException {
+ while (biw.shouldWriteBlock(isClosing)) {
+ long offset = outputStream.getPos();
+ biw.writeInlineBlock(hbw.startWriting(biw.getInlineBlockType(), false));
+ hbw.writeHeaderAndData(outputStream);
+ biw.blockWritten(offset, hbw.getOnDiskSizeWithHeader(),
+ hbw.getUncompressedSizeWithoutHeader());
+ LOG.info("Wrote an inline index block at " + offset + ", size " +
+ hbw.getOnDiskSizeWithHeader());
+ }
+ }
+
+ private static final long getDummyFileOffset(int i) {
+ return i * 185 + 379;
+ }
+
+ private static final int getDummyOnDiskSize(int i) {
+ return i * i * 37 + i * 19 + 13;
+ }
+
+ @Test
+ public void testSecondaryIndexBinarySearch() throws IOException {
+ int numTotalKeys = 99;
+ assertTrue(numTotalKeys % 2 == 1); // Ensure no one made this even.
+
+ // We only add odd-index keys into the array that we will binary-search.
+ int numSearchedKeys = (numTotalKeys - 1) / 2;
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ DataOutputStream dos = new DataOutputStream(baos);
+
+ dos.writeInt(numSearchedKeys);
+ int curAllEntriesSize = 0;
+ int numEntriesAdded = 0;
+
+ // Only odd-index elements of this array are used to keep the secondary
+ // index entries of the corresponding keys.
+ int secondaryIndexEntries[] = new int[numTotalKeys];
+
+ for (int i = 0; i < numTotalKeys; ++i) {
+ byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i * 2);
+ keys.add(k);
+ String msgPrefix = "Key #" + i + " (" + Bytes.toStringBinary(k) + "): ";
+ StringBuilder padding = new StringBuilder();
+ while (msgPrefix.length() + padding.length() < 70)
+ padding.append(' ');
+ msgPrefix += padding;
+ if (i % 2 == 1) {
+ dos.writeInt(curAllEntriesSize);
+ secondaryIndexEntries[i] = curAllEntriesSize;
+ LOG.info(msgPrefix + "secondary index entry #" + ((i - 1) / 2) +
+ ", offset " + curAllEntriesSize);
+ curAllEntriesSize += k.length
+ + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
+ ++numEntriesAdded;
+ } else {
+ secondaryIndexEntries[i] = -1;
+ LOG.info(msgPrefix + "not in the searched array");
+ }
+ }
+
+ // Make sure the keys are increasing.
+ for (int i = 0; i < keys.size() - 1; ++i)
+ assertTrue(Bytes.BYTES_RAWCOMPARATOR.compare(keys.get(i),
+ keys.get(i + 1)) < 0);
+
+ dos.writeInt(curAllEntriesSize);
+ assertEquals(numSearchedKeys, numEntriesAdded);
+ int secondaryIndexOffset = dos.size();
+ assertEquals(Bytes.SIZEOF_INT * (numSearchedKeys + 2),
+ secondaryIndexOffset);
+
+ for (int i = 1; i <= numTotalKeys - 1; i += 2) {
+ assertEquals(dos.size(),
+ secondaryIndexOffset + secondaryIndexEntries[i]);
+ long dummyFileOffset = getDummyFileOffset(i);
+ int dummyOnDiskSize = getDummyOnDiskSize(i);
+ LOG.debug("Storing file offset=" + dummyFileOffset + " and onDiskSize=" +
+ dummyOnDiskSize + " at offset " + dos.size());
+ dos.writeLong(dummyFileOffset);
+ dos.writeInt(dummyOnDiskSize);
+ LOG.debug("Stored key " + ((i - 1) / 2) +" at offset " + dos.size());
+ dos.write(keys.get(i));
+ }
+
+ dos.writeInt(curAllEntriesSize);
+
+ ByteBuffer nonRootIndex = ByteBuffer.wrap(baos.toByteArray());
+ for (int i = 0; i < numTotalKeys; ++i) {
+ byte[] searchKey = keys.get(i);
+ byte[] arrayHoldingKey = new byte[searchKey.length +
+ searchKey.length / 2];
+
+ // To make things a bit more interesting, store the key we are looking
+ // for at a non-zero offset in a new array.
+ System.arraycopy(searchKey, 0, arrayHoldingKey, searchKey.length / 2,
+ searchKey.length);
+
+ int searchResult = BlockIndexReader.binarySearchNonRootIndex(
+ arrayHoldingKey, searchKey.length / 2, searchKey.length, nonRootIndex,
+ Bytes.BYTES_RAWCOMPARATOR);
+ String lookupFailureMsg = "Failed to look up key #" + i + " ("
+ + Bytes.toStringBinary(searchKey) + ")";
+
+ int expectedResult;
+ int referenceItem;
+
+ if (i % 2 == 1) {
+ // This key is in the array we search as the element (i - 1) / 2. Make
+ // sure we find it.
+ expectedResult = (i - 1) / 2;
+ referenceItem = i;
+ } else {
+ // This key is not in the array but between two elements on the array,
+ // in the beginning, or in the end. The result should be the previous
+ // key in the searched array, or -1 for i = 0.
+ expectedResult = i / 2 - 1;
+ referenceItem = i - 1;
+ }
+
+ assertEquals(lookupFailureMsg, expectedResult, searchResult);
+
+ // Now test we can get the offset and the on-disk-size using a
+ // higher-level API function.s
+ boolean locateBlockResult =
+ BlockIndexReader.locateNonRootIndexEntry(nonRootIndex, arrayHoldingKey,
+ searchKey.length / 2, searchKey.length, Bytes.BYTES_RAWCOMPARATOR);
+
+ if (i == 0) {
+ assertFalse(locateBlockResult);
+ } else {
+ assertTrue(locateBlockResult);
+ String errorMsg = "i=" + i + ", position=" + nonRootIndex.position();
+ assertEquals(errorMsg, getDummyFileOffset(referenceItem),
+ nonRootIndex.getLong());
+ assertEquals(errorMsg, getDummyOnDiskSize(referenceItem),
+ nonRootIndex.getInt());
+ }
+ }
+
+ }
+
+ @Test
+ public void testBlockIndexChunk() throws IOException {
+ BlockIndexChunk c = new BlockIndexChunk();
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ int N = 1000;
+ int[] numSubEntriesAt = new int[N];
+ int numSubEntries = 0;
+ for (int i = 0; i < N; ++i) {
+ baos.reset();
+ DataOutputStream dos = new DataOutputStream(baos);
+ c.writeNonRoot(dos);
+ assertEquals(c.getNonRootSize(), dos.size());
+
+ baos.reset();
+ dos = new DataOutputStream(baos);
+ c.writeRoot(dos);
+ assertEquals(c.getRootSize(), dos.size());
+
+ byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
+ numSubEntries += rand.nextInt(5) + 1;
+ keys.add(k);
+ c.add(k, getDummyFileOffset(i), getDummyOnDiskSize(i), numSubEntries);
+ }
+
+ // Test the ability to look up the entry that contains a particular
+ // deeper-level index block's entry ("sub-entry"), assuming a global
+ // 0-based ordering of sub-entries. This is needed for mid-key calculation.
+ for (int i = 0; i < N; ++i) {
+ for (int j = i == 0 ? 0 : numSubEntriesAt[i - 1];
+ j < numSubEntriesAt[i];
+ ++j) {
+ assertEquals(i, c.getEntryBySubEntry(j));
+ }
+ }
+ }
+
+ /** Checks if the HeapSize calculator is within reason */
+ @Test
+ public void testHeapSizeForBlockIndex() throws IOException {
+ Class<HFileBlockIndex.BlockIndexReader> cl =
+ HFileBlockIndex.BlockIndexReader.class;
+ long expected = ClassSize.estimateBase(cl, false);
+
+ HFileBlockIndex.BlockIndexReader bi =
+ new HFileBlockIndex.BlockIndexReader(Bytes.BYTES_RAWCOMPARATOR, 1);
+ long actual = bi.heapSize();
+
+ // Since the arrays in BlockIndex(byte [][] blockKeys, long [] blockOffsets,
+ // int [] blockDataSizes) are all null they are not going to show up in the
+ // HeapSize calculation, so need to remove those array costs from expected.
+ expected -= ClassSize.align(3 * ClassSize.ARRAY);
+
+ if (expected != actual) {
+ ClassSize.estimateBase(cl, true);
+ assertEquals(expected, actual);
+ }
+ }
+
+ /**
+ * Testing block index through the HFile writer/reader APIs. Allows to test
+ * setting index block size through configuration, intermediate-level index
+ * blocks, and caching index blocks on write.
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testHFileWriterAndReader() throws IOException {
+ Path hfilePath = new Path(HBaseTestingUtility.getTestDir(),
+ "hfile_for_block_index");
+ BlockCache blockCache = StoreFile.getBlockCache(conf);
+
+ for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; ++testI) {
+ int indexBlockSize = INDEX_CHUNK_SIZES[testI];
+ int expectedNumLevels = EXPECTED_NUM_LEVELS[testI];
+ LOG.info("Index block size: " + indexBlockSize + ", compression: "
+ + compr);
+ // Evict all blocks that were cached-on-write by the previous invocation.
+ blockCache.evictBlocksByPrefix(hfilePath.getName()
+ + HFile.CACHE_KEY_SEPARATOR);
+
+ conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize);
+ Set<String> keyStrSet = new HashSet<String>();
+ byte[][] keys = new byte[NUM_KV][];
+ byte[][] values = new byte[NUM_KV][];
+
+ // Write the HFile
+ {
+ HFile.Writer writer = HFile.getWriterFactory(conf).createWriter(fs,
+ hfilePath, SMALL_BLOCK_SIZE, compr, KeyValue.KEY_COMPARATOR);
+ Random rand = new Random(19231737);
+
+ for (int i = 0; i < NUM_KV; ++i) {
+ byte[] row = TestHFileWriterV2.randomOrderedKey(rand, i);
+
+ // Key will be interpreted by KeyValue.KEY_COMPARATOR
+ byte[] k = KeyValue.createFirstOnRow(row, 0, row.length, row, 0, 0,
+ row, 0, 0).getKey();
+
+ byte[] v = TestHFileWriterV2.randomValue(rand);
+ writer.append(k, v);
+ keys[i] = k;
+ values[i] = v;
+ keyStrSet.add(Bytes.toStringBinary(k));
+
+ if (i > 0) {
+ assertTrue(KeyValue.KEY_COMPARATOR.compare(keys[i - 1],
+ keys[i]) < 0);
+ }
+ }
+
+ writer.close();
+ }
+
+ // Read the HFile
+ HFile.Reader reader = HFile.createReader(fs, hfilePath, blockCache,
+ false, true);
+ assertEquals(expectedNumLevels,
+ reader.getTrailer().getNumDataIndexLevels());
+
+ assertTrue(Bytes.equals(keys[0], reader.getFirstKey()));
+ assertTrue(Bytes.equals(keys[NUM_KV - 1], reader.getLastKey()));
+ LOG.info("Last key: " + Bytes.toStringBinary(keys[NUM_KV - 1]));
+
+ for (boolean pread : new boolean[] { false, true }) {
+ HFileScanner scanner = reader.getScanner(true, pread);
+ for (int i = 0; i < NUM_KV; ++i) {
+ checkSeekTo(keys, scanner, i);
+ checkKeyValue("i=" + i, keys[i], values[i], scanner.getKey(),
+ scanner.getValue());
+ }
+ assertTrue(scanner.seekTo());
+ for (int i = NUM_KV - 1; i >= 0; --i) {
+ checkSeekTo(keys, scanner, i);
+ checkKeyValue("i=" + i, keys[i], values[i], scanner.getKey(),
+ scanner.getValue());
+ }
+ }
+
+ // Manually compute the mid-key and validate it.
+ HFileReaderV2 reader2 = (HFileReaderV2) reader;
+ HFileBlock.FSReader fsReader = reader2.getUncachedBlockReader();
+
+ HFileBlock.BlockIterator iter = fsReader.blockRange(0,
+ reader.getTrailer().getLoadOnOpenDataOffset());
+ HFileBlock block;
+ List<byte[]> blockKeys = new ArrayList<byte[]>();
+ while ((block = iter.nextBlock()) != null) {
+ if (block.getBlockType() != BlockType.LEAF_INDEX)
+ return;
+ ByteBuffer b = block.getBufferReadOnly();
+ int n = b.getInt();
+ // One int for the number of items, and n + 1 for the secondary index.
+ int entriesOffset = Bytes.SIZEOF_INT * (n + 2);
+
+ // Get all the keys from the leaf index block. S
+ for (int i = 0; i < n; ++i) {
+ int keyRelOffset = b.getInt(Bytes.SIZEOF_INT * (i + 1));
+ int nextKeyRelOffset = b.getInt(Bytes.SIZEOF_INT * (i + 2));
+ int keyLen = nextKeyRelOffset - keyRelOffset;
+ int keyOffset = b.arrayOffset() + entriesOffset + keyRelOffset +
+ HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
+ byte[] blockKey = Arrays.copyOfRange(b.array(), keyOffset, keyOffset
+ + keyLen);
+ String blockKeyStr = Bytes.toString(blockKey);
+ blockKeys.add(blockKey);
+
+ // If the first key of the block is not among the keys written, we
+ // are not parsing the non-root index block format correctly.
+ assertTrue("Invalid block key from leaf-level block: " + blockKeyStr,
+ keyStrSet.contains(blockKeyStr));
+ }
+ }
+
+ // Validate the mid-key.
+ assertEquals(
+ Bytes.toStringBinary(blockKeys.get((blockKeys.size() - 1) / 2)),
+ Bytes.toStringBinary(reader.midkey()));
+
+ assertEquals(UNCOMPRESSED_INDEX_SIZES[testI],
+ reader.getTrailer().getUncompressedDataIndexSize());
+
+ reader.close();
+ }
+ }
+
+ private void checkSeekTo(byte[][] keys, HFileScanner scanner, int i)
+ throws IOException {
+ assertEquals("Failed to seek to key #" + i + " ("
+ + Bytes.toStringBinary(keys[i]) + ")", 0, scanner.seekTo(keys[i]));
+ }
+
+ private void assertArrayEqualsBuffer(String msgPrefix, byte[] arr,
+ ByteBuffer buf) {
+ assertEquals(msgPrefix + ": expected " + Bytes.toStringBinary(arr)
+ + ", actual " + Bytes.toStringBinary(buf), 0, Bytes.compareTo(arr, 0,
+ arr.length, buf.array(), buf.arrayOffset(), buf.limit()));
+ }
+
+ /** Check a key/value pair after it was read by the reader */
+ private void checkKeyValue(String msgPrefix, byte[] expectedKey,
+ byte[] expectedValue, ByteBuffer keyRead, ByteBuffer valueRead) {
+ if (!msgPrefix.isEmpty())
+ msgPrefix += ". ";
+
+ assertArrayEqualsBuffer(msgPrefix + "Invalid key", expectedKey, keyRead);
+ assertArrayEqualsBuffer(msgPrefix + "Invalid value", expectedValue,
+ valueRead);
+ }
+
+}
Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileReaderV1.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileReaderV1.java?rev=1153647&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileReaderV1.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileReaderV1.java Wed Aug 3 20:30:27 2011
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.IOException;
+import java.net.URL;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.util.Bytes;
+
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+public class TestHFileReaderV1 {
+
+ private static final HBaseTestingUtility TEST_UTIL =
+ new HBaseTestingUtility();
+
+ private Configuration conf;
+ private FileSystem fs;
+
+ private static final int N = 1000;
+
+ @Before
+ public void setUp() throws IOException {
+ conf = TEST_UTIL.getConfiguration();
+ fs = FileSystem.get(conf);
+ }
+
+ @Test
+ public void testReadingExistingVersion1HFile() throws IOException {
+ URL url = TestHFileReaderV1.class.getResource(
+ "8e8ab58dcf39412da19833fcd8f687ac");
+ Path existingHFilePath = new Path(url.getPath());
+ HFile.Reader reader =
+ HFile.createReader(fs, existingHFilePath, null, false, false);
+ reader.loadFileInfo();
+ FixedFileTrailer trailer = reader.getTrailer();
+
+ assertEquals(N, reader.getEntries());
+ assertEquals(N, trailer.getEntryCount());
+ assertEquals(1, trailer.getVersion());
+ assertEquals(Compression.Algorithm.GZ, trailer.getCompressionCodec());
+
+ for (boolean pread : new boolean[] { false, true }) {
+ int totalDataSize = 0;
+ int n = 0;
+
+ HFileScanner scanner = reader.getScanner(false, pread);
+ assertTrue(scanner.seekTo());
+ do {
+ totalDataSize += scanner.getKey().limit() + scanner.getValue().limit()
+ + Bytes.SIZEOF_INT * 2;
+ ++n;
+ } while (scanner.next());
+
+ // Add magic record sizes, one per data block.
+ totalDataSize += 8 * trailer.getDataIndexCount();
+
+ assertEquals(N, n);
+ assertEquals(trailer.getTotalUncompressedBytes(), totalDataSize);
+ }
+ reader.close();
+ }
+
+}
Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java?rev=1153647&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java Wed Aug 3 20:30:27 2011
@@ -0,0 +1,256 @@
+/**
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.io.hfile;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Text;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Testing writing a version 2 {@link HFile}. This is a low-level test written
+ * during the development of {@link HFileWriterV2}.
+ */
+public class TestHFileWriterV2 {
+
+ private static final Log LOG = LogFactory.getLog(TestHFileWriterV2.class);
+
+ private static final HBaseTestingUtility TEST_UTIL =
+ new HBaseTestingUtility();
+
+ private Configuration conf;
+ private FileSystem fs;
+
+ @Before
+ public void setUp() throws IOException {
+ conf = TEST_UTIL.getConfiguration();
+ fs = FileSystem.get(conf);
+ }
+
+ @Test
+ public void testHFileFormatV2() throws IOException {
+ Path hfilePath = new Path(HBaseTestingUtility.getTestDir(),
+ "testHFileFormatV2");
+
+ final Compression.Algorithm COMPRESS_ALGO = Compression.Algorithm.GZ;
+ HFileWriterV2 writer = new HFileWriterV2(conf, fs, hfilePath, 4096,
+ COMPRESS_ALGO, KeyValue.KEY_COMPARATOR);
+
+ long totalKeyLength = 0;
+ long totalValueLength = 0;
+
+ Random rand = new Random(9713312); // Just a fixed seed.
+
+ final int ENTRY_COUNT = 10000;
+ List<byte[]> keys = new ArrayList<byte[]>();
+ List<byte[]> values = new ArrayList<byte[]>();
+
+ for (int i = 0; i < ENTRY_COUNT; ++i) {
+ byte[] keyBytes = randomOrderedKey(rand, i);
+
+ // A random-length random value.
+ byte[] valueBytes = randomValue(rand);
+ writer.append(keyBytes, valueBytes);
+
+ totalKeyLength += keyBytes.length;
+ totalValueLength += valueBytes.length;
+
+ keys.add(keyBytes);
+ values.add(valueBytes);
+ }
+
+ // Add in an arbitrary order. They will be sorted lexicographically by
+ // the key.
+ writer.appendMetaBlock("CAPITAL_OF_USA", new Text("Washington, D.C."));
+ writer.appendMetaBlock("CAPITAL_OF_RUSSIA", new Text("Moscow"));
+ writer.appendMetaBlock("CAPITAL_OF_FRANCE", new Text("Paris"));
+
+ writer.close();
+
+ FSDataInputStream fsdis = fs.open(hfilePath);
+
+ // A "manual" version of a new-format HFile reader. This unit test was
+ // written before the V2 reader was fully implemented.
+
+ long fileSize = fs.getFileStatus(hfilePath).getLen();
+ FixedFileTrailer trailer =
+ FixedFileTrailer.readFromStream(fsdis, fileSize);
+
+ assertEquals(2, trailer.getVersion());
+ assertEquals(ENTRY_COUNT, trailer.getEntryCount());
+
+ HFileBlock.FSReader blockReader =
+ new HFileBlock.FSReaderV2(fsdis, COMPRESS_ALGO, fileSize);
+
+ // Counters for the number of key/value pairs and the number of blocks
+ int entriesRead = 0;
+ int blocksRead = 0;
+
+ // Scan blocks the way the reader would scan them
+ fsdis.seek(0);
+ long curBlockPos = 0;
+ while (curBlockPos <= trailer.getLastDataBlockOffset()) {
+ HFileBlock block = blockReader.readBlockData(curBlockPos, -1, -1, false);
+ assertEquals(BlockType.DATA, block.getBlockType());
+ ByteBuffer buf = block.getBufferWithoutHeader();
+ while (buf.hasRemaining()) {
+ int keyLen = buf.getInt();
+ int valueLen = buf.getInt();
+
+ byte[] key = new byte[keyLen];
+ buf.get(key);
+
+ byte[] value = new byte[valueLen];
+ buf.get(value);
+
+ // A brute-force check to see that all keys and values are correct.
+ assertTrue(Bytes.compareTo(key, keys.get(entriesRead)) == 0);
+ assertTrue(Bytes.compareTo(value, values.get(entriesRead)) == 0);
+
+ ++entriesRead;
+ }
+ ++blocksRead;
+ curBlockPos += block.getOnDiskSizeWithHeader();
+ }
+ LOG.info("Finished reading: entries=" + entriesRead + ", blocksRead="
+ + blocksRead);
+ assertEquals(ENTRY_COUNT, entriesRead);
+
+ // Meta blocks. We can scan until the load-on-open data offset (which is
+ // the root block index offset in version 2) because we are not testing
+ // intermediate-level index blocks here.
+
+ int metaCounter = 0;
+ while (fsdis.getPos() < trailer.getLoadOnOpenDataOffset()) {
+ LOG.info("Current offset: " + fsdis.getPos() + ", scanning until " +
+ trailer.getLoadOnOpenDataOffset());
+ HFileBlock block = blockReader.readBlockData(curBlockPos, -1, -1, false);
+ assertEquals(BlockType.META, block.getBlockType());
+ Text t = new Text();
+ block.readInto(t);
+ Text expectedText =
+ (metaCounter == 0 ? new Text("Paris") : metaCounter == 1 ? new Text(
+ "Moscow") : new Text("Washington, D.C."));
+ assertEquals(expectedText, t);
+ LOG.info("Read meta block data: " + t);
+ ++metaCounter;
+ curBlockPos += block.getOnDiskSizeWithHeader();
+ }
+
+ fsdis.close();
+ }
+
+ // Static stuff used by various HFile v2 unit tests
+
+ private static final String COLUMN_FAMILY_NAME = "_-myColumnFamily-_";
+ private static final int MIN_ROW_OR_QUALIFIER_LENGTH = 64;
+ private static final int MAX_ROW_OR_QUALIFIER_LENGTH = 128;
+
+ /**
+ * Generates a random key that is guaranteed to increase as the given index i
+ * increases. The result consists of a prefix, which is a deterministic
+ * increasing function of i, and a random suffix.
+ *
+ * @param rand
+ * random number generator to use
+ * @param i
+ * @return
+ */
+ public static byte[] randomOrderedKey(Random rand, int i) {
+ StringBuilder k = new StringBuilder();
+
+ // The fixed-length lexicographically increasing part of the key.
+ for (int bitIndex = 31; bitIndex >= 0; --bitIndex) {
+ if ((i & (1 << bitIndex)) == 0)
+ k.append("a");
+ else
+ k.append("b");
+ }
+
+ // A random-length random suffix of the key.
+ for (int j = 0; j < rand.nextInt(50); ++j)
+ k.append(randomReadableChar(rand));
+
+ byte[] keyBytes = k.toString().getBytes();
+ return keyBytes;
+ }
+
+ public static byte[] randomValue(Random rand) {
+ StringBuilder v = new StringBuilder();
+ for (int j = 0; j < 1 + rand.nextInt(2000); ++j) {
+ v.append((char) (32 + rand.nextInt(95)));
+ }
+
+ byte[] valueBytes = v.toString().getBytes();
+ return valueBytes;
+ }
+
+ public static final char randomReadableChar(Random rand) {
+ int i = rand.nextInt(26 * 2 + 10 + 1);
+ if (i < 26)
+ return (char) ('A' + i);
+ i -= 26;
+
+ if (i < 26)
+ return (char) ('a' + i);
+ i -= 26;
+
+ if (i < 10)
+ return (char) ('0' + i);
+ i -= 10;
+
+ assert i == 0;
+ return '_';
+ }
+
+ public static byte[] randomRowOrQualifier(Random rand) {
+ StringBuilder field = new StringBuilder();
+ int fieldLen = MIN_ROW_OR_QUALIFIER_LENGTH
+ + rand.nextInt(MAX_ROW_OR_QUALIFIER_LENGTH
+ - MIN_ROW_OR_QUALIFIER_LENGTH + 1);
+ for (int i = 0; i < fieldLen; ++i)
+ field.append(randomReadableChar(rand));
+ return field.toString().getBytes();
+ }
+
+ public static KeyValue randomKeyValue(Random rand) {
+ return new KeyValue(randomRowOrQualifier(rand),
+ COLUMN_FAMILY_NAME.getBytes(), randomRowOrQualifier(rand),
+ randomValue(rand));
+ }
+
+}
Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java?rev=1153647&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java Wed Aug 3 20:30:27 2011
@@ -0,0 +1,353 @@
+/*
+ * Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.regionserver;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+import java.util.TreeSet;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.hfile.BlockCache;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.TestHFileWriterV2;
+import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
+import org.apache.hadoop.hbase.util.BloomFilterFactory;
+import org.apache.hadoop.hbase.util.ByteBloomFilter;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.CompoundBloomFilter;
+import org.apache.hadoop.hbase.util.CompoundBloomFilterBase;
+import org.apache.hadoop.hbase.util.CompoundBloomFilterWriter;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests writing Bloom filter blocks in the same part of the file as data
+ * blocks.
+ */
+public class TestCompoundBloomFilter {
+
+ private static final HBaseTestingUtility TEST_UTIL =
+ new HBaseTestingUtility();
+
+ private static final Log LOG = LogFactory.getLog(
+ TestCompoundBloomFilter.class);
+
+ private static final int NUM_TESTS = 9;
+ private static final BloomType BLOOM_TYPES[] = { BloomType.ROW,
+ BloomType.ROW, BloomType.ROWCOL, BloomType.ROWCOL, BloomType.ROW,
+ BloomType.ROWCOL, BloomType.ROWCOL, BloomType.ROWCOL, BloomType.ROW };
+
+ private static final int NUM_KV[];
+ static {
+ final int N = 10000; // Only used in initialization.
+ NUM_KV = new int[] { 21870, N, N, N, N, 1000, N, 7500, 7500};
+ assert NUM_KV.length == NUM_TESTS;
+ }
+
+ private static final int BLOCK_SIZES[];
+ static {
+ final int blkSize = 65536;
+ BLOCK_SIZES = new int[] { 512, 1000, blkSize, blkSize, blkSize, 128, 300,
+ blkSize, blkSize };
+ assert BLOCK_SIZES.length == NUM_TESTS;
+ }
+
+ /**
+ * Be careful not to specify too high a Bloom filter block size, otherwise
+ * there will only be one oversized chunk and the observed false positive
+ * rate will be too low.
+ */
+ private static final int BLOOM_BLOCK_SIZES[] = { 1000, 4096, 4096, 4096,
+ 8192, 128, 1024, 600, 600 };
+ static { assert BLOOM_BLOCK_SIZES.length == NUM_TESTS; }
+
+ private static final double TARGET_ERROR_RATES[] = { 0.025, 0.01, 0.015,
+ 0.01, 0.03, 0.01, 0.01, 0.07, 0.07 };
+ static { assert TARGET_ERROR_RATES.length == NUM_TESTS; }
+
+ /** A false positive rate that is obviously too high. */
+ private static final double TOO_HIGH_ERROR_RATE;
+ static {
+ double m = 0;
+ for (double errorRate : TARGET_ERROR_RATES)
+ m = Math.max(m, errorRate);
+ TOO_HIGH_ERROR_RATE = m + 0.03;
+ }
+
+ private static Configuration conf;
+ private FileSystem fs;
+ private BlockCache blockCache;
+
+ /** A message of the form "in test#<number>:" to include in logging. */
+ private String testIdMsg;
+
+ private static final int GENERATION_SEED = 2319;
+ private static final int EVALUATION_SEED = 135;
+
+ @Before
+ public void setUp() throws IOException {
+ conf = TEST_UTIL.getConfiguration();
+
+ // This test requires the most recent HFile format (i.e. v2).
+ conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
+
+ fs = FileSystem.get(conf);
+
+ blockCache = StoreFile.getBlockCache(conf);
+ assertNotNull(blockCache);
+ }
+
+ private List<KeyValue> createSortedKeyValues(Random rand, int n) {
+ List<KeyValue> kvList = new ArrayList<KeyValue>(n);
+ for (int i = 0; i < n; ++i)
+ kvList.add(TestHFileWriterV2.randomKeyValue(rand));
+ Collections.sort(kvList, KeyValue.COMPARATOR);
+ return kvList;
+ }
+
+ @Test
+ public void testCompoundBloomFilter() throws IOException {
+ conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
+ for (int t = 0; t < NUM_TESTS; ++t) {
+ conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE,
+ (float) TARGET_ERROR_RATES[t]);
+
+ testIdMsg = "in test #" + t + ":";
+ Random generationRand = new Random(GENERATION_SEED);
+ List<KeyValue> kvs = createSortedKeyValues(generationRand, NUM_KV[t]);
+ BloomType bt = BLOOM_TYPES[t];
+ Path sfPath = writeStoreFile(t, bt, kvs);
+ readStoreFile(t, bt, kvs, sfPath);
+ }
+ }
+
+ /**
+ * Validates the false positive ratio by computing its z-value and comparing
+ * it to the provided threshold.
+ *
+ * @param falsePosRate experimental positive rate
+ * @param nTrials the number of calls to
+ * {@link StoreFile.Reader#shouldSeek(Scan, java.util.SortedSet)}.
+ * @param zValueBoundary z-value boundary, positive for an upper bound and
+ * negative for a lower bound
+ * @param cbf the compound Bloom filter we are using
+ * @param additionalMsg additional message to include in log output and
+ * assertion failures
+ */
+ private void validateFalsePosRate(double falsePosRate, int nTrials,
+ double zValueBoundary, CompoundBloomFilter cbf, String additionalMsg) {
+ double p = BloomFilterFactory.getErrorRate(conf);
+ double zValue = (falsePosRate - p) / Math.sqrt(p * (1 - p) / nTrials);
+
+ String assortedStatsStr = " (targetErrorRate=" + p + ", falsePosRate="
+ + falsePosRate + ", nTrials=" + nTrials + ")";
+ LOG.info("z-value is " + zValue + assortedStatsStr);
+
+ boolean isUpperBound = zValueBoundary > 0;
+
+ if (isUpperBound && zValue > zValueBoundary ||
+ !isUpperBound && zValue < zValueBoundary) {
+ String errorMsg = "False positive rate z-value " + zValue + " is "
+ + (isUpperBound ? "higher" : "lower") + " than " + zValueBoundary
+ + assortedStatsStr + ". Per-chunk stats:\n"
+ + cbf.formatTestingStats();
+ fail(errorMsg + additionalMsg);
+ }
+ }
+
+ private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
+ Path sfPath) throws IOException {
+ StoreFile sf = new StoreFile(fs, sfPath, true, conf, bt, false);
+ StoreFile.Reader r = sf.createReader();
+ final boolean pread = true; // does not really matter
+ StoreFileScanner scanner = r.getStoreFileScanner(true, pread);
+
+ {
+ // Test for false negatives (not allowed).
+ int numChecked = 0;
+ for (KeyValue kv : kvs) {
+ byte[] row = kv.getRow();
+ boolean present = isInBloom(scanner, row, kv.getQualifier());
+ assertTrue(testIdMsg + " Bloom filter false negative on row "
+ + Bytes.toStringBinary(row) + " after " + numChecked
+ + " successful checks", present);
+ ++numChecked;
+ }
+ }
+
+ // Test for false positives (some percentage allowed). We test in two modes:
+ // "fake lookup" which ignores the key distribution, and production mode.
+ for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
+ ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
+ try {
+ String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
+ "enabled" : "disabled");
+ CompoundBloomFilter cbf = (CompoundBloomFilter) r.getBloomFilter();
+ cbf.enableTestingStats();
+ int numFalsePos = 0;
+ Random rand = new Random(EVALUATION_SEED);
+ int nTrials = NUM_KV[t] * 10;
+ for (int i = 0; i < nTrials; ++i) {
+ byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
+ if (isInBloom(scanner, query, bt, rand)) {
+ numFalsePos += 1;
+ }
+ }
+ double falsePosRate = numFalsePos * 1.0 / nTrials;
+ LOG.debug(String.format(testIdMsg
+ + " False positives: %d out of %d (%f)",
+ numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);
+
+ // Check for obvious Bloom filter crashes.
+ assertTrue("False positive is too high: " + falsePosRate + " (greater "
+ + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
+ falsePosRate < TOO_HIGH_ERROR_RATE);
+
+ // Now a more precise check to see if the false positive rate is not
+ // too high. The reason we use a relaxed restriction for the real-world
+ // case as opposed to the "fake lookup" case is that our hash functions
+ // are not completely independent.
+
+ double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
+ validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
+ fakeLookupModeStr);
+
+ // For checking the lower bound we need to eliminate the last chunk,
+ // because it is frequently smaller and the false positive rate in it
+ // is too low. This does not help if there is only one under-sized
+ // chunk, though.
+ int nChunks = cbf.getNumChunks();
+ if (nChunks > 1) {
+ numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
+ nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
+ falsePosRate = numFalsePos * 1.0 / nTrials;
+ LOG.info(testIdMsg + " False positive rate without last chunk is " +
+ falsePosRate + fakeLookupModeStr);
+ }
+
+ validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
+ fakeLookupModeStr);
+ } finally {
+ ByteBloomFilter.setFakeLookupMode(false);
+ }
+ }
+
+ r.close();
+ }
+
+ private boolean isInBloom(StoreFileScanner scanner, byte[] row, BloomType bt,
+ Random rand) {
+ return isInBloom(scanner, row,
+ TestHFileWriterV2.randomRowOrQualifier(rand));
+ }
+
+ private boolean isInBloom(StoreFileScanner scanner, byte[] row,
+ byte[] qualifier) {
+ Scan scan = new Scan(row, row);
+ TreeSet<byte[]> columns = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
+ columns.add(qualifier);
+ return scanner.shouldSeek(scan, columns);
+ }
+
+ private Path writeStoreFile(int t, BloomType bt, List<KeyValue> kvs)
+ throws IOException {
+ conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE,
+ BLOOM_BLOCK_SIZES[t]);
+ conf.setBoolean(HFile.CACHE_BLOCKS_ON_WRITE_KEY, true);
+
+ StoreFile.Writer w = StoreFile.createWriter(fs,
+ HBaseTestingUtility.getTestDir(), BLOCK_SIZES[t], null, null, conf,
+ bt, 0);
+
+ assertTrue(w.hasBloom());
+ assertTrue(w.getBloomWriter() instanceof CompoundBloomFilterWriter);
+ CompoundBloomFilterWriter cbbf =
+ (CompoundBloomFilterWriter) w.getBloomWriter();
+
+ int keyCount = 0;
+ KeyValue prev = null;
+ LOG.debug("Total keys/values to insert: " + kvs.size());
+ for (KeyValue kv : kvs) {
+ w.append(kv);
+
+ // Validate the key count in the Bloom filter.
+ boolean newKey = true;
+ if (prev != null) {
+ newKey = !(bt == BloomType.ROW ? KeyValue.COMPARATOR.matchingRows(kv,
+ prev) : KeyValue.COMPARATOR.matchingRowColumn(kv, prev));
+ }
+ if (newKey)
+ ++keyCount;
+ assertEquals(keyCount, cbbf.getKeyCount());
+
+ prev = kv;
+ }
+ w.close();
+
+ return w.getPath();
+ }
+
+ @Test
+ public void testCompoundBloomSizing() {
+ int bloomBlockByteSize = 4096;
+ int bloomBlockBitSize = bloomBlockByteSize * 8;
+ double targetErrorRate = 0.01;
+ long maxKeysPerChunk = ByteBloomFilter.idealMaxKeys(bloomBlockBitSize,
+ targetErrorRate);
+
+ long bloomSize1 = bloomBlockByteSize * 8;
+ long bloomSize2 = ByteBloomFilter.computeBitSize(maxKeysPerChunk,
+ targetErrorRate);
+
+ double bloomSizeRatio = (bloomSize2 * 1.0 / bloomSize1);
+ assertTrue(Math.abs(bloomSizeRatio - 0.9999) < 0.0001);
+ }
+
+ @Test
+ public void testCreateKey() {
+ CompoundBloomFilterBase cbfb = new CompoundBloomFilterBase();
+ byte[] row = "myRow".getBytes();
+ byte[] qualifier = "myQualifier".getBytes();
+ byte[] rowKey = cbfb.createBloomKey(row, 0, row.length,
+ row, 0, 0);
+ byte[] rowColKey = cbfb.createBloomKey(row, 0, row.length,
+ qualifier, 0, qualifier.length);
+ KeyValue rowKV = KeyValue.createKeyValueFromKey(rowKey);
+ KeyValue rowColKV = KeyValue.createKeyValueFromKey(rowColKey);
+ assertEquals(rowKV.getTimestamp(), rowColKV.getTimestamp());
+ assertEquals(Bytes.toStringBinary(rowKV.getRow()),
+ Bytes.toStringBinary(rowColKV.getRow()));
+ assertEquals(0, rowKV.getQualifier().length);
+ }
+
+}
Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestIdLock.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestIdLock.java?rev=1153647&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestIdLock.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestIdLock.java Wed Aug 3 20:30:27 2011
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util;
+
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+
+public class TestIdLock {
+
+ private static final Log LOG = LogFactory.getLog(TestIdLock.class);
+
+ private static final int NUM_IDS = 16;
+ private static final int NUM_THREADS = 128;
+ private static final int NUM_SECONDS = 20;
+
+ private IdLock idLock = new IdLock();
+
+ private Map<Long, String> idOwner = new ConcurrentHashMap<Long, String>();
+
+ private class IdLockTestThread implements Callable<Boolean> {
+
+ private String clientId;
+
+ public IdLockTestThread(String clientId) {
+ this.clientId = clientId;
+ }
+
+ @Override
+ public Boolean call() throws Exception {
+ Thread.currentThread().setName(clientId);
+ Random rand = new Random();
+ long endTime = System.currentTimeMillis() + NUM_SECONDS * 1000;
+ while (System.currentTimeMillis() < endTime) {
+ long id = rand.nextInt(NUM_IDS);
+
+ LOG.info(clientId + " is waiting for id " + id);
+ IdLock.Entry lockEntry = idLock.getLockEntry(id);
+ try {
+ int sleepMs = 1 + rand.nextInt(4);
+ String owner = idOwner.get(id);
+ if (owner != null) {
+ LOG.error("Id " + id + " already taken by " + owner + ", "
+ + clientId + " failed");
+ return false;
+ }
+
+ idOwner.put(id, clientId);
+ LOG.info(clientId + " took id " + id + ", sleeping for " +
+ sleepMs + "ms");
+ Thread.sleep(sleepMs);
+ LOG.info(clientId + " is releasing id " + id);
+ idOwner.remove(id);
+
+ } finally {
+ idLock.releaseLockEntry(lockEntry);
+ }
+ }
+ return true;
+ }
+
+ }
+
+ @Test
+ public void testMultipleClients() throws Exception {
+ ExecutorService exec = Executors.newFixedThreadPool(NUM_THREADS);
+ try {
+ ExecutorCompletionService<Boolean> ecs =
+ new ExecutorCompletionService<Boolean>(exec);
+ for (int i = 0; i < NUM_THREADS; ++i)
+ ecs.submit(new IdLockTestThread("client_" + i));
+ for (int i = 0; i < NUM_THREADS; ++i) {
+ Future<Boolean> result = ecs.take();
+ assertTrue(result.get());
+ }
+ idLock.assertMapEmpty();
+ } finally {
+ exec.shutdown();
+ }
+ }
+
+}