You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by te...@apache.org on 2011/12/24 22:20:41 UTC
svn commit: r1223020 [5/5] - in /hbase/trunk/src: main/java/org/apache/hadoop/hbase/ main/java/org/apache/hadoop/hbase/io/ main/java/org/apache/hadoop/hbase/io/encoding/ main/java/org/apache/hadoop/hbase/io/hfile/ main/java/org/apache/hadoop/hbase/mapr...

Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java?rev=1223020&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java Sat Dec 24 21:20:39 2011
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.io.HeapSize;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncodings;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncodings.Algorithm;
+import org.apache.hadoop.hbase.io.encoding.RedundantKVGenerator;
+import org.apache.hadoop.hbase.regionserver.metrics.SchemaConfigured;
+import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics;
+import org.apache.hadoop.hbase.util.Pair;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+@RunWith(Parameterized.class)
+public class TestHFileDataBlockEncoder {
+  private Configuration conf;
+  private final HBaseTestingUtility TEST_UTIL =
+      new HBaseTestingUtility();
+  private HFileDataBlockEncoderImpl blockEncoder;
+  private RedundantKVGenerator generator = new RedundantKVGenerator();
+  private SchemaConfigured UNKNOWN_TABLE_AND_CF =
+      SchemaConfigured.createUnknown();
+  private boolean includesMemstoreTS;
+
+  /**
+   * Create test for given data block encoding configuration.
+   * @param blockEncoder What kind of encoding policy will be used.
+   */
+  public TestHFileDataBlockEncoder(HFileDataBlockEncoderImpl blockEncoder,
+      boolean includesMemstoreTS) {
+    this.blockEncoder = blockEncoder;
+    this.includesMemstoreTS = includesMemstoreTS;
+  }
+
+  /**
+   * Preparation before JUnit test.
+   */
+  @Before
+  public void setUp() {
+    conf = TEST_UTIL.getConfiguration();
+    SchemaMetrics.configureGlobally(conf);
+  }
+
+  /**
+   * Cleanup after JUnit test.
+   */
+  @After
+  public void tearDown() throws IOException {
+    TEST_UTIL.cleanupTestDir();
+  }
+
+  /**
+   * Test putting and taking out blocks into cache with different
+   * encoding options.
+   */
+  @Test
+  public void testEncodingWithCache() {
+    HFileBlock block = getSampleHFileBlock();
+    LruBlockCache blockCache =
+        new LruBlockCache(8 * 1024 * 1024, 32 * 1024);
+
+    HFileBlock cacheBlock = blockEncoder.beforeBlockCache(block,
+        includesMemstoreTS);
+    BlockCacheKey cacheKey = new BlockCacheKey("test", 0);
+    blockCache.cacheBlock(cacheKey, cacheBlock);
+
+    HeapSize heapSize = blockCache.getBlock(cacheKey, false);
+    assertTrue(heapSize instanceof HFileBlock);
+
+    HFileBlock afterCache = (HFileBlock) heapSize;
+    HFileBlock returnedBlock = blockEncoder.afterBlockCache(afterCache,
+        false, includesMemstoreTS);
+
+    if (!blockEncoder.useEncodedSeek() ||
+        blockEncoder.getInCache() == Algorithm.NONE) {
+      assertEquals(block.getBufferWithHeader(),
+          returnedBlock.getBufferWithHeader());
+    } else {
+      if (BlockType.ENCODED_DATA != returnedBlock.getBlockType()) {
+        System.out.println(blockEncoder);
+      }
+      assertEquals(BlockType.ENCODED_DATA, returnedBlock.getBlockType());
+    }
+  }
+
+  /**
+   * Test writing to disk.
+   */
+  @Test
+  public void testEncodingWritePath() {
+    // usually we have just block without headers, but don't complicate that
+    HFileBlock block = getSampleHFileBlock();
+    Pair<ByteBuffer, BlockType> result =
+        blockEncoder.beforeWriteToDisk(block.getBufferWithoutHeader(),
+            includesMemstoreTS);
+
+    int size = result.getFirst().limit() - HFileBlock.HEADER_SIZE;
+    HFileBlock blockOnDisk = new HFileBlock(result.getSecond(),
+        size, size, -1, result.getFirst(), HFileBlock.FILL_HEADER, 0,
+        includesMemstoreTS);
+
+    if (blockEncoder.getOnDisk() !=
+        DataBlockEncodings.Algorithm.NONE) {
+      assertEquals(BlockType.ENCODED_DATA, blockOnDisk.getBlockType());
+      assertEquals(blockEncoder.getOnDisk().getId(),
+          blockOnDisk.getDataBlockEncodingId());
+    } else {
+      assertEquals(BlockType.DATA, blockOnDisk.getBlockType());
+    }
+  }
+
+  /**
+   * Test reading from disk.
+   */
+  @Test
+  public void testEncodingReadPath() {
+    HFileBlock origBlock = getSampleHFileBlock();
+    HFileBlock afterDisk = blockEncoder.afterReadFromDisk(origBlock);
+    blockEncoder.afterReadFromDiskAndPuttingInCache(afterDisk, false,
+        includesMemstoreTS);
+  }
+
+  private HFileBlock getSampleHFileBlock() {
+    ByteBuffer keyValues = RedundantKVGenerator.convertKvToByteBuffer(
+        generator.generateTestKeyValues(60), includesMemstoreTS);
+    int size = keyValues.limit();
+    ByteBuffer buf = ByteBuffer.allocate(size + HFileBlock.HEADER_SIZE);
+    buf.position(HFileBlock.HEADER_SIZE);
+    keyValues.rewind();
+    buf.put(keyValues);
+    HFileBlock b = new HFileBlock(BlockType.DATA, size, size, -1, buf,
+        HFileBlock.FILL_HEADER, 0, includesMemstoreTS);
+    UNKNOWN_TABLE_AND_CF.passSchemaMetricsTo(b);
+    return b;
+  }
+
+  /**
+   * @return All possible data block encoding configurations
+   */
+  @Parameters
+  public static Collection<Object[]> getAllConfigurations() {
+    List<Object[]> configurations =
+        new ArrayList<Object[]>();
+
+    for (Algorithm diskAlgo : DataBlockEncodings.Algorithm.values()) {
+      for (Algorithm cacheAlgo : DataBlockEncodings.Algorithm.values()) {
+        for (boolean useEncodedSeek : new boolean[] {false, true}) {
+          for (boolean includesMemstoreTS : new boolean[] {false, true}) {
+            configurations.add( new Object[] {
+                new HFileDataBlockEncoderImpl(diskAlgo, cacheAlgo,
+                    useEncodedSeek),
+                new Boolean(includesMemstoreTS)});
+          }
+        }
+      }
+    }
+
+    return configurations;
+  }
+}

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java?rev=1223020&r1=1223019&r2=1223020&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java Sat Dec 24 21:20:39 2011
@@ -76,7 +76,7 @@ public class TestHFileWriterV2 {
 
     final Compression.Algorithm COMPRESS_ALGO = Compression.Algorithm.GZ;
     HFileWriterV2 writer = new HFileWriterV2(conf, new CacheConfig(conf), fs,
-        hfilePath, 4096, COMPRESS_ALGO, KeyValue.KEY_COMPARATOR);
+        hfilePath, 4096, COMPRESS_ALGO, null, KeyValue.KEY_COMPARATOR);
 
     long totalKeyLength = 0;
     long totalValueLength = 0;
@@ -125,10 +125,12 @@ public class TestHFileWriterV2 {
         new HFileBlock.FSReaderV2(fsdis, COMPRESS_ALGO, fileSize);
     // Comparator class name is stored in the trailer in version 2.
     RawComparator<byte []> comparator = trailer.createComparator();
-    HFileBlockIndex.BlockIndexReader dataBlockIndexReader = new HFileBlockIndex.BlockIndexReader(comparator,
-        trailer.getNumDataIndexLevels());
-    HFileBlockIndex.BlockIndexReader metaBlockIndexReader = new HFileBlockIndex.BlockIndexReader(
-        Bytes.BYTES_RAWCOMPARATOR, 1);
+    HFileBlockIndex.BlockIndexReader dataBlockIndexReader =
+        new HFileBlockIndex.BlockIndexReader(comparator,
+            trailer.getNumDataIndexLevels());
+    HFileBlockIndex.BlockIndexReader metaBlockIndexReader =
+        new HFileBlockIndex.BlockIndexReader(
+            Bytes.BYTES_RAWCOMPARATOR, 1);
 
     HFileBlock.BlockIterator blockIter = blockReader.blockRange(
         trailer.getLoadOnOpenDataOffset(),
@@ -146,8 +148,10 @@ public class TestHFileWriterV2 {
     // File info
     FileInfo fileInfo = new FileInfo();
     fileInfo.readFields(blockIter.nextBlockAsStream(BlockType.FILE_INFO));
-    byte [] keyValueFormatVersion = fileInfo.get(HFileWriterV2.KEY_VALUE_VERSION);
-    boolean includeMemstoreTS = (keyValueFormatVersion != null && Bytes.toInt(keyValueFormatVersion) > 0);
+    byte [] keyValueFormatVersion = fileInfo.get(
+        HFileWriterV2.KEY_VALUE_VERSION);
+    boolean includeMemstoreTS = keyValueFormatVersion != null &&
+        Bytes.toInt(keyValueFormatVersion) > 0;
 
     // Counters for the number of key/value pairs and the number of blocks
     int entriesRead = 0;

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java?rev=1223020&r1=1223019&r2=1223020&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java Sat Dec 24 21:20:39 2011
@@ -133,6 +133,9 @@ public class TestImportExport {
         5, /* versions */
         true /* keep deleted cells */,
         HColumnDescriptor.DEFAULT_COMPRESSION,
+        HColumnDescriptor.DEFAULT_DATA_BLOCK_ENCODING_ON_DISK,
+        HColumnDescriptor.DEFAULT_DATA_BLOCK_ENCODING_IN_CACHE,
+        HColumnDescriptor.DEFAULT_ENCODED_DATA_BLOCK_SEEK,
         HColumnDescriptor.DEFAULT_IN_MEMORY,
         HColumnDescriptor.DEFAULT_BLOCKCACHE,
         HColumnDescriptor.DEFAULT_BLOCKSIZE,
@@ -179,6 +182,9 @@ public class TestImportExport {
         5, /* versions */
         true /* keep deleted cells */,
         HColumnDescriptor.DEFAULT_COMPRESSION,
+        HColumnDescriptor.DEFAULT_DATA_BLOCK_ENCODING_ON_DISK,
+        HColumnDescriptor.DEFAULT_DATA_BLOCK_ENCODING_IN_CACHE,
+        HColumnDescriptor.DEFAULT_ENCODED_DATA_BLOCK_SEEK,
         HColumnDescriptor.DEFAULT_IN_MEMORY,
         HColumnDescriptor.DEFAULT_BLOCKCACHE,
         HColumnDescriptor.DEFAULT_BLOCKSIZE,

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java?rev=1223020&r1=1223019&r2=1223020&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java Sat Dec 24 21:20:39 2011
@@ -184,8 +184,8 @@ public class CreateRandomStoreFile {
     }
 
     StoreFile.Writer sfw = StoreFile.createWriter(fs, outputDir, blockSize,
-        compr, KeyValue.COMPARATOR, conf, new CacheConfig(conf), bloomType, 
-        numKV);
+        compr, null, KeyValue.COMPARATOR, conf, new CacheConfig(conf),
+        bloomType, numKV);
 
     rand = new Random();
     LOG.info("Writing " + numKV + " key/value pairs");

Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java?rev=1223020&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java Sat Dec 24 21:20:39 2011
@@ -0,0 +1,585 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.io.encoding.EncodedDataBlock;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncodings;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.Compression;
+import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder;
+import org.apache.hadoop.hbase.io.hfile.Compression.Algorithm;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.compress.Compressor;
+import org.apache.hadoop.io.compress.Decompressor;
+
+/**
+ * Tests various algorithms for key compression on an existing HFile. Useful
+ * for testing, debugging and benchmarking.
+ */
+public class DataBlockEncodingTool {
+  private static final Log LOG = LogFactory.getLog(
+      DataBlockEncodingTool.class);
+
+  private static final boolean includesMemstoreTS = true;
+
+  /**
+   * How many times should benchmark run.
+   * More times means better data in terms of statistics.
+   * It has to be larger than BENCHMARK_N_OMIT.
+   */
+  public static int BENCHMARK_N_TIMES = 12;
+
+  /**
+   * How many first runs should omit benchmark.
+   * Usually it is one in order to exclude setup cost.
+   * Has to be 0 or larger.
+   */
+  public static int BENCHMARK_N_OMIT = 2;
+
+  private List<EncodedDataBlock> codecs = new ArrayList<EncodedDataBlock>();
+  private int totalPrefixLength = 0;
+  private int totalKeyLength = 0;
+  private int totalValueLength = 0;
+  private int totalKeyRedundancyLength = 0;
+
+  final private String compressionAlgorithmName;
+  final private Algorithm compressionAlgorithm;
+  final private Compressor compressor;
+  final private Decompressor decompressor;
+
+  /**
+   * @param compressionAlgorithmName What kind of algorithm should be used
+   *                                 as baseline for comparison (e.g. lzo, gz).
+   */
+  public DataBlockEncodingTool(String compressionAlgorithmName) {
+    this.compressionAlgorithmName = compressionAlgorithmName;
+    this.compressionAlgorithm = Compression.getCompressionAlgorithmByName(
+        compressionAlgorithmName);
+    this.compressor = this.compressionAlgorithm.getCompressor();
+    this.decompressor = this.compressionAlgorithm.getDecompressor();
+  }
+  /**
+   * Check statistics for given HFile for different data block encoders.
+   * @param scanner Of file which will be compressed.
+   * @param kvLimit Maximal count of KeyValue which will be processed.
+   * @throws IOException thrown if scanner is invalid
+   */
+  public void checkStatistics(final KeyValueScanner scanner, final int kvLimit)
+      throws IOException {
+    scanner.seek(KeyValue.LOWESTKEY);
+
+    KeyValue currentKv;
+
+    byte[] previousKey = null;
+    byte[] currentKey;
+
+    List<DataBlockEncoder> dataBlockEncoders =
+        DataBlockEncodings.getAllEncoders();
+
+    for (DataBlockEncoder d : dataBlockEncoders) {
+      codecs.add(new EncodedDataBlock(d, includesMemstoreTS));
+    }
+
+    int j = 0;
+    while ((currentKv = scanner.next()) != null && j < kvLimit) {
+      // Iterates through key/value pairs
+      j++;
+      currentKey = currentKv.getKey();
+      if (previousKey != null) {
+        for (int i = 0 ; i < previousKey.length && i < currentKey.length &&
+            previousKey[i] == currentKey[i] ; ++i) {
+          totalKeyRedundancyLength++;
+        }
+      }
+
+      for (EncodedDataBlock codec : codecs) {
+        codec.addKv(currentKv);
+      }
+
+      previousKey = currentKey;
+
+      totalPrefixLength += currentKv.getLength() - currentKv.getKeyLength() -
+          currentKv.getValueLength();
+      totalKeyLength += currentKv.getKeyLength();
+      totalValueLength += currentKv.getValueLength();
+    }
+  }
+
+  /**
+   * Verify if all data block encoders are working properly.
+   * 
+   * @param scanner Of file which was compressed.
+   * @param kvLimit Maximal count of KeyValue which will be processed.
+   * @return true if all data block encoders compressed/decompressed correctly.
+   * @throws IOException thrown if scanner is invalid
+   */
+  public boolean verifyCodecs(final KeyValueScanner scanner, final int kvLimit)
+      throws IOException {
+    KeyValue currentKv;
+
+    scanner.seek(KeyValue.LOWESTKEY);
+    List<Iterator<KeyValue>> codecIterators =
+        new ArrayList<Iterator<KeyValue>>();
+    for(EncodedDataBlock codec : codecs) {
+      codecIterators.add(codec.getIterator());
+    }
+
+    int j = 0;
+    while ((currentKv = scanner.next()) != null && j < kvLimit) {
+      // Iterates through key/value pairs
+      ++j;
+      for (Iterator<KeyValue> it : codecIterators) {
+        KeyValue codecKv = it.next();
+        if (codecKv == null || 0 != Bytes.compareTo(
+            codecKv.getBuffer(), codecKv.getOffset(), codecKv.getLength(),
+            currentKv.getBuffer(), currentKv.getOffset(),
+            currentKv.getLength())) {
+          if (codecKv == null) {
+            LOG.error("There is a bug in codec " + it +
+                " it returned null KeyValue,");
+          } else {
+            int prefix = 0;
+            int limitLength = 2 * Bytes.SIZEOF_INT +
+                Math.min(codecKv.getLength(), currentKv.getLength());
+            while (prefix < limitLength &&
+                codecKv.getBuffer()[prefix + codecKv.getOffset()] ==
+                currentKv.getBuffer()[prefix + currentKv.getOffset()]) {
+              prefix++;
+            }
+
+            LOG.error("There is bug in codec " + it.toString() +
+                "\n on element " + j +
+                "\n codecKv.getKeyLength() " + codecKv.getKeyLength() +
+                "\n codecKv.getValueLength() " + codecKv.getValueLength() +
+                "\n codecKv.getLength() " + codecKv.getLength() +
+                "\n currentKv.getKeyLength() " + currentKv.getKeyLength() +
+                "\n currentKv.getValueLength() " + currentKv.getValueLength() +
+                "\n codecKv.getLength() " + currentKv.getLength() +
+                "\n currentKV rowLength " + currentKv.getRowLength() +
+                " familyName " + currentKv.getFamilyLength() +
+                " qualifier " + currentKv.getQualifierLength() +
+                "\n prefix " + prefix +
+                "\n codecKv   '" + Bytes.toStringBinary(codecKv.getBuffer(),
+                    codecKv.getOffset(), prefix) + "' diff '" +
+                    Bytes.toStringBinary(codecKv.getBuffer(),
+                        codecKv.getOffset() + prefix, codecKv.getLength() -
+                        prefix) + "'" +
+                "\n currentKv '" + Bytes.toStringBinary(
+                   currentKv.getBuffer(),
+                   currentKv.getOffset(), prefix) + "' diff '" +
+                   Bytes.toStringBinary(currentKv.getBuffer(),
+                       currentKv.getOffset() + prefix, currentKv.getLength() -
+                       prefix) + "'"
+                );
+          }
+          return false;
+        }
+      }
+    }
+
+    LOG.info("Verification was successful!");
+
+    return true;
+  }
+
+  /**
+   * Benchmark codec's speed.
+   */
+  public void benchmarkCodecs() {
+    int prevTotalSize = -1;
+    for (EncodedDataBlock codec : codecs) {
+      prevTotalSize = benchmarkEncoder(prevTotalSize, codec);
+    }
+
+    byte[] buffer = codecs.get(0).getRawKeyValues();
+
+    benchmarkDefaultCompression(prevTotalSize, buffer);
+  }
+
+  /**
+   * Benchmark compression/decompression throughput.
+   * @param previousTotalSize Total size used for verification. Use -1 if
+   *          unknown.
+   * @param codec Tested encoder.
+   * @return Size of uncompressed data.
+   */
+  private int benchmarkEncoder(int previousTotalSize, EncodedDataBlock codec) {
+    int prevTotalSize = previousTotalSize;
+    int totalSize = 0;
+
+    // decompression time
+    List<Long> durations = new ArrayList<Long>();
+    for (int itTime = 0 ; itTime < BENCHMARK_N_TIMES ; ++itTime) {
+      totalSize = 0;
+
+      Iterator<KeyValue> it;
+
+      it = codec.getIterator();
+
+      // count only the algorithm time, without memory allocations
+      // (expect first time)
+      final long startTime = System.nanoTime();
+      while (it.hasNext()) {
+        totalSize += it.next().getLength();
+      }
+      final long finishTime = System.nanoTime();
+      if (itTime >= BENCHMARK_N_OMIT) {
+        durations.add(finishTime - startTime);
+      }
+
+      if (prevTotalSize != -1 && prevTotalSize != totalSize) {
+        throw new IllegalStateException(String.format(
+            "Algorithm '%s' decoded data to different size", codec.toString()));
+      }
+      prevTotalSize = totalSize;
+    }
+
+    // compression time
+    List<Long> compressDurations = new ArrayList<Long>();
+    for (int itTime = 0 ; itTime < BENCHMARK_N_TIMES ; ++itTime) {
+      final long startTime = System.nanoTime();
+      codec.doCompressData();
+      final long finishTime = System.nanoTime();
+      if (itTime >= BENCHMARK_N_OMIT) {
+        compressDurations.add(finishTime - startTime);
+      }
+    }
+
+    System.out.println(codec.toString() + ":");
+    printBenchmarkResult(totalSize, compressDurations, false);
+    printBenchmarkResult(totalSize, durations, true);
+
+    return prevTotalSize;
+  }
+
+  private void benchmarkDefaultCompression(int totalSize, byte[] rawBuffer) {
+    benchmarkAlgorithm(compressionAlgorithm, compressor, decompressor,
+        compressionAlgorithmName.toUpperCase(), rawBuffer, 0, totalSize);
+  }
+
+  /**
+   * Check decompress performance of a given algorithm and print it.
+   * @param algorithm Compression algorithm.
+   * @param compressorCodec Compressor to be tested.
+   * @param decompressorCodec Decompressor of the same algorithm.
+   * @param name Name of algorithm.
+   * @param buffer Buffer to be compressed.
+   * @param offset Position of the beginning of the data.
+   * @param length Length of data in buffer.
+   */
+  public static void benchmarkAlgorithm(
+      Compression.Algorithm algorithm,
+      Compressor compressorCodec,
+      Decompressor decompressorCodec,
+      String name,
+      byte[] buffer, int offset, int length) {
+    System.out.println(name + ":");
+
+    // compress it
+    List<Long> compressDurations = new ArrayList<Long>();
+    ByteArrayOutputStream compressedStream = new ByteArrayOutputStream();
+    OutputStream compressingStream;
+    try {
+      for (int itTime = 0 ; itTime < BENCHMARK_N_TIMES ; ++itTime) {
+        final long startTime = System.nanoTime();
+        compressingStream = algorithm.createCompressionStream(
+            compressedStream, compressorCodec, 0);
+        compressingStream.write(buffer, offset, length);
+        compressingStream.flush();
+        compressedStream.toByteArray();
+
+        final long finishTime = System.nanoTime();
+
+        // add time record
+        if (itTime >= BENCHMARK_N_OMIT) {
+          compressDurations.add(finishTime - startTime);
+        }
+
+        if (itTime + 1 < BENCHMARK_N_TIMES) { // not the last one
+          compressedStream.reset();
+        }
+      }
+    } catch (IOException e) {
+      throw new RuntimeException(String.format(
+          "Benchmark, or encoding algorithm '%s' cause some stream problems",
+          name), e);
+    }
+    printBenchmarkResult(length, compressDurations, false);
+
+
+    byte[] compBuffer = compressedStream.toByteArray();
+
+    // uncompress it several times and measure performance
+    List<Long> durations = new ArrayList<Long>();
+    for (int itTime = 0 ; itTime < BENCHMARK_N_TIMES ; ++itTime) {
+      final long startTime = System.nanoTime();
+      byte[] newBuf = new byte[length + 1];
+
+      try {
+
+        ByteArrayInputStream downStream = new ByteArrayInputStream(compBuffer,
+            0, compBuffer.length);
+        InputStream decompressedStream = algorithm.createDecompressionStream(
+            downStream, decompressorCodec, 0);
+
+        int destOffset = 0;
+        int nextChunk;
+        while ((nextChunk = decompressedStream.available()) > 0) {
+          destOffset += decompressedStream.read(newBuf, destOffset, nextChunk);
+        }
+        decompressedStream.close();
+
+        // iterate over KeyValue
+        KeyValue kv;
+        for (int pos = 0 ; pos < length ; pos += kv.getLength()) {
+          kv = new KeyValue(newBuf, pos);
+        }
+
+      } catch (IOException e) {
+        throw new RuntimeException(String.format(
+            "Decoding path in '%s' algorithm cause exception ", name), e);
+      }
+
+      final long finishTime = System.nanoTime();
+
+      // check correctness
+      if (0 != Bytes.compareTo(buffer, 0, length, newBuf, 0, length)) {
+        int prefix = 0;
+        for( ; prefix < buffer.length && prefix < newBuf.length ; ++prefix) {
+          if (buffer[prefix] != newBuf[prefix]) {
+            break;
+          }
+        }
+        throw new RuntimeException(String.format(
+            "Algorithm '%s' is corrupting the data", name));
+      }
+
+      // add time record
+      if (itTime >= BENCHMARK_N_OMIT) {
+        durations.add(finishTime - startTime);
+      }
+    }
+    printBenchmarkResult(length, durations, true);
+  }
+
+  private static void printBenchmarkResult(int totalSize,
+      List<Long> durationsInNanoSed, boolean isDecompression) {
+    long meanTime = 0;
+    for (long time : durationsInNanoSed) {
+      meanTime += time;
+    }
+    meanTime /= durationsInNanoSed.size();
+
+    long standardDev = 0;
+    for (long time : durationsInNanoSed) {
+      standardDev += (time - meanTime) * (time - meanTime);
+    }
+    standardDev = (long) Math.sqrt(standardDev / durationsInNanoSed.size());
+
+    final double million = 1000.0 * 1000.0 * 1000.0;
+    double mbPerSec = (totalSize * million) / (1024.0 * 1024.0 * meanTime);
+    double mbPerSecDev = (totalSize * million) /
+        (1024.0 * 1024.0 * (meanTime - standardDev));
+
+    System.out.println(String.format(
+        "  %s performance:%s %6.2f MB/s (+/- %.2f MB/s)",
+        isDecompression ? "Decompression" : "Compression",
+        isDecompression ? "" : "  ",
+        mbPerSec, mbPerSecDev - mbPerSec));
+  }
+
+  /**
+   * Display statistics of different compression algorithms.
+   */
+  public void displayStatistics() {
+    int totalLength = totalPrefixLength + totalKeyLength + totalValueLength;
+    compressor.reset();
+
+    for(EncodedDataBlock codec : codecs) {
+      System.out.println(codec.toString());
+      int saved = totalKeyLength + totalPrefixLength + totalValueLength
+          - codec.getSize();
+      System.out.println(
+          String.format("  Saved bytes:            %8d", saved));
+      double keyRatio = (saved * 100.0) / (totalPrefixLength + totalKeyLength);
+      double allRatio = (saved * 100.0) / totalLength;
+      System.out.println(
+          String.format("  Key compression ratio:        %.2f %%", keyRatio));
+      System.out.println(
+          String.format("  All compression ratio:        %.2f %%", allRatio));
+      int compressedSize = codec.checkCompressedSize(compressor);
+      System.out.println(
+          String.format("  %s compressed size:    %8d",
+              compressionAlgorithmName.toUpperCase(), compressedSize));
+      double lzoRatio = 100.0 * (1.0 - compressedSize / (0.0 + totalLength));
+      System.out.println(
+          String.format("  %s compression ratio:        %.2f %%",
+              compressionAlgorithmName.toUpperCase(), lzoRatio));
+    }
+
+    System.out.println(
+        String.format("Total KV prefix length:   %8d", totalPrefixLength));
+    System.out.println(
+        String.format("Total key length:         %8d", totalKeyLength));
+    System.out.println(
+        String.format("Total key redundancy:     %8d",
+            totalKeyRedundancyLength));
+    System.out.println(
+        String.format("Total value length:       %8d", totalValueLength));
+  }
+
+  /**
+   * Test a data block encoder on the given HFile. Output results to console.
+   * @param kvLimit The limit of KeyValue which will be analyzed.
+   * @param hfilePath an HFile path on the file system.
+   * @param compressionName Compression algorithm used for comparison.
+   * @param doBenchmark Run performance benchmarks.
+   * @param doVerify Verify correctness.
+   * @throws IOException When pathName is incorrect.
+   */
+  public static void testCodecs(int kvLimit, String hfilePath,
+      String compressionName, boolean doBenchmark, boolean doVerify)
+          throws IOException {
+    // create environment
+    Path path = new Path(hfilePath);
+    Configuration conf = HBaseConfiguration.create();
+    CacheConfig cacheConf = new CacheConfig(conf);
+    FileSystem fs = FileSystem.get(conf);
+    StoreFile hsf = new StoreFile(fs, path, conf, cacheConf,
+        StoreFile.BloomType.NONE, new NoOpDataBlockEncoder());
+
+    StoreFile.Reader reader = hsf.createReader();
+    reader.loadFileInfo();
+    KeyValueScanner scanner = reader.getStoreFileScanner(true, true);
+
+    // run the utilities
+    DataBlockEncodingTool comp = new DataBlockEncodingTool(compressionName);
+    comp.checkStatistics(scanner, kvLimit);
+    if (doVerify) {
+      comp.verifyCodecs(scanner, kvLimit);
+    }
+    if (doBenchmark) {
+      comp.benchmarkCodecs();
+    }
+    comp.displayStatistics();
+
+    // cleanup
+    scanner.close();
+    reader.close(cacheConf.shouldEvictOnClose());
+  }
+
+  private static void printUsage(Options options) {
+    System.err.println("Usage:");
+    System.err.println(String.format("./hbase %s <options>",
+        DataBlockEncodingTool.class.getName()));
+    System.err.println("Options:");
+    for (Object it : options.getOptions()) {
+      Option opt = (Option) it;
+      if (opt.hasArg()) {
+        System.err.println(String.format("-%s %s: %s", opt.getOpt(),
+            opt.getArgName(), opt.getDescription()));
+      } else {
+        System.err.println(String.format("-%s: %s", opt.getOpt(),
+            opt.getDescription()));
+      }
+    }
+  }
+
+  /**
+   * A command line interface to benchmarks.
+   * @param args Should have length at least 1 and holds the file path to HFile.
+   * @throws IOException If you specified the wrong file.
+   */
+  public static void main(final String[] args) throws IOException {
+    // set up user arguments
+    Options options = new Options();
+    options.addOption("f", true, "HFile to analyse (REQUIRED)");
+    options.getOption("f").setArgName("FILENAME");
+    options.addOption("n", true,
+        "Limit number of KeyValue which will be analysed");
+    options.getOption("n").setArgName("NUMBER");
+    options.addOption("b", false, "Measure read throughput");
+    options.addOption("c", false, "Omit corectness tests.");
+    options.addOption("a", true,
+        "What kind of compression algorithm use for comparison.");
+
+    // parse arguments
+    CommandLineParser parser = new PosixParser();
+    CommandLine cmd = null;
+    try {
+      cmd = parser.parse(options, args);
+    } catch (ParseException e) {
+      System.err.println("Could not parse arguments!");
+      System.exit(-1);
+      return; // avoid warning
+    }
+
+    int kvLimit = Integer.MAX_VALUE;
+    if (cmd.hasOption("n")) {
+      kvLimit = Integer.parseInt(cmd.getOptionValue("n"));
+    }
+
+    // basic argument sanity checks
+    if (!cmd.hasOption("f")) {
+      System.err.println("ERROR: Filename is required!");
+      printUsage(options);
+      System.exit(-1);
+    }
+
+    if (!(new File(cmd.getOptionValue("f"))).exists()) {
+      System.err.println(String.format("ERROR: file '%s' doesn't exist!",
+          cmd.getOptionValue("f")));
+      printUsage(options);
+      System.exit(-1);
+    }
+
+    String pathName = cmd.getOptionValue("f");
+    String compressionName = "gz";
+    if (cmd.hasOption("a")) {
+      compressionName = cmd.getOptionValue("a");
+    }
+    boolean doBenchmark = cmd.hasOption("b");
+    boolean doVerify = !cmd.hasOption("c");
+
+    testCodecs(kvLimit, pathName, compressionName, doBenchmark, doVerify);
+  }
+
+}

Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/EncodedSeekPerformanceTest.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/EncodedSeekPerformanceTest.java?rev=1223020&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/EncodedSeekPerformanceTest.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/EncodedSeekPerformanceTest.java Sat Dec 24 21:20:39 2011
@@ -0,0 +1,180 @@
+package org.apache.hadoop.hbase.regionserver;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncodings.Algorithm;
+import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
+import org.apache.hadoop.hbase.io.hfile.LruBlockCache;
+import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
+
+/**
+ * Test seek performance for encoded data blocks. Read an HFile and do several
+ * random seeks.
+ */
+public class EncodedSeekPerformanceTest {
+  private static final double NANOSEC_IN_SEC = 1000.0 * 1000.0 * 1000.0;
+  private static final double BYTES_IN_MEGABYTES = 1024.0 * 1024.0;
+  /** Default number of seeks which will be used in benchmark. */
+  public static int DEFAULT_NUMBER_OF_SEEKS = 10000;
+
+  private final HBaseTestingUtility testingUtility = new HBaseTestingUtility();
+  private Configuration configuration = testingUtility.getConfiguration();
+  private CacheConfig cacheConf = new CacheConfig(configuration);
+  private Random randomizer;
+  private int numberOfSeeks;
+
+  /** Use this benchmark with default options */
+  public EncodedSeekPerformanceTest() {
+    configuration.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.5f);
+    randomizer = new Random(42l);
+    numberOfSeeks = DEFAULT_NUMBER_OF_SEEKS;
+  }
+
+  private List<KeyValue> prepareListOfTestSeeks(Path path) throws IOException {
+    List<KeyValue> allKeyValues = new ArrayList<KeyValue>();
+
+    // read all of the key values
+    StoreFile storeFile = new StoreFile(testingUtility.getTestFileSystem(),
+        path, configuration, cacheConf, BloomType.NONE, null);
+
+    StoreFile.Reader reader = storeFile.createReader();
+    StoreFileScanner scanner = reader.getStoreFileScanner(true, false);
+    KeyValue current;
+
+    scanner.seek(KeyValue.LOWESTKEY);
+    while (null != (current = scanner.next())) {
+      allKeyValues.add(current);
+    }
+
+    storeFile.closeReader(cacheConf.shouldEvictOnClose());
+
+    // pick seeks by random
+    List<KeyValue> seeks = new ArrayList<KeyValue>();
+    for (int i = 0 ; i < numberOfSeeks ; ++i) {
+      KeyValue keyValue = allKeyValues.get(
+          randomizer.nextInt(allKeyValues.size()));
+      seeks.add(keyValue);
+    }
+
+    clearBlockCache();
+
+    return seeks;
+  }
+
+  private void runTest(Path path, HFileDataBlockEncoder blockEncoder,
+      List<KeyValue> seeks) throws IOException {
+    // read all of the key values
+    StoreFile storeFile = new StoreFile(testingUtility.getTestFileSystem(),
+        path, configuration, cacheConf, BloomType.NONE, blockEncoder);
+
+    int totalSize = 0;
+
+    StoreFile.Reader reader = storeFile.createReader();
+    StoreFileScanner scanner = reader.getStoreFileScanner(true, false);
+
+    long startReadingTime = System.nanoTime();
+    KeyValue current;
+    scanner.seek(KeyValue.LOWESTKEY);
+    while (null != (current = scanner.next())) { // just iterate it!
+      totalSize += current.getLength();
+    }
+    long finishReadingTime = System.nanoTime();
+
+    // do seeks
+    long startSeeksTime = System.nanoTime();
+    for (KeyValue keyValue : seeks) {
+      scanner.seek(keyValue);
+      KeyValue toVerify = scanner.next();
+      if (!keyValue.equals(toVerify)) {
+        System.out.println(String.format("KeyValue doesn't match:\n" +
+            "Orig key: %s\n" +
+            "Ret key:  %s", keyValue.getKeyString(), toVerify.getKeyString()));
+        break;
+      }
+    }
+    long finishSeeksTime = System.nanoTime();
+
+    // write some stats
+    double readInMbPerSec = (totalSize * NANOSEC_IN_SEC) /
+        (BYTES_IN_MEGABYTES * (finishReadingTime - startReadingTime));
+    double seeksPerSec = (seeks.size() * NANOSEC_IN_SEC) /
+        (finishSeeksTime - startSeeksTime);
+
+    storeFile.closeReader(cacheConf.shouldEvictOnClose());
+    clearBlockCache();
+
+    System.out.println(blockEncoder);
+    System.out.println(String.format("  Read speed:       %8.2f (MB/s)",
+        readInMbPerSec));
+    System.out.println(String.format("  Seeks per second: %8.2f (#/s)",
+        seeksPerSec));
+  }
+
+  /**
+   * @param path Path to the HFile which will be used.
+   * @param encoders List of encoders which will be used for tests.
+   * @throws IOException if there is a bug while reading from disk
+   */
+  public void runTests(Path path, List<HFileDataBlockEncoder> encoders)
+      throws IOException {
+    List<KeyValue> seeks = prepareListOfTestSeeks(path);
+
+    for (HFileDataBlockEncoder blockEncoder : encoders) {
+      runTest(path, blockEncoder, seeks);
+    }
+  }
+
+  /**
+   * Command line interface:
+   * @param args Takes one argument - file size.
+   * @throws IOException if there is a bug while reading from disk
+   */
+  public static void main(final String[] args) throws IOException {
+    if (args.length < 1) {
+      printUsage();
+      System.exit(-1);
+    }
+
+    //System.setProperty("org.apache.commons.logging.Log",
+    //    "org.apache.commons.logging.impl.NoOpLog");
+
+    Path path = new Path(args[0]);
+    List<HFileDataBlockEncoder> encoders =
+        new ArrayList<HFileDataBlockEncoder>();
+
+    encoders.add(new HFileDataBlockEncoderImpl(Algorithm.NONE, Algorithm.NONE,
+        false));
+    for (Algorithm encodingAlgo : Algorithm.values()) {
+      encoders.add(new HFileDataBlockEncoderImpl(Algorithm.NONE, encodingAlgo,
+          false));
+    }
+
+    for (Algorithm encodingAlgo : Algorithm.values()) {
+      encoders.add(new HFileDataBlockEncoderImpl(Algorithm.NONE, encodingAlgo,
+          true));
+    }
+
+    EncodedSeekPerformanceTest utility = new EncodedSeekPerformanceTest();
+    utility.runTests(path, encoders);
+
+    System.exit(0);
+  }
+
+  private static void printUsage() {
+    System.out.println("Usage: one argument, name of the HFile");
+  }
+
+  private void clearBlockCache() {
+    ((LruBlockCache) cacheConf.getBlockCache()).clearCache();
+  }
+}

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactSelection.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactSelection.java?rev=1223020&r1=1223019&r2=1223020&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactSelection.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactSelection.java Sat Dec 24 21:20:39 2011
@@ -102,7 +102,8 @@ public class TestCompactSelection extend
     MockStoreFile(long length, boolean isRef) throws IOException {
       super(TEST_UTIL.getTestFileSystem(), TEST_FILE,
             TEST_UTIL.getConfiguration(),
-            new CacheConfig(TEST_UTIL.getConfiguration()), BloomType.NONE);
+            new CacheConfig(TEST_UTIL.getConfiguration()), BloomType.NONE,
+            null);
       this.length = length;
       this.isRef  = isRef;
     }

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java?rev=1223020&r1=1223019&r2=1223020&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java Sat Dec 24 21:20:39 2011
@@ -24,8 +24,10 @@ import static org.mockito.Mockito.spy;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
-
+import java.util.Map;
+import java.util.Map.Entry;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -33,20 +35,24 @@ import org.apache.hadoop.fs.FSDataOutput
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.*;
+import org.apache.hadoop.hbase.HBaseTestCase;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.SmallTests;
 import org.apache.hadoop.hbase.client.Delete;
 import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncodings.Algorithm;
+import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl;
+import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
 import org.apache.hadoop.hbase.io.hfile.HFileScanner;
 import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress;
-import org.apache.hadoop.hbase.regionserver.StoreFile;
 import org.apache.hadoop.hbase.regionserver.wal.HLog;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
 import org.junit.experimental.categories.Category;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
@@ -61,6 +67,7 @@ public class TestCompaction extends HBas
   private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
 
   private HRegion r = null;
+  private HTableDescriptor htd = null;
   private Path compactionDir = null;
   private Path regionCompactionDir = null;
   private static final byte [] COLUMN_FAMILY = fam1;
@@ -70,7 +77,6 @@ public class TestCompaction extends HBas
   private byte[] firstRowBytes, secondRowBytes, thirdRowBytes;
   final private byte[] col1, col2;
 
-
   /** constructor */
   public TestCompaction() throws Exception {
     super();
@@ -94,7 +100,7 @@ public class TestCompaction extends HBas
   @Override
   public void setUp() throws Exception {
     super.setUp();
-    HTableDescriptor htd = createTableDescriptor(getName());
+    this.htd = createTableDescriptor(getName());
     this.r = createNewHRegion(htd, null, null);
   }
 
@@ -146,6 +152,48 @@ public class TestCompaction extends HBas
    * @throws Exception
    */
   public void testMajorCompaction() throws Exception {
+    majorCompaction();
+  }
+  
+  /**
+   * Test major compaction with block cache.
+   * @throws Exception
+   */
+  public void testDataBlockEncodingWithNormalSeek() throws Exception {
+    // block cache only
+    majorCompactionWithDataBlockEncoding(false);
+  }
+
+  /**
+   * Test major compaction with block cache and in memory encoding.
+   * @throws Exception
+   */
+  public void testDataBlockEncodingWithEncodedSeek() throws Exception {
+    majorCompactionWithDataBlockEncoding(true);
+  }
+
+  private void majorCompactionWithDataBlockEncoding(boolean encodedSeek)
+      throws Exception {
+    Map<Store, HFileDataBlockEncoder> replaceBlockCache =
+        new HashMap<Store, HFileDataBlockEncoder>();
+    for (Entry<byte[], Store> pair : r.getStores().entrySet()) {
+      Store store = pair.getValue();
+      HFileDataBlockEncoder blockEncoder = store.getDataBlockEncoder();
+      replaceBlockCache.put(pair.getValue(), blockEncoder);
+      store.setDataBlockEncoderInTest(new HFileDataBlockEncoderImpl(null,
+          Algorithm.PREFIX, encodedSeek));
+    }
+    
+    majorCompaction();
+    
+    // restore settings
+    for (Entry<Store, HFileDataBlockEncoder> entry :
+        replaceBlockCache.entrySet()) {
+      entry.getKey().setDataBlockEncoderInTest(entry.getValue());
+    }
+  }
+
+  private void majorCompaction() throws Exception {
     createStoreFile(r);
     for (int i = 0; i < compactionThreshold; i++) {
       createStoreFile(r);
@@ -174,10 +222,10 @@ public class TestCompaction extends HBas
       CompactionProgress progress = store.getCompactionProgress();
       if( progress != null ) {
         ++storeCount;
-        assert(progress.currentCompactedKVs > 0);
-        assert(progress.totalCompactingKVs > 0);
+        assertTrue(progress.currentCompactedKVs > 0);
+        assertTrue(progress.totalCompactingKVs > 0);
       }
-      assert(storeCount > 0);
+      assertTrue(storeCount > 0);
     }
 
     // look at the second row
@@ -186,14 +234,20 @@ public class TestCompaction extends HBas
     secondRowBytes[START_KEY_BYTES.length - 1]++;
 
     // Always 3 versions if that is what max versions is.
-    result = r.get(new Get(secondRowBytes).addFamily(COLUMN_FAMILY_TEXT).setMaxVersions(100), null);
-    assertEquals(compactionThreshold, result.size());
+    result = r.get(new Get(secondRowBytes).addFamily(COLUMN_FAMILY_TEXT).
+        setMaxVersions(100), null);
+    LOG.debug("Row " + Bytes.toStringBinary(secondRowBytes) + " after " +
+        "initial compaction: " + result);
+    assertEquals("Invalid number of versions of row "
+        + Bytes.toStringBinary(secondRowBytes) + ".", compactionThreshold,
+        result.size());
 
     // Now add deletes to memstore and then flush it.
     // That will put us over
     // the compaction threshold of 3 store files.  Compacting these store files
     // should result in a compacted store file that has no references to the
     // deleted row.
+    LOG.debug("Adding deletes to memstore and flushing");
     Delete delete = new Delete(secondRowBytes, System.currentTimeMillis(), null);
     byte [][] famAndQf = {COLUMN_FAMILY, null};
     delete.deleteFamily(famAndQf[0]);

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java?rev=1223020&r1=1223019&r2=1223020&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java Sat Dec 24 21:20:39 2011
@@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.*;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.io.hfile.BlockCache;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder;
 import org.apache.hadoop.hbase.io.hfile.HFile;
 import org.apache.hadoop.hbase.io.hfile.TestHFileWriterV2;
 import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
@@ -194,7 +195,8 @@ public class TestCompoundBloomFilter {
 
   private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
       Path sfPath) throws IOException {
-    StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt);
+    StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt,
+        new NoOpDataBlockEncoder());
     StoreFile.Reader r = sf.createReader();
     final boolean pread = true; // does not really matter
     StoreFileScanner scanner = r.getStoreFileScanner(true, pread);
@@ -294,7 +296,7 @@ public class TestCompoundBloomFilter {
     cacheConf = new CacheConfig(conf);
 
     StoreFile.Writer w = StoreFile.createWriter(fs,
-        TEST_UTIL.getDataTestDir(), BLOCK_SIZES[t], null, null, conf,
+        TEST_UTIL.getDataTestDir(), BLOCK_SIZES[t], null, null, null, conf,
         cacheConf, bt, 0);
 
     assertTrue(w.hasGeneralBloom());

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java?rev=1223020&r1=1223019&r2=1223020&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java Sat Dec 24 21:20:39 2011
@@ -76,7 +76,8 @@ public class TestFSErrorsExposed {
         writer, Bytes.toBytes("cf"), Bytes.toBytes("qual"));
 
     StoreFile sf = new StoreFile(fs, writer.getPath(),
-        util.getConfiguration(), cacheConf, StoreFile.BloomType.NONE);
+        util.getConfiguration(), cacheConf, StoreFile.BloomType.NONE, null);
+
     StoreFile.Reader reader = sf.createReader();
     HFileScanner scanner = reader.getScanner(false, true);
 
@@ -119,7 +120,8 @@ public class TestFSErrorsExposed {
         writer, Bytes.toBytes("cf"), Bytes.toBytes("qual"));
 
     StoreFile sf = new StoreFile(fs, writer.getPath(), util.getConfiguration(),
-        cacheConf, BloomType.NONE);
+        cacheConf, BloomType.NONE, null);
+
     List<StoreFileScanner> scanners = StoreFileScanner.getScannersForStoreFiles(
         Collections.singletonList(sf), false, true, false);
     KeyValueScanner scanner = scanners.get(0);

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java?rev=1223020&r1=1223019&r2=1223020&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java Sat Dec 24 21:20:39 2011
@@ -27,7 +27,6 @@ import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
 import java.util.Map;
-import java.util.TreeMap;
 import java.util.TreeSet;
 
 import org.apache.commons.logging.Log;
@@ -35,18 +34,23 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.*;
+import org.apache.hadoop.hbase.HBaseTestCase;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.SmallTests;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.io.Reference.Range;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncodings;
 import org.apache.hadoop.hbase.io.hfile.BlockCache;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
 import org.apache.hadoop.hbase.io.hfile.CacheStats;
 import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
+import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl;
 import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
 import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics;
 import org.apache.hadoop.hbase.util.BloomFilterFactory;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.junit.experimental.categories.Category;
 import org.mockito.Mockito;
 
@@ -89,7 +93,7 @@ public class TestStoreFile extends HBase
       conf, cacheConf);
     writeStoreFile(writer);
     checkHalfHFile(new StoreFile(this.fs, writer.getPath(), conf, cacheConf,
-        StoreFile.BloomType.NONE));
+        StoreFile.BloomType.NONE, null));
   }
 
   private void writeStoreFile(final StoreFile.Writer writer) throws IOException {
@@ -130,7 +134,7 @@ public class TestStoreFile extends HBase
         conf, cacheConf);
     writeStoreFile(writer);
     StoreFile hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf,
-        StoreFile.BloomType.NONE);
+        StoreFile.BloomType.NONE, null);
     StoreFile.Reader reader = hsf.createReader();
     // Split on a row, not in middle of row.  Midkey returned by reader
     // may be in middle of row.  Create new one with empty column and
@@ -142,7 +146,7 @@ public class TestStoreFile extends HBase
     // Make a reference
     Path refPath = StoreFile.split(fs, dir, hsf, midRow, Range.top);
     StoreFile refHsf = new StoreFile(this.fs, refPath, conf, cacheConf,
-        StoreFile.BloomType.NONE);
+        StoreFile.BloomType.NONE, null);
     // Now confirm that I can read from the reference and that it only gets
     // keys from top half of the file.
     HFileScanner s = refHsf.createReader().getScanner(false, false);
@@ -178,10 +182,10 @@ public class TestStoreFile extends HBase
     Path bottomPath = StoreFile.split(this.fs, bottomDir,
       f, midRow, Range.bottom);
     // Make readers on top and bottom.
-    StoreFile.Reader top = new StoreFile(this.fs, topPath, conf, cacheConf,
-        StoreFile.BloomType.NONE).createReader();
-    StoreFile.Reader bottom = new StoreFile(this.fs, bottomPath, conf, cacheConf,
-        StoreFile.BloomType.NONE).createReader();
+    StoreFile.Reader top = new StoreFile(this.fs, topPath,
+        conf, cacheConf).createReader();
+    StoreFile.Reader bottom = new StoreFile(this.fs, bottomPath,
+        conf, cacheConf).createReader();
     ByteBuffer previous = null;
     LOG.info("Midkey: " + midKV.toString());
     ByteBuffer bbMidkeyBytes = ByteBuffer.wrap(midkey);
@@ -237,9 +241,9 @@ public class TestStoreFile extends HBase
       bottomPath = StoreFile.split(this.fs, bottomDir, f, badmidkey,
         Range.bottom);
       top = new StoreFile(this.fs, topPath, conf, cacheConf,
-          StoreFile.BloomType.NONE).createReader();
+          StoreFile.BloomType.NONE, null).createReader();
       bottom = new StoreFile(this.fs, bottomPath, conf, cacheConf,
-          StoreFile.BloomType.NONE).createReader();
+          StoreFile.BloomType.NONE, null).createReader();
       bottomScanner = bottom.getScanner(false, false);
       int count = 0;
       while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) ||
@@ -282,9 +286,9 @@ public class TestStoreFile extends HBase
       bottomPath = StoreFile.split(this.fs, bottomDir, f, badmidkey,
         Range.bottom);
       top = new StoreFile(this.fs, topPath, conf, cacheConf,
-          StoreFile.BloomType.NONE).createReader();
+          StoreFile.BloomType.NONE, null).createReader();
       bottom = new StoreFile(this.fs, bottomPath, conf, cacheConf,
-          StoreFile.BloomType.NONE).createReader();
+          StoreFile.BloomType.NONE, null).createReader();
       first = true;
       bottomScanner = bottom.getScanner(false, false);
       while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) ||
@@ -340,7 +344,7 @@ public class TestStoreFile extends HBase
     }
     writer.close();
 
-    StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf);
+    StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf, null);
     reader.loadFileInfo();
     reader.loadBloomfilter();
     StoreFileScanner scanner = reader.getStoreFileScanner(false, false);
@@ -379,10 +383,10 @@ public class TestStoreFile extends HBase
 
     // write the file
     Path f = new Path(ROOT_DIR, getName());
-    StoreFile.Writer writer = new StoreFile.Writer(fs, f,
-        StoreFile.DEFAULT_BLOCKSIZE_SMALL, HFile.DEFAULT_COMPRESSION_ALGORITHM,
-        conf, cacheConf, KeyValue.COMPARATOR, StoreFile.BloomType.ROW, 2000);
-
+    StoreFile.Writer writer =
+        new StoreFile.Writer(fs, f, StoreFile.DEFAULT_BLOCKSIZE_SMALL,
+            HFile.DEFAULT_COMPRESSION_ALGORITHM, null, conf, cacheConf,
+            KeyValue.COMPARATOR, StoreFile.BloomType.ROW, 2000);
     bloomWriteRead(writer, fs);
   }
 
@@ -399,7 +403,8 @@ public class TestStoreFile extends HBase
 
     StoreFile.Writer writer = new StoreFile.Writer(fs, f,
         StoreFile.DEFAULT_BLOCKSIZE_SMALL, HFile.DEFAULT_COMPRESSION_ALGORITHM,
-        conf, cacheConf, KeyValue.COMPARATOR, StoreFile.BloomType.NONE, 2000);
+        null, conf, cacheConf, KeyValue.COMPARATOR, StoreFile.BloomType.NONE,
+        2000);
 
     // add delete family
     long now = System.currentTimeMillis();
@@ -411,7 +416,7 @@ public class TestStoreFile extends HBase
     }
     writer.close();
 
-    StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf);
+    StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf, null);
     reader.loadFileInfo();
     reader.loadBloomfilter();
 
@@ -466,7 +471,7 @@ public class TestStoreFile extends HBase
       StoreFile.Writer writer = new StoreFile.Writer(fs, f,
           StoreFile.DEFAULT_BLOCKSIZE_SMALL,
           HFile.DEFAULT_COMPRESSION_ALGORITHM,
-          conf, cacheConf, KeyValue.COMPARATOR, bt[x], expKeys[x]);
+          null, conf, cacheConf, KeyValue.COMPARATOR, bt[x], expKeys[x]);
 
       long now = System.currentTimeMillis();
       for (int i = 0; i < rowCount*2; i += 2) { // rows
@@ -483,7 +488,7 @@ public class TestStoreFile extends HBase
       }
       writer.close();
 
-      StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf);
+      StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf, null);
       reader.loadFileInfo();
       reader.loadBloomfilter();
       StoreFileScanner scanner = reader.getStoreFileScanner(false, false);
@@ -536,7 +541,8 @@ public class TestStoreFile extends HBase
     // this should not create a bloom because the max keys is too small
     StoreFile.Writer writer = new StoreFile.Writer(fs, f,
         StoreFile.DEFAULT_BLOCKSIZE_SMALL, HFile.DEFAULT_COMPRESSION_ALGORITHM,
-        conf, cacheConf, KeyValue.COMPARATOR, StoreFile.BloomType.ROW, 2000);
+        null, conf, cacheConf, KeyValue.COMPARATOR, StoreFile.BloomType.ROW,
+        2000);
     assertFalse(writer.hasGeneralBloom());
     writer.close();
     fs.delete(f, true);
@@ -559,7 +565,7 @@ public class TestStoreFile extends HBase
     // because Java can't create a contiguous array > MAX_INT
     writer = new StoreFile.Writer(fs, f,
         StoreFile.DEFAULT_BLOCKSIZE_SMALL, HFile.DEFAULT_COMPRESSION_ALGORITHM,
-        conf, cacheConf, KeyValue.COMPARATOR, StoreFile.BloomType.ROW,
+        null, conf, cacheConf, KeyValue.COMPARATOR, StoreFile.BloomType.ROW,
         Integer.MAX_VALUE);
     assertFalse(writer.hasGeneralBloom());
     writer.close();
@@ -664,7 +670,7 @@ public class TestStoreFile extends HBase
     writer.close();
 
     StoreFile hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf,
-        StoreFile.BloomType.NONE);
+        StoreFile.BloomType.NONE, null);
     StoreFile.Reader reader = hsf.createReader();
     StoreFileScanner scanner = reader.getStoreFileScanner(false, false);
     TreeSet<byte[]> columns = new TreeSet<byte[]>();
@@ -708,7 +714,7 @@ public class TestStoreFile extends HBase
     Path pathCowOff = new Path(baseDir, "123456789");
     StoreFile.Writer writer = writeStoreFile(conf, cacheConf, pathCowOff, 3);
     StoreFile hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf,
-        StoreFile.BloomType.NONE);
+        StoreFile.BloomType.NONE, null);
     LOG.debug(hsf.getPath().toString());
 
     // Read this file, we should see 3 misses
@@ -730,7 +736,7 @@ public class TestStoreFile extends HBase
     Path pathCowOn = new Path(baseDir, "123456788");
     writer = writeStoreFile(conf, cacheConf, pathCowOn, 3);
     hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf,
-        StoreFile.BloomType.NONE);
+        StoreFile.BloomType.NONE, null);
 
     // Read this file, we should see 3 hits
     reader = hsf.createReader();
@@ -746,13 +752,13 @@ public class TestStoreFile extends HBase
 
     // Let's read back the two files to ensure the blocks exactly match
     hsf = new StoreFile(this.fs, pathCowOff, conf, cacheConf,
-        StoreFile.BloomType.NONE);
+        StoreFile.BloomType.NONE, null);
     StoreFile.Reader readerOne = hsf.createReader();
     readerOne.loadFileInfo();
     StoreFileScanner scannerOne = readerOne.getStoreFileScanner(true, true);
     scannerOne.seek(KeyValue.LOWESTKEY);
     hsf = new StoreFile(this.fs, pathCowOn, conf, cacheConf,
-        StoreFile.BloomType.NONE);
+        StoreFile.BloomType.NONE, null);
     StoreFile.Reader readerTwo = hsf.createReader();
     readerTwo.loadFileInfo();
     StoreFileScanner scannerTwo = readerTwo.getStoreFileScanner(true, true);
@@ -783,7 +789,7 @@ public class TestStoreFile extends HBase
     conf.setBoolean("hbase.rs.evictblocksonclose", true);
     cacheConf = new CacheConfig(conf);
     hsf = new StoreFile(this.fs, pathCowOff, conf, cacheConf,
-        StoreFile.BloomType.NONE);
+        StoreFile.BloomType.NONE, null);
     reader = hsf.createReader();
     reader.close(cacheConf.shouldEvictOnClose());
 
@@ -797,7 +803,7 @@ public class TestStoreFile extends HBase
     conf.setBoolean("hbase.rs.evictblocksonclose", false);
     cacheConf = new CacheConfig(conf);
     hsf = new StoreFile(this.fs, pathCowOn, conf, cacheConf,
-        StoreFile.BloomType.NONE);
+        StoreFile.BloomType.NONE, null);
     reader = hsf.createReader();
     reader.close(cacheConf.shouldEvictOnClose());
 
@@ -824,7 +830,8 @@ public class TestStoreFile extends HBase
     int blockSize = totalSize / numBlocks;
     StoreFile.Writer writer = new StoreFile.Writer(fs, path, blockSize,
         HFile.DEFAULT_COMPRESSION_ALGORITHM,
-        conf, cacheConf, KeyValue.COMPARATOR, StoreFile.BloomType.NONE, 2000);
+        null, conf, cacheConf, KeyValue.COMPARATOR, StoreFile.BloomType.NONE,
+        2000);
     // We'll write N-1 KVs to ensure we don't write an extra block
     kvs.remove(kvs.size()-1);
     for (KeyValue kv : kvs) {
@@ -835,6 +842,43 @@ public class TestStoreFile extends HBase
     return writer;
   }
 
+  /**
+   * Check if data block encoding information is saved correctly in HFile's
+   * file info.
+   */
+  public void testDataBlockEncodingMetaData() throws IOException {
+    Path dir = new Path(new Path(this.testDir, "regionname"), "familyname");
+    Path path = new Path(dir, "1234567890");
+
+    DataBlockEncodings.Algorithm dataBlockEncoderAlgo =
+        DataBlockEncodings.Algorithm.FAST_DIFF;
+    HFileDataBlockEncoder dataBlockEncoder =
+        new HFileDataBlockEncoderImpl(
+            dataBlockEncoderAlgo,
+            DataBlockEncodings.Algorithm.NONE,
+            false);
+    cacheConf = new CacheConfig(conf);
+    StoreFile.Writer writer = new StoreFile.Writer(fs,
+        path, HFile.DEFAULT_BLOCKSIZE,
+        HFile.DEFAULT_COMPRESSION_ALGORITHM,
+        dataBlockEncoder,
+        conf,
+        cacheConf,
+        KeyValue.COMPARATOR,
+        StoreFile.BloomType.NONE,
+        2000);
+    writer.close();
+    
+    StoreFile storeFile = new StoreFile(fs, writer.getPath(), conf,
+        cacheConf, BloomType.NONE, dataBlockEncoder);
+    StoreFile.Reader reader = storeFile.createReader();
+    
+    Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
+    byte[] value = fileInfo.get(StoreFile.DATA_BLOCK_ENCODING);
+
+    assertEquals(dataBlockEncoderAlgo.getNameInBytes(), value);
+  }
+
   @org.junit.Rule
   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java?rev=1223020&r1=1223019&r2=1223020&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java Sat Dec 24 21:20:39 2011
@@ -28,6 +28,7 @@ import org.apache.hadoop.hbase.HConstant
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.PerformanceEvaluation;
 import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncodings;
 import org.apache.hadoop.hbase.io.hfile.Compression;
 import org.apache.hadoop.hbase.regionserver.StoreFile;
 
@@ -72,6 +73,12 @@ public class LoadTestTool extends Abstra
 
   private static final String OPT_BLOOM = "bloom";
   private static final String OPT_COMPRESSION = "compression";
+  private static final String OPT_DATA_BLOCK_ENCODING = "data_block_encoding";
+  private static final String OPT_DATA_BLOCK_ENCODING_CACHE_ONLY =
+      "data_block_encoding_cache_only";
+  private static final String OPT_ENCODED_DATA_BLOCK_SEEK =
+      "encoded_data_block_seek";
+
   private static final String OPT_KEY_WINDOW = "key_window";
   private static final String OPT_WRITE = "write";
   private static final String OPT_MAX_READ_ERRORS = "max_read_errors";
@@ -82,6 +89,8 @@ public class LoadTestTool extends Abstra
   private static final String OPT_TABLE_NAME = "tn";
   private static final String OPT_ZK_QUORUM = "zk";
 
+  private static final long DEFAULT_START_KEY = 0;
+
   /** This will be removed as we factor out the dependency on command line */
   private CommandLine cmd;
 
@@ -108,7 +117,7 @@ public class LoadTestTool extends Abstra
   public void createTables() throws IOException {
     HBaseTestingUtility.createPreSplitLoadTestTable(conf, tableName,
         COLUMN_FAMILY);
-    applyBloomFilterAndCompression(tableName, COLUMN_FAMILIES);
+    applyColumnFamilyOptions(tableName, COLUMN_FAMILIES);
   }
 
   private String[] splitColonSeparated(String option,
@@ -129,9 +138,10 @@ public class LoadTestTool extends Abstra
   }
 
   /**
-   * Apply the given Bloom filter type to all column families we care about.
+   * Apply column family options such as Bloom filters, compression, and data
+   * block encoding.
    */
-  private void applyBloomFilterAndCompression(byte[] tableName,
+  private void applyColumnFamilyOptions(byte[] tableName,
       byte[][] columnFamilies) throws IOException {
     String bloomStr = cmd.getOptionValue(OPT_BLOOM);
     StoreFile.BloomType bloomType = bloomStr == null ? null :
@@ -141,8 +151,16 @@ public class LoadTestTool extends Abstra
     Compression.Algorithm compressAlgo = compressStr == null ? null :
         Compression.Algorithm.valueOf(compressStr);
 
-    if (bloomStr == null && compressStr == null)
+    String dataBlockEncodingStr = cmd.getOptionValue(OPT_DATA_BLOCK_ENCODING);
+    DataBlockEncodings.Algorithm dataBlockEncodingAlgo =
+        dataBlockEncodingStr == null ? null :
+        DataBlockEncodings.Algorithm.valueOf(dataBlockEncodingStr);
+
+    if (bloomStr == null && compressStr == null
+        && dataBlockEncodingStr == null) {
+      // No reason to disable/enable the table.
       return;
+    }
 
     HBaseAdmin admin = new HBaseAdmin(conf);
     HTableDescriptor tableDesc = admin.getTableDescriptor(tableName);
@@ -150,10 +168,21 @@ public class LoadTestTool extends Abstra
     admin.disableTable(tableName);
     for (byte[] cf : columnFamilies) {
       HColumnDescriptor columnDesc = tableDesc.getFamily(cf);
-      if (bloomStr != null)
+      if (bloomStr != null) {
         columnDesc.setBloomFilterType(bloomType);
-      if (compressStr != null)
+      }
+      if (compressStr != null) {
         columnDesc.setCompressionType(compressAlgo);
+      }
+      if (dataBlockEncodingAlgo != null) {
+        columnDesc.setDataBlockEncodingOnDisk(
+            cmd.hasOption(OPT_DATA_BLOCK_ENCODING_CACHE_ONLY) ?
+            DataBlockEncodings.Algorithm.NONE :
+            dataBlockEncodingAlgo);
+        columnDesc.setDataBlockEncodingInCache(dataBlockEncodingAlgo);
+        columnDesc.setEncodedDataBlockSeek(
+            cmd.hasOption(OPT_ENCODED_DATA_BLOCK_SEEK));
+      }
       admin.modifyColumn(tableName, columnDesc);
     }
     LOG.info("Enabling table " + Bytes.toString(tableName));
@@ -169,17 +198,29 @@ public class LoadTestTool extends Abstra
     addOptWithArg(OPT_READ, OPT_USAGE_READ);
     addOptWithArg(OPT_BLOOM, OPT_USAGE_BLOOM);
     addOptWithArg(OPT_COMPRESSION, OPT_USAGE_COMPRESSION);
+    addOptWithArg(OPT_DATA_BLOCK_ENCODING, "Encoding algorithm (e.g. prefix "
+        + "compression) to use for data blocks in the test column family, "
+        + "one of " +
+        Arrays.toString(DataBlockEncodings.Algorithm.values()) + ".");
     addOptWithArg(OPT_MAX_READ_ERRORS, "The maximum number of read errors " +
         "to tolerate before terminating all reader threads. The default is " +
         MultiThreadedReader.DEFAULT_MAX_ERRORS + ".");
     addOptWithArg(OPT_KEY_WINDOW, "The 'key window' to maintain between " +
         "reads and writes for concurrent write/read workload. The default " +
         "is " + MultiThreadedReader.DEFAULT_KEY_WINDOW + ".");
+
     addOptNoArg(OPT_MULTIPUT, "Whether to use multi-puts as opposed to " +
         "separate puts for every column in a row");
+    addOptNoArg(OPT_DATA_BLOCK_ENCODING_CACHE_ONLY, "If using a data block " +
+        "encoding, this flag will only enable encoding in cache but not on " +
+        "disk.");
+    addOptNoArg(OPT_ENCODED_DATA_BLOCK_SEEK, "If using a data block " +
+        "encoding, this will enable doing seek operations on encoded blocks.");
 
     addRequiredOptWithArg(OPT_NUM_KEYS, "The number of keys to read/write");
-    addRequiredOptWithArg(OPT_START_KEY, "The first key to read/write");
+    addOptWithArg(OPT_START_KEY, "The first key to read/write " +
+        "(a 0-based index). The default value is " +
+        DEFAULT_START_KEY + ".");
   }
 
   @Override
@@ -188,8 +229,8 @@ public class LoadTestTool extends Abstra
 
     tableName = Bytes.toBytes(cmd.getOptionValue(OPT_TABLE_NAME,
         DEFAULT_TABLE_NAME));
-    startKey = parseLong(cmd.getOptionValue(OPT_START_KEY), 0,
-        Long.MAX_VALUE);
+    startKey = parseLong(cmd.getOptionValue(OPT_START_KEY,
+        String.valueOf(DEFAULT_START_KEY)), 0, Long.MAX_VALUE);
     long numKeys = parseLong(cmd.getOptionValue(OPT_NUM_KEYS), 1,
         Long.MAX_VALUE - startKey);
     endKey = startKey + numKeys;
@@ -248,7 +289,7 @@ public class LoadTestTool extends Abstra
       System.out.println("Reader threads: " + numReaderThreads);
     }
 
-    System.out.println("Key range: " + startKey + ".." + (endKey - 1));
+    System.out.println("Key range: [" + startKey + ".." + (endKey - 1) + "]");
   }
 
   @Override

Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java?rev=1223020&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java Sat Dec 24 21:20:39 2011
@@ -0,0 +1,356 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.apache.hadoop.hbase.SmallTests;
+import org.apache.hadoop.io.WritableUtils;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category(SmallTests.class)
+public class TestByteBufferUtils {
+
+  private byte[] array;
+
+  /**
+   * Create an array with sample data.
+   */
+  @Before
+  public void setUp() {
+    array = new byte[8];
+    for (int i = 0 ; i < array.length ; ++i) {
+      array[i] = (byte) ('a' + i);
+    }
+  }
+
+  private static final int MAX_VLONG_LENGTH = 9;
+  private static final Collection<Long> testNumbers;
+
+  private static void addNumber(Set<Long> a, long l) {
+    if (l != Long.MIN_VALUE) {
+      a.add(l - 1);
+    }
+    a.add(l);
+    if (l != Long.MAX_VALUE) {
+      a.add(l + 1);
+    }
+    for (long divisor = 3; divisor <= 10; ++divisor) {
+      for (long delta = -1; delta <= 1; ++delta) {
+        a.add(l / divisor + delta);
+      }
+    }
+  }
+
+  static {
+    SortedSet<Long> a = new TreeSet<Long>();
+    for (int i = 0; i <= 63; ++i) {
+      long v = (-1L) << i;
+      assertTrue(v < 0);
+      addNumber(a, v);
+      v = (1L << i) - 1;
+      assertTrue(v >= 0);
+      addNumber(a, v);
+    }
+
+    testNumbers = Collections.unmodifiableSet(a);
+    System.err.println("Testing variable-length long serialization using: "
+        + testNumbers + " (count: " + testNumbers.size() + ")");
+    assertEquals(1753, testNumbers.size());
+    assertEquals(Long.MIN_VALUE, a.first().longValue());
+    assertEquals(Long.MAX_VALUE, a.last().longValue());
+  }
+
+  @Test
+  public void testReadWriteVLong() {
+    for (long l : testNumbers) {
+      ByteBuffer b = ByteBuffer.allocate(MAX_VLONG_LENGTH);
+      ByteBufferUtils.writeVLong(b, l);
+      b.flip();
+      assertEquals(l, ByteBufferUtils.readVLong(b));
+    }
+  }
+
+  @Test
+  public void testConsistencyWithHadoopVLong() throws IOException {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    DataOutputStream dos = new DataOutputStream(baos);
+    for (long l : testNumbers) {
+      baos.reset();
+      ByteBuffer b = ByteBuffer.allocate(MAX_VLONG_LENGTH);
+      ByteBufferUtils.writeVLong(b, l);
+      String bufStr = Bytes.toStringBinary(b.array(),
+          b.arrayOffset(), b.position());
+      WritableUtils.writeVLong(dos, l);
+      String baosStr = Bytes.toStringBinary(baos.toByteArray());
+      assertEquals(baosStr, bufStr);
+    }
+  }
+
+  /**
+   * Test copying to stream from buffer.
+   */
+  @Test
+  public void testCopyToStream() {
+    ByteBuffer buffer = ByteBuffer.wrap(array);
+
+    ByteArrayOutputStream bos = new ByteArrayOutputStream();
+
+    try {
+      ByteBufferUtils.copyToStream(bos, buffer, array.length);
+    } catch (IOException e) {
+      fail("IOException in testCopyToStream()");
+    }
+    assertArrayEquals(bos.toByteArray(), array);
+  }
+
+  /**
+   * Test copying to stream one byte.
+   * @throws IOException On test failure.
+   */
+  @Test
+  public void testCopyToStreamByte() throws IOException {
+    ByteArrayOutputStream bos = new ByteArrayOutputStream();
+
+    for (int i = 0 ; i < array.length ; ++i) {
+      ByteBufferUtils.copyToStream(bos, array[i]);
+    }
+    byte[] actual = bos.toByteArray();
+    for (int i = 0 ; i < array.length ; ++i) {
+      assertEquals(array[i], actual[i]);
+    }
+  }
+
+  /**
+   * Test copying to stream from buffer with offset.
+   * @throws IOException On test failure.
+   */
+  @Test
+  public void testCopyToStreamWithOffset() throws IOException {
+    ByteBuffer buffer = ByteBuffer.wrap(array);
+
+    ByteArrayOutputStream bos = new ByteArrayOutputStream();
+
+    ByteBufferUtils.copyToStream(bos, buffer, array.length / 2,
+        array.length / 2);
+
+    byte[] returnedArray = bos.toByteArray();
+    for (int i = 0 ; i < array.length / 2 ; ++i) {
+      int pos = array.length / 2 + i;
+      assertEquals(returnedArray[i], array[pos]);
+    }
+  }
+
+  /**
+   * Test copying data from stream.
+   * @throws IOException On test failure.
+   */
+  @Test
+  public void testCopyFromStream() throws IOException {
+    ByteBuffer buffer = ByteBuffer.allocate(array.length);
+    ByteArrayInputStream bis = new ByteArrayInputStream(array);
+    DataInputStream dis = new DataInputStream(bis);
+
+    ByteBufferUtils.copyFromStream(dis, buffer, array.length / 2);
+    ByteBufferUtils.copyFromStream(dis, buffer,
+        array.length - array.length / 2);
+    for (int i = 0 ; i < array.length ; ++i) {
+      assertEquals(array[i], buffer.get(i));
+    }
+  }
+
+  /**
+   * Test copying from buffer.
+   */
+  @Test
+  public void testCopyFromBuffer() {
+    ByteBuffer srcBuffer = ByteBuffer.allocate(array.length);
+    ByteBuffer dstBuffer = ByteBuffer.allocate(array.length);
+    srcBuffer.put(array);
+
+    ByteBufferUtils.copyFromBuffer(srcBuffer, dstBuffer,
+        array.length / 2, array.length / 4);
+    for (int i = 0 ; i < array.length / 4 ; ++i) {
+      assertEquals(srcBuffer.get(i + array.length / 2),
+          dstBuffer.get(i));
+    }
+  }
+
+  /**
+   * Test 7-bit encoding of integers.
+   * @throws IOException On test failure.
+   */
+  @Test
+  public void testCompressedInt() throws IOException {
+    testCompressedInt(0);
+    testCompressedInt(Integer.MAX_VALUE);
+    testCompressedInt(Integer.MIN_VALUE);
+
+    for (int i = 0 ; i < 3 ; i++) {
+      testCompressedInt((128 << i) - 1);
+    }
+
+    for (int i = 0 ; i < 3 ; i++) {
+      testCompressedInt((128 << i));
+    }
+  }
+
+  /**
+   * Test fitting integer in less bytes.
+   */
+  @Test
+  public void testFixedInt() {
+    testFixedInt(0, 1);
+    testFixedInt(Integer.MAX_VALUE, 4);
+    testFixedInt(Integer.MIN_VALUE, 4);
+
+    for (int i = 0 ; i < 3 ; i++) {
+      testFixedInt((128 << i) - 1, i + 1);
+    }
+
+    for (int i = 0 ; i < 3 ; i++) {
+      testFixedInt((128 << i), 2 + i);
+    }
+  }
+
+  /**
+   * Test how much bytes we need to store integer.
+   */
+  @Test
+  public void testIntFitsIn() {
+    assertEquals(1, ByteBufferUtils.intFitsIn(0));
+    assertEquals(1, ByteBufferUtils.intFitsIn(1));
+    assertEquals(2, ByteBufferUtils.intFitsIn(1 << 8));
+    assertEquals(3, ByteBufferUtils.intFitsIn(1 << 16));
+    assertEquals(4, ByteBufferUtils.intFitsIn(-1));
+    assertEquals(4, ByteBufferUtils.intFitsIn(Integer.MAX_VALUE));
+    assertEquals(4, ByteBufferUtils.intFitsIn(Integer.MIN_VALUE));
+  }
+
+  /**
+   * Test how much bytes we need to store long.
+   */
+  @Test
+  public void testLongFitsIn() {
+    assertEquals(1, ByteBufferUtils.longFitsIn(0));
+    assertEquals(1, ByteBufferUtils.longFitsIn(1));
+    assertEquals(3, ByteBufferUtils.longFitsIn(1l << 16));
+    assertEquals(5, ByteBufferUtils.longFitsIn(1l << 32));
+    assertEquals(8, ByteBufferUtils.longFitsIn(-1));
+    assertEquals(8, ByteBufferUtils.longFitsIn(Long.MIN_VALUE));
+    assertEquals(8, ByteBufferUtils.longFitsIn(Long.MAX_VALUE));
+  }
+
+  /**
+   * Test if we are comparing equal bytes.
+   */
+  @Test
+  public void testArePartEqual() {
+    byte[] array = new byte[] { 1, 2, 3, 4, 5, 1, 2, 3, 4 };
+    ByteBuffer buffer = ByteBuffer.wrap(array);
+    assertTrue(ByteBufferUtils.arePartsEqual(buffer, 0, 4, 5, 4));
+    assertTrue(ByteBufferUtils.arePartsEqual(buffer, 1, 2, 6, 2));
+    assertFalse(ByteBufferUtils.arePartsEqual(buffer, 1, 2, 6, 3));
+    assertFalse(ByteBufferUtils.arePartsEqual(buffer, 1, 3, 6, 2));
+    assertFalse(ByteBufferUtils.arePartsEqual(buffer, 0, 3, 6, 3));
+  }
+
+  /**
+   * Test serializing int to bytes
+   */
+  @Test
+  public void testPutInt() {
+    testPutInt(0);
+    testPutInt(Integer.MAX_VALUE);
+
+    for (int i = 0 ; i < 3 ; i++) {
+      testPutInt((128 << i) - 1);
+    }
+
+    for (int i = 0 ; i < 3 ; i++) {
+      testPutInt((128 << i));
+    }
+  }
+
+  // Utility methods invoked from test methods
+
+  private void testCompressedInt(int value) throws IOException {
+    int parsedValue = 0;
+
+    ByteArrayOutputStream bos = new ByteArrayOutputStream();
+    ByteBufferUtils.putCompressedInt(bos, value);
+
+    ByteArrayInputStream bis = new ByteArrayInputStream(
+        bos.toByteArray());
+    parsedValue = ByteBufferUtils.readCompressedInt(bis);
+
+    assertEquals(value, parsedValue);
+  }
+
+  private void testFixedInt(int value, int width) {
+    int parsedValue = 0;
+    byte[] bValue = new byte[Bytes.SIZEOF_INT];
+    Bytes.putInt(bValue, 0, value);
+
+    ByteArrayOutputStream bos = new ByteArrayOutputStream();
+    bos.write(bValue, Bytes.SIZEOF_INT - width, width);
+
+    ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
+    try {
+      parsedValue = ByteBufferUtils.readCompressedInt(bis, width);
+    } catch (IOException e) {
+      fail("While reading fixed int");
+    }
+
+    assertEquals(value, parsedValue);
+  }
+
+  private void testPutInt(int value) {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    try {
+      ByteBufferUtils.putInt(baos, value);
+    } catch (IOException e) {
+      throw new RuntimeException("Bug in putIn()", e);
+    }
+
+    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
+    DataInputStream dis = new DataInputStream(bais);
+    try {
+      assertEquals(dis.readInt(), value);
+    } catch (IOException e) {
+      throw new RuntimeException("Bug in test!", e);
+    }
+  }
+}