You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ra...@apache.org on 2015/06/09 08:33:47 UTC

[2/2] hbase git commit: HBASE-13451 - Make the HFileBlockIndex blockKeys to Cells so that it could be easy to use in the CellComparators (Ram)

HBASE-13451 - Make the HFileBlockIndex blockKeys to Cells so that it could
be easy to use in the CellComparators (Ram)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/487e4aa7
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/487e4aa7
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/487e4aa7

Branch: refs/heads/master
Commit: 487e4aa74fcc6ef4201f6ffdcfd1a7169c754562
Parents: c62b396
Author: ramkrishna <ra...@gmail.com>
Authored: Tue Jun 9 12:03:01 2015 +0530
Committer: ramkrishna <ra...@gmail.com>
Committed: Tue Jun 9 12:03:01 2015 +0530

----------------------------------------------------------------------
 .../java/org/apache/hadoop/hbase/CellUtil.java  |   6 +-
 .../org/apache/hadoop/hbase/util/Bytes.java     |  46 +-
 .../hadoop/hbase/io/HalfStoreFileReader.java    |  10 +-
 .../hbase/io/hfile/CompoundBloomFilter.java     | 201 +++++++
 .../hbase/io/hfile/CompoundBloomFilterBase.java |  71 +++
 .../io/hfile/CompoundBloomFilterWriter.java     | 277 ++++++++++
 .../org/apache/hadoop/hbase/io/hfile/HFile.java |   4 +-
 .../hadoop/hbase/io/hfile/HFileBlockIndex.java  | 523 ++++++++++++-------
 .../hbase/io/hfile/HFilePrettyPrinter.java      |   2 +-
 .../hadoop/hbase/io/hfile/HFileReaderImpl.java  |  26 +-
 .../hbase/regionserver/HRegionFileSystem.java   |   5 +-
 .../hadoop/hbase/regionserver/HStore.java       |   9 +-
 .../hadoop/hbase/regionserver/StoreFile.java    |  18 +-
 .../hadoop/hbase/util/BloomFilterChunk.java     |   2 +-
 .../hadoop/hbase/util/BloomFilterFactory.java   |   3 +
 .../hadoop/hbase/util/CompoundBloomFilter.java  | 197 -------
 .../hbase/util/CompoundBloomFilterBase.java     |  70 ---
 .../hbase/util/CompoundBloomFilterWriter.java   | 276 ----------
 .../org/apache/hadoop/hbase/util/HBaseFsck.java |   8 +-
 .../hbase/io/TestHalfStoreFileReader.java       |  12 +-
 .../hbase/io/hfile/TestHFileBlockIndex.java     |  13 +-
 .../hadoop/hbase/io/hfile/TestHFileSeek.java    |   2 +-
 .../hbase/io/hfile/TestHFileWriterV2.java       |   8 +-
 .../hbase/io/hfile/TestHFileWriterV3.java       |   8 +-
 .../regionserver/TestCompoundBloomFilter.java   |   5 +-
 .../hbase/regionserver/TestStoreFile.java       |  10 +-
 26 files changed, 969 insertions(+), 843 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java
index 7ddcfe6..f276449 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java
@@ -770,8 +770,10 @@ public final class CellUtil {
     sb.append(KeyValue.humanReadableTimestamp(cell.getTimestamp()));
     sb.append('/');
     sb.append(Type.codeToType(cell.getTypeByte()));
-    sb.append("/vlen=");
-    sb.append(cell.getValueLength());
+    if (!(cell instanceof KeyValue.KeyOnlyKeyValue)) {
+      sb.append("/vlen=");
+      sb.append(cell.getValueLength());
+    }
     sb.append("/seqid=");
     sb.append(cell.getSequenceId());
     return sb.toString();

http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
index 7d678fd..9ac6912 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
@@ -2075,24 +2075,6 @@ public class Bytes implements Comparable<Bytes> {
   }
 
   /**
-   * Binary search for keys in indexes using Bytes.BYTES_RAWCOMPARATOR
-   *
-   * @param arr array of byte arrays to search for
-   * @param key the key you want to find
-   * @param offset the offset in the key you want to find
-   * @param length the length of the key
-   * @return zero-based index of the key, if the key is present in the array.
-   *         Otherwise, a value -(i + 1) such that the key is between arr[i -
-   *         1] and arr[i] non-inclusively, where i is in [0, i], if we define
-   *         arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above
-   *         means that this function can return 2N + 1 different values
-   *         ranging from -(N + 1) to N - 1.
-   */
-  public static int binarySearch(byte[][] arr, byte[] key, int offset, int length) {
-    return binarySearch(arr, key, offset, length, (CellComparator) null);
-  }
-
-  /**
    * Binary search for keys in indexes.
    *
    * @param arr array of byte arrays to search for
@@ -2111,17 +2093,16 @@ public class Bytes implements Comparable<Bytes> {
   @Deprecated
   public static int binarySearch(byte [][]arr, byte []key, int offset,
       int length, RawComparator<?> comparator) {
-    return binarySearch(arr, key, offset, length, (CellComparator)null);
+    return binarySearch(arr, key, offset, length);
   }
 
   /**
-   * Binary search for keys in indexes.
+   * Binary search for keys in indexes using Bytes.BYTES_RAWCOMPARATOR.
    *
    * @param arr array of byte arrays to search for
    * @param key the key you want to find
    * @param offset the offset in the key you want to find
    * @param length the length of the key
-   * @param comparator a comparator to compare.
    * @return zero-based index of the key, if the key is present in the array.
    *         Otherwise, a value -(i + 1) such that the key is between arr[i -
    *         1] and arr[i] non-inclusively, where i is in [0, i], if we define
@@ -2129,23 +2110,18 @@ public class Bytes implements Comparable<Bytes> {
    *         means that this function can return 2N + 1 different values
    *         ranging from -(N + 1) to N - 1.
    */
-  public static int binarySearch(byte [][]arr, byte []key, int offset,
-      int length, CellComparator comparator) {
+  public static int binarySearch(byte[][] arr, byte[] key, int offset, int length) {
     int low = 0;
     int high = arr.length - 1;
 
     KeyValue.KeyOnlyKeyValue r = new KeyValue.KeyOnlyKeyValue();
     r.setKey(key, offset, length);
     while (low <= high) {
-      int mid = (low+high) >>> 1;
+      int mid = (low + high) >>> 1;
       // we have to compare in this order, because the comparator order
       // has special logic when the 'left side' is a special key.
-      int cmp = 0;
-      if (comparator != null) {
-        cmp = comparator.compare(r, arr[mid], 0, arr[mid].length);
-      } else {
-        cmp = Bytes.BYTES_RAWCOMPARATOR.compare(key, offset, length, arr[mid], 0, arr[mid].length);
-      }
+      int cmp = Bytes.BYTES_RAWCOMPARATOR
+          .compare(key, offset, length, arr[mid], 0, arr[mid].length);
       // key lives above the midpoint
       if (cmp > 0)
         low = mid + 1;
@@ -2156,7 +2132,7 @@ public class Bytes implements Comparable<Bytes> {
       else
         return mid;
     }
-    return - (low+1);
+    return -(low + 1);
   }
 
   /**
@@ -2172,7 +2148,7 @@ public class Bytes implements Comparable<Bytes> {
    *         means that this function can return 2N + 1 different values
    *         ranging from -(N + 1) to N - 1.
    * @return the index of the block
-   * @deprecated Use {@link Bytes#binarySearch(byte[][], Cell, Comparator)}
+   * @deprecated Use {@link Bytes#binarySearch(Cell[], Cell, CellComparator)}
    */
   @Deprecated
   public static int binarySearch(byte[][] arr, Cell key, RawComparator<Cell> comparator) {
@@ -2212,16 +2188,14 @@ public class Bytes implements Comparable<Bytes> {
    *         ranging from -(N + 1) to N - 1.
    * @return the index of the block
    */
-  public static int binarySearch(byte[][] arr, Cell key, Comparator<Cell> comparator) {
+  public static int binarySearch(Cell[] arr, Cell key, CellComparator comparator) {
     int low = 0;
     int high = arr.length - 1;
-    KeyValue.KeyOnlyKeyValue r = new KeyValue.KeyOnlyKeyValue();
     while (low <= high) {
       int mid = (low+high) >>> 1;
       // we have to compare in this order, because the comparator order
       // has special logic when the 'left side' is a special key.
-      r.setKey(arr[mid], 0, arr[mid].length);
-      int cmp = comparator.compare(key, r);
+      int cmp = comparator.compare(key, arr[mid]);
       // key lives above the midpoint
       if (cmp > 0)
         low = mid + 1;

http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java
index 78c6734..7a4a333 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java
@@ -59,7 +59,7 @@ public class HalfStoreFileReader extends StoreFile.Reader {
 
   protected final Cell splitCell;
 
-  private byte[] firstKey = null;
+  private Cell firstKey = null;
 
   private boolean firstKeySeeked = false;
 
@@ -262,7 +262,7 @@ public class HalfStoreFileReader extends StoreFile.Reader {
       @Override
       public boolean seekBefore(Cell key) throws IOException {
         if (top) {
-          Cell fk = new KeyValue.KeyOnlyKeyValue(getFirstKey(), 0, getFirstKey().length);
+          Cell fk = getFirstKey();
           if (getComparator().compareKeyIgnoresMvcc(key, fk) <= 0) {
             return false;
           }
@@ -319,18 +319,18 @@ public class HalfStoreFileReader extends StoreFile.Reader {
   }
 
   @Override
-  public byte[] midkey() throws IOException {
+  public Cell midkey() throws IOException {
     // Returns null to indicate file is not splitable.
     return null;
   }
 
   @Override
-  public byte[] getFirstKey() {
+  public Cell getFirstKey() {
     if (!firstKeySeeked) {
       HFileScanner scanner = getScanner(true, true, false);
       try {
         if (scanner.seekTo()) {
-          this.firstKey = Bytes.toBytes(scanner.getKey());
+          this.firstKey = new KeyValue.KeyOnlyKeyValue(Bytes.toBytes(scanner.getKey()));
         }
         firstKeySeeked = true;
       } catch (IOException e) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilter.java
new file mode 100644
index 0000000..11436ce
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilter.java
@@ -0,0 +1,201 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.DataInput;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.util.BloomFilter;
+import org.apache.hadoop.hbase.util.BloomFilterUtil;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Hash;
+
+/**
+ * A Bloom filter implementation built on top of 
+ * {@link org.apache.hadoop.hbase.util.BloomFilterChunk}, encapsulating
+ * a set of fixed-size Bloom filters written out at the time of
+ * {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into the data
+ * block stream, and loaded on demand at query time. This class only provides
+ * reading capabilities.
+ */
+@InterfaceAudience.Private
+public class CompoundBloomFilter extends CompoundBloomFilterBase
+    implements BloomFilter {
+
+  /** Used to load chunks on demand */
+  private HFile.Reader reader;
+
+  private HFileBlockIndex.BlockIndexReader index;
+
+  private int hashCount;
+  private Hash hash;
+
+  private long[] numQueriesPerChunk;
+  private long[] numPositivesPerChunk;
+
+  /**
+   * De-serialization for compound Bloom filter metadata. Must be consistent
+   * with what {@link CompoundBloomFilterWriter} does.
+   *
+   * @param meta serialized Bloom filter metadata without any magic blocks
+   * @throws IOException
+   */
+  public CompoundBloomFilter(DataInput meta, HFile.Reader reader)
+      throws IOException {
+    this.reader = reader;
+
+    totalByteSize = meta.readLong();
+    hashCount = meta.readInt();
+    hashType = meta.readInt();
+    totalKeyCount = meta.readLong();
+    totalMaxKeys = meta.readLong();
+    numChunks = meta.readInt();
+    byte[] comparatorClassName = Bytes.readByteArray(meta);
+    // The writer would have return 0 as the vint length for the case of 
+    // Bytes.BYTES_RAWCOMPARATOR.  In such cases do not initialize comparator, it can be
+    // null
+    if (comparatorClassName.length != 0) {
+      comparator = FixedFileTrailer.createComparator(Bytes.toString(comparatorClassName));
+    }
+
+    hash = Hash.getInstance(hashType);
+    if (hash == null) {
+      throw new IllegalArgumentException("Invalid hash type: " + hashType);
+    }
+    // We will pass null for ROW block
+    if(comparator == null) {
+      index = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1);
+    } else {
+      index = new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator, 1);
+    }
+    index.readRootIndex(meta, numChunks);
+  }
+
+  @Override
+  public boolean contains(byte[] key, int keyOffset, int keyLength, ByteBuffer bloom) {
+    // We try to store the result in this variable so we can update stats for
+    // testing, but when an error happens, we log a message and return.
+
+    int block = index.rootBlockContainingKey(key, keyOffset,
+        keyLength);
+    return checkContains(key, keyOffset, keyLength, block);
+  }
+
+  private boolean checkContains(byte[] key, int keyOffset, int keyLength, int block) {
+    boolean result;
+    if (block < 0) {
+      result = false; // This key is not in the file.
+    } else {
+      HFileBlock bloomBlock;
+      try {
+        // We cache the block and use a positional read.
+        bloomBlock = reader.readBlock(index.getRootBlockOffset(block),
+            index.getRootBlockDataSize(block), true, true, false, true,
+            BlockType.BLOOM_CHUNK, null);
+      } catch (IOException ex) {
+        // The Bloom filter is broken, turn it off.
+        throw new IllegalArgumentException(
+            "Failed to load Bloom block for key "
+                + Bytes.toStringBinary(key, keyOffset, keyLength), ex);
+      }
+
+      ByteBuffer bloomBuf = bloomBlock.getBufferReadOnly();
+      result = BloomFilterUtil.contains(key, keyOffset, keyLength,
+          bloomBuf, bloomBlock.headerSize(),
+          bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount);
+    }
+
+    if (numQueriesPerChunk != null && block >= 0) {
+      // Update statistics. Only used in unit tests.
+      ++numQueriesPerChunk[block];
+      if (result)
+        ++numPositivesPerChunk[block];
+    }
+
+    return result;
+  }
+
+  @Override
+  public boolean contains(Cell keyCell, ByteBuffer bloom) {
+    // We try to store the result in this variable so we can update stats for
+    // testing, but when an error happens, we log a message and return.
+    int block = index.rootBlockContainingKey(keyCell);
+    // TODO : Will be true KeyValue for now.
+    // When Offheap comes in we can add an else condition to work
+    // on the bytes in offheap
+    KeyValue kvKey = (KeyValue) keyCell;
+    return checkContains(kvKey.getBuffer(), kvKey.getKeyOffset(), kvKey.getKeyLength(), block);
+  }
+
+  public boolean supportsAutoLoading() {
+    return true;
+  }
+
+  public int getNumChunks() {
+    return numChunks;
+  }
+
+  public void enableTestingStats() {
+    numQueriesPerChunk = new long[numChunks];
+    numPositivesPerChunk = new long[numChunks];
+  }
+
+  public String formatTestingStats() {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < numChunks; ++i) {
+      sb.append("chunk #");
+      sb.append(i);
+      sb.append(": queries=");
+      sb.append(numQueriesPerChunk[i]);
+      sb.append(", positives=");
+      sb.append(numPositivesPerChunk[i]);
+      sb.append(", positiveRatio=");
+      sb.append(numPositivesPerChunk[i] * 1.0 / numQueriesPerChunk[i]);
+      sb.append(";\n");
+    }
+    return sb.toString();
+  }
+
+  public long getNumQueriesForTesting(int chunk) {
+    return numQueriesPerChunk[chunk];
+  }
+
+  public long getNumPositivesForTesting(int chunk) {
+    return numPositivesPerChunk[chunk];
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append(BloomFilterUtil.formatStats(this));
+    sb.append(BloomFilterUtil.STATS_RECORD_SEP + 
+        "Number of chunks: " + numChunks);
+    sb.append(BloomFilterUtil.STATS_RECORD_SEP + 
+        ((comparator != null) ? "Comparator: "
+        + comparator.getClass().getSimpleName() : "Comparator: "
+        + Bytes.BYTES_RAWCOMPARATOR.getClass().getSimpleName()));
+    return sb.toString();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilterBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilterBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilterBase.java
new file mode 100644
index 0000000..c42ee20
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilterBase.java
@@ -0,0 +1,71 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.io.hfile;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.util.BloomFilterBase;
+
+import org.apache.hadoop.hbase.CellComparator;
+
+@InterfaceAudience.Private
+public class CompoundBloomFilterBase implements BloomFilterBase {
+
+  /**
+   * At read time, the total number of chunks. At write time, the number of
+   * chunks created so far. The first chunk has an ID of 0, and the current
+   * chunk has the ID of numChunks - 1.
+   */
+  protected int numChunks;
+
+  /**
+   * The Bloom filter version. There used to be a DynamicByteBloomFilter which
+   * had version 2.
+   */
+  public static final int VERSION = 3;
+
+  /** Target error rate for configuring the filter and for information */
+  protected float errorRate;
+
+  /** The total number of keys in all chunks */
+  protected long totalKeyCount;
+  protected long totalByteSize;
+  protected long totalMaxKeys;
+
+  /** Hash function type to use, as defined in {@link org.apache.hadoop.hbase.util.Hash} */
+  protected int hashType;
+  /** Comparator used to compare Bloom filter keys */
+  protected CellComparator comparator;
+
+  @Override
+  public long getMaxKeys() {
+    return totalMaxKeys;
+  }
+
+  @Override
+  public long getKeyCount() {
+    return totalKeyCount;
+  }
+
+  @Override
+  public long getByteSize() {
+    return totalByteSize;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilterWriter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilterWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilterWriter.java
new file mode 100644
index 0000000..cd60562
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilterWriter.java
@@ -0,0 +1,277 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.Queue;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.CellComparator;
+import org.apache.hadoop.hbase.util.BloomFilterChunk;
+import org.apache.hadoop.hbase.util.BloomFilterUtil;
+import org.apache.hadoop.hbase.util.BloomFilterWriter;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * Adds methods required for writing a compound Bloom filter to the data
+ * section of an {@link org.apache.hadoop.hbase.io.hfile.HFile} to the
+ * {@link CompoundBloomFilter} class.
+ */
+@InterfaceAudience.Private
+public class CompoundBloomFilterWriter extends CompoundBloomFilterBase
+    implements BloomFilterWriter, InlineBlockWriter {
+
+  private static final Log LOG =
+    LogFactory.getLog(CompoundBloomFilterWriter.class);
+
+  /** The current chunk being written to */
+  private BloomFilterChunk chunk;
+
+  /** Previous chunk, so that we can create another similar chunk */
+  private BloomFilterChunk prevChunk;
+
+  /** Maximum fold factor */
+  private int maxFold;
+
+  /** The size of individual Bloom filter chunks to create */
+  private int chunkByteSize;
+
+  /** A Bloom filter chunk enqueued for writing */
+  private static class ReadyChunk {
+    int chunkId;
+    byte[] firstKey;
+    BloomFilterChunk chunk;
+  }
+
+  private Queue<ReadyChunk> readyChunks = new LinkedList<ReadyChunk>();
+
+  /** The first key in the current Bloom filter chunk. */
+  private byte[] firstKeyInChunk = null;
+
+  private HFileBlockIndex.BlockIndexWriter bloomBlockIndexWriter =
+      new HFileBlockIndex.BlockIndexWriter();
+
+  /** Whether to cache-on-write compound Bloom filter chunks */
+  private boolean cacheOnWrite;
+
+  /**
+   * @param chunkByteSizeHint
+   *          each chunk's size in bytes. The real chunk size might be different
+   *          as required by the fold factor.
+   * @param errorRate
+   *          target false positive rate
+   * @param hashType
+   *          hash function type to use
+   * @param maxFold
+   *          maximum degree of folding allowed
+   */
+  public CompoundBloomFilterWriter(int chunkByteSizeHint, float errorRate,
+      int hashType, int maxFold, boolean cacheOnWrite,
+      CellComparator comparator) {
+    chunkByteSize = BloomFilterUtil.computeFoldableByteSize(
+        chunkByteSizeHint * 8L, maxFold);
+
+    this.errorRate = errorRate;
+    this.hashType = hashType;
+    this.maxFold = maxFold;
+    this.cacheOnWrite = cacheOnWrite;
+    this.comparator = comparator;
+  }
+
+  @Override
+  public boolean shouldWriteBlock(boolean closing) {
+    enqueueReadyChunk(closing);
+    return !readyChunks.isEmpty();
+  }
+
+  /**
+   * Enqueue the current chunk if it is ready to be written out.
+   *
+   * @param closing true if we are closing the file, so we do not expect new
+   *        keys to show up
+   */
+  private void enqueueReadyChunk(boolean closing) {
+    if (chunk == null ||
+        (chunk.getKeyCount() < chunk.getMaxKeys() && !closing)) {
+      return;
+    }
+
+    if (firstKeyInChunk == null) {
+      throw new NullPointerException("Trying to enqueue a chunk, " +
+          "but first key is null: closing=" + closing + ", keyCount=" +
+          chunk.getKeyCount() + ", maxKeys=" + chunk.getMaxKeys());
+    }
+
+    ReadyChunk readyChunk = new ReadyChunk();
+    readyChunk.chunkId = numChunks - 1;
+    readyChunk.chunk = chunk;
+    readyChunk.firstKey = firstKeyInChunk;
+    readyChunks.add(readyChunk);
+
+    long prevMaxKeys = chunk.getMaxKeys();
+    long prevByteSize = chunk.getByteSize();
+
+    chunk.compactBloom();
+
+    if (LOG.isTraceEnabled() && prevByteSize != chunk.getByteSize()) {
+      LOG.trace("Compacted Bloom chunk #" + readyChunk.chunkId + " from ["
+          + prevMaxKeys + " max keys, " + prevByteSize + " bytes] to ["
+          + chunk.getMaxKeys() + " max keys, " + chunk.getByteSize()
+          + " bytes]");
+    }
+
+    totalMaxKeys += chunk.getMaxKeys();
+    totalByteSize += chunk.getByteSize();
+
+    firstKeyInChunk = null;
+    prevChunk = chunk;
+    chunk = null;
+  }
+
+  /**
+   * Adds a Bloom filter key. This key must be greater than the previous key,
+   * as defined by the comparator this compound Bloom filter is configured
+   * with. For efficiency, key monotonicity is not checked here. See
+   * {@link org.apache.hadoop.hbase.regionserver.StoreFile.Writer#append(
+   * org.apache.hadoop.hbase.Cell)} for the details of deduplication.
+   */
+  @Override
+  public void add(byte[] bloomKey, int keyOffset, int keyLength) {
+    if (bloomKey == null)
+      throw new NullPointerException();
+
+    enqueueReadyChunk(false);
+
+    if (chunk == null) {
+      if (firstKeyInChunk != null) {
+        throw new IllegalStateException("First key in chunk already set: "
+            + Bytes.toStringBinary(firstKeyInChunk));
+      }
+      firstKeyInChunk = Arrays.copyOfRange(bloomKey, keyOffset, keyOffset
+          + keyLength);
+
+      if (prevChunk == null) {
+        // First chunk
+        chunk = BloomFilterUtil.createBySize(chunkByteSize, errorRate,
+            hashType, maxFold);
+      } else {
+        // Use the same parameters as the last chunk, but a new array and
+        // a zero key count.
+        chunk = prevChunk.createAnother();
+      }
+
+      if (chunk.getKeyCount() != 0) {
+        throw new IllegalStateException("keyCount=" + chunk.getKeyCount()
+            + " > 0");
+      }
+
+      chunk.allocBloom();
+      ++numChunks;
+    }
+
+    chunk.add(bloomKey, keyOffset, keyLength);
+    ++totalKeyCount;
+  }
+
+  @Override
+  public void writeInlineBlock(DataOutput out) throws IOException {
+    // We don't remove the chunk from the queue here, because we might need it
+    // again for cache-on-write.
+    ReadyChunk readyChunk = readyChunks.peek();
+
+    BloomFilterChunk readyChunkBloom = readyChunk.chunk;
+    readyChunkBloom.writeBloom(out);
+  }
+
+  @Override
+  public void blockWritten(long offset, int onDiskSize, int uncompressedSize) {
+    ReadyChunk readyChunk = readyChunks.remove();
+    bloomBlockIndexWriter.addEntry(readyChunk.firstKey, offset, onDiskSize);
+  }
+
+  @Override
+  public BlockType getInlineBlockType() {
+    return BlockType.BLOOM_CHUNK;
+  }
+
+  private class MetaWriter implements Writable {
+    protected MetaWriter() {}
+
+    @Override
+    public void readFields(DataInput in) throws IOException {
+      throw new IOException("Cant read with this class.");
+    }
+
+    /**
+     * This is modeled after {@link BloomFilterChunk.MetaWriter} for simplicity,
+     * although the two metadata formats do not have to be consistent. This
+     * does have to be consistent with how {@link
+     * CompoundBloomFilter#CompoundBloomFilter(DataInput,
+     * org.apache.hadoop.hbase.io.hfile.HFile.Reader)} reads fields.
+     */
+    @Override
+    public void write(DataOutput out) throws IOException {
+      out.writeInt(VERSION);
+
+      out.writeLong(getByteSize());
+      out.writeInt(prevChunk.getHashCount());
+      out.writeInt(prevChunk.getHashType());
+      out.writeLong(getKeyCount());
+      out.writeLong(getMaxKeys());
+
+      // Fields that don't have equivalents in ByteBloomFilter.
+      out.writeInt(numChunks);
+      if (comparator != null) {
+        Bytes.writeByteArray(out, Bytes.toBytes(comparator.getClass().getName()));
+      } else {
+        // Internally writes a 0 vint if the byte[] is null
+        Bytes.writeByteArray(out, null);
+      }
+
+      // Write a single-level index without compression or block header.
+      bloomBlockIndexWriter.writeSingleLevelIndex(out, "Bloom filter");
+    }
+  }
+
+  @Override
+  public void compactBloom() {
+  }
+
+  @Override
+  public Writable getMetaWriter() {
+    return new MetaWriter();
+  }
+
+  @Override
+  public Writable getDataWriter() {
+    return null;
+  }
+
+  @Override
+  public boolean getCacheOnWrite() {
+    return cacheOnWrite;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
index 6c8260d..35458a2 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
@@ -394,13 +394,13 @@ public class HFile {
 
     byte[] getLastKey();
 
-    byte[] midkey() throws IOException;
+    Cell midkey() throws IOException;
 
     long length();
 
     long getEntries();
 
-    byte[] getFirstKey();
+    Cell getFirstKey();
 
     long indexSize();
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
index e6e1fff..50a5baa 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellComparator;
+import org.apache.hadoop.hbase.CellUtil;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.KeyValueUtil;
@@ -51,10 +52,11 @@ import org.apache.hadoop.util.StringUtils;
 
 /**
  * Provides functionality to write ({@link BlockIndexWriter}) and read
- * ({@link BlockIndexReader}) single-level and multi-level block indexes.
+ * ({@link org.apache.hadoop.hbase.io.hfile.BlockIndexReader})
+ * single-level and multi-level block indexes.
  *
  * Examples of how to use the block index writer can be found in
- * {@link org.apache.hadoop.hbase.util.CompoundBloomFilterWriter} and
+ * {@link org.apache.hadoop.hbase.io.hfile.CompoundBloomFilterWriter} and
  *  {@link HFileWriterImpl}. Examples of how to use the reader can be
  *  found in {@link HFileWriterImpl} and TestHFileBlockIndex.
  */
@@ -96,124 +98,179 @@ public class HFileBlockIndex {
       2 * Bytes.SIZEOF_INT;
 
   /**
-   * The reader will always hold the root level index in the memory. Index
-   * blocks at all other levels will be cached in the LRU cache in practice,
-   * although this API does not enforce that.
-   *
-   * All non-root (leaf and intermediate) index blocks contain what we call a
-   * "secondary index": an array of offsets to the entries within the block.
-   * This allows us to do binary search for the entry corresponding to the
-   * given key without having to deserialize the block.
+   * An implementation of the BlockIndexReader that deals with block keys which are plain
+   * byte[] like MetaBlock or the Bloom Block for ROW bloom.
+   * Does not need a comparator. It can work on Bytes.BYTES_RAWCOMPARATOR
    */
-  public static class BlockIndexReader implements HeapSize {
-    /** Needed doing lookup on blocks. */
-    private final CellComparator comparator;
+   static class ByteArrayKeyBlockIndexReader extends BlockIndexReader {
 
-    // Root-level data.
-    // TODO : Convert these to Cells (ie) KeyValue.KeyOnlyKV
     private byte[][] blockKeys;
-    private long[] blockOffsets;
-    private int[] blockDataSizes;
-    private int rootCount = 0;
 
-    // Mid-key metadata.
-    private long midLeafBlockOffset = -1;
-    private int midLeafBlockOnDiskSize = -1;
-    private int midKeyEntry = -1;
+    public ByteArrayKeyBlockIndexReader(final int treeLevel,
+        final CachingBlockReader cachingBlockReader) {
+      this(treeLevel);
+      this.cachingBlockReader = cachingBlockReader;
+    }
 
-    /** Pre-computed mid-key */
-    private AtomicReference<byte[]> midKey = new AtomicReference<byte[]>();
+    public ByteArrayKeyBlockIndexReader(final int treeLevel) {
+      // Can be null for METAINDEX block
+      searchTreeLevel = treeLevel;
+    }
+
+    protected long calculateHeapSizeForBlockKeys(long heapSize) {
+      // Calculating the size of blockKeys
+      if (blockKeys != null) {
+        heapSize += ClassSize.REFERENCE;
+        // Adding array + references overhead
+        heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length * ClassSize.REFERENCE);
+
+        // Adding bytes
+        for (byte[] key : blockKeys) {
+          heapSize += ClassSize.align(ClassSize.ARRAY + key.length);
+        }
+      }
+      return heapSize;
+    }
+
+    @Override
+    public boolean isEmpty() {
+      return blockKeys.length == 0;
+    }
 
     /**
-     * The number of levels in the block index tree. One if there is only root
-     * level, two for root and leaf levels, etc.
+     * @param i
+     *          from 0 to {@link #getRootBlockCount() - 1}
      */
-    private int searchTreeLevel;
+    public byte[] getRootBlockKey(int i) {
+      return blockKeys[i];
+    }
 
-    /** A way to read {@link HFile} blocks at a given offset */
-    private CachingBlockReader cachingBlockReader;
+    @Override
+    public BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock,
+        boolean cacheBlocks, boolean pread, boolean isCompaction,
+        DataBlockEncoding expectedDataBlockEncoding) throws IOException {
+      // this would not be needed
+      return null;
+    }
+
+    @Override
+    public Cell midkey() throws IOException {
+      // Not needed here
+      return null;
+    }
+
+    protected void initialize(int numEntries) {
+      blockKeys = new byte[numEntries][];
+    }
+
+    protected void add(final byte[] key, final long offset, final int dataSize) {
+      blockOffsets[rootCount] = offset;
+      blockKeys[rootCount] = key;
+      blockDataSizes[rootCount] = dataSize;
+      rootCount++;
+    }
+
+    @Override
+    public int rootBlockContainingKey(byte[] key, int offset, int length, CellComparator comp) {
+      int pos = Bytes.binarySearch(blockKeys, key, offset, length);
+      // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see
+      // binarySearch's javadoc.
+
+      if (pos >= 0) {
+        // This means this is an exact match with an element of blockKeys.
+        assert pos < blockKeys.length;
+        return pos;
+      }
+
+      // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i],
+      // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that
+      // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if
+      // key < blockKeys[0], meaning the file does not contain the given key.
+
+      int i = -pos - 1;
+      assert 0 <= i && i <= blockKeys.length;
+      return i - 1;
+    }
 
-    public BlockIndexReader(final CellComparator c, final int treeLevel,
+    @Override
+    public int rootBlockContainingKey(Cell key) {
+      // Should not be called on this because here it deals only with byte[]
+      throw new UnsupportedOperationException(
+          "Cannot search for a key that is of Cell type. Only plain byte array keys " +
+          "can be searched for");
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder();
+      sb.append("size=" + rootCount).append("\n");
+      for (int i = 0; i < rootCount; i++) {
+        sb.append("key=").append(KeyValue.keyToString(blockKeys[i]))
+            .append("\n  offset=").append(blockOffsets[i])
+            .append(", dataSize=" + blockDataSizes[i]).append("\n");
+      }
+      return sb.toString();
+    }
+
+  }
+
+  /**
+   * An implementation of the BlockIndexReader that deals with block keys which are the key
+   * part of a cell like the Data block index or the ROW_COL bloom blocks
+   * This needs a comparator to work with the Cells
+   */
+   static class CellBasedKeyBlockIndexReader extends BlockIndexReader {
+
+    private Cell[] blockKeys;
+    /** Pre-computed mid-key */
+    private AtomicReference<Cell> midKey = new AtomicReference<Cell>();
+    /** Needed doing lookup on blocks. */
+    private CellComparator comparator;
+
+    public CellBasedKeyBlockIndexReader(final CellComparator c, final int treeLevel,
         final CachingBlockReader cachingBlockReader) {
       this(c, treeLevel);
       this.cachingBlockReader = cachingBlockReader;
     }
 
-    public BlockIndexReader(final CellComparator c, final int treeLevel)
-    {
+    public CellBasedKeyBlockIndexReader(final CellComparator c, final int treeLevel) {
       // Can be null for METAINDEX block
       comparator = c;
       searchTreeLevel = treeLevel;
     }
 
-    /**
-     * @return true if the block index is empty.
-     */
-    public boolean isEmpty() {
-      return blockKeys.length == 0;
-    }
+    protected long calculateHeapSizeForBlockKeys(long heapSize) {
+      if (blockKeys != null) {
+        heapSize += ClassSize.REFERENCE;
+        // Adding array + references overhead
+        heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length * ClassSize.REFERENCE);
 
-    /**
-     * Verifies that the block index is non-empty and throws an
-     * {@link IllegalStateException} otherwise.
-     */
-    public void ensureNonEmpty() {
-      if (blockKeys.length == 0) {
-        throw new IllegalStateException("Block index is empty or not loaded");
+        // Adding blockKeys
+        for (Cell key : blockKeys) {
+          heapSize += ClassSize.align(CellUtil.estimatedHeapSizeOf(key));
+        }
       }
+      // Add comparator and the midkey atomicreference
+      heapSize += 2 * ClassSize.REFERENCE;
+      return heapSize;
     }
 
-    /**
-     * Return the data block which contains this key. This function will only
-     * be called when the HFile version is larger than 1.
-     *
-     * @param key the key we are looking for
-     * @param currentBlock the current block, to avoid re-reading the same block
-     * @param cacheBlocks
-     * @param pread
-     * @param isCompaction
-     * @param expectedDataBlockEncoding the data block encoding the caller is
-     *          expecting the data block to be in, or null to not perform this
-     *          check and return the block irrespective of the encoding
-     * @return reader a basic way to load blocks
-     * @throws IOException
-     */
-    public HFileBlock seekToDataBlock(final Cell key, HFileBlock currentBlock, boolean cacheBlocks,
-        boolean pread, boolean isCompaction, DataBlockEncoding expectedDataBlockEncoding)
-        throws IOException {
-      BlockWithScanInfo blockWithScanInfo = loadDataBlockWithScanInfo(key, currentBlock,
-          cacheBlocks,
-          pread, isCompaction, expectedDataBlockEncoding);
-      if (blockWithScanInfo == null) {
-        return null;
-      } else {
-        return blockWithScanInfo.getHFileBlock();
-      }
+    @Override
+    public boolean isEmpty() {
+      return blockKeys.length == 0;
     }
 
     /**
-     * Return the BlockWithScanInfo which contains the DataBlock with other scan
-     * info such as nextIndexedKey. This function will only be called when the
-     * HFile version is larger than 1.
-     * 
-     * @param key
-     *          the key we are looking for
-     * @param currentBlock
-     *          the current block, to avoid re-reading the same block
-     * @param cacheBlocks
-     * @param pread
-     * @param isCompaction
-     * @param expectedDataBlockEncoding the data block encoding the caller is
-     *          expecting the data block to be in, or null to not perform this
-     *          check and return the block irrespective of the encoding.
-     * @return the BlockWithScanInfo which contains the DataBlock with other
-     *         scan info such as nextIndexedKey.
-     * @throws IOException
+     * @param i
+     *          from 0 to {@link #getRootBlockCount() - 1}
      */
+    public Cell getRootBlockKey(int i) {
+      return blockKeys[i];
+    }
+    @Override
     public BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock,
-        boolean cacheBlocks,
-        boolean pread, boolean isCompaction, DataBlockEncoding expectedDataBlockEncoding)
-        throws IOException {
+        boolean cacheBlocks, boolean pread, boolean isCompaction,
+        DataBlockEncoding expectedDataBlockEncoding) throws IOException {
       int rootLevelIndex = rootBlockContainingKey(key);
       if (rootLevelIndex < 0 || rootLevelIndex >= blockOffsets.length) {
         return null;
@@ -227,7 +284,7 @@ public class HFileBlockIndex {
       int currentOnDiskSize = blockDataSizes[rootLevelIndex];
 
       if (rootLevelIndex < blockKeys.length - 1) {
-        nextIndexedKey = new KeyValue.KeyOnlyKeyValue(blockKeys[rootLevelIndex + 1]);
+        nextIndexedKey = blockKeys[rootLevelIndex + 1];
       } else {
         nextIndexedKey = HConstants.NO_NEXT_INDEXED_KEY;
       }
@@ -314,18 +371,12 @@ public class HFileBlockIndex {
       return blockWithScanInfo;
     }
 
-    /**
-     * An approximation to the {@link HFile}'s mid-key. Operates on block
-     * boundaries, and does not go inside blocks. In other words, returns the
-     * first key of the middle block of the file.
-     *
-     * @return the first key of the middle block
-     */
-    public byte[] midkey() throws IOException {
+    @Override
+    public Cell midkey() throws IOException {
       if (rootCount == 0)
         throw new IOException("HFile empty");
 
-      byte[] targetMidKey = this.midKey.get();
+      Cell targetMidKey = this.midKey.get();
       if (targetMidKey != null) {
         return targetMidKey;
       }
@@ -348,7 +399,8 @@ public class HFileBlockIndex {
             keyRelOffset;
         int keyOffset = Bytes.SIZEOF_INT * (numDataBlocks + 2) + keyRelOffset
             + SECONDARY_INDEX_ENTRY_OVERHEAD;
-        targetMidKey = ByteBufferUtils.toBytes(b, keyOffset, keyLen);
+        byte[] bytes = ByteBufferUtils.toBytes(b, keyOffset, keyLen);
+        targetMidKey = new KeyValue.KeyOnlyKeyValue(bytes, 0, bytes.length);
       } else {
         // The middle of the root-level index.
         targetMidKey = blockKeys[rootCount / 2];
@@ -358,14 +410,175 @@ public class HFileBlockIndex {
       return targetMidKey;
     }
 
+    protected void initialize(int numEntries) {
+      blockKeys = new Cell[numEntries];
+    }
+
     /**
-     * @param i from 0 to {@link #getRootBlockCount() - 1}
+     * Adds a new entry in the root block index. Only used when reading.
+     *
+     * @param key Last key in the block
+     * @param offset file offset where the block is stored
+     * @param dataSize the uncompressed data size
      */
-    public byte[] getRootBlockKey(int i) {
-      return blockKeys[i];
+    protected void add(final byte[] key, final long offset, final int dataSize) {
+      blockOffsets[rootCount] = offset;
+      // Create the blockKeys as Cells once when the reader is opened
+      blockKeys[rootCount] = new KeyValue.KeyOnlyKeyValue(key, 0, key.length);
+      blockDataSizes[rootCount] = dataSize;
+      rootCount++;
+    }
+
+    @Override
+    public int rootBlockContainingKey(final byte[] key, int offset, int length,
+        CellComparator comp) {
+      // This should always be called with Cell not with a byte[] key
+      throw new UnsupportedOperationException("Cannot find for a key containing plain byte " +
+      		"array. Only cell based keys can be searched for");
+    }
+
+    @Override
+    public int rootBlockContainingKey(Cell key) {
+      // Here the comparator should not be null as this happens for the root-level block
+      int pos = Bytes.binarySearch(blockKeys, key, comparator);
+      // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see
+      // binarySearch's javadoc.
+
+      if (pos >= 0) {
+        // This means this is an exact match with an element of blockKeys.
+        assert pos < blockKeys.length;
+        return pos;
+      }
+
+      // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i],
+      // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that
+      // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if
+      // key < blockKeys[0], meaning the file does not contain the given key.
+
+      int i = -pos - 1;
+      assert 0 <= i && i <= blockKeys.length;
+      return i - 1;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder();
+      sb.append("size=" + rootCount).append("\n");
+      for (int i = 0; i < rootCount; i++) {
+        sb.append("key=").append((blockKeys[i]))
+            .append("\n  offset=").append(blockOffsets[i])
+            .append(", dataSize=" + blockDataSizes[i]).append("\n");
+      }
+      return sb.toString();
+    }
+  }
+   /**
+   * The reader will always hold the root level index in the memory. Index
+   * blocks at all other levels will be cached in the LRU cache in practice,
+   * although this API does not enforce that.
+   *
+   * All non-root (leaf and intermediate) index blocks contain what we call a
+   * "secondary index": an array of offsets to the entries within the block.
+   * This allows us to do binary search for the entry corresponding to the
+   * given key without having to deserialize the block.
+   */
+   static abstract class BlockIndexReader implements HeapSize {
+
+    protected long[] blockOffsets;
+    protected int[] blockDataSizes;
+    protected int rootCount = 0;
+
+    // Mid-key metadata.
+    protected long midLeafBlockOffset = -1;
+    protected int midLeafBlockOnDiskSize = -1;
+    protected int midKeyEntry = -1;
+
+    /**
+     * The number of levels in the block index tree. One if there is only root
+     * level, two for root and leaf levels, etc.
+     */
+    protected int searchTreeLevel;
+
+    /** A way to read {@link HFile} blocks at a given offset */
+    protected CachingBlockReader cachingBlockReader;
+
+    /**
+     * @return true if the block index is empty.
+     */
+    public abstract boolean isEmpty();
+
+    /**
+     * Verifies that the block index is non-empty and throws an
+     * {@link IllegalStateException} otherwise.
+     */
+    public void ensureNonEmpty() {
+      if (isEmpty()) {
+        throw new IllegalStateException("Block index is empty or not loaded");
+      }
     }
 
     /**
+     * Return the data block which contains this key. This function will only
+     * be called when the HFile version is larger than 1.
+     *
+     * @param key the key we are looking for
+     * @param currentBlock the current block, to avoid re-reading the same block
+     * @param cacheBlocks
+     * @param pread
+     * @param isCompaction
+     * @param expectedDataBlockEncoding the data block encoding the caller is
+     *          expecting the data block to be in, or null to not perform this
+     *          check and return the block irrespective of the encoding
+     * @return reader a basic way to load blocks
+     * @throws IOException
+     */
+    public HFileBlock seekToDataBlock(final Cell key, HFileBlock currentBlock, boolean cacheBlocks,
+        boolean pread, boolean isCompaction, DataBlockEncoding expectedDataBlockEncoding)
+        throws IOException {
+      BlockWithScanInfo blockWithScanInfo = loadDataBlockWithScanInfo(key, currentBlock,
+          cacheBlocks,
+          pread, isCompaction, expectedDataBlockEncoding);
+      if (blockWithScanInfo == null) {
+        return null;
+      } else {
+        return blockWithScanInfo.getHFileBlock();
+      }
+    }
+
+    /**
+     * Return the BlockWithScanInfo which contains the DataBlock with other scan
+     * info such as nextIndexedKey. This function will only be called when the
+     * HFile version is larger than 1.
+     * 
+     * @param key
+     *          the key we are looking for
+     * @param currentBlock
+     *          the current block, to avoid re-reading the same block
+     * @param cacheBlocks
+     * @param pread
+     * @param isCompaction
+     * @param expectedDataBlockEncoding the data block encoding the caller is
+     *          expecting the data block to be in, or null to not perform this
+     *          check and return the block irrespective of the encoding.
+     * @return the BlockWithScanInfo which contains the DataBlock with other
+     *         scan info such as nextIndexedKey.
+     * @throws IOException
+     */
+    public abstract BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock,
+        boolean cacheBlocks,
+        boolean pread, boolean isCompaction, DataBlockEncoding expectedDataBlockEncoding)
+        throws IOException;
+
+    /**
+     * An approximation to the {@link HFile}'s mid-key. Operates on block
+     * boundaries, and does not go inside blocks. In other words, returns the
+     * first key of the middle block of the file.
+     *
+     * @return the first key of the middle block
+     */
+    public abstract Cell midkey() throws IOException;
+
+    /**
      * @param i from 0 to {@link #getRootBlockCount() - 1}
      */
     public long getRootBlockOffset(int i) {
@@ -402,27 +615,8 @@ public class HFileBlockIndex {
     // When we want to find the meta index block or bloom block for ROW bloom
     // type Bytes.BYTES_RAWCOMPARATOR would be enough. For the ROW_COL bloom case we need the
     // CellComparator.
-    public int rootBlockContainingKey(final byte[] key, int offset, int length,
-        CellComparator comp) {
-      int pos = Bytes.binarySearch(blockKeys, key, offset, length, comp);
-      // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see
-      // binarySearch's javadoc.
-
-      if (pos >= 0) {
-        // This means this is an exact match with an element of blockKeys.
-        assert pos < blockKeys.length;
-        return pos;
-      }
-
-      // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i],
-      // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that
-      // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if
-      // key < blockKeys[0], meaning the file does not contain the given key.
-
-      int i = -pos - 1;
-      assert 0 <= i && i <= blockKeys.length;
-      return i - 1;
-    }
+    public abstract int rootBlockContainingKey(final byte[] key, int offset, int length,
+        CellComparator comp);
 
     /**
      * Finds the root-level index block containing the given key.
@@ -438,7 +632,7 @@ public class HFileBlockIndex {
     // Bytes.BYTES_RAWCOMPARATOR would be enough. For the ROW_COL bloom case we
     // need the CellComparator.
     public int rootBlockContainingKey(final byte[] key, int offset, int length) {
-      return rootBlockContainingKey(key, offset, length, comparator);
+      return rootBlockContainingKey(key, offset, length, null);
     }
 
     /**
@@ -447,41 +641,7 @@ public class HFileBlockIndex {
      * @param key
      *          Key to find
      */
-    public int rootBlockContainingKey(final Cell key) {
-      // Here the comparator should not be null as this happens for the root-level block
-      int pos = Bytes.binarySearch(blockKeys, key, comparator);
-      // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see
-      // binarySearch's javadoc.
-
-      if (pos >= 0) {
-        // This means this is an exact match with an element of blockKeys.
-        assert pos < blockKeys.length;
-        return pos;
-      }
-
-      // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i],
-      // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that
-      // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if
-      // key < blockKeys[0], meaning the file does not contain the given key.
-
-      int i = -pos - 1;
-      assert 0 <= i && i <= blockKeys.length;
-      return i - 1;
-    }
-
-    /**
-     * Adds a new entry in the root block index. Only used when reading.
-     *
-     * @param key Last key in the block
-     * @param offset file offset where the block is stored
-     * @param dataSize the uncompressed data size
-     */
-    private void add(final byte[] key, final long offset, final int dataSize) {
-      blockOffsets[rootCount] = offset;
-      blockKeys[rootCount] = key;
-      blockDataSizes[rootCount] = dataSize;
-      rootCount++;
-    }
+    public abstract int rootBlockContainingKey(final Cell key);
 
     /**
      * The indexed key at the ith position in the nonRootIndex. The position starts at 0.
@@ -489,7 +649,7 @@ public class HFileBlockIndex {
      * @param i the ith position
      * @return The indexed key at the ith position in the nonRootIndex.
      */
-    private byte[] getNonRootIndexedKey(ByteBuffer nonRootIndex, int i) {
+    protected byte[] getNonRootIndexedKey(ByteBuffer nonRootIndex, int i) {
       int numEntries = nonRootIndex.getInt(0);
       if (i < 0 || i >= numEntries) {
         return null;
@@ -653,10 +813,9 @@ public class HFileBlockIndex {
      * @param numEntries the number of root-level index entries
      * @throws IOException
      */
-    public void readRootIndex(DataInput in, final int numEntries)
-        throws IOException {
+    public void readRootIndex(DataInput in, final int numEntries) throws IOException {
       blockOffsets = new long[numEntries];
-      blockKeys = new byte[numEntries][];
+      initialize(numEntries);
       blockDataSizes = new int[numEntries];
 
       // If index size is zero, no index was written.
@@ -670,6 +829,10 @@ public class HFileBlockIndex {
       }
     }
 
+    protected abstract void initialize(int numEntries);
+
+    protected abstract void add(final byte[] key, final long offset, final int dataSize);
+
     /**
      * Read in the root-level index from the given input stream. Must match
      * what was written into the root level by
@@ -712,36 +875,15 @@ public class HFileBlockIndex {
     }
 
     @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder();
-      sb.append("size=" + rootCount).append("\n");
-      for (int i = 0; i < rootCount; i++) {
-        sb.append("key=").append(KeyValue.keyToString(blockKeys[i]))
-            .append("\n  offset=").append(blockOffsets[i])
-            .append(", dataSize=" + blockDataSizes[i]).append("\n");
-      }
-      return sb.toString();
-    }
-
-    @Override
     public long heapSize() {
-      long heapSize = ClassSize.align(6 * ClassSize.REFERENCE +
+      // The BlockIndexReader does not have the blockKey, comparator and the midkey atomic reference
+      long heapSize = ClassSize.align(3 * ClassSize.REFERENCE +
           2 * Bytes.SIZEOF_INT + ClassSize.OBJECT);
 
       // Mid-key metadata.
       heapSize += MID_KEY_METADATA_SIZE;
 
-      // Calculating the size of blockKeys
-      if (blockKeys != null) {
-        // Adding array + references overhead
-        heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length
-            * ClassSize.REFERENCE);
-
-        // Adding bytes
-        for (byte[] key : blockKeys) {
-          heapSize += ClassSize.align(ClassSize.ARRAY + key.length);
-        }
-      }
+      heapSize = calculateHeapSizeForBlockKeys(heapSize);
 
       if (blockOffsets != null) {
         heapSize += ClassSize.align(ClassSize.ARRAY + blockOffsets.length
@@ -756,6 +898,7 @@ public class HFileBlockIndex {
       return ClassSize.align(heapSize);
     }
 
+    protected abstract long calculateHeapSizeForBlockKeys(long heapSize);
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java
index aac10f2..cb7dc62 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java
@@ -410,7 +410,7 @@ public class HFilePrettyPrinter extends Configured implements Tool {
     }
 
     try {
-      System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
+      System.out.println("Mid-key: " + (CellUtil.getCellKeyAsString(reader.midkey())));
     } catch (Exception e) {
       System.out.println ("Unable to retrieve the midkey");
     }

http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
index d184d42..642b6c7 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
@@ -39,7 +39,6 @@ import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.KeyValueUtil;
 import org.apache.hadoop.hbase.NoTagsKeyValue;
-import org.apache.hadoop.hbase.KeyValue.KVComparator;
 import org.apache.hadoop.hbase.fs.HFileSystem;
 import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
 import org.apache.hadoop.hbase.io.compress.Compression;
@@ -73,10 +72,10 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
   private static final Log LOG = LogFactory.getLog(HFileReaderImpl.class);
 
   /** Data block index reader keeping the root data index in memory */
-  private HFileBlockIndex.BlockIndexReader dataBlockIndexReader;
+  private HFileBlockIndex.CellBasedKeyBlockIndexReader dataBlockIndexReader;
 
   /** Meta block index reader -- always single level */
-  private HFileBlockIndex.BlockIndexReader metaBlockIndexReader;
+  private HFileBlockIndex.ByteArrayKeyBlockIndexReader metaBlockIndexReader;
 
   private final FixedFileTrailer trailer;
 
@@ -189,10 +188,9 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
 
     // Comparator class name is stored in the trailer in version 2.
     comparator = trailer.createComparator();
-    dataBlockIndexReader = new HFileBlockIndex.BlockIndexReader(comparator,
+    dataBlockIndexReader = new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator,
         trailer.getNumDataIndexLevels(), this);
-    metaBlockIndexReader = new HFileBlockIndex.BlockIndexReader(
-        null, 1);
+    metaBlockIndexReader = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1);
 
     // Parse load-on-open data.
 
@@ -309,7 +307,9 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
   }
 
   private String toStringFirstKey() {
-    return KeyValue.keyToString(getFirstKey());
+    if(getFirstKey() == null)
+      return null;
+    return CellUtil.getCellKeyAsString(getFirstKey());
   }
 
   private String toStringLastKey() {
@@ -341,7 +341,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
    *         first KeyValue.
    */
   @Override
-  public byte [] getFirstKey() {
+  public Cell getFirstKey() {
     if (dataBlockIndexReader == null) {
       throw new BlockIndexNotLoadedException();
     }
@@ -357,8 +357,9 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
    */
   @Override
   public byte[] getFirstRowKey() {
-    byte[] firstKey = getFirstKey();
-    return firstKey == null? null: KeyValueUtil.createKeyValueFromKey(firstKey).getRow();
+    Cell firstKey = getFirstKey();
+    // We have to copy the row part to form the row key alone
+    return firstKey == null? null: CellUtil.cloneRow(firstKey);
   }
 
   /**
@@ -1215,7 +1216,8 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
     // Per meta key from any given file, synchronize reads for said block. This
     // is OK to do for meta blocks because the meta block index is always
     // single-level.
-    synchronized (metaBlockIndexReader.getRootBlockKey(block)) {
+    synchronized (metaBlockIndexReader
+        .getRootBlockKey(block)) {
       // Check cache for block. If found return.
       long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block);
       BlockCacheKey cacheKey = new BlockCacheKey(name, metaBlockOffset);
@@ -1387,7 +1389,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
    * @throws IOException
    */
   @Override
-  public byte[] midkey() throws IOException {
+  public Cell midkey() throws IOException {
     return dataBlockIndexReader.midkey();
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java
index ee2644d..f4eaaf9 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
@@ -597,12 +598,12 @@ public class HRegionFileSystem {
         } else {
           //check if smaller than first key
           KeyValue splitKey = KeyValueUtil.createLastOnRow(splitRow);
-          byte[] firstKey = f.createReader().getFirstKey();
+          Cell firstKey = f.createReader().getFirstKey();
           // If firstKey is null means storefile is empty.
           if (firstKey == null) {
             return null;
           }
-          if (f.getReader().getComparator().compare(splitKey, firstKey, 0, firstKey.length) < 0) {
+          if (f.getReader().getComparator().compare(splitKey, firstKey) < 0) {
             return null;
           }
         }

http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
index 4de61ae..33d4e1e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
@@ -1838,9 +1838,8 @@ public class HStore implements Store {
       return false;
     }
     // TODO: Cache these keys rather than make each time?
-    byte [] fk = r.getFirstKey();
-    if (fk == null) return false;
-    KeyValue firstKV = KeyValueUtil.createKeyValueFromKey(fk, 0, fk.length);
+    Cell  firstKV = r.getFirstKey();
+    if (firstKV == null) return false;
     byte [] lk = r.getLastKey();
     KeyValue lastKV = KeyValueUtil.createKeyValueFromKey(lk, 0, lk.length);
     KeyValue firstOnRow = state.getTargetKey();
@@ -1884,9 +1883,9 @@ public class HStore implements Store {
    */
   private boolean seekToScanner(final HFileScanner scanner,
                                 final KeyValue firstOnRow,
-                                final KeyValue firstKV)
+                                final Cell firstKV)
       throws IOException {
-    KeyValue kv = firstOnRow;
+    Cell kv = firstOnRow;
     // If firstOnRow < firstKV, set to firstKV
     if (this.comparator.compareRows(firstKV, firstOnRow) == 0) kv = firstKV;
     int result = scanner.seekTo(kv);

http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
index eba3689..fc94d3d 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
@@ -678,21 +678,20 @@ public class StoreFile {
     // Get first, last, and mid keys.  Midkey is the key that starts block
     // in middle of hfile.  Has column and timestamp.  Need to return just
     // the row we want to split on as midkey.
-    byte [] midkey = this.reader.midkey();
+    Cell midkey = this.reader.midkey();
     if (midkey != null) {
-      KeyValue mk = KeyValueUtil.createKeyValueFromKey(midkey, 0, midkey.length);
-      byte [] fk = this.reader.getFirstKey();
-      KeyValue firstKey = KeyValueUtil.createKeyValueFromKey(fk, 0, fk.length);
+      Cell firstKey = this.reader.getFirstKey();
       byte [] lk = this.reader.getLastKey();
       KeyValue lastKey = KeyValueUtil.createKeyValueFromKey(lk, 0, lk.length);
       // if the midkey is the same as the first or last keys, we cannot (ever) split this region.
-      if (comparator.compareRows(mk, firstKey) == 0 || comparator.compareRows(mk, lastKey) == 0) {
+      if (comparator.compareRows(midkey, firstKey) == 0
+          || comparator.compareRows(midkey, lastKey) == 0) {
         if (LOG.isDebugEnabled()) {
           LOG.debug("cannot split because midkey is the same as first or last row");
         }
         return null;
       }
-      return mk.getRow();
+      return CellUtil.cloneRow(midkey);
     }
     return null;
   }
@@ -1371,8 +1370,7 @@ public class StoreFile {
           .createLastOnRow(scan.getStartRow()) : KeyValueUtil.createLastOnRow(scan
           .getStopRow());
       // TODO this is in hot path? Optimize and avoid 2 extra object creations.
-      KeyValue.KeyOnlyKeyValue firstKeyKV = 
-          new KeyValue.KeyOnlyKeyValue(this.getFirstKey(), 0, this.getFirstKey().length);
+      Cell firstKeyKV = this.getFirstKey();
       KeyValue.KeyOnlyKeyValue lastKeyKV = 
           new KeyValue.KeyOnlyKeyValue(this.getLastKey(), 0, this.getLastKey().length);
       boolean nonOverLapping = ((getComparator().compare(firstKeyKV, largestScanKeyValue)) > 0
@@ -1493,7 +1491,7 @@ public class StoreFile {
       return reader.getLastRowKey();
     }
 
-    public byte[] midkey() throws IOException {
+    public Cell midkey() throws IOException {
       return reader.midkey();
     }
 
@@ -1513,7 +1511,7 @@ public class StoreFile {
       return deleteFamilyCnt;
     }
 
-    public byte[] getFirstKey() {
+    public Cell getFirstKey() {
       return reader.getFirstKey();
     }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterChunk.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterChunk.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterChunk.java
index a80a201..5b6cb36 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterChunk.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterChunk.java
@@ -29,7 +29,7 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import com.google.common.annotations.VisibleForTesting;
 
 /**
- * The basic building block for the {@link CompoundBloomFilter}
+ * The basic building block for the {@link org.apache.hadoop.hbase.io.hfile.CompoundBloomFilter}
  */
 @InterfaceAudience.Private
 public class BloomFilterChunk implements BloomFilterBase {

http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java
index aecbdf8..99951f0 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java
@@ -28,6 +28,9 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.CellComparator;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.CompoundBloomFilter;
+import org.apache.hadoop.hbase.io.hfile.CompoundBloomFilterBase;
+import org.apache.hadoop.hbase.io.hfile.CompoundBloomFilterWriter;
 import org.apache.hadoop.hbase.io.hfile.HFile;
 import org.apache.hadoop.hbase.regionserver.BloomType;
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java
deleted file mode 100644
index 984742f..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.util;
-
-import java.io.DataInput;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.io.hfile.BlockType;
-import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer;
-import org.apache.hadoop.hbase.io.hfile.HFile;
-import org.apache.hadoop.hbase.io.hfile.HFileBlock;
-import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex;
-
-/**
- * A Bloom filter implementation built on top of {@link BloomFilterChunk},
- * encapsulating a set of fixed-size Bloom filters written out at the time of
- * {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into the data
- * block stream, and loaded on demand at query time. This class only provides
- * reading capabilities.
- */
-@InterfaceAudience.Private
-public class CompoundBloomFilter extends CompoundBloomFilterBase
-    implements BloomFilter {
-
-  /** Used to load chunks on demand */
-  private HFile.Reader reader;
-
-  private HFileBlockIndex.BlockIndexReader index;
-
-  private int hashCount;
-  private Hash hash;
-
-  private long[] numQueriesPerChunk;
-  private long[] numPositivesPerChunk;
-
-  /**
-   * De-serialization for compound Bloom filter metadata. Must be consistent
-   * with what {@link CompoundBloomFilterWriter} does.
-   *
-   * @param meta serialized Bloom filter metadata without any magic blocks
-   * @throws IOException
-   */
-  public CompoundBloomFilter(DataInput meta, HFile.Reader reader)
-      throws IOException {
-    this.reader = reader;
-
-    totalByteSize = meta.readLong();
-    hashCount = meta.readInt();
-    hashType = meta.readInt();
-    totalKeyCount = meta.readLong();
-    totalMaxKeys = meta.readLong();
-    numChunks = meta.readInt();
-    byte[] comparatorClassName = Bytes.readByteArray(meta);
-    // The writer would have return 0 as the vint length for the case of 
-    // Bytes.BYTES_RAWCOMPARATOR.  In such cases do not initialize comparator, it can be
-    // null
-    if (comparatorClassName.length != 0) {
-      comparator = FixedFileTrailer.createComparator(Bytes.toString(comparatorClassName));
-    }
-
-    hash = Hash.getInstance(hashType);
-    if (hash == null) {
-      throw new IllegalArgumentException("Invalid hash type: " + hashType);
-    }
-    // We will pass null for ROW block
-    index = new HFileBlockIndex.BlockIndexReader(comparator, 1);
-    index.readRootIndex(meta, numChunks);
-  }
-
-  @Override
-  public boolean contains(byte[] key, int keyOffset, int keyLength, ByteBuffer bloom) {
-    // We try to store the result in this variable so we can update stats for
-    // testing, but when an error happens, we log a message and return.
-
-    int block = index.rootBlockContainingKey(key, keyOffset,
-        keyLength);
-    return checkContains(key, keyOffset, keyLength, block);
-  }
-
-  private boolean checkContains(byte[] key, int keyOffset, int keyLength, int block) {
-    boolean result;
-    if (block < 0) {
-      result = false; // This key is not in the file.
-    } else {
-      HFileBlock bloomBlock;
-      try {
-        // We cache the block and use a positional read.
-        bloomBlock = reader.readBlock(index.getRootBlockOffset(block),
-            index.getRootBlockDataSize(block), true, true, false, true,
-            BlockType.BLOOM_CHUNK, null);
-      } catch (IOException ex) {
-        // The Bloom filter is broken, turn it off.
-        throw new IllegalArgumentException(
-            "Failed to load Bloom block for key "
-                + Bytes.toStringBinary(key, keyOffset, keyLength), ex);
-      }
-
-      ByteBuffer bloomBuf = bloomBlock.getBufferReadOnly();
-      result = BloomFilterUtil.contains(key, keyOffset, keyLength,
-          bloomBuf, bloomBlock.headerSize(),
-          bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount);
-    }
-
-    if (numQueriesPerChunk != null && block >= 0) {
-      // Update statistics. Only used in unit tests.
-      ++numQueriesPerChunk[block];
-      if (result)
-        ++numPositivesPerChunk[block];
-    }
-
-    return result;
-  }
-
-  @Override
-  public boolean contains(Cell keyCell, ByteBuffer bloom) {
-    // We try to store the result in this variable so we can update stats for
-    // testing, but when an error happens, we log a message and return.
-    int block = index.rootBlockContainingKey(keyCell);
-    // TODO : Will be true KeyValue for now.
-    // When Offheap comes in we can add an else condition to work
-    // on the bytes in offheap
-    KeyValue kvKey = (KeyValue) keyCell;
-    return checkContains(kvKey.getBuffer(), kvKey.getKeyOffset(), kvKey.getKeyLength(), block);
-  }
-
-  public boolean supportsAutoLoading() {
-    return true;
-  }
-
-  public int getNumChunks() {
-    return numChunks;
-  }
-
-  public void enableTestingStats() {
-    numQueriesPerChunk = new long[numChunks];
-    numPositivesPerChunk = new long[numChunks];
-  }
-
-  public String formatTestingStats() {
-    StringBuilder sb = new StringBuilder();
-    for (int i = 0; i < numChunks; ++i) {
-      sb.append("chunk #");
-      sb.append(i);
-      sb.append(": queries=");
-      sb.append(numQueriesPerChunk[i]);
-      sb.append(", positives=");
-      sb.append(numPositivesPerChunk[i]);
-      sb.append(", positiveRatio=");
-      sb.append(numPositivesPerChunk[i] * 1.0 / numQueriesPerChunk[i]);
-      sb.append(";\n");
-    }
-    return sb.toString();
-  }
-
-  public long getNumQueriesForTesting(int chunk) {
-    return numQueriesPerChunk[chunk];
-  }
-
-  public long getNumPositivesForTesting(int chunk) {
-    return numPositivesPerChunk[chunk];
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder();
-    sb.append(BloomFilterUtil.formatStats(this));
-    sb.append(BloomFilterUtil.STATS_RECORD_SEP + 
-        "Number of chunks: " + numChunks);
-    sb.append(BloomFilterUtil.STATS_RECORD_SEP + 
-        ((comparator != null) ? "Comparator: "
-        + comparator.getClass().getSimpleName() : "Comparator: "
-        + Bytes.BYTES_RAWCOMPARATOR.getClass().getSimpleName()));
-    return sb.toString();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/487e4aa7/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterBase.java
deleted file mode 100644
index 7c29ab2..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterBase.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.util;
-
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-
-import org.apache.hadoop.hbase.CellComparator;
-
-@InterfaceAudience.Private
-public class CompoundBloomFilterBase implements BloomFilterBase {
-
-  /**
-   * At read time, the total number of chunks. At write time, the number of
-   * chunks created so far. The first chunk has an ID of 0, and the current
-   * chunk has the ID of numChunks - 1.
-   */
-  protected int numChunks;
-
-  /**
-   * The Bloom filter version. There used to be a DynamicByteBloomFilter which
-   * had version 2.
-   */
-  public static final int VERSION = 3;
-
-  /** Target error rate for configuring the filter and for information */
-  protected float errorRate;
-
-  /** The total number of keys in all chunks */
-  protected long totalKeyCount;
-  protected long totalByteSize;
-  protected long totalMaxKeys;
-
-  /** Hash function type to use, as defined in {@link Hash} */
-  protected int hashType;
-  /** Comparator used to compare Bloom filter keys */
-  protected CellComparator comparator;
-
-  @Override
-  public long getMaxKeys() {
-    return totalMaxKeys;
-  }
-
-  @Override
-  public long getKeyCount() {
-    return totalKeyCount;
-  }
-
-  @Override
-  public long getByteSize() {
-    return totalByteSize;
-  }
-
-}