You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by li...@apache.org on 2014/02/11 19:28:12 UTC

svn commit: r1567261 - in /hbase/branches/0.89-fb/src: main/java/org/apache/hadoop/hbase/ main/java/org/apache/hadoop/hbase/io/hfile/ main/java/org/apache/hadoop/hbase/regionserver/ main/java/org/apache/hadoop/hbase/util/ test/java/org/apache/hadoop/hb...

Author: liyin
Date: Tue Feb 11 18:28:11 2014
New Revision: 1567261

URL: http://svn.apache.org/r1567261
Log:
[HBASE-10342] RowKey Prefix Bloom Filter

Author: liyintang

Summary:
When designing HBase schema for some use cases, it is quite common to combine multiple information within a RowKey. For instance, assuming that RowKey is defined as md5(id1) + id1 + id2, and user wants to scan all the RowKeys which starting by id1. In such case, the RowKey Prefix Bloom Filter is able to cut more unnecessary seeks during the scan.

This diff is to implement the RowKey Prefix Bloom Filter in HBase. And the user could define the length of the RowKey prefix through the HTable schema.

Test Plan: Run through all the unit tests

Reviewers: manukranthk, adela

Reviewed By: manukranthk

CC: hbase-dev@, hbase-eng@, arice, daviddeng

Differential Revision: https://phabricator.fb.com/D1141078

Task ID: 3548019

Modified:
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockType.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV1.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java?rev=1567261&r1=1567260&r2=1567261&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java Tue Feb 11 18:28:11 2014
@@ -19,15 +19,8 @@
  */
 package org.apache.hadoop.hbase;
 
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
 import org.apache.hadoop.hbase.io.hfile.Compression;
@@ -38,6 +31,11 @@ import org.apache.hadoop.hbase.util.Byte
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparable;
 
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.*;
+
 /**
  * An HColumnDescriptor contains information about a column family such as the
  * number of versions, compression settings, etc.
@@ -48,6 +46,7 @@ import org.apache.hadoop.io.WritableComp
  * deleted when the column is deleted.
  */
 public class HColumnDescriptor implements WritableComparable<HColumnDescriptor> {
+  static final Log LOG = LogFactory.getLog(HColumnDescriptor.class);
   // For future backward compatibility
 
   // Version 3 was when column names become byte arrays and when we picked up
@@ -96,6 +95,7 @@ public class HColumnDescriptor implement
   public static final String BLOOMFILTER_ERRORRATE = "BLOOMFILTER_ERRORRATE";
   public static final String FOREVER = "FOREVER";
   public static final String REPLICATION_SCOPE = "REPLICATION_SCOPE";
+  public static final String ROWKEY_PREFIX_LENGTH_FOR_BLOOMFILTER = "ROWKEY_PREFIX_LENGTH";
 
   /**
    * Default compression type.
@@ -147,6 +147,11 @@ public class HColumnDescriptor implement
   public static final String DEFAULT_BLOOMFILTER = StoreFile.BloomType.NONE.toString();
 
   /**
+   * Default setting for the RowKey Prefix Length for the Bloomfilter.
+   */
+  public static final int DEFAULT_ROWKEY_PREFIX_LENGTH_FOR_BLOOM = -1;
+
+  /**
    * Default value for bloom filter error rate.
    */
   public static final float DEFAULT_BLOOMFILTER_ERROR_RATE = 0.01f;
@@ -189,6 +194,8 @@ public class HColumnDescriptor implement
           String.valueOf(DEFAULT_ENCODE_ON_DISK));
       DEFAULT_VALUES.put(DATA_BLOCK_ENCODING,
           String.valueOf(DEFAULT_DATA_BLOCK_ENCODING));
+      DEFAULT_VALUES.put(ROWKEY_PREFIX_LENGTH_FOR_BLOOMFILTER,
+          String.valueOf(DEFAULT_ROWKEY_PREFIX_LENGTH_FOR_BLOOM));
       for (String s : DEFAULT_VALUES.keySet()) {
         RESERVED_KEYWORDS.add(new ImmutableBytesWritable(Bytes.toBytes(s)));
       }
@@ -694,6 +701,28 @@ public class HColumnDescriptor implement
   }
 
   /**
+   * @return the number of bytes as row key prefix for the bloom filter
+   */
+  public int getRowPrefixLengthForBloom() {
+    String n = getValue(ROWKEY_PREFIX_LENGTH_FOR_BLOOMFILTER);
+    int prefixLength = DEFAULT_ROWKEY_PREFIX_LENGTH_FOR_BLOOM;
+    if (n != null) {
+      try {
+        prefixLength = Integer.valueOf(n);
+      } catch (Throwable e) {
+        LOG.error("Cannot parse " + n + " as the RowKey Prefix Length", e);
+      }
+    }
+    return prefixLength;
+  }
+
+  public void setRowKeyPrefixLengthForBloom(int prefixLength) {
+    if (prefixLength > 0) {
+      setValue(ROWKEY_PREFIX_LENGTH_FOR_BLOOMFILTER, String.valueOf(prefixLength));
+    }
+  }
+
+  /**
    * @param bt bloom filter type
    * @return this (for chained invocation)
    */

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java?rev=1567261&r1=1567260&r2=1567261&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java Tue Feb 11 18:28:11 2014
@@ -19,14 +19,14 @@
  */
 package org.apache.hadoop.hbase;
 
-import java.nio.ByteBuffer;
-
 import org.apache.hadoop.hbase.io.hfile.Compression;
 import org.apache.hadoop.hbase.ipc.HRegionInterface;
 import org.apache.hadoop.hbase.regionserver.CompactionManager;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.io.nativeio.NativeIO;
 
+import java.nio.ByteBuffer;
+
 /**
  * HConstants holds a bunch of HBase-related constants
  */
@@ -742,6 +742,8 @@ public final class HConstants {
 
   public static final String DELETE_COLUMN_BLOOM_FILTER = "delete_column_bloom_filter";
 
+  public static final String ROWKEY_PREFIX_BLOOM_FILTER = "rowkey_prefix_bloom_filter";
+
   /**
    * This will enable/disable the usage of delete col bloom filter. Note that
    * this won't enable/disable the delete bloom filter for being written/read.

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockType.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockType.java?rev=1567261&r1=1567260&r2=1567261&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockType.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockType.java Tue Feb 11 18:28:11 2014
@@ -19,14 +19,14 @@
  */
 package org.apache.hadoop.hbase.io.hfile;
 
+import org.apache.hadoop.hbase.util.Bytes;
+
 import java.io.DataInputStream;
 import java.io.DataOutput;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.nio.ByteBuffer;
 
-import org.apache.hadoop.hbase.util.Bytes;
-
 /**
  * Various types of {@link HFile} blocks. Ordinal values of these enum constants
  * must not be relied upon. The values in the enum appear in the order they
@@ -78,6 +78,9 @@ public enum BlockType {
   /** Delete Column Bloom filter metadata, version 2 */
   DELETE_COLUMN_BLOOM_META("DCBLMET2", BlockCategory.BLOOM),
 
+  /** RowKey Prefix Bloom filter metadata, version 2 */
+  ROWKEY_PREFIX_BLOOM_META("RPBLMET2", BlockCategory.BLOOM),
+
   // Trailer
 
   /** Fixed file trailer, both versions (always just a magic string) */

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java?rev=1567261&r1=1567260&r2=1567261&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java Tue Feb 11 18:28:11 2014
@@ -19,33 +19,13 @@
  */
 package org.apache.hadoop.hbase.io.hfile;
 
-import java.io.Closeable;
-import java.io.DataInput;
-import java.io.IOException;
-import java.net.InetSocketAddress;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
-
+import com.google.common.base.Preconditions;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-import org.apache.hadoop.io.WriteOptions;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.hbase.*;
 import org.apache.hadoop.hbase.KeyValue.KeyComparator;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.KeyValueContext;
 import org.apache.hadoop.hbase.io.HbaseMapWritable;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
 import org.apache.hadoop.hbase.regionserver.metrics.PercentileMetric;
@@ -56,8 +36,18 @@ import org.apache.hadoop.hbase.util.FSUt
 import org.apache.hadoop.hbase.util.Histogram;
 import org.apache.hadoop.io.RawComparator;
 import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WriteOptions;
 
-import com.google.common.base.Preconditions;
+import java.io.Closeable;
+import java.io.DataInput;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
 
 /**
  * File format for hbase.
@@ -284,6 +274,12 @@ public class HFile {
     void addDeleteColumnBloomFilter(BloomFilterWriter bfw) throws IOException;
 
     /**
+     * Store the Rowkey Prefix Bloom filter in the file, which is only supported in
+     * HFile V2.
+     */
+    void addRowKeyPrefixBloomFilter(BloomFilterWriter bfw) throws IOException;
+
+    /**
      * Set whether compaction is in progress or not
      */
     void setCompactionWriter(boolean isCompaction);
@@ -526,6 +522,14 @@ public class HFile {
      */
     DataInput getDeleteColumnBloomFilterMetadata() throws IOException;
 
+    /**
+     * Retrieves RowKey Prefix Bloom filter metadata as appropriate for each
+     * {@link HFile} version.
+     * Knows nothing about how metadata is structured.
+     * @throws IOException
+     */
+    DataInput getRowKeyPrefixBloomFilterMetadata() throws IOException;
+
     Path getPath();
 
     /** Close method with optional evictOnClose */

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java?rev=1567261&r1=1567260&r2=1567261&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java Tue Feb 11 18:28:11 2014
@@ -19,19 +19,7 @@
  */
 package org.apache.hadoop.hbase.io.hfile;
 
-import java.io.DataInput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.CommandLineParser;
-import org.apache.commons.cli.HelpFormatter;
-import org.apache.commons.cli.Options;
-import org.apache.commons.cli.ParseException;
-import org.apache.commons.cli.PosixParser;
+import org.apache.commons.cli.*;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -45,14 +33,16 @@ import org.apache.hadoop.hbase.io.hfile.
 import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
 import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
 import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics;
-import org.apache.hadoop.hbase.util.BloomFilter;
-import org.apache.hadoop.hbase.util.BloomFilterFactory;
-import org.apache.hadoop.hbase.util.ByteBloomFilter;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.hbase.util.Writables;
+import org.apache.hadoop.hbase.util.*;
 import org.codehaus.jackson.map.ObjectMapper;
 
+import java.io.DataInput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
 /**
  * Implements pretty-printing functionality for {@link HFile}s.
  */
@@ -401,6 +391,8 @@ public class HFilePrettyPrinter {
       return reader.getDeleteBloomFilterMetadata();
     } else if (bloomFilter.equals(HConstants.DELETE_COLUMN_BLOOM_FILTER)) {
       return reader.getDeleteColumnBloomFilterMetadata();
+    } else if (bloomFilter.equals(HConstants.ROWKEY_PREFIX_BLOOM_FILTER)) {
+      return reader.getRowKeyPrefixBloomFilterMetadata();
     }
     return null;
   }

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java?rev=1567261&r1=1567260&r2=1567261&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java Tue Feb 11 18:28:11 2014
@@ -19,13 +19,7 @@
  */
 package org.apache.hadoop.hbase.io.hfile;
 
-import java.io.ByteArrayInputStream;
-import java.io.DataInput;
-import java.io.DataInputStream;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.concurrent.TimeUnit;
-
+import com.google.common.base.Preconditions;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -42,7 +36,12 @@ import org.apache.hadoop.hbase.util.Byte
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.RawComparator;
 
-import com.google.common.base.Preconditions;
+import java.io.ByteArrayInputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.concurrent.TimeUnit;
 
 /**
  * {@link HFile} reader for version 1. Does not support data block encoding,
@@ -750,4 +749,9 @@ public class HFileReaderV1 extends Abstr
     return null;
   }
 
+  @Override
+  public DataInput getRowKeyPrefixBloomFilterMetadata() {
+    return null;
+  }
+
 }

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java?rev=1567261&r1=1567260&r2=1567261&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java Tue Feb 11 18:28:11 2014
@@ -19,16 +19,6 @@
  */
 package org.apache.hadoop.hbase.io.hfile;
 
-import java.io.DataInput;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.locks.ReentrantLock;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -44,12 +34,20 @@ import org.apache.hadoop.hbase.io.hfile.
 import org.apache.hadoop.hbase.ipc.HBaseServer.Call;
 import org.apache.hadoop.hbase.ipc.ProfilingData;
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
-import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics;
-import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics.BlockMetricType;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.IdLock;
 import org.apache.hadoop.io.WritableUtils;
 
+import java.io.DataInput;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.locks.ReentrantLock;
+
 /**
  * {@link HFile} reader for version 2.
  */
@@ -1573,19 +1571,26 @@ public class HFileReaderV2 extends Abstr
     return this.getBloomFilterMetadata(BlockType.DELETE_COLUMN_BLOOM_META);
   }
 
+  @Override
+  public DataInput getRowKeyPrefixBloomFilterMetadata() throws IOException {
+    return this.getBloomFilterMetadata(BlockType.ROWKEY_PREFIX_BLOOM_META);
+  }
+
   private DataInput getBloomFilterMetadata(BlockType blockType)
   throws IOException {
     if (blockType != BlockType.GENERAL_BLOOM_META &&
         blockType != BlockType.DELETE_FAMILY_BLOOM_META &&
-        blockType != BlockType.DELETE_COLUMN_BLOOM_META) {
+        blockType != BlockType.DELETE_COLUMN_BLOOM_META &&
+        blockType != BlockType.ROWKEY_PREFIX_BLOOM_META) {
       throw new RuntimeException("Block Type: " + blockType.toString() +
           " is not supported") ;
     }
 
-    for (HFileBlock b : loadOnOpenBlocks)
+    for (HFileBlock b : loadOnOpenBlocks) {
       if (b.getBlockType() == blockType) {
         return b.getByteStream();
       }
+    }
     return null;
   }
 

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV1.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV1.java?rev=1567261&r1=1567260&r2=1567261&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV1.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV1.java Tue Feb 11 18:28:11 2014
@@ -416,6 +416,12 @@ public class HFileWriterV1 extends Abstr
     throw new IOException("Delete Bloom filter is not supported in HFile V1");
   }
 
+  @Override
+  public void addRowKeyPrefixBloomFilter(BloomFilterWriter bfw)
+    throws IOException {
+    throw new IOException("RowKeyPrefix Bloom filter is not supported in HFile V1");
+  }
+
   /**
    * Write out the index in the version 1 format. This conforms to the legacy
    * version 1 format, but can still be read by

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java?rev=1567261&r1=1567260&r2=1567261&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java Tue Feb 11 18:28:11 2014
@@ -20,12 +20,6 @@
 
 package org.apache.hadoop.hbase.io.hfile;
 
-import java.io.DataOutput;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -39,6 +33,12 @@ import org.apache.hadoop.hbase.util.Bloo
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.io.Writable;
 
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
 /**
  * Writes HFile format version 2.
  */
@@ -487,6 +487,11 @@ public class HFileWriterV2 extends Abstr
     this.addBloomFilter(bfw, BlockType.DELETE_COLUMN_BLOOM_META);
   }
 
+  @Override
+  public void addRowKeyPrefixBloomFilter(BloomFilterWriter bfw) {
+    this.addBloomFilter(bfw, BlockType.ROWKEY_PREFIX_BLOOM_META);
+  }
+
   private void addBloomFilter(final BloomFilterWriter bfw,
       final BlockType blockType) {
     if (bfw.getKeyCount() <= 0)
@@ -494,7 +499,8 @@ public class HFileWriterV2 extends Abstr
 
     if (blockType != BlockType.GENERAL_BLOOM_META &&
         blockType != BlockType.DELETE_FAMILY_BLOOM_META &&
-        blockType != BlockType.DELETE_COLUMN_BLOOM_META) {
+        blockType != BlockType.DELETE_COLUMN_BLOOM_META &&
+        blockType != BlockType.ROWKEY_PREFIX_BLOOM_META) {
       throw new RuntimeException("Block Type: " + blockType.toString() +
           "is not supported");
     }

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java?rev=1567261&r1=1567260&r2=1567261&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java Tue Feb 11 18:28:11 2014
@@ -947,6 +947,7 @@ public class Store extends SchemaConfigu
             .withDataBlockEncoder(dataBlockEncoder)
             .withComparator(comparator)
             .withBloomType(family.getBloomFilterType())
+            .withRowKeyPrefixLengthForBloom(family.getRowPrefixLengthForBloom())
             .withMaxKeyCount(maxKeyCount)
             .withFavoredNodes(region.getFavoredNodes())
             .withCompression(compression)

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java?rev=1567261&r1=1567260&r2=1567261&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java Tue Feb 11 18:28:11 2014
@@ -124,6 +124,10 @@ public class StoreFile extends SchemaCon
   static final byte[] BLOOM_FILTER_TYPE_KEY =
       Bytes.toBytes("BLOOM_FILTER_TYPE");
 
+  /** RowKey prefix length in FileInfo */
+  static final byte[] ROWKEY_PREFIX_LENGTH =
+    Bytes.toBytes("ROWKEY_PREFIX_LENGTH");
+
   /** Delete Family Count in FileInfo */
   public static final byte[] DELETE_FAMILY_COUNT =
       Bytes.toBytes("DELETE_FAMILY_COUNT");
@@ -654,6 +658,7 @@ public class StoreFile extends SchemaCon
     private HFileDataBlockEncoder dataBlockEncoder;
     private KeyValue.KVComparator comparator = KeyValue.COMPARATOR;
     private BloomType bloomType = BloomType.NONE;
+    private int rowKeyPrefixLength = -1;
     private long maxKeyCount = 0;
     private Path dir;
     private Path filePath;
@@ -730,6 +735,11 @@ public class StoreFile extends SchemaCon
       return this;
     }
 
+    public WriterBuilder withRowKeyPrefixLengthForBloom(int prefixLength) {
+      this.rowKeyPrefixLength = prefixLength;
+      return this;
+    }
+
     /**
      * @param maxKeyCount estimated maximum number of keys we expect to add
      * @return this (for chained invocation)
@@ -876,6 +886,8 @@ public class StoreFile extends SchemaCon
     private final BloomFilterWriter generalBloomFilterWriter;
     private final BloomFilterWriter deleteFamilyBloomFilterWriter;
     private final BloomFilterWriter deleteColumnBloomFilterWriter;
+    private final BloomFilterWriter rowKeyPrefixBloomFilterWriter;
+    private int rowKeyPrefixLength = 0;
     private final BloomType bloomType;
     private byte[] lastBloomKey;
     private int lastBloomKeyOffset, lastBloomKeyLen;
@@ -883,6 +895,7 @@ public class StoreFile extends SchemaCon
     private KeyValue lastKv = null;
     private KeyValue lastDeleteFamilyKV = null;
     private KeyValue lastDeleteColumnKV = null;
+    private KeyValue lastPrefixKV = null;
     private long deleteFamilyCnt = 0;
     private long deleteColumnCnt = 0;
 
@@ -929,30 +942,62 @@ public class StoreFile extends SchemaCon
         this.bloomType = BloomType.NONE;
       }
 
-      // initialize delete family Bloom filter when there is NO RowCol Bloom
-      // filter
-      if (this.bloomType != BloomType.ROWCOL) {
+      // Initialize delete family Bloom filter when there is NO RowCol Bloom
+      // filter and it has been enabled in the configuration
+      if (this.bloomType != BloomType.ROWCOL &&
+        BloomFilterFactory.isDeleteFamilyBloomEnabled(wb.conf)) {
         this.deleteFamilyBloomFilterWriter = BloomFilterFactory
-            .createDeleteBloomAtWrite(wb.conf, wb.cacheConf,
-                (int) Math.min(wb.maxKeyCount, Integer.MAX_VALUE), writer,
-                wb.bloomErrorRate, HConstants.DELETE_FAMILY_BLOOM_FILTER);
+            .createBloomFilterWriter(wb.conf, wb.cacheConf, writer, wb.bloomErrorRate);
       } else {
-        deleteFamilyBloomFilterWriter = null;
+        this.deleteFamilyBloomFilterWriter = null;
       }
+
       if (deleteFamilyBloomFilterWriter != null) {
         LOG.info("Delete Family Bloom filter type for " + wb.filePath + ": "
-            + deleteFamilyBloomFilterWriter.getClass().getSimpleName());
+          + deleteFamilyBloomFilterWriter.getClass().getSimpleName());
+      } else {
+        LOG.info("Delete Family Bloom filters are disabled by configuration or ROWCOL BF has " +
+          "been already enabled for " + writer.getPath() + (wb.conf == null ? " " +
+          "(configurations null)" : ""));
+      }
+
+      // Initialize the delete column bloom filter if the conf is enabled
+      if (BloomFilterFactory.isDeleteColumnBloomEnabled(wb.conf)) {
+        // initialize DeleteColumn bloom filter
+        this.deleteColumnBloomFilterWriter = BloomFilterFactory
+            .createBloomFilterWriter(wb.conf, wb.cacheConf, writer, wb.bloomErrorRate);
+      } else {
+        this.deleteColumnBloomFilterWriter = null;
       }
-      // initialize DeleteColumn bloom filter
-      this.deleteColumnBloomFilterWriter = BloomFilterFactory
-          .createDeleteBloomAtWrite(wb.conf, wb.cacheConf,
-              (int) Math.min(wb.maxKeyCount, Integer.MAX_VALUE), writer,
-              wb.bloomErrorRate, HConstants.DELETE_COLUMN_BLOOM_FILTER);
 
       if (deleteColumnBloomFilterWriter != null) {
         LOG.info("Delete Column Family filter type for " + wb.filePath + ": "
-            + deleteColumnBloomFilterWriter.getClass().getSimpleName());
+          + deleteColumnBloomFilterWriter.getClass().getSimpleName());
+      } else {
+        LOG.info("Delete Column Bloom filters are disabled by configuration for "
+          + writer.getPath() + (wb.conf == null ? " (configuration is null)" : ""));
       }
+
+      // Initialize the RowKey Prefix Bloom filters
+      if (wb.rowKeyPrefixLength > 0 && BloomFilterFactory.isRowKeyPrefixBloomEnabled(wb.conf)) {
+        // Create the bloom filter
+        this.rowKeyPrefixBloomFilterWriter = BloomFilterFactory.createBloomFilterWriter(
+          wb.conf, wb.cacheConf, writer, wb.bloomErrorRate);
+
+        // Cache the prefix length
+        rowKeyPrefixLength = wb.rowKeyPrefixLength;
+      } else {
+        this.rowKeyPrefixBloomFilterWriter = null;
+      }
+
+      if (rowKeyPrefixBloomFilterWriter != null) {
+        LOG.info("BloomFilter for the RowKeyPrefix " + rowKeyPrefixLength + " created " + wb
+          .filePath + ": " + rowKeyPrefixBloomFilterWriter.getClass().getSimpleName());
+      } else {
+        LOG.info("RowKeyPrefix Bloom filters are disabled for "
+          + writer.getPath() + (wb.conf == null ? " (configuration is null)" : ""));
+      }
+
     }
 
     /**
@@ -1137,6 +1182,24 @@ public class StoreFile extends SchemaCon
       }
     }
 
+    private void appendRowKeyPrefixBloomFilter(final KeyValue kv)
+      throws IOException {
+      if (this.rowKeyPrefixBloomFilterWriter == null || kv.getRowLength() < rowKeyPrefixLength) {
+        return;
+      }
+
+      if (lastPrefixKV != null &&
+          (rowKeyPrefixBloomFilterWriter.getComparator().compare(
+            kv.getBuffer(), kv.getRowOffset(), rowKeyPrefixLength,
+            lastPrefixKV.getBuffer(), lastPrefixKV.getRowOffset(), rowKeyPrefixLength) == 0)) {
+        // return directly if the current prefix matches with the previous prefix
+        return;
+      }
+
+      this.rowKeyPrefixBloomFilterWriter.add(kv.getBuffer(), kv.getRowOffset(), rowKeyPrefixLength);
+      this.lastPrefixKV = kv;
+    }
+
     public void append(final KeyValue kv) throws IOException {
       append(kv, null);
     }
@@ -1145,7 +1208,7 @@ public class StoreFile extends SchemaCon
       appendGeneralBloomfilter(kv);
       appendDeleteFamilyBloomFilter(kv);
       appendDeleteColumnBloomFilter(kv);
-
+      appendRowKeyPrefixBloomFilter(kv);
       writer.append(kv, cv);
       includeInTimeRangeTracker(kv);
     }
@@ -1181,6 +1244,10 @@ public class StoreFile extends SchemaCon
       return deleteColumnBloomFilterWriter;
     }
 
+    BloomFilterWriter getRowKeyPrefixBloomFilterWriter() {
+      return rowKeyPrefixBloomFilterWriter;
+    }
+
     private boolean closeBloomFilter(BloomFilterWriter bfw) throws IOException {
       boolean haveBloom = (bfw != null && bfw.getKeyCount() > 0);
       if (haveBloom) {
@@ -1206,6 +1273,17 @@ public class StoreFile extends SchemaCon
       return hasGeneralBloom;
     }
 
+    private boolean closeRowKeyPrefixBloomFilter() throws IOException {
+      boolean hasPrefixBloom = closeBloomFilter(rowKeyPrefixBloomFilterWriter);
+
+      // add the rowkey prefix bloom filter writer and append file info
+      if (hasPrefixBloom) {
+        writer.addRowKeyPrefixBloomFilter(rowKeyPrefixBloomFilterWriter);
+      }
+      writer.appendFileInfo(ROWKEY_PREFIX_LENGTH,  Bytes.toBytes(this.rowKeyPrefixLength));
+      return hasPrefixBloom;
+    }
+
     private boolean closeDeleteFamilyBloomFilter() throws IOException {
       boolean hasDeleteFamilyBloom = closeBloomFilter(deleteFamilyBloomFilterWriter);
 
@@ -1213,11 +1291,10 @@ public class StoreFile extends SchemaCon
       if (hasDeleteFamilyBloom) {
         writer.addDeleteFamilyBloomFilter(deleteFamilyBloomFilterWriter);
       }
-
       // append file info about the number of delete family kvs
       // even if there is no delete family Bloom.
       writer.appendFileInfo(DELETE_FAMILY_COUNT,
-          Bytes.toBytes(this.deleteFamilyCnt));
+        Bytes.toBytes(this.deleteFamilyCnt));
 
       return hasDeleteFamilyBloom;
     }
@@ -1244,14 +1321,15 @@ public class StoreFile extends SchemaCon
       boolean hasGeneralBloom = this.closeGeneralBloomFilter();
       boolean hasDeleteFamilyBloom = this.closeDeleteFamilyBloomFilter();
       boolean hasDeleteColumnBloom = this.closeDeleteColumnBloomFilter();
-
+      boolean hasRowKeyPrefixBloom = this.closeRowKeyPrefixBloomFilter();
       writer.close();
 
       // Log final Bloom filter statistics. This needs to be done after close()
       // because compound Bloom filters might be finalized as part of closing.
       StoreFile.LOG.info((hasGeneralBloom ? "" : "NO ") + "General Bloom and "
           + (hasDeleteFamilyBloom ? "" : "NO ") + "DeleteFamily and "
-          + (hasDeleteColumnBloom ? "" : "NO ") + "DeleteColumn"
+          + (hasDeleteColumnBloom ? "" : "NO ") + "DeleteColumn and "
+          + (hasRowKeyPrefixBloom ? "" : "NO ") + "RowKeyPrefix"
           + " was added to HFile (" + getPath() + ") ");
 
     }
@@ -1275,6 +1353,7 @@ public class StoreFile extends SchemaCon
     protected BloomFilter generalBloomFilter = null;
     protected BloomFilter deleteFamilyBloomFilter = null;
     protected BloomFilter deleteColumnBloomFilter = null;
+    protected BloomFilter rowKeyPrefixBloomFilter = null;
     protected BloomType bloomFilterType;
     private final HFile.Reader reader;
     protected TimeRangeTracker timeRangeTracker = null;
@@ -1282,6 +1361,7 @@ public class StoreFile extends SchemaCon
     private byte[] lastBloomKey;
     private long deleteFamilyCnt = -1;
     private long deleteColumnCnt = -1;
+    private int rowKeyPrefixLength = -1;
 
     public Reader(FileSystem fs, Path path, CacheConfig cacheConf,
         DataBlockEncoding preferredEncodingInCache) throws IOException {
@@ -1482,6 +1562,49 @@ public class StoreFile extends SchemaCon
     }
 
     /**
+     * This function checks whether the RowKeyPrefix Bloom filter covers the given RowKey prefix.
+     * @param buffer
+     * @param rowPrefixOffset
+     * @param rowPrefixLength
+     * @return false if the RowKeyPrefix Bloom filter doesn't cover the given RowKey prefix.
+     * Otherwise, return true;
+     */
+    public boolean passesRowKeyPrefixBloomFilter(byte[] buffer, int rowPrefixOffset,
+                                                 int rowPrefixLength) {
+
+      // Sanity check the parameters
+      if (buffer == null || rowPrefixOffset + rowPrefixLength > buffer.length
+        || rowPrefixLength <=0) {
+        return false;
+      }
+
+      // Cache Bloom filter as a local variable in case it is set to null by
+      // another thread on an IO error.
+      BloomFilter bloomFilter = this.rowKeyPrefixBloomFilter;
+
+      // Empty file or the prefix does not match at all
+      if (reader.getTrailer().getEntryCount() == 0) {
+        return false; // No need to seek
+      }
+
+      if (bloomFilter == null || this.rowKeyPrefixLength > rowPrefixLength) {
+        return true; // Have to seek into the file
+      }
+
+      try {
+        if (!bloomFilter.supportsAutoLoading()) {
+          return true;
+        }
+        return bloomFilter.contains(buffer, rowPrefixOffset, this.rowKeyPrefixLength, null);
+      } catch (IllegalArgumentException e) {
+        LOG.error("Bad RowKey Prefix  bloom filter data -- proceeding without", e);
+        setRowKeyPrefixBloomFilterFaulty();
+      }
+
+      return true;
+    }
+
+    /**
      * A method for checking Bloom filters. Called directly from
      * {@link StoreFileScanner} in case of a multi-column query.
      *
@@ -1592,6 +1715,11 @@ public class StoreFile extends SchemaCon
       return true;
     }
 
+    /**
+     * Load the file info from the HFile format
+     * @return the file info mapping
+     * @throws IOException
+     */
     public Map<byte[], byte[]> loadFileInfo() throws IOException {
       Map<byte [], byte []> fi = reader.loadFileInfo();
 
@@ -1610,6 +1738,11 @@ public class StoreFile extends SchemaCon
         deleteColumnCnt = Bytes.toLong(cnt);
       }
 
+      cnt = fi.get(ROWKEY_PREFIX_LENGTH);
+      if (cnt != null) {
+        rowKeyPrefixLength = Bytes.toInt(cnt);
+      }
+
       return fi;
     }
 
@@ -1617,6 +1750,7 @@ public class StoreFile extends SchemaCon
       this.loadBloomfilter(BlockType.GENERAL_BLOOM_META);
       this.loadBloomfilter(BlockType.DELETE_FAMILY_BLOOM_META);
       this.loadBloomfilter(BlockType.DELETE_COLUMN_BLOOM_META);
+      this.loadBloomfilter(BlockType.ROWKEY_PREFIX_BLOOM_META);
     }
 
     private void loadBloomfilter(BlockType blockType) {
@@ -1662,6 +1796,19 @@ public class StoreFile extends SchemaCon
                 + deleteColumnBloomFilter.getClass().getSimpleName()
                 + ") metadata for " + reader.getName());
           }
+        } else if (blockType == BlockType.ROWKEY_PREFIX_BLOOM_META) {
+          if (this.rowKeyPrefixBloomFilter != null) {
+            return; // Bloom has been loaded
+          }
+
+          DataInput bloomMeta = reader.getRowKeyPrefixBloomFilterMetadata();
+          if (bloomMeta != null) {
+            rowKeyPrefixBloomFilter = BloomFilterFactory.createFromMeta(
+              bloomMeta, reader);
+            LOG.info("Loaded RowKey Prefix Bloom ("
+              + rowKeyPrefixBloomFilter.getClass().getSimpleName()
+              + ") metadata for " + reader.getName());
+          }
         } else {
           throw new RuntimeException("Block Type: " + blockType.toString()
               + "is not supported for Bloom filter");
@@ -1684,6 +1831,8 @@ public class StoreFile extends SchemaCon
         setDeleteFamilyBloomFilterFaulty();
       } else if (blockType == BlockType.DELETE_COLUMN_BLOOM_META) {
         setDeleteColumnBloomFilterFaulty();
+      } else if (blockType == BlockType.ROWKEY_PREFIX_BLOOM_META) {
+        setRowKeyPrefixBloomFilterFaulty();
       }
     }
 
@@ -1711,6 +1860,10 @@ public class StoreFile extends SchemaCon
       this.deleteColumnBloomFilter = null;
     }
 
+    public void setRowKeyPrefixBloomFilterFaulty() {
+      this.rowKeyPrefixBloomFilter = null;
+    }
+
     public byte[] getLastKey() {
       return reader.getLastKey();
     }
@@ -1735,6 +1888,14 @@ public class StoreFile extends SchemaCon
       return deleteColumnCnt;
     }
 
+    public long getRowKeyPrefixLength() {
+      return rowKeyPrefixLength;
+    }
+
+    public BloomFilter getRowKeyPrefixBloomFilter() {
+      return rowKeyPrefixBloomFilter;
+    }
+
     public byte[] getFirstKey() {
       return reader.getFirstKey();
     }
@@ -1782,9 +1943,10 @@ public class StoreFile extends SchemaCon
     }
 
     void disableBloomFilterForTesting() {
-      generalBloomFilter = null;
+      this.generalBloomFilter = null;
       this.deleteFamilyBloomFilter = null;
       this.deleteColumnBloomFilter = null;
+      this.rowKeyPrefixBloomFilter = null;
     }
 
     public long getMaxTimestamp() {

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java?rev=1567261&r1=1567260&r2=1567261&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java Tue Feb 11 18:28:11 2014
@@ -20,19 +20,18 @@
 
 package org.apache.hadoop.hbase.util;
 
-import java.io.DataInput;
-import java.io.IOException;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
 import org.apache.hadoop.hbase.io.hfile.HFile;
 import org.apache.hadoop.hbase.regionserver.StoreFile;
 import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
 
+import java.io.DataInput;
+import java.io.IOException;
+
 /**
  * Handles Bloom filter initialization based on configuration and serialized
  * metadata in the reader and writer of {@link StoreFile}.
@@ -78,6 +77,10 @@ public final class BloomFilterFactory {
   public static final String IO_STOREFILE_DELETECOLUMN_BLOOM_ENABLED =
       "io.storefile.delete.column.bloom.enabled";
 
+  /** Master switch to enable RowKey Prefix Bloom filters */
+  public static final String IO_STOREFILE_ROWKEYPREFIX_BLOOM_ENABLED =
+    "io.storefile.rowkey.prefix.bloom.enabled";
+
   /**
    * Target Bloom block size. Bloom filter blocks of approximately this size
    * are interleaved with data blocks.
@@ -132,7 +135,7 @@ public final class BloomFilterFactory {
    * @return true if Delete Family Bloom filters are enabled in the given configuration
    */
   public static boolean isDeleteFamilyBloomEnabled(Configuration conf) {
-    return conf.getBoolean(IO_STOREFILE_DELETEFAMILY_BLOOM_ENABLED, true);
+    return conf != null ? conf.getBoolean(IO_STOREFILE_DELETEFAMILY_BLOOM_ENABLED, true) : false;
   }
 
   /**
@@ -140,7 +143,15 @@ public final class BloomFilterFactory {
    * configuration
    */
   public static boolean isDeleteColumnBloomEnabled(Configuration conf) {
-    return conf.getBoolean(IO_STOREFILE_DELETECOLUMN_BLOOM_ENABLED, false);
+    return conf != null ? conf.getBoolean(IO_STOREFILE_DELETECOLUMN_BLOOM_ENABLED, false) : false;
+  }
+
+  /**
+   * @return true if rowkey prefix bloom filters are enabled in the given
+   * configuration
+   */
+  public static boolean isRowKeyPrefixBloomEnabled(Configuration conf) {
+    return conf != null ? conf.getBoolean(IO_STOREFILE_ROWKEYPREFIX_BLOOM_ENABLED, true) : false;
   }
 
   /**
@@ -252,30 +263,17 @@ public final class BloomFilterFactory {
   }
 
   /**
-   * Creates a new Delete Family/Column Bloom filter at the time of
+   * Creates a new Bloom filter at the time of
    * {@link org.apache.hadoop.hbase.regionserver.StoreFile} writing.
    * @param conf
-   * @param maxKeys an estimate of the number of keys we expect to insert.
-   *        Irrelevant if compound Bloom filters are enabled.
    * @param writer the HFile writer
    * @param bloomErrorRate
    * @return the new Bloom filter, or null in case Bloom filters are disabled
    *         or when failed to create one.
    */
-  public static BloomFilterWriter createDeleteBloomAtWrite(
-      Configuration conf, CacheConfig cacheConf, int maxKeys,
-      HFile.Writer writer, float bloomErrorRate, String deleteBloomType) {
-    if (deleteBloomType.equals(HConstants.DELETE_FAMILY_BLOOM_FILTER) && !isDeleteFamilyBloomEnabled(conf)) {
-      LOG.info("Delete Family Bloom filters are disabled by configuration for "
-          + writer.getPath()
-          + (conf == null ? " (configuration is null)" : ""));
-      return null;
-    } else if (deleteBloomType.equals(HConstants.DELETE_COLUMN_BLOOM_FILTER) && !isDeleteColumnBloomEnabled(conf)) {
-      LOG.info("Delete Column Bloom filters are disabled by configuration for "
-          + writer.getPath()
-          + (conf == null ? " (configuration is null)" : ""));
-      return null;
-    }
+  public static BloomFilterWriter createBloomFilterWriter(
+    Configuration conf, CacheConfig cacheConf,
+    HFile.Writer writer, float bloomErrorRate) {
 
     if (HFile.getFormatVersion(conf) > HFile.MIN_FORMAT_VERSION) {
       int maxFold = getMaxFold(conf);
@@ -291,4 +289,5 @@ public final class BloomFilterFactory {
       return null;
     }
   }
+
 };

Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java?rev=1567261&r1=1567260&r2=1567261&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java (original)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java Tue Feb 11 18:28:11 2014
@@ -19,16 +19,9 @@
  */
 package org.apache.hadoop.hbase.regionserver;
 
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeSet;
-
+import com.google.common.base.Joiner;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -41,25 +34,20 @@ import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.io.Reference.Range;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
-import org.apache.hadoop.hbase.io.hfile.BlockCache;
-import org.apache.hadoop.hbase.io.hfile.CacheConfig;
-import org.apache.hadoop.hbase.io.hfile.CacheTestHelper;
-import org.apache.hadoop.hbase.io.hfile.HFile;
-import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
-import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl;
-import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.io.hfile.*;
 import org.apache.hadoop.hbase.io.hfile.LruBlockCache.CacheStats;
-import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder;
 import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
 import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics;
 import org.apache.hadoop.hbase.util.BloomFilterFactory;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.junit.Assert;
 import org.mockito.Mockito;
 
-import com.google.common.base.Joiner;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.security.MessageDigest;
+import java.util.*;
 
 /**
  * Test HStoreFile
@@ -420,6 +408,159 @@ public class TestStoreFile extends HBase
     bloomWriteRead(writer, fs);
   }
 
+  public void testRowKeyPrefixBloomFilter() throws Exception {
+    FileSystem fs = FileSystem.getLocal(conf);
+    final float ERROR_RATE = (float) 0.01;
+    conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, ERROR_RATE);
+    conf.setBoolean(BloomFilterFactory.IO_STOREFILE_ROWKEYPREFIX_BLOOM_ENABLED, true);
+
+    // Create a StoreFile
+    final int ROWKEY_PREFIX_LENGTH = Bytes.SIZEOF_INT;
+    Path f = new Path(ROOT_DIR, getName() + "-prefix-" + ROWKEY_PREFIX_LENGTH);
+    StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf,
+      fs, StoreFile.DEFAULT_BLOCKSIZE_SMALL)
+      .withFilePath(f)
+      .withRowKeyPrefixLengthForBloom(ROWKEY_PREFIX_LENGTH)
+      .build();
+
+    // Generate the prefixes
+    final int PREFIX_NUM = 10000;
+    byte[][] prefixArray = new byte[PREFIX_NUM][];
+    for (int i = 0; i < PREFIX_NUM; i++) {
+      prefixArray[i] = Bytes.toBytes(i);
+      Assert.assertEquals(ROWKEY_PREFIX_LENGTH, prefixArray[i].length);
+      if (i != 0) {
+        Assert.assertTrue(Bytes.BYTES_RAWCOMPARATOR.compare(prefixArray[i],
+          prefixArray[i-1]) > 0);
+      }
+    }
+
+    final byte[] shorterRowKey = new byte[Bytes.SIZEOF_INT -1];
+    System.arraycopy(prefixArray[0], 0, shorterRowKey, 0, Bytes.SIZEOF_INT -1);
+
+    // Add key values with the prefixes into store file
+    final int KV_PER_PREFIX = 10;
+    final int KV_NUM = KV_PER_PREFIX * PREFIX_NUM;
+    final byte[] FAMILY = Bytes.toBytes("family");
+    final byte[] COL =Bytes.toBytes("col");
+
+    // Generate the suffix
+    TreeSet<byte[]> suffix = new TreeSet<byte[]>(Bytes.BYTES_RAWCOMPARATOR);
+    MessageDigest m = MessageDigest.getInstance("MD5");
+    for (int i = 0; i < KV_NUM; i++) {
+      m.reset();
+      m.update(Bytes.toBytes("Test" + i));
+      suffix.add(m.digest()); // Make sure the suffix is sorted as byte order
+    }
+
+    // Add a row key whose length is shorter than the prefix; This row key shall not exist in the
+    // RowKeyPrefix Bloom filter
+    writer.append(new KeyValue(shorterRowKey, FAMILY, COL, Bytes.toBytes("value")));
+
+    // Add all the prepared the KVs by combining the prefix and suffix.
+    Iterator<byte[]> iterator = suffix.iterator();
+    for (int i = 0; i < KV_NUM; i++) {
+      // Generate row key
+      byte[] row = Bytes.add(prefixArray[i / KV_PER_PREFIX], iterator.next());
+      KeyValue kv = new KeyValue(row, FAMILY, COL, Bytes.toBytes("value" + i));
+      writer.append(kv);
+    }
+
+    // Verify the bloom filter writer
+    Assert.assertNotNull(writer.getRowKeyPrefixBloomFilterWriter());
+    writer.close();
+
+    // Open the file and load the bloom filters
+    StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf, DataBlockEncoding.NONE);
+    reader.loadFileInfo();
+    reader.loadBloomfilter();
+
+    // Verify the prefix length
+    Assert.assertEquals(ROWKEY_PREFIX_LENGTH, reader.getRowKeyPrefixLength());
+
+    // Verify the RowKey Prefix Bloom filter has been loaded
+    Assert.assertNotNull(reader.getRowKeyPrefixBloomFilter());
+
+    // Verify the number keys in the RowKey Prefix Bloom filter
+    Assert.assertEquals(PREFIX_NUM, reader.getRowKeyPrefixBloomFilter().getKeyCount());
+
+    // Verify all the prefixes have been covered in the Bloom
+    for (byte[] prefix : prefixArray) {
+      // The bloom filter contains the bloom key
+      assertTrue(reader.getRowKeyPrefixBloomFilter().contains(prefix, 0, prefix.length,
+        null));
+      assertTrue(reader.passesRowKeyPrefixBloomFilter(prefix, 0, prefix.length));
+    }
+
+    // Verify all the longer prefix has been covered in the Bloom as well
+    int falsePositive = 0;
+    for (byte[] prefix : prefixArray) {
+      byte[] longerPrefix = Bytes.add(prefix, Bytes.toBytes(10));
+
+      assertTrue(reader.getRowKeyPrefixBloomFilter().contains(
+        longerPrefix, 0, prefix.length, null));
+      assertTrue(reader.passesRowKeyPrefixBloomFilter(longerPrefix, 0, longerPrefix.length));
+
+      if (reader.getRowKeyPrefixBloomFilter().contains(
+        longerPrefix, 0,  longerPrefix.length, null)) {
+        falsePositive ++;
+      }
+    }
+
+    // Verify the error rate
+    assertTrue(PREFIX_NUM * ERROR_RATE >= falsePositive);
+
+    // Create a new store file with a new row key prefix
+    final int ROWKEY_PREFIX_LENGTH2 = ROWKEY_PREFIX_LENGTH + 1;
+    f = new Path(ROOT_DIR, getName() + "-prefix-" + ROWKEY_PREFIX_LENGTH2);
+    writer = new StoreFile.WriterBuilder(conf, cacheConf,
+      fs, StoreFile.DEFAULT_BLOCKSIZE_SMALL)
+      .withFilePath(f)
+      .withRowKeyPrefixLengthForBloom(ROWKEY_PREFIX_LENGTH2)
+      .build();
+
+    // Write the same data into this new store file
+    iterator = suffix.iterator();
+    for (int i = 0; i < KV_NUM; i++) {
+      // Generate row key
+      byte[] row = Bytes.add(prefixArray[i / KV_PER_PREFIX], iterator.next());
+      KeyValue kv = new KeyValue(row, FAMILY, COL, Bytes.toBytes("value" + i));
+      writer.append(kv);
+    }
+
+    // Verify the bloom filter writer
+    Assert.assertNotNull(writer.getRowKeyPrefixBloomFilterWriter());
+    writer.close();
+
+    // Open the file and load the bloom filter
+    reader = new StoreFile.Reader(fs, f, cacheConf, DataBlockEncoding.NONE);
+    reader.loadFileInfo();
+    reader.loadBloomfilter();
+
+    // Verify the prefix length
+    Assert.assertEquals(ROWKEY_PREFIX_LENGTH2, reader.getRowKeyPrefixLength());
+
+    // Verify the RowKey Prefix Bloom filter has been loaded
+    Assert.assertNotNull(reader.getRowKeyPrefixBloomFilter());
+
+    // Verify the number keys in the RowKey Prefix Bloom filter is larger than the PREFIX_NUM
+    Assert.assertTrue(reader.getRowKeyPrefixBloomFilter().getKeyCount() > PREFIX_NUM);
+
+    falsePositive = 0;
+    for (byte[] prefix : prefixArray) {
+      // Verify all the existing prefixes won't be covered in this new Bloom filter,
+      // whose prefix length is larger than all the existing prefixes.
+      if (reader.getRowKeyPrefixBloomFilter().contains(prefix, 0, prefix.length, null)) {
+        falsePositive++;
+      }
+      // But the passesRowKeyPrefixBloomFilter will return true due to the prefix mismatch
+      Assert.assertTrue(reader.passesRowKeyPrefixBloomFilter(prefix, 0, prefix.length));
+    }
+
+    // Verify the error rate
+    assertTrue(PREFIX_NUM * ERROR_RATE >= falsePositive);
+  }
+
   public void testDeleteFamilyBloomFilter() throws Exception {
     FileSystem fs = FileSystem.getLocal(conf);
     conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE,