You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by na...@apache.org on 2016/07/11 17:28:54 UTC

[2/5] incubator-quickstep git commit: WIP version of BloomFilter hash fn change

WIP version of BloomFilter hash fn change


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/b59f1f09
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/b59f1f09
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/b59f1f09

Branch: refs/heads/expt_bloom_filter
Commit: b59f1f0903416b0ab611117948fbf5155c5d874f
Parents: 44c9985
Author: Navneet Potti <na...@gmail.com>
Authored: Sun Jul 10 16:52:48 2016 -0500
Committer: Navneet Potti <na...@cs.wisc.edu>
Committed: Mon Jul 11 12:28:25 2016 -0500

----------------------------------------------------------------------
 storage/BloomFilterIndexSubBlock.cpp   |   4 +-
 storage/BloomFilterIndexSubBlock.hpp   |  10 +-
 storage/HashTable.hpp                  |  16 +-
 utility/BloomFilter.hpp                | 229 +++++++++++++++-------------
 utility/BloomFilter.proto              |   2 +-
 utility/tests/BloomFilter_unittest.cpp |   6 +-
 6 files changed, 151 insertions(+), 116 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b59f1f09/storage/BloomFilterIndexSubBlock.cpp
----------------------------------------------------------------------
diff --git a/storage/BloomFilterIndexSubBlock.cpp b/storage/BloomFilterIndexSubBlock.cpp
index e806217..e5c6d11 100644
--- a/storage/BloomFilterIndexSubBlock.cpp
+++ b/storage/BloomFilterIndexSubBlock.cpp
@@ -55,7 +55,7 @@ BloomFilterIndexSubBlock::BloomFilterIndexSubBlock(const TupleStorageSubBlock &t
                     sub_block_memory_size),
       is_initialized_(false),
       is_consistent_(false),
-      random_seed_(kBloomFilterSeed),
+      // random_seed_(kBloomFilterSeed),
       bit_array_size_in_bytes_(description.GetExtension(
                                    BloomFilterIndexSubBlockDescription::bloom_filter_size)) {
   CHECK(DescriptionIsValid(relation_, description_))
@@ -74,7 +74,7 @@ BloomFilterIndexSubBlock::BloomFilterIndexSubBlock(const TupleStorageSubBlock &t
   const std::uint32_t salt_count = description.GetExtension(BloomFilterIndexSubBlockDescription::number_of_hashes);
 
   // Initialize the bloom_filter_ data structure to operate on bit_array.
-  bloom_filter_.reset(new BloomFilter(random_seed_,
+  bloom_filter_.reset(new BloomFilter(// random_seed_,
                                       salt_count,
                                       bit_array_size_in_bytes_,
                                       bit_array_.get(),

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b59f1f09/storage/BloomFilterIndexSubBlock.hpp
----------------------------------------------------------------------
diff --git a/storage/BloomFilterIndexSubBlock.hpp b/storage/BloomFilterIndexSubBlock.hpp
index 4925673..5d8b97a 100644
--- a/storage/BloomFilterIndexSubBlock.hpp
+++ b/storage/BloomFilterIndexSubBlock.hpp
@@ -65,10 +65,10 @@ class BloomFilterIndexSubBlock : public IndexSubBlock {
     kSelectivityNone
   };
 
-  /**
-   * @brief A random seed to initialize the bloom filter hash functions.
-   **/
-  static const std::uint64_t kBloomFilterSeed = 0xA5A5A5A55A5A5A5AULL;
+  // /**
+  //  * @brief A random seed to initialize the bloom filter hash functions.
+  //  **/
+  // static const std::uint64_t kBloomFilterSeed = 0xA5A5A5A55A5A5A5AULL;
 
   BloomFilterIndexSubBlock(const TupleStorageSubBlock &tuple_store,
                            const IndexSubBlockDescription &description,
@@ -179,7 +179,7 @@ class BloomFilterIndexSubBlock : public IndexSubBlock {
  private:
   bool is_initialized_;
   bool is_consistent_;
-  const std::uint64_t random_seed_;
+  // const std::uint64_t random_seed_;
   const std::uint64_t bit_array_size_in_bytes_;
   std::vector<attribute_id> indexed_attribute_ids_;
   std::unique_ptr<unsigned char> bit_array_;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b59f1f09/storage/HashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTable.hpp b/storage/HashTable.hpp
index 740e678..59601e4 100644
--- a/storage/HashTable.hpp
+++ b/storage/HashTable.hpp
@@ -1481,7 +1481,7 @@ HashTablePutResult HashTable<ValueT, resizable, serializable, force_key_copy, al
     }
     std::unique_ptr<BloomFilter> thread_local_bloom_filter;
     if (has_build_side_bloom_filter_) {
-      thread_local_bloom_filter.reset(new BloomFilter(build_bloom_filter_->getRandomSeed(),
+      thread_local_bloom_filter.reset(new BloomFilter(//build_bloom_filter_->getRandomSeed(),
                                                       build_bloom_filter_->getNumberOfHashes(),
                                                       build_bloom_filter_->getBitArraySize()));
     }
@@ -2253,8 +2253,12 @@ void HashTable<ValueT, resizable, serializable, force_key_copy, allow_duplicate_
       bloom_filter_adapter.reset(
           new BloomFilterAdapter(probe_bloom_filters_, probe_attribute_ids_));
     }
+    std::size_t numFalsePositives = 0;
+    std::size_t numMisses = 0;
+    std::size_t numHits = 0;
     while (accessor->next()) {
       if (has_probe_side_bloom_filter_ && bloom_filter_adapter->miss(accessor)) {
+        numMisses++;
         continue;  // On a bloom filter miss, probing the hash table can be skipped.
       }
 
@@ -2268,13 +2272,23 @@ void HashTable<ValueT, resizable, serializable, force_key_copy, allow_duplicate_
                                                                : true_hash;
       std::size_t entry_num = 0;
       const ValueT *value;
+      bool wasHit = false;
       while (this->getNextEntryForKey(key, adjusted_hash, &value, &entry_num)) {
+        wasHit = true;
         (*functor)(*accessor, *value);
         if (!allow_duplicate_keys) {
           break;
         }
       }
+      if (!wasHit)
+        numFalsePositives++;
+      else
+        numHits++;
+
     }
+    std::cerr << "numFalsePositives: " << numFalsePositives
+              << " numMisses: " << numMisses
+              << " numHits: " << numHits << "\n";
   });
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b59f1f09/utility/BloomFilter.hpp
----------------------------------------------------------------------
diff --git a/utility/BloomFilter.hpp b/utility/BloomFilter.hpp
index b93df84..787a0bb 100644
--- a/utility/BloomFilter.hpp
+++ b/utility/BloomFilter.hpp
@@ -51,6 +51,7 @@ namespace quickstep {
 class BloomFilter {
  public:
   static const uint32_t kNumBitsPerByte = 8;
+  static const uint32_t kMaxNumHashFns = 32;
 
   /**
    * @brief Constructor.
@@ -61,17 +62,17 @@ class BloomFilter {
    * @param hash_fn_count The number of hash functions used by this bloom filter.
    * @param bit_array_size_in_bytes Size of the bit array.
    **/
-  BloomFilter(const std::uint64_t random_seed,
+  BloomFilter(// const std::uint64_t random_seed,
               const std::size_t hash_fn_count,
               const std::uint64_t bit_array_size_in_bytes)
-      : random_seed_(random_seed),
+      : // random_seed_(random_seed),
         hash_fn_count_(hash_fn_count),
         array_size_in_bytes_(bit_array_size_in_bytes),
         array_size_(array_size_in_bytes_ * kNumBitsPerByte),
         bit_array_(new std::uint8_t[array_size_in_bytes_]),
         is_bit_array_owner_(true) {
     reset();
-    generate_unique_hash_fn();
+    // generate_unique_hash_fn();
   }
 
   /**
@@ -86,12 +87,12 @@ class BloomFilter {
    * @param is_initialized A boolean that indicates whether to zero-out the region
    *                       before use or not.
    **/
-  BloomFilter(const std::uint64_t random_seed,
+  BloomFilter(// const std::uint64_t random_seed,
               const std::size_t hash_fn_count,
               const std::uint64_t bit_array_size_in_bytes,
               std::uint8_t *bit_array,
               const bool is_initialized)
-      : random_seed_(random_seed),
+      : // random_seed_(random_seed),
         hash_fn_count_(hash_fn_count),
         array_size_in_bytes_(bit_array_size_in_bytes),
         array_size_(bit_array_size_in_bytes * kNumBitsPerByte),
@@ -100,7 +101,7 @@ class BloomFilter {
     if (!is_initialized) {
       reset();
     }
-    generate_unique_hash_fn();
+    // generate_unique_hash_fn();
   }
 
   /**
@@ -112,14 +113,14 @@ class BloomFilter {
    *        bloom filter configuration.
    **/
   explicit BloomFilter(const serialization::BloomFilter &bloom_filter_proto)
-      : random_seed_(bloom_filter_proto.bloom_filter_seed()),
+      : // random_seed_(bloom_filter_proto.bloom_filter_seed()),
         hash_fn_count_(bloom_filter_proto.number_of_hashes()),
         array_size_in_bytes_(bloom_filter_proto.bloom_filter_size()),
         array_size_(array_size_in_bytes_ * kNumBitsPerByte),
         bit_array_(new std::uint8_t[array_size_in_bytes_]),
         is_bit_array_owner_(true) {
     reset();
-    generate_unique_hash_fn();
+    // generate_unique_hash_fn();
   }
 
   /**
@@ -146,14 +147,14 @@ class BloomFilter {
     inserted_element_count_ = 0;
   }
 
-  /**
-   * @brief Get the random seed that was used to initialize this bloom filter.
-   *
-   * @return Returns the random seed.
-   **/
-  inline std::uint64_t getRandomSeed() const {
-    return random_seed_;
-  }
+  // /**
+  //  * @brief Get the random seed that was used to initialize this bloom filter.
+  //  *
+  //  * @return Returns the random seed.
+  //  **/
+  // inline std::uint64_t getRandomSeed() const {
+  //   return random_seed_;
+  // }
 
   /**
    * @brief Get the number of hash functions used in this bloom filter.
@@ -198,7 +199,7 @@ class BloomFilter {
 
     // Determine all the bit positions that are required to be set.
     for (std::size_t i = 0; i < hash_fn_count_; ++i) {
-      compute_indices(hash_ap(key_begin, length, hash_fn_[i]), &bit_index, &bit);
+      compute_indices(hash_multiplicative(key_begin, length, hash_fn_[i]), &bit_index, &bit);
       modified_bit_positions.push_back(std::make_pair(bit_index, bit));
     }
 
@@ -243,7 +244,7 @@ class BloomFilter {
     std::size_t bit = 0;
 
     for (std::size_t i = 0; i < hash_fn_count_; ++i) {
-      compute_indices(hash_ap(key_begin, length, hash_fn_[i]), &bit_index, &bit);
+      compute_indices(hash_multiplicative(key_begin, length, hash_fn_[i]), &bit_index, &bit);
       (bit_array_.get())[bit_index / kNumBitsPerByte] |= (1 << bit);
     }
 
@@ -265,7 +266,7 @@ class BloomFilter {
     std::size_t bit_index = 0;
     std::size_t bit = 0;
     for (std::size_t i = 0; i < hash_fn_count_; ++i) {
-      compute_indices(hash_ap(key_begin, length, hash_fn_[i]), &bit_index, &bit);
+      compute_indices(hash_multiplicative(key_begin, length, hash_fn_[i]), &bit_index, &bit);
       if (((bit_array_.get())[bit_index / kNumBitsPerByte] & (1 << bit)) != (1 << bit)) {
         return false;
       }
@@ -301,101 +302,121 @@ class BloomFilter {
     *bit = *bit_index % kNumBitsPerByte;
   }
 
-  void generate_unique_hash_fn() {
-    hash_fn_.reserve(hash_fn_count_);
-    const std::uint32_t predef_hash_fn_count = 128;
-    static const std::uint32_t predef_hash_fn[predef_hash_fn_count] = {
-       0xAAAAAAAA, 0x55555555, 0x33333333, 0xCCCCCCCC,
-       0x66666666, 0x99999999, 0xB5B5B5B5, 0x4B4B4B4B,
-       0xAA55AA55, 0x55335533, 0x33CC33CC, 0xCC66CC66,
-       0x66996699, 0x99B599B5, 0xB54BB54B, 0x4BAA4BAA,
-       0xAA33AA33, 0x55CC55CC, 0x33663366, 0xCC99CC99,
-       0x66B566B5, 0x994B994B, 0xB5AAB5AA, 0xAAAAAA33,
-       0x555555CC, 0x33333366, 0xCCCCCC99, 0x666666B5,
-       0x9999994B, 0xB5B5B5AA, 0xFFFFFFFF, 0xFFFF0000,
-       0xB823D5EB, 0xC1191CDF, 0xF623AEB3, 0xDB58499F,
-       0xC8D42E70, 0xB173F616, 0xA91A5967, 0xDA427D63,
-       0xB1E8A2EA, 0xF6C0D155, 0x4909FEA3, 0xA68CC6A7,
-       0xC395E782, 0xA26057EB, 0x0CD5DA28, 0x467C5492,
-       0xF15E6982, 0x61C6FAD3, 0x9615E352, 0x6E9E355A,
-       0x689B563E, 0x0C9831A8, 0x6753C18B, 0xA622689B,
-       0x8CA63C47, 0x42CC2884, 0x8E89919B, 0x6EDBD7D3,
-       0x15B6796C, 0x1D6FDFE4, 0x63FF9092, 0xE7401432,
-       0xEFFE9412, 0xAEAEDF79, 0x9F245A31, 0x83C136FC,
-       0xC3DA4A8C, 0xA5112C8C, 0x5271F491, 0x9A948DAB,
-       0xCEE59A8D, 0xB5F525AB, 0x59D13217, 0x24E7C331,
-       0x697C2103, 0x84B0A460, 0x86156DA9, 0xAEF2AC68,
-       0x23243DA5, 0x3F649643, 0x5FA495A8, 0x67710DF8,
-       0x9A6C499E, 0xDCFB0227, 0x46A43433, 0x1832B07A,
-       0xC46AFF3C, 0xB9C8FFF0, 0xC9500467, 0x34431BDF,
-       0xB652432B, 0xE367F12B, 0x427F4C1B, 0x224C006E,
-       0x2E7E5A89, 0x96F99AA5, 0x0BEB452A, 0x2FD87C39,
-       0x74B2E1FB, 0x222EFD24, 0xF357F60C, 0x440FCB1E,
-       0x8BBE030F, 0x6704DC29, 0x1144D12F, 0x948B1355,
-       0x6D8FD7E9, 0x1C11A014, 0xADD1592F, 0xFB3C712E,
-       0xFC77642F, 0xF9C4CE8C, 0x31312FB9, 0x08B0DD79,
-       0x318FA6E7, 0xC040D23D, 0xC0589AA7, 0x0CA5C075,
-       0xF874B172, 0x0CF914D5, 0x784D3280, 0x4E8CFEBC,
-       0xC569F575, 0xCDB2A091, 0x2CC016B4, 0x5C5F4421
-    };
-    if (hash_fn_count_ <= predef_hash_fn_count) {
-      std::copy(predef_hash_fn, predef_hash_fn + hash_fn_count_, hash_fn_.begin());
-      for (std::uint32_t i = 0; i < hash_fn_.size(); ++i) {
-        hash_fn_[i] = hash_fn_[i] * hash_fn_[(i + 3) % hash_fn_count_] + static_cast<std::uint32_t>(random_seed_);
-      }
-    } else {
-      LOG(FATAL) << "Requested number of hash functions is too large.";
-    }
-  }
-
-  inline std::uint32_t hash_ap(const std::uint8_t *begin, std::size_t remaining_length, std::uint32_t hash) const {
-    const std::uint8_t *itr = begin;
-    std::uint32_t loop = 0;
-    while (remaining_length >= 8) {
-      const std::uint32_t &i1 = *(reinterpret_cast<const std::uint32_t*>(itr)); itr += sizeof(std::uint32_t);
-      const std::uint32_t &i2 = *(reinterpret_cast<const std::uint32_t*>(itr)); itr += sizeof(std::uint32_t);
-      hash ^= (hash <<  7) ^  i1 * (hash >> 3) ^ (~((hash << 11) + (i2 ^ (hash >> 5))));
-      remaining_length -= 8;
-    }
-    if (remaining_length) {
-      if (remaining_length >= 4) {
-        const std::uint32_t &i = *(reinterpret_cast<const std::uint32_t*>(itr));
-        if (loop & 0x01) {
-          hash ^= (hash <<  7) ^  i * (hash >> 3);
-        } else {
-          hash ^= (~((hash << 11) + (i ^ (hash >> 5))));
-        }
-        ++loop;
-        remaining_length -= 4;
-        itr += sizeof(std::uint32_t);
-      }
-      if (remaining_length >= 2) {
-        const std::uint16_t &i = *(reinterpret_cast<const std::uint16_t*>(itr));
-        if (loop & 0x01) {
-          hash ^= (hash <<  7) ^  i * (hash >> 3);
-        } else {
-          hash ^= (~((hash << 11) + (i ^ (hash >> 5))));
-        }
-        ++loop;
-        remaining_length -= 2;
-        itr += sizeof(std::uint16_t);
-      }
-      if (remaining_length) {
-        hash += ((*itr) ^ (hash * 0xA5A5A5A5)) + loop;
-      }
-    }
+    // void generate_unique_hash_fn() {
+    // hash_fn_.reserve(hash_fn_count_);
+    // const std::uint32_t predef_hash_fn_count = 128;
+    // static const std::uint32_t predef_hash_fn[predef_hash_fn_count] = {
+    //    0xAAAAAAAA, 0x55555555, 0x33333333, 0xCCCCCCCC,
+    //    0x66666666, 0x99999999, 0xB5B5B5B5, 0x4B4B4B4B,
+    //    0xAA55AA55, 0x55335533, 0x33CC33CC, 0xCC66CC66,
+    //    0x66996699, 0x99B599B5, 0xB54BB54B, 0x4BAA4BAA,
+    //    0xAA33AA33, 0x55CC55CC, 0x33663366, 0xCC99CC99,
+    //    0x66B566B5, 0x994B994B, 0xB5AAB5AA, 0xAAAAAA33,
+    //    0x555555CC, 0x33333366, 0xCCCCCC99, 0x666666B5,
+    //    0x9999994B, 0xB5B5B5AA, 0xFFFFFFFF, 0xFFFF0000,
+    //    0xB823D5EB, 0xC1191CDF, 0xF623AEB3, 0xDB58499F,
+    //    0xC8D42E70, 0xB173F616, 0xA91A5967, 0xDA427D63,
+    //    0xB1E8A2EA, 0xF6C0D155, 0x4909FEA3, 0xA68CC6A7,
+    //    0xC395E782, 0xA26057EB, 0x0CD5DA28, 0x467C5492,
+    //    0xF15E6982, 0x61C6FAD3, 0x9615E352, 0x6E9E355A,
+    //    0x689B563E, 0x0C9831A8, 0x6753C18B, 0xA622689B,
+    //    0x8CA63C47, 0x42CC2884, 0x8E89919B, 0x6EDBD7D3,
+    //    0x15B6796C, 0x1D6FDFE4, 0x63FF9092, 0xE7401432,
+    //    0xEFFE9412, 0xAEAEDF79, 0x9F245A31, 0x83C136FC,
+    //    0xC3DA4A8C, 0xA5112C8C, 0x5271F491, 0x9A948DAB,
+    //    0xCEE59A8D, 0xB5F525AB, 0x59D13217, 0x24E7C331,
+    //    0x697C2103, 0x84B0A460, 0x86156DA9, 0xAEF2AC68,
+    //    0x23243DA5, 0x3F649643, 0x5FA495A8, 0x67710DF8,
+    //    0x9A6C499E, 0xDCFB0227, 0x46A43433, 0x1832B07A,
+    //    0xC46AFF3C, 0xB9C8FFF0, 0xC9500467, 0x34431BDF,
+    //    0xB652432B, 0xE367F12B, 0x427F4C1B, 0x224C006E,
+    //    0x2E7E5A89, 0x96F99AA5, 0x0BEB452A, 0x2FD87C39,
+    //    0x74B2E1FB, 0x222EFD24, 0xF357F60C, 0x440FCB1E,
+    //    0x8BBE030F, 0x6704DC29, 0x1144D12F, 0x948B1355,
+    //    0x6D8FD7E9, 0x1C11A014, 0xADD1592F, 0xFB3C712E,
+    //    0xFC77642F, 0xF9C4CE8C, 0x31312FB9, 0x08B0DD79,
+    //    0x318FA6E7, 0xC040D23D, 0xC0589AA7, 0x0CA5C075,
+    //    0xF874B172, 0x0CF914D5, 0x784D3280, 0x4E8CFEBC,
+    //    0xC569F575, 0xCDB2A091, 0x2CC016B4, 0x5C5F4421
+    // };
+    // if (hash_fn_count_ <= predef_hash_fn_count) {
+    //   std::copy(predef_hash_fn, predef_hash_fn + hash_fn_count_, hash_fn_.begin());
+    //   for (std::uint32_t i = 0; i < hash_fn_.size(); ++i) {
+    //     hash_fn_[i] = hash_fn_[i] * hash_fn_[(i + 3) % hash_fn_count_] + static_cast<std::uint32_t>(random_seed_);
+    //   }
+    // } else {
+    //   LOG(FATAL) << "Requested number of hash functions is too large.";
+    // }
+    // }
+
+  // inline std::uint32_t hash_ap(const std::uint8_t *begin, std::size_t remaining_length, std::uint32_t hash) const {
+  //   const std::uint8_t *itr = begin;
+  //   std::uint32_t loop = 0;
+  //   while (remaining_length >= 8) {
+  //     const std::uint32_t &i1 = *(reinterpret_cast<const std::uint32_t*>(itr)); itr += sizeof(std::uint32_t);
+  //     const std::uint32_t &i2 = *(reinterpret_cast<const std::uint32_t*>(itr)); itr += sizeof(std::uint32_t);
+  //     hash ^= (hash <<  7) ^  i1 * (hash >> 3) ^ (~((hash << 11) + (i2 ^ (hash >> 5))));
+  //     remaining_length -= 8;
+  //   }
+  //   if (remaining_length) {
+  //     if (remaining_length >= 4) {
+  //       const std::uint32_t &i = *(reinterpret_cast<const std::uint32_t*>(itr));
+  //       if (loop & 0x01) {
+  //         hash ^= (hash <<  7) ^  i * (hash >> 3);
+  //       } else {
+  //         hash ^= (~((hash << 11) + (i ^ (hash >> 5))));
+  //       }
+  //       ++loop;
+  //       remaining_length -= 4;
+  //       itr += sizeof(std::uint32_t);
+  //     }
+  //     if (remaining_length >= 2) {
+  //       const std::uint16_t &i = *(reinterpret_cast<const std::uint16_t*>(itr));
+  //       if (loop & 0x01) {
+  //         hash ^= (hash <<  7) ^  i * (hash >> 3);
+  //       } else {
+  //         hash ^= (~((hash << 11) + (i ^ (hash >> 5))));
+  //       }
+  //       ++loop;
+  //       remaining_length -= 2;
+  //       itr += sizeof(std::uint16_t);
+  //     }
+  //     if (remaining_length) {
+  //       hash += ((*itr) ^ (hash * 0xA5A5A5A5)) + loop;
+  //     }
+  //   }
+  //   return hash;
+  // }
+
+  inline std::uint32_t hash_multiplicative(
+      const std::uint8_t *begin,
+      const std::size_t remaining_length,
+      const std::size_t multiplier) const {
+    std::uint32_t hash = 0;
+    const std::uint8_t *end = begin + remaining_length;
+    for (const std::uint8_t *i = begin; i != end; i++)
+      hash = multiplier * hash + static_cast<std::uint32_t>(*i);
     return hash;
   }
 
  private:
-  const std::uint64_t random_seed_;
-  std::vector<std::uint32_t> hash_fn_;
+  // const std::uint64_t random_seed_;
   const std::uint32_t hash_fn_count_;
   std::uint64_t array_size_in_bytes_;
   std::uint64_t array_size_;
   std::unique_ptr<std::uint8_t> bit_array_;
   std::uint32_t inserted_element_count_;
   const bool is_bit_array_owner_;
+  const std::uint32_t hash_fn_[kMaxNumHashFns] = { // hash_fn_[i] is 2**(i+1) - 1
+    0x00000001, 0x00000003, 0x00000007, 0x0000000f,
+    0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff,
+    0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
+    0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff,
+    0x0001ffff, 0x0003ffff, 0x0007ffff, 0x000fffff,
+    0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff,
+    0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff,
+    0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff
+  };
 
   alignas(kCacheLineBytes) mutable SpinSharedMutex<false> bloom_filter_insert_mutex_;
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b59f1f09/utility/BloomFilter.proto
----------------------------------------------------------------------
diff --git a/utility/BloomFilter.proto b/utility/BloomFilter.proto
index 8dd9163..6100122 100644
--- a/utility/BloomFilter.proto
+++ b/utility/BloomFilter.proto
@@ -24,7 +24,7 @@ message BloomFilter {
   // - Default seed for initializing family of hashes = 0xA5A5A5A55A5A5A5A.
   // - Default bloom filter size = 10 KB.
   // - Default number of hash functions used in bloom filter = 5.
-  optional fixed64 bloom_filter_seed = 1 [default = 0xA5A5A5A55A5A5A5A];
+  //optional fixed64 bloom_filter_seed = 1 [default = 0xA5A5A5A55A5A5A5A];
   optional uint32 bloom_filter_size = 2 [default = 10000];
   optional uint32 number_of_hashes = 3 [default = 5];
 }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/b59f1f09/utility/tests/BloomFilter_unittest.cpp
----------------------------------------------------------------------
diff --git a/utility/tests/BloomFilter_unittest.cpp b/utility/tests/BloomFilter_unittest.cpp
index 4d8beae..000198b 100644
--- a/utility/tests/BloomFilter_unittest.cpp
+++ b/utility/tests/BloomFilter_unittest.cpp
@@ -28,7 +28,7 @@ namespace quickstep {
 
 class BloomFilterTest : public ::testing::Test {
  public:
-  static const std::uint64_t kBloomFilterSeed = 0xA5A5A5A55A5A5A5AULL;
+  // static const std::uint64_t kBloomFilterSeed = 0xA5A5A5A55A5A5A5AULL;
   static const std::uint32_t kNumberOfHashFunctions = 20;
   static const std::uint32_t kBloomFilterSize = 100000;  // in bytes.
 };
@@ -44,7 +44,7 @@ TEST_F(BloomFilterTest, BloomFilterInsertTest) {
   const std::uint32_t bloom_filter_size = 1;
 
   bit_array.reset(new std::uint8_t[bloom_filter_size]);
-  bloom_filter.reset(new BloomFilter(kBloomFilterSeed,
+  bloom_filter.reset(new BloomFilter(//kBloomFilterSeed,
                                      num_hashes,
                                      bloom_filter_size,
                                      bit_array.get(),
@@ -61,7 +61,7 @@ TEST_F(BloomFilterTest, BloomFilterContainsTest) {
   std::unique_ptr<BloomFilter> bloom_filter;
 
   bit_array.reset(new std::uint8_t[kBloomFilterSize]);
-  bloom_filter.reset(new BloomFilter(kBloomFilterSeed,
+  bloom_filter.reset(new BloomFilter(// kBloomFilterSeed,
                                      kNumberOfHashFunctions,
                                      kBloomFilterSize,
                                      bit_array.get(),