You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by na...@apache.org on 2016/07/13 04:49:36 UTC

[2/5] incubator-quickstep git commit: Try to speed up Blocked Bloom Filter

Try to speed up Blocked Bloom Filter


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/36d0f4c9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/36d0f4c9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/36d0f4c9

Branch: refs/heads/expt_bloom_filter_hash_fn
Commit: 36d0f4c9ec17cb5eaf1361f5a7975626ab0c9901
Parents: dd7d7b0
Author: Navneet Potti <na...@gmail.com>
Authored: Tue Jul 12 19:28:15 2016 -0500
Committer: Navneet Potti <na...@gmail.com>
Committed: Tue Jul 12 19:28:15 2016 -0500

----------------------------------------------------------------------
 query_optimizer/ExecutionHeuristics.cpp |   2 +-
 utility/BloomFilter.hpp                 | 106 +++++++++++++++++++--------
 2 files changed, 75 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/36d0f4c9/query_optimizer/ExecutionHeuristics.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionHeuristics.cpp b/query_optimizer/ExecutionHeuristics.cpp
index 38b7c50..6b30d83 100644
--- a/query_optimizer/ExecutionHeuristics.cpp
+++ b/query_optimizer/ExecutionHeuristics.cpp
@@ -116,7 +116,7 @@ void ExecutionHeuristics::setBloomFilterProperties(serialization::BloomFilter *b
                                                    const std::size_t hash_join_index) {
   auto cardinality = hash_joins_[hash_join_index].estimated_build_relation_cardinality_;
   bloom_filter_proto->set_bloom_filter_size(
-      (FLAGS_bloom_num_bits_per_tuple * cardinality)/kNumBitsPerByte);
+      BloomFilter::getNearestAllowedSize((FLAGS_bloom_num_bits_per_tuple * cardinality)/kNumBitsPerByte));
   bloom_filter_proto->set_number_of_hashes(FLAGS_bloom_num_hash_fns);
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/36d0f4c9/utility/BloomFilter.hpp
----------------------------------------------------------------------
diff --git a/utility/BloomFilter.hpp b/utility/BloomFilter.hpp
index 2acd2a9..49d00e7 100644
--- a/utility/BloomFilter.hpp
+++ b/utility/BloomFilter.hpp
@@ -26,6 +26,7 @@
 #include <algorithm>
 #include <cstddef>
 #include <cstdint>
+#include <cstring>
 #include <memory>
 #include <utility>
 #include <vector>
@@ -60,7 +61,7 @@ typedef BloomFilterBlocked BloomFilter;
 class BloomFilterBlocked {
  public:
   static const std::uint8_t kNumBitsPerByte = 8;
-  static const std::uint8_t kMaxNumHashFns = 8;
+  static const std::uint8_t kMaxNumHashFns = 4;
 
   // This union allows us to read/write position in convenient fashion,
   // through nested structs and their bitfield members
@@ -80,6 +81,11 @@ class BloomFilterBlocked {
     } byte_pos;
   };
 
+  static std::uint64_t getNearestAllowedSize(const std::uint64_t approx_size) {
+    return (approx_size / kCacheLineBytes) * kCacheLineBytes;
+  }
+
+
   /**
    * @brief Constructor.
    * @note When no bit_array is being passed to the constructor,
@@ -189,7 +195,7 @@ class BloomFilterBlocked {
   }
 
   template <typename T>
-  void insert(const T value) {
+  void insert(const T &value) {
     insert(reinterpret_cast<const std::uint8_t *>(&value), sizeof(T));
   }
 
@@ -200,18 +206,12 @@ class BloomFilterBlocked {
    * @param length Size of the value being inserted in bytes.
    */
   inline void insert(const std::uint8_t *key_begin, const std::size_t length) {
-    Position positions[kMaxNumHashFns];
-    getPositions(key_begin, length, positions);
-    {
       SpinSharedMutexExclusiveLock<false> exclusive_writer_lock(bloom_filter_insert_mutex_);
-      for (std::uint8_t i = 0; i < hash_fn_count_; ++i)
-        setBitAtPosition(positions[i]);
-    }
-    ++inserted_element_count_;
+      insertUnSafe(key_begin, length);
   }
 
   template <typename T>
-  void insertUnSafe(const T value) {
+  void insertUnSafe(const T &value) {
     insertUnSafe(reinterpret_cast<const std::uint8_t *>(&value), sizeof(T));
   }
 
@@ -224,15 +224,19 @@ class BloomFilterBlocked {
    * @param length Size of the value being inserted in bytes.
    */
   inline void insertUnSafe(const std::uint8_t *key_begin, const std::size_t length) {
-    Position positions[kMaxNumHashFns];
-    getPositions(key_begin, length, positions);
-    for (std::uint8_t i = 0; i < hash_fn_count_; ++i)
-      setBitAtPosition(positions[i]);
+    Position first_pos = getFirstPosition(key_begin, length);
+    if (!getBitAtPosition(first_pos))
+      setBitAtPosition(first_pos);
+    Position other_pos;
+    for (std::uint8_t i = 1; i <hash_fn_count_; ++i) {
+      other_pos = getOtherPosition(key_begin, length, first_pos, i);
+      setBitAtPosition(other_pos);
+    }
     ++inserted_element_count_;
   }
 
   template <typename T>
-  bool contains(const T value) {
+  bool contains(const T &value) {
     return contains(reinterpret_cast<const std::uint8_t *>(&value), sizeof(T));
   }
 
@@ -247,12 +251,20 @@ class BloomFilterBlocked {
    * @param key_begin A pointer to the value being tested for membership.
    * @param length Size of the value being inserted in bytes.
    */
-  inline bool contains(const std::uint8_t *key_begin, const std::size_t length) const {
-    Position positions[kMaxNumHashFns];
-    getPositions(key_begin, length, positions);
-    for (std::uint8_t i = 0; i < hash_fn_count_; ++i)
-      if (!getBitAtPosition(positions[i]))
+  inline bool contains(
+      const std::uint8_t *__restrict__ key_begin,
+      const std::size_t length) const {
+    Position first_pos = getFirstPosition(key_begin, length);
+    if (!getBitAtPosition(first_pos)) {
+      return false;
+    }
+    Position other_pos;
+    for (std::uint8_t i = 1; i < hash_fn_count_; ++i) {
+      other_pos = getOtherPosition(key_begin, length, first_pos, i);
+      if (!getBitAtPosition(other_pos)) {
         return false;
+      }
+    }
     return true;
   }
 
@@ -279,14 +291,33 @@ class BloomFilterBlocked {
   }
 
  protected:
-  void getPositions(
+  Position getFirstPosition(const std::uint8_t *begin, std::size_t length) const {
+    Position pos;
+    pos.hash = hash_identity(begin, length);
+    return pos;
+  }
+
+  Position getOtherPosition(
+      const std::uint8_t *begin,
+      std::size_t length,
+      const Position first_pos,
+      const std::uint8_t index) const {
+    Position pos;
+    pos.hash = hash_multiplicative(begin, length, hash_fn_[index-1]);
+    pos.cache_line_pos.line_num = first_pos.cache_line_pos.line_num;
+    return pos;
+  }
+
+  void fillPosition(
       const std::uint8_t *begin,
       std::size_t length,
+      const std::uint8_t index,
       Position positions[]) const {
-    positions[0].hash = hash_multiplicative(begin, length, hash_fn_[0]);
-    for (std::size_t i = 1; i < hash_fn_count_; ++i) {
-      positions[i].hash = hash_multiplicative(begin, length, hash_fn_[i]);
-      positions[i].cache_line_pos.line_num = positions[0].cache_line_pos.line_num;
+    if (index == 0)
+      positions[0].hash = hash_identity(begin, length);
+    else {
+      positions[index].hash = hash_multiplicative(begin, length, hash_fn_[index-1]);
+      positions[index].cache_line_pos.line_num = positions[0].cache_line_pos.line_num;
     }
   }
 
@@ -298,8 +329,19 @@ class BloomFilterBlocked {
     return (bit_array_.get())[pos.byte_pos.byte_num] & (1 << pos.byte_pos.index_in_byte);
   }
 
+  inline std::uint32_t hash_identity(
+      const std::uint8_t *__restrict__ begin,
+      std::size_t length) const {
+    std::uint32_t hash;
+    if (length >= 4)
+      hash = *reinterpret_cast<const std::uint32_t*> (begin);
+    else
+      std::memcpy(&hash, begin, length);
+    return hash % (array_size_in_bytes_ * kNumBitsPerByte);
+  }
+
   inline std::uint32_t hash_multiplicative(
-      const std::uint8_t *begin,
+      const std::uint8_t *__restrict__ begin,
       std::size_t length,
       const std::uint64_t multiplier) const {
     std::uint32_t hash = 0;
@@ -313,10 +355,10 @@ class BloomFilterBlocked {
     }
     while (bytes_hashed < length) {
       std::uint8_t val = *(begin + bytes_hashed);
-      hash += (multiplier * val) >> 32;
+      hash += (multiplier * val) >> 24;
       bytes_hashed++;
     }
-    return hash % (array_size_in_bytes_ * kNumBitsPerByte);
+    return hash;//  % (array_size_in_bytes_ * kNumBitsPerByte);
   }
 
  private:
@@ -328,13 +370,13 @@ class BloomFilterBlocked {
   static constexpr std::uint64_t kKnuthGoldenRatioNumber = 2654435761;
   const std::uint64_t hash_fn_[kMaxNumHashFns] = { // hash_fn_[i] is 2**(i+1) - 1
     0x00000001 * kKnuthGoldenRatioNumber, // 0x00000003, 0x00000007, 0x0000000f,
-    0x0000001f * kKnuthGoldenRatioNumber, // 0x0000003f, 0x0000007f, 0x000000ff,
+    // 0x0000001f * kKnuthGoldenRatioNumber, // 0x0000003f, 0x0000007f, 0x000000ff,
     0x000001ff * kKnuthGoldenRatioNumber, // 0x000003ff, 0x000007ff, 0x00000fff,
-    0x00001fff * kKnuthGoldenRatioNumber, // 0x00003fff, 0x00007fff, 0x0000ffff,
+    // 0x00001fff * kKnuthGoldenRatioNumber, // 0x00003fff, 0x00007fff, 0x0000ffff,
     0x0001ffff * kKnuthGoldenRatioNumber, // 0x0003ffff, 0x0007ffff, 0x000fffff,
-    0x001fffff * kKnuthGoldenRatioNumber, // 0x003fffff, 0x007fffff, 0x00ffffff,
+    // 0x001fffff * kKnuthGoldenRatioNumber, // 0x003fffff, 0x007fffff, 0x00ffffff,
     0x01ffffff * kKnuthGoldenRatioNumber, // 0x03ffffff, 0x07ffffff, 0x0fffffff,
-    0x1fffffff * kKnuthGoldenRatioNumber  // 0x3fffffff, 0x7fffffff, 0xffffffff
+    // 0x1fffffff * kKnuthGoldenRatioNumber  // 0x3fffffff, 0x7fffffff, 0xffffffff
     };
 
   alignas(kCacheLineBytes) std::unique_ptr<std::uint8_t> bit_array_;