You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2020/03/10 21:41:53 UTC
[kudu] 02/03: [util] Import "Or" function to BlockBloomFilter from
Impala
This is an automated email from the ASF dual-hosted git repository.
adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 885d7946c771ea875919d479c67b2ddac5c5103e
Author: Bankim Bhavsar <ba...@cloudera.com>
AuthorDate: Tue Mar 3 16:18:17 2020 -0800
[util] Import "Or" function to BlockBloomFilter from Impala
Impala will be switching to using the Block Bloom filter from kudu-util.
"Or" function was missing and this change adds it.
Note that original implementation for OrEqualArrayAvx() in Impala is
targeted for AVX and not AVX2, however AVX2 is super-set of AVX instructions
and there is already provision in the Block Bloom filter to separate out
AVX2 v/s non-AVX2 (SSE) code. Hence don't see need to add separate AVX
specific file/implementation.
Change-Id: Ibe5f9311f73dcff883dd2cce18fd558e7d57d14f
Reviewed-on: http://gerrit.cloudera.org:8080/15373
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Reviewed-by: Alexey Serbin <as...@cloudera.com>
---
src/kudu/util/block_bloom_filter-test.cc | 33 +++++++++++++++++++
src/kudu/util/block_bloom_filter.cc | 56 ++++++++++++++++++++++++++++++++
src/kudu/util/block_bloom_filter.h | 28 ++++++++++++++--
src/kudu/util/block_bloom_filter_avx2.cc | 20 +++++++++++-
4 files changed, 134 insertions(+), 3 deletions(-)
diff --git a/src/kudu/util/block_bloom_filter-test.cc b/src/kudu/util/block_bloom_filter-test.cc
index 4c4d418..b9ed9f2 100644
--- a/src/kudu/util/block_bloom_filter-test.cc
+++ b/src/kudu/util/block_bloom_filter-test.cc
@@ -286,4 +286,37 @@ TEST_F(BlockBloomFilterTest, MinSpaceForFpp) {
}
}
}
+
+TEST_F(BlockBloomFilterTest, Or) {
+ BlockBloomFilter* bf1 = CreateBloomFilter(BlockBloomFilter::MinLogSpace(100, 0.01));
+ BlockBloomFilter* bf2 = CreateBloomFilter(BlockBloomFilter::MinLogSpace(100, 0.01));
+
+ for (int i = 60; i < 80; ++i) bf2->Insert(i);
+ for (int i = 0; i < 10; ++i) bf1->Insert(i);
+
+ ASSERT_OK(bf1->Or(*bf2));
+ for (int i = 0; i < 10; ++i) ASSERT_TRUE(bf1->Find(i)) << i;
+ for (int i = 60; i < 80; ++i) ASSERT_TRUE(bf1->Find(i)) << i;
+
+ // Insert another value to aggregated BloomFilter.
+ for (int i = 11; i < 50; ++i) bf1->Insert(i);
+
+ for (int i = 11; i < 50; ++i) ASSERT_TRUE(bf1->Find(i)) << i;
+ ASSERT_FALSE(bf1->Find(81));
+
+ // Check that AlwaysFalse() is updated correctly.
+ BlockBloomFilter* bf3 = CreateBloomFilter(BlockBloomFilter::MinLogSpace(100, 0.01));
+ BlockBloomFilter* always_false = CreateBloomFilter(BlockBloomFilter::MinLogSpace(100, 0.01));
+ ASSERT_OK(bf3->Or(*always_false));
+ EXPECT_TRUE(bf3->AlwaysFalse());
+ ASSERT_OK(bf3->Or(*bf2));
+ EXPECT_FALSE(bf3->AlwaysFalse());
+
+ // Invalid argument test cases.
+ BlockBloomFilter* bf4 = CreateBloomFilter(BlockBloomFilter::MinLogSpace(100, 0.01));
+ BlockBloomFilter* bf5 = CreateBloomFilter(BlockBloomFilter::MinLogSpace(100000, 0.01));
+ Status s = bf4->Or(*bf5);
+ ASSERT_TRUE(s.IsInvalidArgument());
+ ASSERT_STR_CONTAINS(s.ToString(), "Directory size don't match");
+}
} // namespace kudu
diff --git a/src/kudu/util/block_bloom_filter.cc b/src/kudu/util/block_bloom_filter.cc
index e09a7cc..acaf358 100644
--- a/src/kudu/util/block_bloom_filter.cc
+++ b/src/kudu/util/block_bloom_filter.cc
@@ -47,6 +47,9 @@ namespace kudu {
constexpr uint32_t BlockBloomFilter::kRehash[8] __attribute__((aligned(32)));
const base::CPU BlockBloomFilter::kCpu = base::CPU();
+// constexpr data member requires initialization in the class declaration.
+// Hence no duplicate initialization in the definition here.
+constexpr BlockBloomFilter* const BlockBloomFilter::kAlwaysTrueFilter;
BlockBloomFilter::BlockBloomFilter(BlockBloomFilterBufferAllocatorIf* buffer_allocator) :
always_false_(true),
@@ -60,13 +63,16 @@ BlockBloomFilter::BlockBloomFilter(BlockBloomFilterBufferAllocatorIf* buffer_all
if (has_avx2()) {
bucket_insert_func_ptr_ = &BlockBloomFilter::BucketInsertAVX2;
bucket_find_func_ptr_ = &BlockBloomFilter::BucketFindAVX2;
+ or_equal_array_func_ptr_ = &BlockBloomFilter::OrEqualArrayAVX2;
} else {
bucket_insert_func_ptr_ = &BlockBloomFilter::BucketInsert;
bucket_find_func_ptr_ = &BlockBloomFilter::BucketFind;
+ or_equal_array_func_ptr_ = &BlockBloomFilter::OrEqualArray;
}
#else
bucket_insert_func_ptr_ = &BlockBloomFilter::BucketInsert;
bucket_find_func_ptr_ = &BlockBloomFilter::BucketFind;
+ or_equal_array_func_ptr_ = &BlockBloomFilter::OrEqualArray;
#endif
}
@@ -259,6 +265,56 @@ bool BlockBloomFilter::operator!=(const BlockBloomFilter& rhs) const {
return !(rhs == *this);
}
+void BlockBloomFilter::OrEqualArray(size_t n, const uint8_t* __restrict__ in,
+ uint8_t* __restrict__ out) {
+ // The trivial loop out[i] |= in[i] should auto-vectorize with gcc at -O3, but it is not
+ // written in a way that is very friendly to auto-vectorization. Instead, we manually
+ // vectorize, increasing the speed by up to 56x.
+ const __m128i* simd_in = reinterpret_cast<const __m128i*>(in);
+ const __m128i* const simd_in_end = reinterpret_cast<const __m128i*>(in + n);
+ __m128i* simd_out = reinterpret_cast<__m128i*>(out);
+ // in.directory has a size (in bytes) that is a multiple of 32. Since sizeof(__m128i)
+ // == 16, we can do two _mm_or_si128's in each iteration without checking array
+ // bounds.
+ while (simd_in != simd_in_end) {
+ for (int i = 0; i < 2; ++i, ++simd_in, ++simd_out) {
+ _mm_storeu_si128(
+ simd_out, _mm_or_si128(_mm_loadu_si128(simd_out), _mm_loadu_si128(simd_in)));
+ }
+ }
+}
+
+Status BlockBloomFilter::Or(const BlockBloomFilter& other) {
+ // AlwaysTrueFilter is a special case implemented with a nullptr.
+ // Hence Or'ing with an AlwaysTrueFilter will result in a Bloom filter that also
+ // always returns true which'll require destructing this Bloom filter.
+ // Moreover for a reference "other" to be an AlwaysTrueFilter the reference needs
+ // to be created from a nullptr and so we get into undefined behavior territory.
+ // Comparing AlwaysTrueFilter with "&other" results in a compiler warning for
+ // comparing a non-null argument "other" with NULL [-Wnonnull-compare].
+ // For above reasons, guard against it.
+ CHECK_NE(kAlwaysTrueFilter, &other);
+
+ if (this == &other) {
+ // No op.
+ return Status::OK();
+ }
+ if (directory_size() != other.directory_size()) {
+ return Status::InvalidArgument(Substitute("Directory size don't match. this: $0, other: $1",
+ directory_size(), other.directory_size()));
+ }
+ if (other.AlwaysFalse()) {
+ // Nothing to do.
+ return Status::OK();
+ }
+
+ (*or_equal_array_func_ptr_)(directory_size(),
+ reinterpret_cast<uint8_t*>(other.directory_),
+ reinterpret_cast<uint8_t*>(directory_));
+ always_false_ = false;
+ return Status::OK();
+}
+
shared_ptr<DefaultBlockBloomFilterBufferAllocator>
DefaultBlockBloomFilterBufferAllocator::GetSingletonSharedPtr() {
// Meyer's Singleton.
diff --git a/src/kudu/util/block_bloom_filter.h b/src/kudu/util/block_bloom_filter.h
index a218dec..8a0fdf8 100644
--- a/src/kudu/util/block_bloom_filter.h
+++ b/src/kudu/util/block_bloom_filter.h
@@ -148,6 +148,21 @@ class BlockBloomFilter {
bool operator==(const BlockBloomFilter& rhs) const;
bool operator!=(const BlockBloomFilter& rhs) const;
+ // Computes the logical OR of this filter with 'other' and stores the result in this
+ // filter.
+ // Notes:
+ // - The directory sizes of the Bloom filters must match.
+ // - Or'ing with kAlwaysTrueFilter is disallowed.
+ Status Or(const BlockBloomFilter& other);
+
+ // Returns whether the Bloom filter is empty and hence would return false for all lookups.
+ bool AlwaysFalse() const {
+ return always_false_;
+ }
+
+ // Representation of a filter which allows all elements to pass.
+ static constexpr BlockBloomFilter* const kAlwaysTrueFilter = nullptr;
+
private:
// always_false_ is true when the bloom filter hasn't had any elements inserted.
bool always_false_;
@@ -190,7 +205,7 @@ class BlockBloomFilter {
// Helper function for public Init() variants.
Status InitInternal(int log_space_bytes, HashAlgorithm hash_algorithm, uint32_t hash_seed);
- // Same as Insert(), but skips the CPU check and assumes that AVX is not available.
+ // Same as Insert(), but skips the CPU check and assumes that AVX2 is not available.
void InsertNoAvx2(uint32_t hash) noexcept;
// Does the actual work of Insert(). bucket_idx is the index of the bucket to insert
@@ -199,8 +214,11 @@ class BlockBloomFilter {
bool BucketFind(uint32_t bucket_idx, uint32_t hash) const noexcept;
+ // Computes out[i] |= in[i] for the arrays 'in' and 'out' of length 'n'.
+ static void OrEqualArray(size_t n, const uint8_t* __restrict__ in, uint8_t* __restrict__ out);
+
#ifdef USE_AVX2
- // Same as Insert(), but skips the CPU check and assumes that AVX is available.
+ // Same as Insert(), but skips the CPU check and assumes that AVX2 is available.
void InsertAvx2(uint32_t hash) noexcept __attribute__((__target__("avx2")));
// A faster SIMD version of BucketInsert().
@@ -210,12 +228,18 @@ class BlockBloomFilter {
// A faster SIMD version of BucketFind().
bool BucketFindAVX2(uint32_t bucket_idx, uint32_t hash) const noexcept
__attribute__((__target__("avx2")));
+
+ // Computes out[i] |= in[i] for the arrays 'in' and 'out' of length 'n' using AVX2
+ // instructions. 'n' must be a multiple of 32.
+ static void OrEqualArrayAVX2(size_t n, const uint8_t* __restrict__ in,
+ uint8_t* __restrict__ out) __attribute__((target("avx2")));
#endif
// Function pointers initialized in constructor to avoid run-time cost
// in hot-path of Find and Insert operations.
decltype(&BlockBloomFilter::BucketInsert) bucket_insert_func_ptr_;
decltype(&BlockBloomFilter::BucketFind) bucket_find_func_ptr_;
+ decltype(&BlockBloomFilter::OrEqualArray) or_equal_array_func_ptr_;
// Returns amount of space used in log2 bytes.
int log_space_bytes() const {
diff --git a/src/kudu/util/block_bloom_filter_avx2.cc b/src/kudu/util/block_bloom_filter_avx2.cc
index e10b6cc..93c3ff6 100644
--- a/src/kudu/util/block_bloom_filter_avx2.cc
+++ b/src/kudu/util/block_bloom_filter_avx2.cc
@@ -24,9 +24,14 @@
#include "kudu/util/block_bloom_filter.h"
-#include <cstdint>
#include <immintrin.h>
+#include <cstddef>
+#include <cstdint>
+#include <ostream>
+
+#include <glog/logging.h>
+
#include "kudu/gutil/port.h"
namespace kudu {
@@ -75,4 +80,17 @@ void BlockBloomFilter::InsertAvx2(const uint32_t hash) noexcept {
BucketInsertAVX2(bucket_idx, hash);
}
+void BlockBloomFilter::OrEqualArrayAVX2(size_t n, const uint8_t* __restrict__ in,
+ uint8_t* __restrict__ out) {
+ constexpr size_t kAVXRegisterBytes = sizeof(__m256d);
+ DCHECK_EQ(n % kAVXRegisterBytes, 0) << "Invalid Bloom filter directory size";
+ const uint8_t* const in_end = in + n;
+ for (; in != in_end; (in += kAVXRegisterBytes), (out += kAVXRegisterBytes)) {
+ const double* double_in = reinterpret_cast<const double*>(in);
+ double* double_out = reinterpret_cast<double*>(out);
+ _mm256_storeu_pd(double_out,
+ _mm256_or_pd(_mm256_loadu_pd(double_out), _mm256_loadu_pd(double_in)));
+ }
+}
+
} // namespace kudu