You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2020/03/14 09:21:03 UTC
[kudu] branch master updated: [util] Changes to allow
serializing/de-serializing BlockBloomFilter to a custom format
This is an automated email from the ASF dual-hosted git repository.
adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 1394746 [util] Changes to allow serializing/de-serializing BlockBloomFilter to a custom format
1394746 is described below
commit 1394746becb905ff460e0fcc615376d967bdc4c1
Author: Bankim Bhavsar <ba...@cloudera.com>
AuthorDate: Tue Mar 10 12:32:21 2020 -0700
[util] Changes to allow serializing/de-serializing BlockBloomFilter to a custom format
Consumers of the BlockBloomFilter like Impala would prefer using its own
serialization/deserializing message format or library.
Hence following set of changes:
- InitFromDirectory() that allows initialization from a populated directory
structure.
- log_space_bytes() and directory() public functions to allow serialization.
- Renamed AlwaysFalse() to always_false() since it's a simple get function
providing access to data.
Change-Id: I79504813024abaf452e2c2b244391024b442d6f7
Reviewed-on: http://gerrit.cloudera.org:8080/15396
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Kudu Jenkins
Reviewed-by: Wenzhe Zhou <wz...@cloudera.com>
---
src/kudu/util/block_bloom_filter-test.cc | 4 ++--
src/kudu/util/block_bloom_filter.cc | 34 ++++++++++++++++++++------------
src/kudu/util/block_bloom_filter.h | 22 +++++++++++++++------
3 files changed, 39 insertions(+), 21 deletions(-)
diff --git a/src/kudu/util/block_bloom_filter-test.cc b/src/kudu/util/block_bloom_filter-test.cc
index b9ed9f2..8e12099 100644
--- a/src/kudu/util/block_bloom_filter-test.cc
+++ b/src/kudu/util/block_bloom_filter-test.cc
@@ -308,9 +308,9 @@ TEST_F(BlockBloomFilterTest, Or) {
BlockBloomFilter* bf3 = CreateBloomFilter(BlockBloomFilter::MinLogSpace(100, 0.01));
BlockBloomFilter* always_false = CreateBloomFilter(BlockBloomFilter::MinLogSpace(100, 0.01));
ASSERT_OK(bf3->Or(*always_false));
- EXPECT_TRUE(bf3->AlwaysFalse());
+ EXPECT_TRUE(bf3->always_false());
ASSERT_OK(bf3->Or(*bf2));
- EXPECT_FALSE(bf3->AlwaysFalse());
+ EXPECT_FALSE(bf3->always_false());
// Invalid argument test cases.
BlockBloomFilter* bf4 = CreateBloomFilter(BlockBloomFilter::MinLogSpace(100, 0.01));
diff --git a/src/kudu/util/block_bloom_filter.cc b/src/kudu/util/block_bloom_filter.cc
index acaf358..40a3d64 100644
--- a/src/kudu/util/block_bloom_filter.cc
+++ b/src/kudu/util/block_bloom_filter.cc
@@ -118,24 +118,32 @@ Status BlockBloomFilter::Init(const int log_space_bytes, HashAlgorithm hash_algo
return Status::OK();
}
+Status BlockBloomFilter::InitFromDirectory(int log_space_bytes,
+ const Slice& directory,
+ bool always_false,
+ HashAlgorithm hash_algorithm,
+ uint32_t hash_seed) {
+ RETURN_NOT_OK(InitInternal(log_space_bytes, hash_algorithm, hash_seed));
+ DCHECK_NOTNULL(directory_);
+
+ if (directory_size() != directory.size()) {
+ return Status::InvalidArgument(
+ Substitute("Mismatch in BlockBloomFilter source directory size $0 and expected size $1",
+ directory.size(), directory_size()));
+ }
+ memcpy(directory_, directory.data(), directory.size());
+ always_false_ = always_false;
+ return Status();
+}
+
Status BlockBloomFilter::InitFromPB(const BlockBloomFilterPB& bf_src) {
if (!bf_src.has_log_space_bytes() || !bf_src.has_bloom_data() ||
!bf_src.has_hash_algorithm() || !bf_src.has_always_false()) {
return Status::InvalidArgument("Missing arguments to initialize BlockBloomFilter.");
}
- RETURN_NOT_OK(InitInternal(bf_src.log_space_bytes(), bf_src.hash_algorithm(),
- bf_src.hash_seed()));
- DCHECK_NOTNULL(directory_);
-
- if (directory_size() != bf_src.bloom_data().size()) {
- return Status::InvalidArgument(
- Substitute("Mismatch in BlockBloomFilter source directory size $0 and expected size $1",
- bf_src.bloom_data().size(), directory_size()));
- }
- memcpy(directory_, bf_src.bloom_data().data(), bf_src.bloom_data().size());
- always_false_ = bf_src.always_false();
- return Status::OK();
+ return InitFromDirectory(bf_src.log_space_bytes(), bf_src.bloom_data(), bf_src.always_false(),
+ bf_src.hash_algorithm(), bf_src.hash_seed());
}
Status BlockBloomFilter::Clone(BlockBloomFilterBufferAllocatorIf* allocator,
@@ -303,7 +311,7 @@ Status BlockBloomFilter::Or(const BlockBloomFilter& other) {
return Status::InvalidArgument(Substitute("Directory size don't match. this: $0, other: $1",
directory_size(), other.directory_size()));
}
- if (other.AlwaysFalse()) {
+ if (other.always_false()) {
// Nothing to do.
return Status::OK();
}
diff --git a/src/kudu/util/block_bloom_filter.h b/src/kudu/util/block_bloom_filter.h
index 8a0fdf8..6a9a991 100644
--- a/src/kudu/util/block_bloom_filter.h
+++ b/src/kudu/util/block_bloom_filter.h
@@ -91,6 +91,10 @@ class BlockBloomFilter {
Status Init(int log_space_bytes, HashAlgorithm hash_algorithm, uint32_t hash_seed);
// Initialize the BlockBloomFilter by de-serializing the protobuf message.
Status InitFromPB(const BlockBloomFilterPB& bf_src);
+ // Initialize the BlockBloomFilter from a populated "directory" structure.
+ // Useful for initializing the BlockBloomFilter by de-serializing a custom protobuf message.
+ Status InitFromDirectory(int log_space_bytes, const Slice& directory, bool always_false,
+ HashAlgorithm hash_algorithm, uint32_t hash_seed);
// Clones the BlockBloomFilter using the supplied "allocator". The allocator is expected
// to remain valid during the lifetime of the cloned BlockBloomFilter.
@@ -156,10 +160,21 @@ class BlockBloomFilter {
Status Or(const BlockBloomFilter& other);
// Returns whether the Bloom filter is empty and hence would return false for all lookups.
- bool AlwaysFalse() const {
+ bool always_false() const {
return always_false_;
}
+ // Returns amount of space used in log2 bytes.
+ int log_space_bytes() const {
+ return log_num_buckets_ + kLogBucketByteSize;
+ }
+
+ // Returns the directory structure. Useful for serializing the BlockBloomFilter to
+ // a custom protobuf message.
+ Slice directory() const {
+ return Slice(reinterpret_cast<const uint8_t*>(directory_), directory_size());
+ }
+
// Representation of a filter which allows all elements to pass.
static constexpr BlockBloomFilter* const kAlwaysTrueFilter = nullptr;
@@ -241,11 +256,6 @@ class BlockBloomFilter {
decltype(&BlockBloomFilter::BucketFind) bucket_find_func_ptr_;
decltype(&BlockBloomFilter::OrEqualArray) or_equal_array_func_ptr_;
- // Returns amount of space used in log2 bytes.
- int log_space_bytes() const {
- return log_num_buckets_ + kLogBucketByteSize;
- }
-
// Size of the internal directory structure in bytes.
int64_t directory_size() const {
return 1ULL << log_space_bytes();