You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2022/06/17 06:50:59 UTC

[GitHub] [incubator-doris] adonis0147 commented on a diff in pull request #10195: [Improvement] Use IOBufAsZeroCopyInputStream for bloom filter

adonis0147 commented on code in PR #10195:
URL: https://github.com/apache/incubator-doris/pull/10195#discussion_r899795015


##########
be/src/exprs/block_bloom_filter.hpp:
##########
@@ -44,8 +47,8 @@ class BlockBloomFilter {
     Status init(int log_space_bytes, uint32_t hash_seed);
     // Initialize the BlockBloomFilter from a populated "directory" structure.
     // Useful for initializing the BlockBloomFilter by de-serializing a custom protobuf message.
-    Status init_from_directory(int log_space_bytes, const Slice& directory, bool always_false,
-                               uint32_t hash_seed);
+    Status init_from_directory(int log_space_bytes, butil::IOBufAsZeroCopyInputStream* data,
+                               const size_t& data_size, bool always_false, uint32_t hash_seed);

Review Comment:
   Using `const size_t` is enough.
   ```suggestion
                                  const size_t data_size, bool always_false, uint32_t hash_seed);
   ```



##########
be/src/exprs/block_bloom_filter.hpp:
##########
@@ -177,7 +180,7 @@ class BlockBloomFilter {
 
 #endif
     // Size of the internal directory structure in bytes.
-    int64_t directory_size() const { return 1ULL << log_space_bytes(); }
+    size_t directory_size() const { return 1ULL << log_space_bytes(); }

Review Comment:
   Why use platform related size here? Using `uint64_t` is better.
   
   ```suggestion
       uint64_t directory_size() const { return 1ULL << log_space_bytes(); }
   Write Preview
   ```



##########
be/src/exprs/block_bloom_filter_impl.cc:
##########
@@ -83,17 +85,26 @@ Status BlockBloomFilter::init(const int log_space_bytes, uint32_t hash_seed) {
     return Status::OK();
 }
 
-Status BlockBloomFilter::init_from_directory(int log_space_bytes, const Slice& directory,
-                                             bool always_false, uint32_t hash_seed) {
+Status BlockBloomFilter::init_from_directory(int log_space_bytes,
+                                             butil::IOBufAsZeroCopyInputStream* data,
+                                             const size_t& data_size, bool always_false,
+                                             uint32_t hash_seed) {
     RETURN_IF_ERROR(init_internal(log_space_bytes, hash_seed));
     DCHECK(_directory);
 
-    if (directory_size() != directory.size) {
+    if (directory_size() != data_size) {
         return Status::InvalidArgument(fmt::format(
                 "Mismatch in BlockBloomFilter source directory size {} and expected size {}",
-                directory.size, directory_size()));
+                data_size, directory_size()));
+    }
+    int size = 0;
+    char* tmp;
+    const void** ptr = (const void**)&tmp;
+    char* data_ptr = reinterpret_cast<char*>(_directory);
+    while (data->Next(ptr, &size)) {
+        memcpy(data_ptr, *ptr, size);
+        data_ptr += size;

Review Comment:
   If you create other `Slice`, you can define a member function to hide this logic which looks a little bit complex.



##########
be/src/exprs/block_bloom_filter_impl.cc:
##########
@@ -83,17 +85,26 @@ Status BlockBloomFilter::init(const int log_space_bytes, uint32_t hash_seed) {
     return Status::OK();
 }
 
-Status BlockBloomFilter::init_from_directory(int log_space_bytes, const Slice& directory,
-                                             bool always_false, uint32_t hash_seed) {
+Status BlockBloomFilter::init_from_directory(int log_space_bytes,
+                                             butil::IOBufAsZeroCopyInputStream* data,
+                                             const size_t& data_size, bool always_false,
+                                             uint32_t hash_seed) {

Review Comment:
   It may not be a good idea to change the interface like that which increases the number of arguments. `data` and `data_size` are related and users will use them at the same time here. Try to create some `struct` like `Slice`.



##########
be/src/exprs/block_bloom_filter.hpp:
##########
@@ -202,6 +205,13 @@ class BlockBloomFilter {
         return (static_cast<uint64_t>(hash) * m + a) >> 32U;
     }
 
+#ifdef DISALLOW_COPY_AND_ASSIGN
+#undef DISALLOW_COPY_AND_ASSIGN
+#endif
+#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
+    TypeName(const TypeName&) = delete;    \
+    void operator=(const TypeName&) = delete
+

Review Comment:
   To much similar macros in the codebase, try to unify them rather than create a new one.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org