You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2022/06/17 07:16:32 UTC

[GitHub] [incubator-doris] Gabriel39 commented on a diff in pull request #10195: [Improvement] Use IOBufAsZeroCopyInputStream for bloom filter

Gabriel39 commented on code in PR #10195:
URL: https://github.com/apache/incubator-doris/pull/10195#discussion_r899832593


##########
be/src/exprs/block_bloom_filter_impl.cc:
##########
@@ -83,17 +85,26 @@ Status BlockBloomFilter::init(const int log_space_bytes, uint32_t hash_seed) {
     return Status::OK();
 }
 
-Status BlockBloomFilter::init_from_directory(int log_space_bytes, const Slice& directory,
-                                             bool always_false, uint32_t hash_seed) {
+Status BlockBloomFilter::init_from_directory(int log_space_bytes,
+                                             butil::IOBufAsZeroCopyInputStream* data,
+                                             const size_t& data_size, bool always_false,
+                                             uint32_t hash_seed) {
     RETURN_IF_ERROR(init_internal(log_space_bytes, hash_seed));
     DCHECK(_directory);
 
-    if (directory_size() != directory.size) {
+    if (directory_size() != data_size) {
         return Status::InvalidArgument(fmt::format(
                 "Mismatch in BlockBloomFilter source directory size {} and expected size {}",
-                directory.size, directory_size()));
+                data_size, directory_size()));
+    }
+    int size = 0;
+    char* tmp;
+    const void** ptr = (const void**)&tmp;
+    char* data_ptr = reinterpret_cast<char*>(_directory);
+    while (data->Next(ptr, &size)) {
+        memcpy(data_ptr, *ptr, size);
+        data_ptr += size;

Review Comment:
   Data blocks in IOBufAsZeroCopyInputStream is non-continuous. This is why we use another memcpy before. This PR delete this redundant memcpy and so we can't use Slice here.



##########
be/src/runtime/runtime_filter_mgr.h:
##########
@@ -68,7 +72,8 @@ class RuntimeFilterMgr {
                          const TQueryOptions& options, int node_id = -1);
 
     // update filter by remote
-    Status update_filter(const PPublishFilterRequest* request, const char* data);
+    Status update_filter(const PPublishFilterRequest* request,
+                         butil::IOBufAsZeroCopyInputStream* data);

Review Comment:
   `IOBufAsZeroCopyInputStream.Next` is a non-const method so we can't make it const here



##########
be/src/exprs/block_bloom_filter.hpp:
##########
@@ -177,7 +180,7 @@ class BlockBloomFilter {
 
 #endif
     // Size of the internal directory structure in bytes.
-    int64_t directory_size() const { return 1ULL << log_space_bytes(); }
+    size_t directory_size() const { return 1ULL << log_space_bytes(); }

Review Comment:
   It's actually s size_t and we use it to compare with other variables with size_t type everywhere. So I just want to keep consistency with other variables here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org