You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by zh...@apache.org on 2023/06/07 01:55:24 UTC

[doris] branch branch-1.2-lts updated (8c89f14bd6 -> 8ea3dfcd10)

This is an automated email from the ASF dual-hosted git repository.

zhangchen pushed a change to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


    from 8c89f14bd6 [Fix](Planner) Fix function do not have equal function and substitution failed (#20479)
     new bdb66e84ef [improvement](merge-on-write) move segment check to delta writer (#18643)
     new 7e02ee9a02 [enhancement](merge-on-write) avoid unecessary pk index iteration (#19620)
     new 5f7e7d7cb4 [enhancement](merge-on-write) Avoiding unnecessary primary key index traversal (#19746)
     new 8ea3dfcd10 [Enhancement](merge-on-write) optimize bloom filter for primary key index (#20182)

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/olap/delta_writer.cpp                       | 14 +++++
 be/src/olap/primary_key_index.cpp                  |  8 ++-
 .../segment_v2/bloom_filter_index_writer.cpp       | 59 +++++++++++++++++++++-
 .../rowset/segment_v2/bloom_filter_index_writer.h  | 39 ++++++++++++++
 be/src/olap/tablet.cpp                             |  6 ++-
 5 files changed, 121 insertions(+), 5 deletions(-)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[doris] 02/04: [enhancement](merge-on-write) avoid unecessary pk index iteration (#19620)

Posted by zh...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

zhangchen pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 7e02ee9a02a6ff9e7f0b707d125c942dc74f42fd
Author: zhannngchen <48...@users.noreply.github.com>
AuthorDate: Tue May 16 17:05:14 2023 +0800

    [enhancement](merge-on-write) avoid unecessary pk index iteration (#19620)
---
 be/src/olap/delta_writer.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index 75ec2eea43..480e09d925 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -405,8 +405,11 @@ Status DeltaWriter::close_wait(const PSlaveTabletNodes& slave_tablet_nodes,
             SchemaChangeHandler::tablet_in_converting(_tablet->tablet_id())) {
             return Status::OK();
         }
-        RETURN_IF_ERROR(_tablet->calc_delete_bitmap(beta_rowset->rowset_id(), segments, nullptr,
-                                                    _delete_bitmap, _cur_max_version, true));
+        if (segments.size() > 1) {
+            RETURN_IF_ERROR(_tablet->calc_delete_bitmap(beta_rowset->rowset_id(), segments, nullptr,
+                                                        _delete_bitmap, _cur_max_version, true));
+        }
+
         _storage_engine->txn_manager()->set_txn_related_delete_bitmap(
                 _req.partition_id, _req.txn_id, _tablet->tablet_id(), _tablet->schema_hash(),
                 _tablet->tablet_uid(), true, _delete_bitmap, _rowset_ids);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[doris] 01/04: [improvement](merge-on-write) move segment check to delta writer (#18643)

Posted by zh...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

zhangchen pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit bdb66e84ef0f49bf12c76374e626c681260a697a
Author: yixiutt <10...@users.noreply.github.com>
AuthorDate: Mon Apr 17 19:59:44 2023 +0800

    [improvement](merge-on-write) move segment check to delta writer (#18643)
---
 be/src/olap/delta_writer.cpp | 11 +++++++++++
 be/src/olap/tablet.cpp       |  2 +-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index ce3ad6a6b9..75ec2eea43 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -23,6 +23,7 @@
 #include "olap/data_dir.h"
 #include "olap/memtable.h"
 #include "olap/memtable_flush_executor.h"
+#include "olap/rowset/beta_rowset.h"
 #include "olap/rowset/beta_rowset_writer.h"
 #include "olap/rowset/rowset_writer_context.h"
 #include "olap/schema.h"
@@ -396,6 +397,16 @@ Status DeltaWriter::close_wait(const PSlaveTabletNodes& slave_tablet_nodes,
         return res;
     }
     if (_tablet->enable_unique_key_merge_on_write()) {
+        auto beta_rowset = reinterpret_cast<BetaRowset*>(_cur_rowset.get());
+        std::vector<segment_v2::SegmentSharedPtr> segments;
+        RETURN_IF_ERROR(beta_rowset->load_segments(&segments));
+        // tablet is under alter process. The delete bitmap will be calculated after conversion.
+        if (_tablet->tablet_state() == TABLET_NOTREADY &&
+            SchemaChangeHandler::tablet_in_converting(_tablet->tablet_id())) {
+            return Status::OK();
+        }
+        RETURN_IF_ERROR(_tablet->calc_delete_bitmap(beta_rowset->rowset_id(), segments, nullptr,
+                                                    _delete_bitmap, _cur_max_version, true));
         _storage_engine->txn_manager()->set_txn_related_delete_bitmap(
                 _req.partition_id, _req.txn_id, _tablet->tablet_id(), _tablet->schema_hash(),
                 _tablet->tablet_uid(), true, _delete_bitmap, _rowset_ids);
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 7bba605b67..9b1c68f4e1 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -2242,7 +2242,7 @@ Status Tablet::update_delete_bitmap(const RowsetSharedPtr& rowset, const TabletT
     }
 
     RETURN_IF_ERROR(calc_delete_bitmap(rowset->rowset_id(), segments, &rowset_ids_to_add,
-                                       delete_bitmap, cur_version - 1, true));
+                                       delete_bitmap, cur_version - 1, false));
 
     // update version without write lock, compaction and publish_txn
     // will update delete bitmap, handle compaction with _rowset_update_lock


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[doris] 03/04: [enhancement](merge-on-write) Avoiding unnecessary primary key index traversal (#19746)

Posted by zh...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

zhangchen pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 5f7e7d7cb4ae823c001b830f7d6365643fa8647e
Author: Xin Liao <li...@126.com>
AuthorDate: Thu May 18 08:41:49 2023 +0800

    [enhancement](merge-on-write) Avoiding unnecessary primary key index traversal (#19746)
---
 be/src/olap/tablet.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 9b1c68f4e1..06c0572d80 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -2241,8 +2241,10 @@ Status Tablet::update_delete_bitmap(const RowsetSharedPtr& rowset, const TabletT
         delete_bitmap->remove({to_del, 0, 0}, {to_del, UINT32_MAX, INT64_MAX});
     }
 
-    RETURN_IF_ERROR(calc_delete_bitmap(rowset->rowset_id(), segments, &rowset_ids_to_add,
-                                       delete_bitmap, cur_version - 1, false));
+    if (!rowset_ids_to_add.empty()) {
+        RETURN_IF_ERROR(calc_delete_bitmap(rowset->rowset_id(), segments, &rowset_ids_to_add,
+                                           delete_bitmap, cur_version - 1, false));
+    }
 
     // update version without write lock, compaction and publish_txn
     // will update delete bitmap, handle compaction with _rowset_update_lock


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[doris] 04/04: [Enhancement](merge-on-write) optimize bloom filter for primary key index (#20182)

Posted by zh...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

zhangchen pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 8ea3dfcd101ec60eee963a80a59180796ebfca1f
Author: Xin Liao <li...@126.com>
AuthorDate: Wed May 31 09:49:15 2023 +0800

    [Enhancement](merge-on-write) optimize bloom filter for primary key index (#20182)
---
 be/src/olap/primary_key_index.cpp                  |  8 ++-
 .../segment_v2/bloom_filter_index_writer.cpp       | 59 +++++++++++++++++++++-
 .../rowset/segment_v2/bloom_filter_index_writer.h  | 39 ++++++++++++++
 3 files changed, 103 insertions(+), 3 deletions(-)

diff --git a/be/src/olap/primary_key_index.cpp b/be/src/olap/primary_key_index.cpp
index 7c2c5fe16a..6d5dc0edd2 100644
--- a/be/src/olap/primary_key_index.cpp
+++ b/be/src/olap/primary_key_index.cpp
@@ -19,6 +19,9 @@
 
 #include "common/config.h"
 #include "io/fs/file_reader.h"
+#include "olap/olap_common.h"
+#include "olap/rowset/segment_v2/bloom_filter_index_reader.h"
+#include "olap/rowset/segment_v2/bloom_filter_index_writer.h"
 #include "olap/rowset/segment_v2/encoding_info.h"
 
 namespace doris {
@@ -36,8 +39,9 @@ Status PrimaryKeyIndexBuilder::init() {
             new segment_v2::IndexedColumnWriter(options, type_info, _file_writer));
     RETURN_IF_ERROR(_primary_key_index_builder->init());
 
-    return segment_v2::BloomFilterIndexWriter::create(segment_v2::BloomFilterOptions(), type_info,
-                                                      &_bloom_filter_index_builder);
+    _bloom_filter_index_builder.reset(new segment_v2::PrimaryKeyBloomFilterIndexWriterImpl(
+            segment_v2::BloomFilterOptions(), type_info));
+    return Status::OK();
 }
 
 Status PrimaryKeyIndexBuilder::add_item(const Slice& key) {
diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
index 5542a6068d..c8f8cc8c50 100644
--- a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
@@ -25,7 +25,6 @@
 #include "olap/rowset/segment_v2/encoding_info.h"
 #include "olap/rowset/segment_v2/indexed_column_writer.h"
 #include "olap/types.h"
-#include "runtime/mem_pool.h"
 #include "util/faststring.h"
 #include "util/slice.h"
 
@@ -170,6 +169,64 @@ private:
 
 } // namespace
 
+void PrimaryKeyBloomFilterIndexWriterImpl::add_values(const void* values, size_t count) {
+    const Slice* v = (const Slice*)values;
+    for (int i = 0; i < count; ++i) {
+        Slice new_value;
+        _type_info->deep_copy(&new_value, v, &_pool);
+        _values.push_back(new_value);
+        ++v;
+    }
+}
+
+Status PrimaryKeyBloomFilterIndexWriterImpl::flush() {
+    std::unique_ptr<BloomFilter> bf;
+    RETURN_IF_ERROR(BloomFilter::create(BLOCK_BLOOM_FILTER, &bf));
+    RETURN_IF_ERROR(bf->init(_values.size(), _bf_options.fpp, _bf_options.strategy));
+    bf->set_has_null(_has_null);
+    for (auto& v : _values) {
+        Slice* s = (Slice*)&v;
+        bf->add_bytes(s->data, s->size);
+    }
+    _bf_buffer_size += bf->size();
+    _bfs.push_back(std::move(bf));
+    _values.clear();
+    _has_null = false;
+    return Status::OK();
+}
+
+Status PrimaryKeyBloomFilterIndexWriterImpl::finish(io::FileWriter* file_writer,
+                                                    ColumnIndexMetaPB* index_meta) {
+    if (_values.size() > 0) {
+        RETURN_IF_ERROR(flush());
+    }
+    index_meta->set_type(BLOOM_FILTER_INDEX);
+    BloomFilterIndexPB* meta = index_meta->mutable_bloom_filter_index();
+    meta->set_hash_strategy(_bf_options.strategy);
+    meta->set_algorithm(BLOCK_BLOOM_FILTER);
+
+    // write bloom filters
+    const auto* bf_type_info = get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_VARCHAR>();
+    IndexedColumnWriterOptions options;
+    options.write_ordinal_index = true;
+    options.write_value_index = false;
+    options.encoding = PLAIN_ENCODING;
+    IndexedColumnWriter bf_writer(options, bf_type_info, file_writer);
+    RETURN_IF_ERROR(bf_writer.init());
+    for (auto& bf : _bfs) {
+        Slice data(bf->data(), bf->size());
+        bf_writer.add(&data);
+    }
+    RETURN_IF_ERROR(bf_writer.finish(meta->mutable_bloom_filter()));
+    return Status::OK();
+}
+
+uint64_t PrimaryKeyBloomFilterIndexWriterImpl::size() {
+    uint64_t total_size = _bf_buffer_size;
+    total_size += _pool.total_allocated_bytes();
+    return total_size;
+}
+
 // TODO currently we don't support bloom filter index for tinyint/hll/float/double
 Status BloomFilterIndexWriter::create(const BloomFilterOptions& bf_options,
                                       const TypeInfo* type_info,
diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.h b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.h
index 8b9a945e1a..52df34ec68 100644
--- a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.h
+++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.h
@@ -23,6 +23,10 @@
 #include "common/status.h"
 #include "gen_cpp/segment_v2.pb.h"
 #include "gutil/macros.h"
+#include "olap/rowset/segment_v2/bloom_filter.h"
+#include "runtime/mem_pool.h"
+#include "util/slice.h"
+#include "vec/common/arena.h"
 
 namespace doris {
 
@@ -58,5 +62,40 @@ private:
     DISALLOW_COPY_AND_ASSIGN(BloomFilterIndexWriter);
 };
 
+// For unique key with merge on write, the data for each segment is deduplicated.
+// Bloom filter doesn't need to use `set` for deduplication like
+// `BloomFilterIndexWriterImpl`, so vector can be used to accelerate.
+class PrimaryKeyBloomFilterIndexWriterImpl : public BloomFilterIndexWriter {
+public:
+    explicit PrimaryKeyBloomFilterIndexWriterImpl(const BloomFilterOptions& bf_options,
+                                                  const TypeInfo* type_info)
+            : _bf_options(bf_options),
+              _type_info(type_info),
+              _has_null(false),
+              _bf_buffer_size(0) {}
+
+    ~PrimaryKeyBloomFilterIndexWriterImpl() override = default;
+
+    void add_values(const void* values, size_t count) override;
+
+    void add_nulls(uint32_t count) override { _has_null = true; }
+
+    Status flush() override;
+
+    Status finish(io::FileWriter* file_writer, ColumnIndexMetaPB* index_meta) override;
+
+    uint64_t size() override;
+
+private:
+    BloomFilterOptions _bf_options;
+    const TypeInfo* _type_info;
+    MemPool _pool;
+    bool _has_null;
+    uint64_t _bf_buffer_size;
+    // distinct values
+    std::vector<Slice> _values;
+    std::vector<std::unique_ptr<BloomFilter>> _bfs;
+};
+
 } // namespace segment_v2
 } // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org