You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by zh...@apache.org on 2023/06/20 11:51:12 UTC

[doris] branch master updated: [enhancement](merge-on-write) add primary key data page size config (#20961)

This is an automated email from the ASF dual-hosted git repository.

zhangchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 9eade148dd [enhancement](merge-on-write) add primary key data page size config (#20961)
9eade148dd is described below

commit 9eade148ddcea0dec2ab847cff6c7f01964d89df
Author: Xin Liao <li...@126.com>
AuthorDate: Tue Jun 20 19:51:02 2023 +0800

    [enhancement](merge-on-write) add primary key data page size config (#20961)
---
 be/src/common/config.cpp                                | 2 ++
 be/src/common/config.h                                  | 2 ++
 be/src/olap/primary_key_index.cpp                       | 1 +
 be/src/olap/rowset/segment_v2/indexed_column_reader.cpp | 3 +++
 be/src/olap/rowset/segment_v2/indexed_column_writer.cpp | 1 +
 be/src/olap/rowset/segment_v2/indexed_column_writer.h   | 1 +
 6 files changed, 10 insertions(+)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 58975767ea..1230318c5c 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -306,6 +306,8 @@ DEFINE_Bool(disable_storage_row_cache, "true");
 
 // Cache for mow primary key storage page size
 DEFINE_String(pk_storage_page_cache_limit, "10%");
+// data page size for primary key index
+DEFINE_Int32(primary_key_data_page_size, "32768");
 
 DEFINE_Bool(enable_low_cardinality_optimize, "true");
 DEFINE_Bool(enable_low_cardinality_cache_code, "true");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 577f96a194..374994b943 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -347,6 +347,8 @@ DECLARE_Bool(disable_storage_row_cache);
 // Cache for mow primary key storage page size, it's seperated from
 // storage_page_cache_limit
 DECLARE_String(pk_storage_page_cache_limit);
+// data page size for primary key index
+DECLARE_Int32(primary_key_data_page_size);
 
 DECLARE_Bool(enable_low_cardinality_optimize);
 DECLARE_Bool(enable_low_cardinality_cache_code);
diff --git a/be/src/olap/primary_key_index.cpp b/be/src/olap/primary_key_index.cpp
index c2a597b152..3f30529732 100644
--- a/be/src/olap/primary_key_index.cpp
+++ b/be/src/olap/primary_key_index.cpp
@@ -38,6 +38,7 @@ Status PrimaryKeyIndexBuilder::init() {
     segment_v2::IndexedColumnWriterOptions options;
     options.write_ordinal_index = true;
     options.write_value_index = true;
+    options.data_page_size = config::primary_key_data_page_size;
     options.encoding = segment_v2::EncodingInfo::get_default_encoding(type_info, true);
     // TODO(liaoxin) test to confirm whether it needs to be compressed
     options.compression = segment_v2::NO_COMPRESSION; // currently not compressed
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
index eaba30eba2..258d5ce4cd 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
@@ -37,6 +37,8 @@ using namespace ErrorCode;
 namespace segment_v2 {
 
 static bvar::Adder<uint64_t> g_index_reader_bytes("doris_pk", "index_reader_bytes");
+static bvar::Adder<uint64_t> g_index_reader_compressed_bytes("doris_pk",
+                                                             "index_reader_compressed_bytes");
 static bvar::PerSecond<bvar::Adder<uint64_t>> g_index_reader_bytes_per_second(
         "doris_pk", "index_reader_bytes_per_second", &g_index_reader_bytes, 60);
 static bvar::Adder<uint64_t> g_index_reader_pages("doris_pk", "index_reader_pages");
@@ -119,6 +121,7 @@ Status IndexedColumnReader::read_page(const PagePointer& pp, PageHandle* handle,
     opts.pre_decode = pre_decode;
 
     auto st = PageIO::read_and_decompress_page(opts, handle, body, footer);
+    g_index_reader_compressed_bytes << pp.size;
     g_index_reader_bytes << footer->uncompressed_size();
     g_index_reader_pages << 1;
     return st;
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp
index 0afbcc349c..28a44b7b75 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp
@@ -63,6 +63,7 @@ Status IndexedColumnWriter::init() {
     PageBuilder* data_page_builder = nullptr;
     PageBuilderOptions builder_option;
     builder_option.need_check_bitmap = false;
+    builder_option.data_page_size = _options.data_page_size;
     RETURN_IF_ERROR(encoding_info->create_page_builder(builder_option, &data_page_builder));
     _data_page_builder.reset(data_page_builder);
 
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.h b/be/src/olap/rowset/segment_v2/indexed_column_writer.h
index ab6812b5e5..a95a9fce7f 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_writer.h
+++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.h
@@ -47,6 +47,7 @@ class PageBuilder;
 
 struct IndexedColumnWriterOptions {
     size_t index_page_size = 64 * 1024;
+    size_t data_page_size = 1024 * 1024;
     bool write_ordinal_index = false;
     bool write_value_index = false;
     EncodingTypePB encoding = DEFAULT_ENCODING;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org