You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/07/03 15:20:35 UTC
[doris] branch master updated: [feature-wip](unique-key-merge-on-write) add primary key index (#10529)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 91fca49df4 [feature-wip](unique-key-merge-on-write) add primary key index (#10529)
91fca49df4 is described below
commit 91fca49df47be07c68ba0b39a8b07847fb300581
Author: Xin Liao <li...@126.com>
AuthorDate: Sun Jul 3 23:20:30 2022 +0800
[feature-wip](unique-key-merge-on-write) add primary key index (#10529)
---
be/src/olap/CMakeLists.txt | 1 +
be/src/olap/primary_key_index.cpp | 66 +++++++++++++++++
be/src/olap/primary_key_index.h | 84 ++++++++++++++++++++++
be/test/CMakeLists.txt | 1 +
be/test/olap/primary_key_index_test.cpp | 123 ++++++++++++++++++++++++++++++++
5 files changed, 275 insertions(+)
diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt
index 615a9fff44..c003c2bdc4 100644
--- a/be/src/olap/CMakeLists.txt
+++ b/be/src/olap/CMakeLists.txt
@@ -90,6 +90,7 @@ add_library(Olap STATIC
types.cpp
utils.cpp
wrapper_field.cpp
+ primary_key_index.cpp
rowset/segment_v2/bitmap_index_reader.cpp
rowset/segment_v2/bitmap_index_writer.cpp
rowset/segment_v2/bitshuffle_page.cpp
diff --git a/be/src/olap/primary_key_index.cpp b/be/src/olap/primary_key_index.cpp
new file mode 100644
index 0000000000..996dfa755c
--- /dev/null
+++ b/be/src/olap/primary_key_index.cpp
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/primary_key_index.h"
+
+#include "olap/rowset/segment_v2/encoding_info.h"
+
+namespace doris {
+
+Status PrimaryKeyIndexBuilder::init() {
+ // TODO(liaoxin) using the column type directly if there's only one column in unique key columns
+ const auto* type_info = get_scalar_type_info<OLAP_FIELD_TYPE_VARCHAR>();
+ segment_v2::IndexedColumnWriterOptions options;
+ options.write_ordinal_index = false;
+ options.write_value_index = true;
+ options.encoding = segment_v2::EncodingInfo::get_default_encoding(type_info, true);
+ // TODO(liaoxin) test to confirm whether it needs to be compressed
+ options.compression = segment_v2::NO_COMPRESSION; // currently not compressed
+ _index_builder.reset(new segment_v2::IndexedColumnWriter(options, type_info, _wblock));
+ return _index_builder->init();
+}
+
+Status PrimaryKeyIndexBuilder::add_item(const Slice& key) {
+ _index_builder->add(&key);
+ // the key is already sorted, so the first key is min_key, and
+ // the last key is max_key.
+ if (UNLIKELY(_num_rows == 0)) {
+ _min_key.append(key.get_data(), key.get_size());
+ }
+ _max_key.clear();
+ _max_key.append(key.get_data(), key.get_size());
+ _num_rows++;
+ _size += key.get_size();
+ return Status::OK();
+}
+
+Status PrimaryKeyIndexBuilder::finalize(segment_v2::IndexedColumnMetaPB* meta) {
+ // finish primary key index
+ return _index_builder->finish(meta);
+}
+
+Status PrimaryKeyIndexReader::parse(const FilePathDesc& path_desc,
+ const segment_v2::IndexedColumnMetaPB& meta) {
+ // parse primary key index
+ _index_reader.reset(new segment_v2::IndexedColumnReader(path_desc, meta));
+ RETURN_IF_ERROR(_index_reader->load(_use_page_cache, _kept_in_memory));
+
+ _parsed = true;
+ return Status::OK();
+}
+
+} // namespace doris
diff --git a/be/src/olap/primary_key_index.h b/be/src/olap/primary_key_index.h
new file mode 100644
index 0000000000..fdddb1fab1
--- /dev/null
+++ b/be/src/olap/primary_key_index.h
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "common/status.h"
+#include "gen_cpp/segment_v2.pb.h"
+#include "olap/rowset/segment_v2/indexed_column_reader.h"
+#include "olap/rowset/segment_v2/indexed_column_writer.h"
+#include "util/faststring.h"
+#include "util/slice.h"
+
+namespace doris {
+
+// Build index for primary key.
+// The primary key index is designed in a similar way like RocksDB
+// Partitioned Index, which is created in the segment file when MemTable flushes.
+// Index is stored in multiple pages to leverage the IndexedColumnWriter.
+class PrimaryKeyIndexBuilder {
+public:
+ PrimaryKeyIndexBuilder(fs::WritableBlock* wblock) : _wblock(wblock), _num_rows(0), _size(0) {}
+
+ Status init();
+
+ Status add_item(const Slice& key);
+
+ uint32_t num_rows() const { return _num_rows; }
+
+ uint64_t size() { return _size; }
+
+ Slice min_key() { return Slice(_min_key); }
+ Slice max_key() { return Slice(_max_key); }
+
+ Status finalize(segment_v2::IndexedColumnMetaPB* meta);
+
+private:
+ fs::WritableBlock* _wblock = nullptr;
+ uint32_t _num_rows;
+ uint64_t _size;
+
+ faststring _min_key;
+ faststring _max_key;
+ std::unique_ptr<segment_v2::IndexedColumnWriter> _index_builder;
+};
+
+class PrimaryKeyIndexReader {
+public:
+ PrimaryKeyIndexReader() : _parsed(false) {}
+
+ Status parse(const FilePathDesc& path_desc, const segment_v2::IndexedColumnMetaPB& meta);
+
+ Status new_iterator(std::unique_ptr<segment_v2::IndexedColumnIterator>* index_iterator) const {
+ DCHECK(_parsed);
+ index_iterator->reset(new segment_v2::IndexedColumnIterator(_index_reader.get()));
+ return Status::OK();
+ }
+
+ uint32_t num_rows() const {
+ DCHECK(_parsed);
+ return _index_reader->num_values();
+ }
+
+private:
+ bool _parsed;
+ bool _use_page_cache = true;
+ bool _kept_in_memory = true;
+ std::unique_ptr<segment_v2::IndexedColumnReader> _index_reader;
+};
+
+} // namespace doris
diff --git a/be/test/CMakeLists.txt b/be/test/CMakeLists.txt
index 49c14c49f6..6b8648b9ba 100644
--- a/be/test/CMakeLists.txt
+++ b/be/test/CMakeLists.txt
@@ -199,6 +199,7 @@ set(OLAP_TEST_FILES
olap/options_test.cpp
olap/fs/file_block_manager_test.cpp
olap/common_test.cpp
+ olap/primary_key_index_test.cpp
# olap/memtable_flush_executor_test.cpp
# olap/push_handler_test.cpp
)
diff --git a/be/test/olap/primary_key_index_test.cpp b/be/test/olap/primary_key_index_test.cpp
new file mode 100644
index 0000000000..da8a63bc41
--- /dev/null
+++ b/be/test/olap/primary_key_index_test.cpp
@@ -0,0 +1,123 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/primary_key_index.h"
+
+#include <gtest/gtest.h>
+
+#include "olap/fs/block_manager.h"
+#include "olap/fs/fs_util.h"
+#include "olap/row_cursor.h"
+#include "olap/tablet_schema_helper.h"
+#include "util/debug_util.h"
+#include "util/file_utils.h"
+
+namespace doris {
+
+class PrimaryKeyIndexTest : public testing::Test {
+public:
+ PrimaryKeyIndexTest() {}
+ virtual ~PrimaryKeyIndexTest() {}
+
+ void SetUp() override {
+ if (FileUtils::check_exist(kTestDir)) {
+ EXPECT_TRUE(FileUtils::remove_all(kTestDir).ok());
+ }
+ EXPECT_TRUE(FileUtils::create_dir(kTestDir).ok());
+ }
+ void TearDown() override {
+ if (FileUtils::check_exist(kTestDir)) {
+ EXPECT_TRUE(FileUtils::remove_all(kTestDir).ok());
+ }
+ }
+
+private:
+ const std::string kTestDir = "./ut_dir/primary_key_index_test";
+};
+
+TEST_F(PrimaryKeyIndexTest, builder) {
+ std::string filename = kTestDir + "/builder";
+ std::unique_ptr<fs::WritableBlock> wblock;
+ fs::CreateBlockOptions opts(filename);
+ std::string storage_name;
+ EXPECT_TRUE(fs::fs_util::block_manager(storage_name)->create_block(opts, &wblock).ok());
+
+ PrimaryKeyIndexBuilder builder(wblock.get());
+ builder.init();
+ size_t num_rows = 0;
+ std::vector<std::string> keys;
+ for (int i = 1000; i < 10000; i += 2) {
+ keys.push_back(std::to_string(i));
+ builder.add_item(std::to_string(i));
+ num_rows++;
+ }
+ EXPECT_EQ("1000", builder.min_key().to_string());
+ EXPECT_EQ("9998", builder.max_key().to_string());
+ segment_v2::IndexedColumnMetaPB index_meta;
+ EXPECT_TRUE(builder.finalize(&index_meta));
+ EXPECT_TRUE(wblock->close().ok());
+ EXPECT_EQ(num_rows, builder.num_rows());
+
+ FilePathDesc path_desc(filename);
+ PrimaryKeyIndexReader index_reader;
+ EXPECT_TRUE(index_reader.parse(path_desc, index_meta).ok());
+ EXPECT_EQ(num_rows, index_reader.num_rows());
+
+ std::unique_ptr<segment_v2::IndexedColumnIterator> index_iterator;
+ EXPECT_TRUE(index_reader.new_iterator(&index_iterator).ok());
+ bool exact_match = false;
+ uint32_t row_id;
+ for (size_t i = 0; i < keys.size(); i++) {
+ auto status = index_iterator->seek_at_or_after(&keys[i], &exact_match);
+ EXPECT_TRUE(status.ok());
+ EXPECT_TRUE(exact_match);
+ row_id = index_iterator->get_current_ordinal();
+ EXPECT_EQ(i, row_id);
+ }
+ // find a non-existing key "8701"
+ {
+ string key("8701");
+ Slice slice(key);
+ auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
+ EXPECT_TRUE(status.ok());
+ EXPECT_FALSE(exact_match);
+ row_id = index_iterator->get_current_ordinal();
+ EXPECT_EQ(3851, row_id);
+ }
+
+ // find prefix "87"
+ {
+ string key("87");
+ Slice slice(key);
+ auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
+ EXPECT_TRUE(status.ok());
+ EXPECT_FALSE(exact_match);
+ row_id = index_iterator->get_current_ordinal();
+ EXPECT_EQ(3850, row_id);
+ }
+
+ // find prefix "9999"
+ {
+ string key("9999");
+ Slice slice(key);
+ auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
+ EXPECT_FALSE(exact_match);
+ EXPECT_TRUE(status.is_not_found());
+ }
+}
+
+} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org