You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/12/30 15:02:01 UTC

[doris] branch master updated: [refactor](non-vec) remove non vec code for indexed column reader (#15409)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new cc7a9d92ad [refactor](non-vec) remove non vec code for indexed column reader (#15409)
cc7a9d92ad is described below

commit cc7a9d92ad06072bb4f3dc326e01b937eb4978ff
Author: Xin Liao <li...@126.com>
AuthorDate: Fri Dec 30 23:01:54 2022 +0800

    [refactor](non-vec) remove non vec code for indexed column reader (#15409)
---
 .../olap/rowset/segment_v2/binary_prefix_page.cpp  |  19 ++++
 be/src/olap/rowset/segment_v2/binary_prefix_page.h |   6 +-
 .../olap/rowset/segment_v2/bitmap_index_reader.cpp |  13 ++-
 .../segment_v2/bloom_filter_index_reader.cpp       |  19 ++--
 .../rowset/segment_v2/indexed_column_reader.cpp    |  35 +++++++
 .../olap/rowset/segment_v2/indexed_column_reader.h |   3 +
 be/src/olap/rowset/segment_v2/page_decoder.h       |   4 +-
 be/src/olap/rowset/segment_v2/segment.cpp          |  19 ++--
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  17 ++-
 be/src/olap/rowset/segment_v2/zone_map_index.cpp   |  17 ++-
 be/src/olap/tablet.cpp                             |  23 ++---
 be/src/vec/data_types/data_type_factory.hpp        |   4 +
 .../rowset/segment_v2/binary_prefix_page_test.cpp  | 114 +++++++++++++++++++++
 13 files changed, 228 insertions(+), 65 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp b/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp
index a80d4e6620..8ced998f57 100644
--- a/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp
+++ b/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp
@@ -275,5 +275,24 @@ Status BinaryPrefixPageDecoder::next_batch(size_t* n, ColumnBlockView* dst) {
     return Status::OK();
 }
 
+Status BinaryPrefixPageDecoder::next_batch(size_t* n, vectorized::MutableColumnPtr& dst) {
+    DCHECK(_parsed);
+    if (PREDICT_FALSE(*n == 0 || _cur_pos >= _num_values)) {
+        *n = 0;
+        return Status::OK();
+    }
+    size_t max_fetch = std::min(*n, static_cast<size_t>(_num_values - _cur_pos));
+
+    // read and copy values
+    for (size_t i = 0; i < max_fetch; ++i) {
+        dst->insert_data((char*)(_current_value.data()), _current_value.size());
+        _read_next_value();
+        _cur_pos++;
+    }
+
+    *n = max_fetch;
+    return Status::OK();
+}
+
 } // namespace segment_v2
 } // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/binary_prefix_page.h b/be/src/olap/rowset/segment_v2/binary_prefix_page.h
index bec5f3f7b4..53e966c191 100644
--- a/be/src/olap/rowset/segment_v2/binary_prefix_page.h
+++ b/be/src/olap/rowset/segment_v2/binary_prefix_page.h
@@ -113,9 +113,7 @@ public:
 
     Status next_batch(size_t* n, ColumnBlockView* dst) override;
 
-    Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) override {
-        return Status::NotSupported("binary prefix page not implement vec op now");
-    };
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) override;
 
     size_t count() const override {
         DCHECK(_parsed);
@@ -173,4 +171,4 @@ private:
 };
 
 } // namespace segment_v2
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/bitmap_index_reader.cpp b/be/src/olap/rowset/segment_v2/bitmap_index_reader.cpp
index eb108f8e8d..ce8e164d58 100644
--- a/be/src/olap/rowset/segment_v2/bitmap_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/bitmap_index_reader.cpp
@@ -18,6 +18,7 @@
 #include "olap/rowset/segment_v2/bitmap_index_reader.h"
 
 #include "olap/types.h"
+#include "vec/data_types/data_type_factory.hpp"
 
 namespace doris {
 namespace segment_v2 {
@@ -49,18 +50,16 @@ Status BitmapIndexIterator::read_bitmap(rowid_t ordinal, roaring::Roaring* resul
     DCHECK(0 <= ordinal && ordinal < _reader->bitmap_nums());
 
     size_t num_to_read = 1;
-    std::unique_ptr<ColumnVectorBatch> cvb;
-    RETURN_IF_ERROR(
-            ColumnVectorBatch::create(num_to_read, false, _reader->type_info(), nullptr, &cvb));
-    ColumnBlock block(cvb.get(), _pool.get());
-    ColumnBlockView column_block_view(&block);
+    auto data_type = vectorized::DataTypeFactory::instance().create_data_type(
+            _reader->type_info()->type(), 1, 0);
+    auto column = data_type->create_column();
 
     RETURN_IF_ERROR(_bitmap_column_iter.seek_to_ordinal(ordinal));
     size_t num_read = num_to_read;
-    RETURN_IF_ERROR(_bitmap_column_iter.next_batch(&num_read, &column_block_view));
+    RETURN_IF_ERROR(_bitmap_column_iter.next_batch(&num_read, column));
     DCHECK(num_to_read == num_read);
 
-    *result = roaring::Roaring::read(reinterpret_cast<const Slice*>(block.data())->data, false);
+    *result = roaring::Roaring::read(column->get_data_at(0).data, false);
     _pool->clear();
     return Status::OK();
 }
diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp
index 1f6a733776..cb94f029b0 100644
--- a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp
@@ -19,6 +19,7 @@
 
 #include "olap/rowset/segment_v2/bloom_filter.h"
 #include "olap/types.h"
+#include "vec/data_types/data_type_factory.hpp"
 
 namespace doris {
 namespace segment_v2 {
@@ -39,24 +40,22 @@ Status BloomFilterIndexReader::new_iterator(std::unique_ptr<BloomFilterIndexIter
 Status BloomFilterIndexIterator::read_bloom_filter(rowid_t ordinal,
                                                    std::unique_ptr<BloomFilter>* bf) {
     size_t num_to_read = 1;
-    std::unique_ptr<ColumnVectorBatch> cvb;
-    RETURN_IF_ERROR(
-            ColumnVectorBatch::create(num_to_read, false, _reader->type_info(), nullptr, &cvb));
-    ColumnBlock block(cvb.get(), _pool.get());
-    ColumnBlockView column_block_view(&block);
+    auto data_type = vectorized::DataTypeFactory::instance().create_data_type(
+            _reader->type_info()->type(), 1, 0);
+    auto column = data_type->create_column();
 
     RETURN_IF_ERROR(_bloom_filter_iter.seek_to_ordinal(ordinal));
     size_t num_read = num_to_read;
-    RETURN_IF_ERROR(_bloom_filter_iter.next_batch(&num_read, &column_block_view));
+    RETURN_IF_ERROR(_bloom_filter_iter.next_batch(&num_read, column));
     DCHECK(num_to_read == num_read);
     // construct bloom filter
-    const Slice* value_ptr = reinterpret_cast<const Slice*>(block.data());
-    BloomFilter::create(_reader->_bloom_filter_index_meta->algorithm(), bf, value_ptr->size);
-    RETURN_IF_ERROR((*bf)->init(value_ptr->data, value_ptr->size,
+    StringRef value = column->get_data_at(0);
+    BloomFilter::create(_reader->_bloom_filter_index_meta->algorithm(), bf, value.size);
+    RETURN_IF_ERROR((*bf)->init(value.data, value.size,
                                 _reader->_bloom_filter_index_meta->hash_strategy()));
     _pool->clear();
     return Status::OK();
 }
 
 } // namespace segment_v2
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
index 424e1b7141..bac8b7b51a 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
@@ -258,5 +258,40 @@ Status IndexedColumnIterator::next_batch(size_t* n, ColumnBlockView* column_view
     return Status::OK();
 }
 
+Status IndexedColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst) {
+    DCHECK(_seeked);
+    if (_current_ordinal == _reader->num_values()) {
+        *n = 0;
+        return Status::OK();
+    }
+
+    size_t remaining = *n;
+    while (remaining > 0) {
+        if (!_data_page.has_remaining()) {
+            // trying to read next data page
+            if (!_reader->_has_index_page) {
+                break; // no more data page
+            }
+            bool has_next = _current_iter->move_next();
+            if (!has_next) {
+                break; // no more data page
+            }
+            RETURN_IF_ERROR(_read_data_page(_current_iter->current_page_pointer()));
+        }
+
+        size_t rows_to_read = std::min(_data_page.remaining(), remaining);
+        size_t rows_read = rows_to_read;
+        RETURN_IF_ERROR(_data_page.data_decoder->next_batch(&rows_read, dst));
+        DCHECK(rows_to_read == rows_read);
+
+        _data_page.offset_in_page += rows_read;
+        _current_ordinal += rows_read;
+        remaining -= rows_read;
+    }
+    *n -= remaining;
+    _seeked = false;
+    return Status::OK();
+}
+
 } // namespace segment_v2
 } // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.h b/be/src/olap/rowset/segment_v2/indexed_column_reader.h
index 3546fe728e..c7e46d2bf2 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_reader.h
+++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.h
@@ -131,6 +131,9 @@ public:
     // from Arena
     Status next_batch(size_t* n, ColumnBlockView* column_view);
 
+    // After one seek, we can only call this function once to read data
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst);
+
 private:
     Status _read_data_page(const PagePointer& pp);
 
diff --git a/be/src/olap/rowset/segment_v2/page_decoder.h b/be/src/olap/rowset/segment_v2/page_decoder.h
index 7c817a08ac..9207825de3 100644
--- a/be/src/olap/rowset/segment_v2/page_decoder.h
+++ b/be/src/olap/rowset/segment_v2/page_decoder.h
@@ -82,9 +82,7 @@ public:
     // allocated in the column_vector_view's mem_pool.
     virtual Status next_batch(size_t* n, ColumnBlockView* dst) = 0;
 
-    virtual Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) {
-        return Status::NotSupported("not implement vec op now");
-    }
+    virtual Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) = 0;
 
     virtual Status read_by_rowids(const rowid_t* rowids, ordinal_t page_first_ordinal, size_t* n,
                                   vectorized::MutableColumnPtr& dst) {
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp
index 76b1c82a0c..afe1b14890 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -36,6 +36,7 @@
 #include "olap/tablet_schema.h"
 #include "util/crc32c.h"
 #include "util/slice.h" // Slice
+#include "vec/data_types/data_type_factory.hpp"
 #include "vec/olap/vgeneric_iterators.h"
 
 namespace doris {
@@ -316,20 +317,18 @@ Status Segment::lookup_row_key(const Slice& key, RowLocation* row_location) {
     row_location->segment_id = _segment_id;
 
     if (has_seq_col) {
-        MemPool pool;
         size_t num_to_read = 1;
-        std::unique_ptr<ColumnVectorBatch> cvb;
-        RETURN_IF_ERROR(ColumnVectorBatch::create(num_to_read, false, _pk_index_reader->type_info(),
-                                                  nullptr, &cvb));
-        ColumnBlock block(cvb.get(), &pool);
-        ColumnBlockView column_block_view(&block);
+        auto index_type = vectorized::DataTypeFactory::instance().create_data_type(
+                _pk_index_reader->type_info()->type(), 1, 0);
+        auto index_column = index_type->create_column();
         size_t num_read = num_to_read;
-        RETURN_IF_ERROR(index_iterator->next_batch(&num_read, &column_block_view));
+        RETURN_IF_ERROR(index_iterator->next_batch(&num_read, index_column));
         DCHECK(num_to_read == num_read);
 
-        const Slice* sought_key = reinterpret_cast<const Slice*>(cvb->cell_ptr(0));
+        Slice sought_key =
+                Slice(index_column->get_data_at(0).data, index_column->get_data_at(0).size);
         Slice sought_key_without_seq =
-                Slice(sought_key->get_data(), sought_key->get_size() - seq_col_length);
+                Slice(sought_key.get_data(), sought_key.get_size() - seq_col_length);
 
         // compare key
         if (key_without_seq.compare(sought_key_without_seq) != 0) {
@@ -340,7 +339,7 @@ Status Segment::lookup_row_key(const Slice& key, RowLocation* row_location) {
         Slice sequence_id =
                 Slice(key.get_data() + key_without_seq.get_size() + 1, seq_col_length - 1);
         Slice previous_sequence_id = Slice(
-                sought_key->get_data() + sought_key_without_seq.get_size() + 1, seq_col_length - 1);
+                sought_key.get_data() + sought_key_without_seq.get_size() + 1, seq_col_length - 1);
         if (sequence_id.compare(previous_sequence_id) < 0) {
             return Status::AlreadyExist("key with higher sequence id exists");
         }
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index f6da5ce4be..3ca445cf77 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -32,6 +32,7 @@
 #include "util/doris_metrics.h"
 #include "util/key_util.h"
 #include "util/simd/bits.h"
+#include "vec/data_types/data_type_factory.hpp"
 #include "vec/data_types/data_type_number.h"
 #include "vec/exprs/vliteral.h"
 
@@ -808,20 +809,18 @@ Status SegmentIterator::_lookup_ordinal_from_pk_index(const RowCursor& key, bool
                 _segment->_tablet_schema->column(_segment->_tablet_schema->sequence_col_idx())
                         .length() +
                 1;
-        MemPool pool;
+        auto index_type = vectorized::DataTypeFactory::instance().create_data_type(
+                _segment->_pk_index_reader->type_info()->type(), 1, 0);
+        auto index_column = index_type->create_column();
         size_t num_to_read = 1;
-        std::unique_ptr<ColumnVectorBatch> cvb;
-        RETURN_IF_ERROR(ColumnVectorBatch::create(
-                num_to_read, false, _segment->_pk_index_reader->type_info(), nullptr, &cvb));
-        ColumnBlock block(cvb.get(), &pool);
-        ColumnBlockView column_block_view(&block);
         size_t num_read = num_to_read;
-        RETURN_IF_ERROR(index_iterator->next_batch(&num_read, &column_block_view));
+        RETURN_IF_ERROR(index_iterator->next_batch(&num_read, index_column));
         DCHECK(num_to_read == num_read);
 
-        const Slice* sought_key = reinterpret_cast<const Slice*>(cvb->cell_ptr(0));
+        Slice sought_key =
+                Slice(index_column->get_data_at(0).data, index_column->get_data_at(0).size);
         Slice sought_key_without_seq =
-                Slice(sought_key->get_data(), sought_key->get_size() - seq_col_length);
+                Slice(sought_key.get_data(), sought_key.get_size() - seq_col_length);
 
         // compare key
         if (Slice(index_key).compare(sought_key_without_seq) == 0) {
diff --git a/be/src/olap/rowset/segment_v2/zone_map_index.cpp b/be/src/olap/rowset/segment_v2/zone_map_index.cpp
index c6ed824b4d..ea21db3fbe 100644
--- a/be/src/olap/rowset/segment_v2/zone_map_index.cpp
+++ b/be/src/olap/rowset/segment_v2/zone_map_index.cpp
@@ -17,8 +17,6 @@
 
 #include "olap/rowset/segment_v2/zone_map_index.h"
 
-#include "olap/column_block.h"
-#include "olap/olap_define.h"
 #include "olap/rowset/segment_v2/encoding_info.h"
 #include "olap/rowset/segment_v2/indexed_column_reader.h"
 #include "olap/rowset/segment_v2/indexed_column_writer.h"
@@ -144,19 +142,18 @@ Status ZoneMapIndexReader::load(bool use_page_cache, bool kept_in_memory) {
     // read and cache all page zone maps
     for (int i = 0; i < reader.num_values(); ++i) {
         size_t num_to_read = 1;
-        std::unique_ptr<ColumnVectorBatch> cvb;
-        RETURN_IF_ERROR(
-                ColumnVectorBatch::create(num_to_read, false, reader.type_info(), nullptr, &cvb));
-        ColumnBlock block(cvb.get(), &pool);
-        ColumnBlockView column_block_view(&block);
+        // The type of reader is OLAP_FIELD_TYPE_OBJECT.
+        // ColumnBitmap will be created when using OLAP_FIELD_TYPE_OBJECT.
+        // But what we need actually is ColumnString.
+        vectorized::MutableColumnPtr column = vectorized::ColumnString::create();
 
         RETURN_IF_ERROR(iter.seek_to_ordinal(i));
         size_t num_read = num_to_read;
-        RETURN_IF_ERROR(iter.next_batch(&num_read, &column_block_view));
+        RETURN_IF_ERROR(iter.next_batch(&num_read, column));
         DCHECK(num_to_read == num_read);
 
-        Slice* value = reinterpret_cast<Slice*>(cvb->data());
-        if (!_page_zone_maps[i].ParseFromArray(value->data, value->size)) {
+        if (!_page_zone_maps[i].ParseFromArray(column->get_data_at(0).data,
+                                               column->get_data_at(0).size)) {
             return Status::Corruption("Failed to parse zone map");
         }
         pool.clear();
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index d1c3ac4029..f54a8f567c 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -65,6 +65,7 @@
 #include "util/scoped_cleanup.h"
 #include "util/time.h"
 #include "util/trace.h"
+#include "vec/data_types/data_type_factory.hpp"
 
 namespace doris {
 using namespace ErrorCode;
@@ -1926,36 +1927,34 @@ Status Tablet::calc_delete_bitmap(RowsetId rowset_id,
         bool exact_match = false;
         std::string last_key;
         int batch_size = 1024;
-        MemPool pool;
         while (remaining > 0) {
             std::unique_ptr<segment_v2::IndexedColumnIterator> iter;
             RETURN_IF_ERROR(pk_idx->new_iterator(&iter));
 
             size_t num_to_read = std::min(batch_size, remaining);
-            std::unique_ptr<ColumnVectorBatch> cvb;
-            RETURN_IF_ERROR(ColumnVectorBatch::create(num_to_read, false, pk_idx->type_info(),
-                                                      nullptr, &cvb));
-            ColumnBlock block(cvb.get(), &pool);
-            ColumnBlockView column_block_view(&block);
+            auto index_type = vectorized::DataTypeFactory::instance().create_data_type(
+                    pk_idx->type_info()->type(), 1, 0);
+            auto index_column = index_type->create_column();
             Slice last_key_slice(last_key);
             RETURN_IF_ERROR(iter->seek_at_or_after(&last_key_slice, &exact_match));
 
             size_t num_read = num_to_read;
-            RETURN_IF_ERROR(iter->next_batch(&num_read, &column_block_view));
+            RETURN_IF_ERROR(iter->next_batch(&num_read, index_column));
             DCHECK(num_to_read == num_read);
-            last_key = (reinterpret_cast<const Slice*>(cvb->cell_ptr(num_read - 1)))->to_string();
+            last_key = index_column->get_data_at(num_read - 1).to_string();
 
             // exclude last_key, last_key will be read in next batch.
             if (num_read == batch_size && num_read != remaining) {
                 num_read -= 1;
             }
             for (size_t i = 0; i < num_read; i++) {
-                const Slice* key = reinterpret_cast<const Slice*>(cvb->cell_ptr(i));
+                Slice key =
+                        Slice(index_column->get_data_at(i).data, index_column->get_data_at(i).size);
                 RowLocation loc;
                 // first check if exist in pre segment
                 if (check_pre_segments) {
-                    auto st = _check_pk_in_pre_segments(rowset_id, pre_segments, *key,
-                                                        dummy_version, delete_bitmap, &loc);
+                    auto st = _check_pk_in_pre_segments(rowset_id, pre_segments, key, dummy_version,
+                                                        delete_bitmap, &loc);
                     if (st.ok()) {
                         delete_bitmap->add({rowset_id, loc.segment_id, dummy_version.first},
                                            loc.row_id);
@@ -1969,7 +1968,7 @@ Status Tablet::calc_delete_bitmap(RowsetId rowset_id,
                 }
 
                 if (specified_rowset_ids != nullptr && !specified_rowset_ids->empty()) {
-                    auto st = lookup_row_key(*key, specified_rowset_ids, &loc,
+                    auto st = lookup_row_key(key, specified_rowset_ids, &loc,
                                              dummy_version.first - 1);
                     CHECK(st.ok() || st.is<NOT_FOUND>() || st.is<ALREADY_EXIST>());
                     if (st.is<NOT_FOUND>()) {
diff --git a/be/src/vec/data_types/data_type_factory.hpp b/be/src/vec/data_types/data_type_factory.hpp
index 696ab9ee54..ed270b40ea 100644
--- a/be/src/vec/data_types/data_type_factory.hpp
+++ b/be/src/vec/data_types/data_type_factory.hpp
@@ -124,6 +124,10 @@ public:
         return create_data_type(TypeDescriptor::from_thrift(raw_type), raw_type.is_nullable);
     }
 
+    DataTypePtr create_data_type(const FieldType& type, int precision, int scale) {
+        return _create_primitive_data_type(type, precision, scale);
+    }
+
 private:
     DataTypePtr _create_primitive_data_type(const FieldType& type, int precision, int scale) const;
 
diff --git a/be/test/olap/rowset/segment_v2/binary_prefix_page_test.cpp b/be/test/olap/rowset/segment_v2/binary_prefix_page_test.cpp
index 6db57f19a7..f7d6b63ce1 100644
--- a/be/test/olap/rowset/segment_v2/binary_prefix_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/binary_prefix_page_test.cpp
@@ -29,6 +29,7 @@
 #include "olap/types.h"
 #include "runtime/mem_pool.h"
 #include "util/debug_util.h"
+#include "vec/data_types/data_type_factory.hpp"
 
 namespace doris {
 using namespace ErrorCode;
@@ -150,6 +151,118 @@ public:
         EXPECT_TRUE(!exact_match);
     }
 
+    void test_encode_and_decode_vec() {
+        std::vector<std::string> test_data;
+        for (int i = 1000; i < 1038; ++i) {
+            test_data.emplace_back(std::to_string(i));
+        }
+        std::vector<Slice> slices;
+        for (const auto& data : test_data) {
+            slices.emplace_back(Slice(data));
+        }
+        // encode
+        PageBuilderOptions options;
+        BinaryPrefixPageBuilder page_builder(options);
+
+        size_t count = slices.size();
+        const Slice* ptr = &slices[0];
+        Status ret = page_builder.add(reinterpret_cast<const uint8_t*>(ptr), &count);
+
+        OwnedSlice dict_slice = page_builder.finish();
+        EXPECT_EQ(slices.size(), page_builder.count());
+        EXPECT_FALSE(page_builder.is_page_full());
+
+        //check first value and last value
+        Slice first_value;
+        page_builder.get_first_value(&first_value);
+        EXPECT_EQ(slices[0], first_value);
+        Slice last_value;
+        page_builder.get_last_value(&last_value);
+        EXPECT_EQ(slices[count - 1], last_value);
+
+        PageDecoderOptions dict_decoder_options;
+        std::unique_ptr<BinaryPrefixPageDecoder> page_decoder(
+                new BinaryPrefixPageDecoder(dict_slice.slice(), dict_decoder_options));
+        ret = page_decoder->init();
+        EXPECT_TRUE(ret.ok());
+        // because every slice is unique
+        EXPECT_EQ(slices.size(), page_decoder->count());
+        auto type_info = get_scalar_type_info(OLAP_FIELD_TYPE_VARCHAR);
+        size_t size = slices.size();
+
+        {
+            //check values
+            auto data_type = vectorized::DataTypeFactory::instance().create_data_type(
+                    type_info->type(), 1, 0);
+            auto column = data_type->create_column();
+
+            ret = page_decoder->next_batch(&size, column);
+            EXPECT_TRUE(ret.ok());
+            EXPECT_EQ(slices.size(), size);
+            for (int i = 1000; i < 1038; ++i) {
+                EXPECT_EQ(std::to_string(i), column->get_data_at(i - 1000).to_string());
+            }
+        }
+
+        {
+            ret = page_decoder->seek_to_position_in_page(0);
+            EXPECT_TRUE(ret.ok());
+            int n = 0;
+            while (true) {
+                //check values
+                MemPool pool;
+                auto data_type = vectorized::DataTypeFactory::instance().create_data_type(
+                        type_info->type(), 1, 0);
+                auto column = data_type->create_column();
+                size_t size = 6;
+                ret = page_decoder->next_batch(&size, column);
+                EXPECT_TRUE(ret.ok());
+                if (size == 0) {
+                    break;
+                }
+                for (int i = 0; i < size; ++i) {
+                    EXPECT_EQ(std::to_string(1000 + 6 * n + i), column->get_data_at(i).to_string());
+                }
+                n++;
+            }
+        }
+
+        {
+            auto data_type = vectorized::DataTypeFactory::instance().create_data_type(
+                    type_info->type(), 1, 0);
+            auto column = data_type->create_column();
+            ret = page_decoder->seek_to_position_in_page(15);
+            EXPECT_TRUE(ret.ok());
+
+            ret = page_decoder->next_batch(&size, column);
+            EXPECT_TRUE(ret.ok());
+            EXPECT_EQ(23, size);
+            for (int i = 1015; i < 1038; ++i) {
+                EXPECT_EQ(std::to_string(i), column->get_data_at(i - 1015).to_string());
+            }
+        }
+
+        Slice v1 = Slice("1039");
+        bool exact_match;
+        ret = page_decoder->seek_at_or_after_value(&v1, &exact_match);
+        EXPECT_TRUE(ret.is<NOT_FOUND>());
+
+        Slice v2 = Slice("1000");
+        ret = page_decoder->seek_at_or_after_value(&v2, &exact_match);
+        EXPECT_TRUE(ret.ok());
+        EXPECT_TRUE(exact_match);
+
+        Slice v3 = Slice("1037");
+        ret = page_decoder->seek_at_or_after_value(&v3, &exact_match);
+        EXPECT_TRUE(ret.ok());
+        EXPECT_TRUE(exact_match);
+
+        Slice v4 = Slice("100");
+        ret = page_decoder->seek_at_or_after_value(&v4, &exact_match);
+        EXPECT_TRUE(ret.ok());
+        EXPECT_TRUE(!exact_match);
+    }
+
     void test_encode_and_decode2() {
         std::vector<std::string> test_data;
         test_data.push_back("ab");
@@ -185,6 +298,7 @@ public:
 
 TEST_F(BinaryPrefixPageTest, TestEncodeAndDecode) {
     test_encode_and_decode();
+    test_encode_and_decode_vec();
 }
 
 TEST_F(BinaryPrefixPageTest, TestEncodeAndDecode2) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org