You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by zh...@apache.org on 2019/09/24 06:39:15 UTC

[incubator-doris] branch master updated: Add default value column iterator #1834 (#1835)

This is an automated email from the ASF dual-hosted git repository.

zhaoc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 8d0fee7  Add default value column iterator #1834 (#1835)
8d0fee7 is described below

commit 8d0fee7e6427aad17d574a7b3b56bb3ec5f26427
Author: kangpinghuang <ka...@126.com>
AuthorDate: Tue Sep 24 14:39:10 2019 +0800

    Add default value column iterator #1834 (#1835)
---
 be/src/olap/rowset/segment_v2/column_reader.cpp |  30 +++++
 be/src/olap/rowset/segment_v2/column_reader.h   |  41 +++++++
 be/src/olap/rowset/segment_v2/segment.cpp       |  12 +-
 be/test/olap/rowset/segment_v2/segment_test.cpp | 156 ++++++++++++++++++++++++
 be/test/olap/tablet_schema_helper.h             |   6 +-
 5 files changed, 242 insertions(+), 3 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 40ad287..a34e21d 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -431,5 +431,35 @@ Status FileColumnIterator::_read_page(const OrdinalPageIndexIterator& iter, Pars
     return Status::OK();
 }
 
+Status DefaultValueColumnIterator::init() {
+    // be consistent with segment v1
+    if (_default_value == "NULL" && _is_nullable) {
+        _is_default_value_null = true;
+    } else {
+        TypeInfo* type_info = get_type_info(_type);
+        _value_size = type_info->size();
+        _mem_value.reserve(_value_size);
+        OLAPStatus s = type_info->from_string(_mem_value.data(), _default_value);
+        if (s != OLAP_SUCCESS) {
+            return Status::InternalError("get value of type from default value failed.");
+        }
+    }
+    return Status::OK();
+}
+
+Status DefaultValueColumnIterator::next_batch(size_t* n, ColumnBlock* dst) {
+    if (_is_default_value_null) {
+        for (int i = 0; i < *n; ++i) {
+            dst->set_is_null(i, true);
+        }
+    } else {
+        for (int i = 0; i < *n; ++i) {
+            memcpy(dst->mutable_cell_ptr(i), _mem_value.data(), _value_size);
+            dst->set_is_null(i, false);
+        }
+    }
+    return Status::OK();
+}
+
 }
 }
diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h
index b41e8eb..6d425cb 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@@ -24,6 +24,7 @@
 #include "common/status.h" // for Status
 #include "gen_cpp/segment_v2.pb.h" // for ColumnMetaPB
 #include "olap/olap_cond.h" // for CondColumn
+#include "olap/tablet_schema.h"
 #include "olap/rowset/segment_v2/common.h" // for rowid_t
 #include "olap/rowset/segment_v2/ordinal_page_index.h" // for OrdinalPageIndexIterator
 #include "olap/rowset/segment_v2/column_zone_map.h" // for ColumnZoneMap
@@ -117,6 +118,8 @@ public:
     ColumnIterator() { }
     virtual ~ColumnIterator() { }
 
+    virtual Status init() { return Status::OK(); }
+
     // Seek to the first entry in the column.
     virtual Status seek_to_first() = 0;
 
@@ -194,5 +197,43 @@ private:
     rowid_t _current_rowid = 0;
 };
 
+// This iterator is used to read default value column
+class DefaultValueColumnIterator : public ColumnIterator {
+public:
+    DefaultValueColumnIterator(const std::string& default_value, bool is_nullable, FieldType type)
+        : _default_value(default_value),
+          _is_nullable(is_nullable),
+          _type(type),
+          _is_default_value_null(false),
+          _value_size(0) { }
+
+    Status init() override;
+
+    Status seek_to_first() override {
+        _current_rowid = 0;
+        return Status::OK();
+    }
+
+    Status seek_to_ordinal(rowid_t ord_idx) override {
+        _current_rowid = ord_idx;
+        return Status::OK();
+    }
+
+    Status next_batch(size_t* n, ColumnBlock* dst) override;
+
+    rowid_t get_current_ordinal() const override { return _current_rowid; }
+
+private:
+    std::string _default_value;
+    bool _is_nullable;
+    FieldType _type;
+    bool _is_default_value_null;
+    size_t _value_size;
+    faststring _mem_value;
+
+    // current rowid
+    rowid_t _current_rowid = 0;
+};
+
 }
 }
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp
index 86d4200..1d59a70 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -153,8 +153,16 @@ Status Segment::_initial_column_readers() {
 
 Status Segment::new_column_iterator(uint32_t cid, ColumnIterator** iter) {
     if (_column_readers[cid] == nullptr) {
-        // TODO(zc): create a DefaultValueIterator for this column
-        // create
+        const TabletColumn& tablet_column = _tablet_schema->column(cid);
+        if (!tablet_column.has_default_value()) {
+            return Status::InternalError("invalid nonexistent column without default value.");
+        }
+        std::unique_ptr<DefaultValueColumnIterator> default_value_iter(
+                new DefaultValueColumnIterator(tablet_column.default_value(),
+                tablet_column.is_nullable(), tablet_column.type()));
+        RETURN_IF_ERROR(default_value_iter->init());
+        *iter = default_value_iter.release();
+        return Status::OK();
     }
     return _column_readers[cid]->new_iterator(iter);
 }
diff --git a/be/test/olap/rowset/segment_v2/segment_test.cpp b/be/test/olap/rowset/segment_v2/segment_test.cpp
index 730bfbb..80ce4ff 100644
--- a/be/test/olap/rowset/segment_v2/segment_test.cpp
+++ b/be/test/olap/rowset/segment_v2/segment_test.cpp
@@ -450,6 +450,162 @@ TEST_F(SegmentReaderWriterTest, estimate_segment_size) {
     FileUtils::remove_all(dname);
 }
 
+TEST_F(SegmentReaderWriterTest, TestDefaultValueColumn) {
+    size_t num_rows_per_block = 10;
+
+    std::shared_ptr<TabletSchema> tablet_schema(new TabletSchema());
+    tablet_schema->_num_columns = 4;
+    tablet_schema->_num_key_columns = 3;
+    tablet_schema->_num_short_key_columns = 2;
+    tablet_schema->_num_rows_per_row_block = num_rows_per_block;
+    tablet_schema->_cols.push_back(create_int_key(1));
+    tablet_schema->_cols.push_back(create_int_key(2));
+    tablet_schema->_cols.push_back(create_int_key(3));
+    tablet_schema->_cols.push_back(create_int_value(4));
+
+    // segment write
+    std::string dname = "./ut_dir/segment_test";
+    FileUtils::create_dir(dname);
+
+    SegmentWriterOptions opts;
+    opts.num_rows_per_block = num_rows_per_block;
+
+    std::string fname = dname + "/int_case";
+    SegmentWriter writer(fname, 0, tablet_schema.get(), opts);
+    auto st = writer.init(10);
+    ASSERT_TRUE(st.ok());
+
+    RowCursor row;
+    auto olap_st = row.init(*tablet_schema);
+    ASSERT_EQ(OLAP_SUCCESS, olap_st);
+
+    // 0, 1, 2, 3
+    // 10, 11, 12, 13
+    // 20, 21, 22, 23
+    for (int i = 0; i < 4096; ++i) {
+        for (int j = 0; j < 4; ++j) {
+            auto cell = row.cell(j);
+            cell.set_not_null();
+            *(int*)cell.mutable_cell_ptr() = i * 10 + j;
+        }
+        writer.append_row(row);
+    }
+
+    uint64_t file_size = 0;
+    st = writer.finalize(&file_size);
+    ASSERT_TRUE(st.ok());
+
+    // add a column with null default value
+    {
+        std::shared_ptr<TabletSchema> new_tablet_schema_1(new TabletSchema());
+        new_tablet_schema_1->_num_columns = 5;
+        new_tablet_schema_1->_num_key_columns = 3;
+        new_tablet_schema_1->_num_short_key_columns = 2;
+        new_tablet_schema_1->_num_rows_per_row_block = num_rows_per_block;
+        new_tablet_schema_1->_cols.push_back(create_int_key(1));
+        new_tablet_schema_1->_cols.push_back(create_int_key(2));
+        new_tablet_schema_1->_cols.push_back(create_int_key(3));
+        new_tablet_schema_1->_cols.push_back(create_int_value(4));
+        new_tablet_schema_1->_cols.push_back(
+            create_int_value(5, OLAP_FIELD_AGGREGATION_SUM, true, "NULL"));
+
+        std::shared_ptr<Segment> segment(new Segment(fname, 0, new_tablet_schema_1.get()));
+        st = segment->open();
+        ASSERT_TRUE(st.ok());
+        ASSERT_EQ(4096, segment->num_rows());
+        Schema schema(*new_tablet_schema_1);
+        // scan all rows
+        {
+            StorageReadOptions read_opts;
+            std::unique_ptr<SegmentIterator> iter = segment->new_iterator(schema, read_opts);
+
+            RowBlockV2 block(schema, 1024);
+
+            int left = 4096;
+
+            int rowid = 0;
+            while (left > 0) {
+                int rows_read = left > 1024 ? 1024 : left;
+                block.clear();
+                st = iter->next_batch(&block);
+                ASSERT_TRUE(st.ok());
+                ASSERT_EQ(rows_read, block.num_rows());
+                left -= rows_read;
+
+                for (int j = 0; j < block.schema()->column_ids().size(); ++j) {
+                    auto cid = block.schema()->column_ids()[j];
+                    auto column_block = block.column_block(j);
+                    for (int i = 0; i < rows_read; ++i) {
+                        int rid = rowid + i;
+                        if (cid == 4) {
+                            ASSERT_TRUE(BitmapTest(column_block.null_bitmap(), i));
+                        } else {
+                            ASSERT_FALSE(BitmapTest(column_block.null_bitmap(), i));
+                            ASSERT_EQ(rid * 10 + cid, *(int*)column_block.cell_ptr(i));
+                        }
+                    }
+                }
+                rowid += rows_read;
+            }
+        }
+    }
+
+    // add a column with non-null default value
+    {
+        std::shared_ptr<TabletSchema> new_tablet_schema_1(new TabletSchema());
+        new_tablet_schema_1->_num_columns = 5;
+        new_tablet_schema_1->_num_key_columns = 3;
+        new_tablet_schema_1->_num_short_key_columns = 2;
+        new_tablet_schema_1->_num_rows_per_row_block = num_rows_per_block;
+        new_tablet_schema_1->_cols.push_back(create_int_key(1));
+        new_tablet_schema_1->_cols.push_back(create_int_key(2));
+        new_tablet_schema_1->_cols.push_back(create_int_key(3));
+        new_tablet_schema_1->_cols.push_back(create_int_value(4));
+        new_tablet_schema_1->_cols.push_back(create_int_value(5, OLAP_FIELD_AGGREGATION_SUM, true, "10086"));
+
+        std::shared_ptr<Segment> segment(new Segment(fname, 0, new_tablet_schema_1.get()));
+        st = segment->open();
+        ASSERT_TRUE(st.ok());
+        ASSERT_EQ(4096, segment->num_rows());
+        Schema schema(*new_tablet_schema_1);
+        // scan all rows
+        {
+            StorageReadOptions read_opts;
+            std::unique_ptr<SegmentIterator> iter = segment->new_iterator(schema, read_opts);
+
+            RowBlockV2 block(schema, 1024);
+
+            int left = 4096;
+
+            int rowid = 0;
+            while (left > 0) {
+                int rows_read = left > 1024 ? 1024 : left;
+                block.clear();
+                st = iter->next_batch(&block);
+                ASSERT_TRUE(st.ok());
+                ASSERT_EQ(rows_read, block.num_rows());
+                left -= rows_read;
+
+                for (int j = 0; j < block.schema()->column_ids().size(); ++j) {
+                    auto cid = block.schema()->column_ids()[j];
+                    auto column_block = block.column_block(j);
+                    for (int i = 0; i < rows_read; ++i) {
+                        int rid = rowid + i;
+                        if (cid == 4) {
+                            ASSERT_FALSE(BitmapTest(column_block.null_bitmap(), i));
+                            ASSERT_EQ(10086, *(int*)column_block.cell_ptr(i));
+                        } else {
+                            ASSERT_FALSE(BitmapTest(column_block.null_bitmap(), i));
+                            ASSERT_EQ(rid * 10 + cid, *(int*)column_block.cell_ptr(i));
+                        }
+                    }
+                }
+                rowid += rows_read;
+            }
+        }
+    }
+}
+
 }
 }
 
diff --git a/be/test/olap/tablet_schema_helper.h b/be/test/olap/tablet_schema_helper.h
index 01f43bc..8e22b33 100644
--- a/be/test/olap/tablet_schema_helper.h
+++ b/be/test/olap/tablet_schema_helper.h
@@ -38,7 +38,7 @@ TabletColumn create_int_key(int32_t id, bool is_nullable = true) {
 TabletColumn create_int_value(
         int32_t id,
         FieldAggregationMethod agg_method = OLAP_FIELD_AGGREGATION_SUM,
-        bool is_nullable = true) {
+        bool is_nullable = true, const std::string default_value = "") {
     TabletColumn column;
     column._unique_id = id;
     column._col_name = std::to_string(id);
@@ -48,6 +48,10 @@ TabletColumn create_int_value(
     column._is_nullable = is_nullable;
     column._length = 4;
     column._index_length = 4;
+    if (default_value != "") {
+        column._has_default_value = true;
+        column._default_value = default_value;
+    }
     return column;
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org