You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by zh...@apache.org on 2019/09/24 06:39:15 UTC
[incubator-doris] branch master updated: Add default value column
iterator #1834 (#1835)
This is an automated email from the ASF dual-hosted git repository.
zhaoc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 8d0fee7 Add default value column iterator #1834 (#1835)
8d0fee7 is described below
commit 8d0fee7e6427aad17d574a7b3b56bb3ec5f26427
Author: kangpinghuang <ka...@126.com>
AuthorDate: Tue Sep 24 14:39:10 2019 +0800
Add default value column iterator #1834 (#1835)
---
be/src/olap/rowset/segment_v2/column_reader.cpp | 30 +++++
be/src/olap/rowset/segment_v2/column_reader.h | 41 +++++++
be/src/olap/rowset/segment_v2/segment.cpp | 12 +-
be/test/olap/rowset/segment_v2/segment_test.cpp | 156 ++++++++++++++++++++++++
be/test/olap/tablet_schema_helper.h | 6 +-
5 files changed, 242 insertions(+), 3 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 40ad287..a34e21d 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -431,5 +431,35 @@ Status FileColumnIterator::_read_page(const OrdinalPageIndexIterator& iter, Pars
return Status::OK();
}
+Status DefaultValueColumnIterator::init() {
+ // be consistent with segment v1
+ if (_default_value == "NULL" && _is_nullable) {
+ _is_default_value_null = true;
+ } else {
+ TypeInfo* type_info = get_type_info(_type);
+ _value_size = type_info->size();
+ _mem_value.reserve(_value_size);
+ OLAPStatus s = type_info->from_string(_mem_value.data(), _default_value);
+ if (s != OLAP_SUCCESS) {
+ return Status::InternalError("get value of type from default value failed.");
+ }
+ }
+ return Status::OK();
+}
+
+Status DefaultValueColumnIterator::next_batch(size_t* n, ColumnBlock* dst) {
+ if (_is_default_value_null) {
+ for (int i = 0; i < *n; ++i) {
+ dst->set_is_null(i, true);
+ }
+ } else {
+ for (int i = 0; i < *n; ++i) {
+ memcpy(dst->mutable_cell_ptr(i), _mem_value.data(), _value_size);
+ dst->set_is_null(i, false);
+ }
+ }
+ return Status::OK();
+}
+
}
}
diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h
index b41e8eb..6d425cb 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@@ -24,6 +24,7 @@
#include "common/status.h" // for Status
#include "gen_cpp/segment_v2.pb.h" // for ColumnMetaPB
#include "olap/olap_cond.h" // for CondColumn
+#include "olap/tablet_schema.h"
#include "olap/rowset/segment_v2/common.h" // for rowid_t
#include "olap/rowset/segment_v2/ordinal_page_index.h" // for OrdinalPageIndexIterator
#include "olap/rowset/segment_v2/column_zone_map.h" // for ColumnZoneMap
@@ -117,6 +118,8 @@ public:
ColumnIterator() { }
virtual ~ColumnIterator() { }
+ virtual Status init() { return Status::OK(); }
+
// Seek to the first entry in the column.
virtual Status seek_to_first() = 0;
@@ -194,5 +197,43 @@ private:
rowid_t _current_rowid = 0;
};
+// This iterator is used to read default value column
+class DefaultValueColumnIterator : public ColumnIterator {
+public:
+ DefaultValueColumnIterator(const std::string& default_value, bool is_nullable, FieldType type)
+ : _default_value(default_value),
+ _is_nullable(is_nullable),
+ _type(type),
+ _is_default_value_null(false),
+ _value_size(0) { }
+
+ Status init() override;
+
+ Status seek_to_first() override {
+ _current_rowid = 0;
+ return Status::OK();
+ }
+
+ Status seek_to_ordinal(rowid_t ord_idx) override {
+ _current_rowid = ord_idx;
+ return Status::OK();
+ }
+
+ Status next_batch(size_t* n, ColumnBlock* dst) override;
+
+ rowid_t get_current_ordinal() const override { return _current_rowid; }
+
+private:
+ std::string _default_value;
+ bool _is_nullable;
+ FieldType _type;
+ bool _is_default_value_null;
+ size_t _value_size;
+ faststring _mem_value;
+
+ // current rowid
+ rowid_t _current_rowid = 0;
+};
+
}
}
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp
index 86d4200..1d59a70 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -153,8 +153,16 @@ Status Segment::_initial_column_readers() {
Status Segment::new_column_iterator(uint32_t cid, ColumnIterator** iter) {
if (_column_readers[cid] == nullptr) {
- // TODO(zc): create a DefaultValueIterator for this column
- // create
+ const TabletColumn& tablet_column = _tablet_schema->column(cid);
+ if (!tablet_column.has_default_value()) {
+ return Status::InternalError("invalid nonexistent column without default value.");
+ }
+ std::unique_ptr<DefaultValueColumnIterator> default_value_iter(
+ new DefaultValueColumnIterator(tablet_column.default_value(),
+ tablet_column.is_nullable(), tablet_column.type()));
+ RETURN_IF_ERROR(default_value_iter->init());
+ *iter = default_value_iter.release();
+ return Status::OK();
}
return _column_readers[cid]->new_iterator(iter);
}
diff --git a/be/test/olap/rowset/segment_v2/segment_test.cpp b/be/test/olap/rowset/segment_v2/segment_test.cpp
index 730bfbb..80ce4ff 100644
--- a/be/test/olap/rowset/segment_v2/segment_test.cpp
+++ b/be/test/olap/rowset/segment_v2/segment_test.cpp
@@ -450,6 +450,162 @@ TEST_F(SegmentReaderWriterTest, estimate_segment_size) {
FileUtils::remove_all(dname);
}
+TEST_F(SegmentReaderWriterTest, TestDefaultValueColumn) {
+ size_t num_rows_per_block = 10;
+
+ std::shared_ptr<TabletSchema> tablet_schema(new TabletSchema());
+ tablet_schema->_num_columns = 4;
+ tablet_schema->_num_key_columns = 3;
+ tablet_schema->_num_short_key_columns = 2;
+ tablet_schema->_num_rows_per_row_block = num_rows_per_block;
+ tablet_schema->_cols.push_back(create_int_key(1));
+ tablet_schema->_cols.push_back(create_int_key(2));
+ tablet_schema->_cols.push_back(create_int_key(3));
+ tablet_schema->_cols.push_back(create_int_value(4));
+
+ // segment write
+ std::string dname = "./ut_dir/segment_test";
+ FileUtils::create_dir(dname);
+
+ SegmentWriterOptions opts;
+ opts.num_rows_per_block = num_rows_per_block;
+
+ std::string fname = dname + "/int_case";
+ SegmentWriter writer(fname, 0, tablet_schema.get(), opts);
+ auto st = writer.init(10);
+ ASSERT_TRUE(st.ok());
+
+ RowCursor row;
+ auto olap_st = row.init(*tablet_schema);
+ ASSERT_EQ(OLAP_SUCCESS, olap_st);
+
+ // 0, 1, 2, 3
+ // 10, 11, 12, 13
+ // 20, 21, 22, 23
+ for (int i = 0; i < 4096; ++i) {
+ for (int j = 0; j < 4; ++j) {
+ auto cell = row.cell(j);
+ cell.set_not_null();
+ *(int*)cell.mutable_cell_ptr() = i * 10 + j;
+ }
+ writer.append_row(row);
+ }
+
+ uint64_t file_size = 0;
+ st = writer.finalize(&file_size);
+ ASSERT_TRUE(st.ok());
+
+ // add a column with null default value
+ {
+ std::shared_ptr<TabletSchema> new_tablet_schema_1(new TabletSchema());
+ new_tablet_schema_1->_num_columns = 5;
+ new_tablet_schema_1->_num_key_columns = 3;
+ new_tablet_schema_1->_num_short_key_columns = 2;
+ new_tablet_schema_1->_num_rows_per_row_block = num_rows_per_block;
+ new_tablet_schema_1->_cols.push_back(create_int_key(1));
+ new_tablet_schema_1->_cols.push_back(create_int_key(2));
+ new_tablet_schema_1->_cols.push_back(create_int_key(3));
+ new_tablet_schema_1->_cols.push_back(create_int_value(4));
+ new_tablet_schema_1->_cols.push_back(
+ create_int_value(5, OLAP_FIELD_AGGREGATION_SUM, true, "NULL"));
+
+ std::shared_ptr<Segment> segment(new Segment(fname, 0, new_tablet_schema_1.get()));
+ st = segment->open();
+ ASSERT_TRUE(st.ok());
+ ASSERT_EQ(4096, segment->num_rows());
+ Schema schema(*new_tablet_schema_1);
+ // scan all rows
+ {
+ StorageReadOptions read_opts;
+ std::unique_ptr<SegmentIterator> iter = segment->new_iterator(schema, read_opts);
+
+ RowBlockV2 block(schema, 1024);
+
+ int left = 4096;
+
+ int rowid = 0;
+ while (left > 0) {
+ int rows_read = left > 1024 ? 1024 : left;
+ block.clear();
+ st = iter->next_batch(&block);
+ ASSERT_TRUE(st.ok());
+ ASSERT_EQ(rows_read, block.num_rows());
+ left -= rows_read;
+
+ for (int j = 0; j < block.schema()->column_ids().size(); ++j) {
+ auto cid = block.schema()->column_ids()[j];
+ auto column_block = block.column_block(j);
+ for (int i = 0; i < rows_read; ++i) {
+ int rid = rowid + i;
+ if (cid == 4) {
+ ASSERT_TRUE(BitmapTest(column_block.null_bitmap(), i));
+ } else {
+ ASSERT_FALSE(BitmapTest(column_block.null_bitmap(), i));
+ ASSERT_EQ(rid * 10 + cid, *(int*)column_block.cell_ptr(i));
+ }
+ }
+ }
+ rowid += rows_read;
+ }
+ }
+ }
+
+ // add a column with non-null default value
+ {
+ std::shared_ptr<TabletSchema> new_tablet_schema_1(new TabletSchema());
+ new_tablet_schema_1->_num_columns = 5;
+ new_tablet_schema_1->_num_key_columns = 3;
+ new_tablet_schema_1->_num_short_key_columns = 2;
+ new_tablet_schema_1->_num_rows_per_row_block = num_rows_per_block;
+ new_tablet_schema_1->_cols.push_back(create_int_key(1));
+ new_tablet_schema_1->_cols.push_back(create_int_key(2));
+ new_tablet_schema_1->_cols.push_back(create_int_key(3));
+ new_tablet_schema_1->_cols.push_back(create_int_value(4));
+ new_tablet_schema_1->_cols.push_back(create_int_value(5, OLAP_FIELD_AGGREGATION_SUM, true, "10086"));
+
+ std::shared_ptr<Segment> segment(new Segment(fname, 0, new_tablet_schema_1.get()));
+ st = segment->open();
+ ASSERT_TRUE(st.ok());
+ ASSERT_EQ(4096, segment->num_rows());
+ Schema schema(*new_tablet_schema_1);
+ // scan all rows
+ {
+ StorageReadOptions read_opts;
+ std::unique_ptr<SegmentIterator> iter = segment->new_iterator(schema, read_opts);
+
+ RowBlockV2 block(schema, 1024);
+
+ int left = 4096;
+
+ int rowid = 0;
+ while (left > 0) {
+ int rows_read = left > 1024 ? 1024 : left;
+ block.clear();
+ st = iter->next_batch(&block);
+ ASSERT_TRUE(st.ok());
+ ASSERT_EQ(rows_read, block.num_rows());
+ left -= rows_read;
+
+ for (int j = 0; j < block.schema()->column_ids().size(); ++j) {
+ auto cid = block.schema()->column_ids()[j];
+ auto column_block = block.column_block(j);
+ for (int i = 0; i < rows_read; ++i) {
+ int rid = rowid + i;
+ if (cid == 4) {
+ ASSERT_FALSE(BitmapTest(column_block.null_bitmap(), i));
+ ASSERT_EQ(10086, *(int*)column_block.cell_ptr(i));
+ } else {
+ ASSERT_FALSE(BitmapTest(column_block.null_bitmap(), i));
+ ASSERT_EQ(rid * 10 + cid, *(int*)column_block.cell_ptr(i));
+ }
+ }
+ }
+ rowid += rows_read;
+ }
+ }
+ }
+}
+
}
}
diff --git a/be/test/olap/tablet_schema_helper.h b/be/test/olap/tablet_schema_helper.h
index 01f43bc..8e22b33 100644
--- a/be/test/olap/tablet_schema_helper.h
+++ b/be/test/olap/tablet_schema_helper.h
@@ -38,7 +38,7 @@ TabletColumn create_int_key(int32_t id, bool is_nullable = true) {
TabletColumn create_int_value(
int32_t id,
FieldAggregationMethod agg_method = OLAP_FIELD_AGGREGATION_SUM,
- bool is_nullable = true) {
+ bool is_nullable = true, const std::string default_value = "") {
TabletColumn column;
column._unique_id = id;
column._col_name = std::to_string(id);
@@ -48,6 +48,10 @@ TabletColumn create_int_value(
column._is_nullable = is_nullable;
column._length = 4;
column._index_length = 4;
+ if (default_value != "") {
+ column._has_default_value = true;
+ column._default_value = default_value;
+ }
return column;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org