You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by el...@apache.org on 2024/04/12 02:33:54 UTC
(doris) branch master updated: [Feature](Variant) support aggregation model for Variant type (#33493)
This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new ad24d4ac3a1 [Feature](Variant) support aggregation model for Variant type (#33493)
ad24d4ac3a1 is described below
commit ad24d4ac3a1bff25437362b9a3584b608ee3a430
Author: lihangyu <15...@163.com>
AuthorDate: Fri Apr 12 10:33:47 2024 +0800
[Feature](Variant) support aggregation model for Variant type (#33493)
refactor use `insert_from` to replace `replace_column_data` for variable lengths columns
---
be/src/olap/tablet_schema.h | 21 +-
be/src/vec/columns/column_array.h | 16 +-
be/src/vec/columns/column_map.h | 18 +-
be/src/vec/columns/column_object.cpp | 21 +-
be/src/vec/columns/column_object.h | 10 +-
be/src/vec/columns/column_string.h | 29 +--
be/src/vec/columns/column_struct.h | 14 +-
be/src/vec/common/schema_util.cpp | 7 +-
be/src/vec/common/schema_util.h | 4 +-
be/src/vec/exec/scan/new_olap_scanner.cpp | 2 +-
be/src/vec/olap/block_reader.cpp | 4 +-
be/src/vec/olap/vertical_block_reader.cpp | 4 +-
regression-test/data/variant_p0/agg.out | 78 +++++++
.../data/variant_p0/compaction/test_compaction.out | 104 ++++++++++
regression-test/data/variant_p0/desc.out | 230 ++++++++++-----------
regression-test/suites/variant_p0/agg.groovy | 63 ++++++
.../variant_p0/compaction/test_compaction.groovy | 11 +-
17 files changed, 422 insertions(+), 214 deletions(-)
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index b5b8df730b3..61d972dadc6 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -127,7 +127,10 @@ public:
int frac() const { return _frac; }
inline bool visible() const { return _visible; }
- void set_aggregation_method(FieldAggregationMethod agg) { _aggregation = agg; }
+ void set_aggregation_method(FieldAggregationMethod agg) {
+ _aggregation = agg;
+ _aggregation_name = get_string_by_aggregation_type(agg);
+ }
/**
* Add a sub column.
@@ -426,6 +429,22 @@ public:
return str;
}
+ string dump_full_schema() const {
+ string str = "[";
+ for (auto p : _cols) {
+ if (str.size() > 1) {
+ str += ", ";
+ }
+ ColumnPB col_pb;
+ p->to_schema_pb(&col_pb);
+ str += "(";
+ str += col_pb.ShortDebugString();
+ str += ")";
+ }
+ str += "]";
+ return str;
+ }
+
vectorized::Block create_block_by_cids(const std::vector<uint32_t>& cids);
std::shared_ptr<TabletSchema> copy_without_extracted_columns();
diff --git a/be/src/vec/columns/column_array.h b/be/src/vec/columns/column_array.h
index 3176f7a45c6..118e7ab05c6 100644
--- a/be/src/vec/columns/column_array.h
+++ b/be/src/vec/columns/column_array.h
@@ -217,23 +217,11 @@ public:
const uint32_t* indices_end) override;
void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override {
- DCHECK(size() > self_row);
- const auto& r = assert_cast<const ColumnArray&>(rhs);
- const size_t nested_row_size = r.size_at(row);
- const size_t r_nested_start_off = r.offset_at(row);
-
- // we should clear data because we call resize() before replace_column_data()
- if (self_row == 0) {
- data->clear();
- }
- get_offsets()[self_row] = get_offsets()[self_row - 1] + nested_row_size;
- // we make sure call replace_column_data() by order so, here we just insert data for nested
- data->insert_range_from(r.get_data(), r_nested_start_off, nested_row_size);
+ LOG(FATAL) << "Method replace_column_data is not supported for " << get_name();
}
void replace_column_data_default(size_t self_row = 0) override {
- DCHECK(size() > self_row);
- get_offsets()[self_row] = get_offsets()[self_row - 1];
+ LOG(FATAL) << "Method replace_column_data_default is not supported for " << get_name();
}
void clear() override {
diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h
index ed5a692defd..2cdfcae8c73 100644
--- a/be/src/vec/columns/column_map.h
+++ b/be/src/vec/columns/column_map.h
@@ -135,25 +135,11 @@ public:
}
void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override {
- DCHECK(size() > self_row);
- const auto& r = assert_cast<const ColumnMap&>(rhs);
- const size_t nested_row_size = r.size_at(row);
- const size_t r_key_nested_start_off = r.offset_at(row);
- const size_t r_val_nested_start_off = r.offset_at(row);
-
- if (self_row == 0) {
- keys_column->clear();
- values_column->clear();
- }
- get_offsets()[self_row] = get_offsets()[self_row - 1] + nested_row_size;
- // here we use batch size to avoid many virtual call in nested column
- keys_column->insert_range_from(r.get_keys(), r_key_nested_start_off, nested_row_size);
- values_column->insert_range_from(r.get_values(), r_val_nested_start_off, nested_row_size);
+ LOG(FATAL) << "Method replace_column_data is not supported for " << get_name();
}
void replace_column_data_default(size_t self_row = 0) override {
- DCHECK(size() > self_row);
- get_offsets()[self_row] = get_offsets()[self_row - 1];
+ LOG(FATAL) << "Method replace_column_data_default is not supported for " << get_name();
}
ColumnArray::Offsets64& ALWAYS_INLINE get_offsets() {
diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp
index b88887b580f..c596717194f 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -732,18 +732,7 @@ Field ColumnObject::operator[](size_t n) const {
}
void ColumnObject::get(size_t n, Field& res) const {
- if (!is_finalized()) {
- const_cast<ColumnObject*>(this)->finalize();
- }
- auto& map = res.get<VariantMap&>();
- for (const auto& entry : subcolumns) {
- auto it = map.try_emplace(entry->path.get_path()).first;
- if (WhichDataType(remove_nullable(entry->data.data_types.back())).is_json()) {
- // JsonbFiled is special case
- it->second = JsonbField();
- }
- entry->data.data.back()->get(n, it->second);
- }
+ res = (*this)[n];
}
Status ColumnObject::try_insert_indices_from(const IColumn& src, const int* indices_begin,
@@ -1513,4 +1502,12 @@ Status ColumnObject::sanitize() const {
return Status::OK();
}
+void ColumnObject::replace_column_data(const IColumn& col, size_t row, size_t self_row) {
+ LOG(FATAL) << "Method replace_column_data is not supported for " << get_name();
+}
+
+void ColumnObject::replace_column_data_default(size_t self_row) {
+ LOG(FATAL) << "Method replace_column_data_default is not supported for " << get_name();
+}
+
} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h
index a940b4a8811..26b2c66a755 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -456,13 +456,11 @@ public:
LOG(FATAL) << "should not call the method in column object";
}
- void replace_column_data(const IColumn&, size_t row, size_t self_row) override {
- LOG(FATAL) << "should not call the method in column object";
- }
+ bool is_variable_length() const override { return true; }
- void replace_column_data_default(size_t self_row) override {
- LOG(FATAL) << "should not call the method in column object";
- }
+ void replace_column_data(const IColumn&, size_t row, size_t self_row) override;
+
+ void replace_column_data_default(size_t self_row) override;
void get_indices_of_non_default_rows(Offsets64&, size_t, size_t) const override {
LOG(FATAL) << "should not call the method in column object";
diff --git a/be/src/vec/columns/column_string.h b/be/src/vec/columns/column_string.h
index b858ab86bf3..405ada3b48d 100644
--- a/be/src/vec/columns/column_string.h
+++ b/be/src/vec/columns/column_string.h
@@ -545,37 +545,12 @@ public:
}
void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override {
- // we check this column size and self_row because we need to make sure when we call
- // replace_column_data() with a batch column data.
- // and this column data is cleared at the every beginning.
- // next we replace column one by one.
- DCHECK(size() > self_row);
- const auto& r = assert_cast<const ColumnString&>(rhs);
- auto data = r.get_data_at(row);
-
- if (!self_row) {
- // self_row == 0 means we first call replace_column_data() with batch column data. so we
- // should clean last batch column data.
- chars.clear();
- offsets[self_row] = data.size;
- } else {
- offsets[self_row] = offsets[self_row - 1] + data.size;
- check_chars_length(offsets[self_row], self_row);
- }
-
- chars.insert(data.data, data.data + data.size);
+ LOG(FATAL) << "Method replace_column_data is not supported for " << get_name();
}
// should replace according to 0,1,2... ,size,0,1,2...
void replace_column_data_default(size_t self_row = 0) override {
- DCHECK(size() > self_row);
-
- if (!self_row) {
- chars.clear();
- offsets[self_row] = 0;
- } else {
- offsets[self_row] = offsets[self_row - 1];
- }
+ LOG(FATAL) << "Method replace_column_data_default is not supported for " << get_name();
}
void compare_internal(size_t rhs_row_id, const IColumn& rhs, int nan_direction_hint,
diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h
index 34d1e3ecf0e..2ca4fdec015 100644
--- a/be/src/vec/columns/column_struct.h
+++ b/be/src/vec/columns/column_struct.h
@@ -88,6 +88,8 @@ public:
MutableColumnPtr clone_resized(size_t size) const override;
size_t size() const override { return columns.at(0)->size(); }
+ bool is_variable_length() const override { return true; }
+
Field operator[](size_t n) const override;
void get(size_t n, Field& res) const override;
@@ -131,19 +133,11 @@ public:
return append_data_by_selector_impl<ColumnStruct>(res, selector);
}
void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override {
- DCHECK(size() > self_row);
- const auto& r = assert_cast<const ColumnStruct&>(rhs);
-
- for (size_t idx = 0; idx < columns.size(); ++idx) {
- columns[idx]->replace_column_data(r.get_column(idx), row, self_row);
- }
+ LOG(FATAL) << "Method replace_column_data is not supported for " << get_name();
}
void replace_column_data_default(size_t self_row = 0) override {
- DCHECK(size() > self_row);
- for (size_t idx = 0; idx < columns.size(); ++idx) {
- columns[idx]->replace_column_data_default(self_row);
- }
+ LOG(FATAL) << "Method replace_column_data_default is not supported for " << get_name();
}
void insert_range_from(const IColumn& src, size_t start, size_t length) override;
diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp
index 71cdece8333..5c7a2f8482a 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -369,7 +369,7 @@ void update_least_sparse_column(const std::vector<TabletSchemaSPtr>& schemas,
update_least_schema_internal(subcolumns_types, common_schema, true, variant_col_unique_id);
}
-void inherit_tablet_index(TabletSchemaSPtr& schema) {
+void inherit_root_attributes(TabletSchemaSPtr& schema) {
std::unordered_map<int32_t, TabletIndex> variants_index_meta;
// Get all variants tablet index metas if exist
for (const auto& col : schema->columns()) {
@@ -392,6 +392,7 @@ void inherit_tablet_index(TabletSchemaSPtr& schema) {
// above types are not supported in bf
col.set_is_bf_column(schema->column(col.parent_unique_id()).is_bf_column());
}
+ col.set_aggregation_method(schema->column(col.parent_unique_id()).aggregation());
auto it = variants_index_meta.find(col.parent_unique_id());
// variant has no index meta, ignore
if (it == variants_index_meta.end()) {
@@ -467,7 +468,7 @@ Status get_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas,
update_least_sparse_column(schemas, output_schema, unique_id, path_set);
}
- inherit_tablet_index(output_schema);
+ inherit_root_attributes(output_schema);
if (check_schema_size &&
output_schema->columns().size() > config::variant_max_merged_tablet_schema_size) {
return Status::DataQualityError("Reached max column size limit {}",
@@ -710,7 +711,7 @@ void rebuild_schema_and_block(const TabletSchemaSPtr& original,
VLOG_DEBUG << "set root_path : " << full_root_path.get_path();
}
- vectorized::schema_util::inherit_tablet_index(flush_schema);
+ vectorized::schema_util::inherit_root_attributes(flush_schema);
}
// ---------------------------
diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h
index e1bb037f70e..e6ed60480f5 100644
--- a/be/src/vec/common/schema_util.h
+++ b/be/src/vec/common/schema_util.h
@@ -116,8 +116,8 @@ void update_least_sparse_column(const std::vector<TabletSchemaSPtr>& schemas,
TabletSchemaSPtr& common_schema, int32_t variant_col_unique_id,
const std::unordered_set<PathInData, PathInData::Hash>& path_set);
-// inherit index info from it's parent column
-void inherit_tablet_index(TabletSchemaSPtr& schema);
+// inherit attributes like index/agg info from it's parent column
+void inherit_root_attributes(TabletSchemaSPtr& schema);
// Rebuild schema from original schema by extend dynamic columns generated from ColumnObject.
// Block consists of two parts, dynamic part of columns and static part of columns.
diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp
index c4b7f681609..98894e225ea 100644
--- a/be/src/vec/exec/scan/new_olap_scanner.cpp
+++ b/be/src/vec/exec/scan/new_olap_scanner.cpp
@@ -444,7 +444,7 @@ Status NewOlapScanner::_init_variant_columns() {
}
}
}
- schema_util::inherit_tablet_index(tablet_schema);
+ schema_util::inherit_root_attributes(tablet_schema);
return Status::OK();
}
diff --git a/be/src/vec/olap/block_reader.cpp b/be/src/vec/olap/block_reader.cpp
index e2f37fee010..7a23c9c4456 100644
--- a/be/src/vec/olap/block_reader.cpp
+++ b/be/src/vec/olap/block_reader.cpp
@@ -476,10 +476,10 @@ size_t BlockReader::_copy_agg_data() {
auto& dst_column = _stored_data_columns[idx];
if (_stored_has_variable_length_tag[idx]) {
//variable length type should replace ordered
+ dst_column->clear();
for (size_t i = 0; i < copy_size; i++) {
auto& ref = _stored_row_ref[i];
- dst_column->replace_column_data(*ref.block->get_by_position(idx).column,
- ref.row_pos, i);
+ dst_column->insert_from(*ref.block->get_by_position(idx).column, ref.row_pos);
}
} else {
for (auto& it : _temp_ref_map) {
diff --git a/be/src/vec/olap/vertical_block_reader.cpp b/be/src/vec/olap/vertical_block_reader.cpp
index 4fa518d58ac..0092a020a2e 100644
--- a/be/src/vec/olap/vertical_block_reader.cpp
+++ b/be/src/vec/olap/vertical_block_reader.cpp
@@ -330,10 +330,10 @@ size_t VerticalBlockReader::_copy_agg_data() {
auto& dst_column = _stored_data_columns[idx];
if (_stored_has_variable_length_tag[idx]) {
//variable length type should replace ordered
+ dst_column->clear();
for (size_t i = 0; i < copy_size; i++) {
auto& ref = _stored_row_ref[i];
- dst_column->replace_column_data(*ref.block->get_by_position(idx).column,
- ref.row_pos, i);
+ dst_column->insert_from(*ref.block->get_by_position(idx).column, ref.row_pos);
}
} else {
for (auto& it : _temp_ref_map) {
diff --git a/regression-test/data/variant_p0/agg.out b/regression-test/data/variant_p0/agg.out
new file mode 100644
index 00000000000..958e3d41a7b
--- /dev/null
+++ b/regression-test/data/variant_p0/agg.out
@@ -0,0 +1,78 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql1 --
+
+-- !sql2 --
+1 \N {"c":[{"a":1}]}
+1022 \N {"f":17034,"g":1.111}
+1029 \N {"c":1}
+1999 \N {"c":1}
+
+-- !sql3 --
+1 {"a":1,"b":{"c":[{"a":1}]}}
+2 [2]
+3 3
+4 "4"
+5 5
+
+-- !sql4 --
+{"c":[{"a":1}]} [{"a":1}] \N
+{"f":17034,"g":1.111} \N \N
+{"c":1} 1 \N
+{"c":1} 1 \N
+
+-- !sql5 --
+
+-- !sql6 --
+{"c":[{"a":1}]}
+{"f":17034,"g":1.111}
+{"c":1}
+{"c":1}
+
+-- !sql7 --
+1 {"a":1,"b":{"c":[{"a":1}]}} 59
+1022 {"a":1,"b":{"f":17034,"g":1.111}} 12
+1029 \N 12
+1999 {"a":1,"b":{"c":1}} 11
+
+-- !sql8 --
+1 {"a":1,"b":{"c":[{"a":1}]}} 59
+2 [2] 2
+3 3 3
+4 "4" 4
+5 5 5
+6 "[6]" 6
+7 7 7
+8 8.11111 8
+9 "9999" 9
+10 1000000 10
+11 [123] 11
+12 [123.2] 12
+1022 {"a":1,"b":{"f":17034,"g":1.111}} 12
+1029 \N 12
+1999 {"a":1,"b":{"c":1}} 11
+19921 {"a":1,"d":10} 11
+
+-- !sql9 --
+0 {"a":11245,"f":["123456"]}
+1 {"a":11245,"f":["123456"]}
+2 {"a":11245,"f":["123456"]}
+3 {"a":11245,"f":["123456"]}
+4 {"a":11245,"f":["123456"]}
+5 {"a":11245,"f":["123456"]}
+6 {"a":11245,"f":["123456"]}
+7 {"a":11245,"f":["123456"]}
+8 {"a":11245,"f":["123456"]}
+9 {"a":11245,"f":["123456"]}
+
+-- !sql9 --
+1025 {"a":11245,"y":11111111}
+1026 {"a":11245,"y":11111111}
+1027 {"a":11245,"y":11111111}
+1028 {"a":11245,"y":11111111}
+1029 {"a":11245,"y":11111111}
+1030 {"a":11245,"y":11111111}
+1031 {"a":11245,"y":11111111}
+1032 {"a":11245,"y":11111111}
+1033 {"a":11245,"y":11111111}
+1034 {"a":11245,"y":11111111}
+
diff --git a/regression-test/data/variant_p0/compaction/test_compaction.out b/regression-test/data/variant_p0/compaction/test_compaction.out
index 50943aef7b6..57a5c142fbb 100644
--- a/regression-test/data/variant_p0/compaction/test_compaction.out
+++ b/regression-test/data/variant_p0/compaction/test_compaction.out
@@ -279,3 +279,107 @@
{"c":[{"a":1}]} [{"a":1}]
{"c":1} 1
+-- !sql_1 --
+1 {"x":[1]}
+2 {"a":"1"}
+3 {"x":[3]}
+4 {"y":1}
+5 {"z":2.0}
+6 {"x":111}
+7 {"m":1}
+8 {"l":2}
+9 {"g":1.11}
+10 {"z":1.1111}
+11 {"sala":0}
+12 {"dddd":0.1}
+13 {"a":1}
+14 {"a":[[[1]]]}
+15 {"a":1}
+16 {"a":"1223"}
+17 {"a":[1]}
+18 {"a":["1",2,1.1]}
+19 {"a":1,"b":{"c":1}}
+20 {"a":1,"b":{"c":[{"a":1}]}}
+21 {"a":1,"b":{"c":[{"a":1}]}}
+22 {"a":1,"b":{"c":[{"a":1}]}}
+1022 {"a":1,"b":10}
+1029 {"a":1,"b":{"c":1}}
+1999 {"a":1,"b":{"c":1}}
+19921 {"a":1,"b":10}
+
+-- !sql_2 --
+14 [null]
+17 [1]
+18 [1, 2, null]
+
+-- !sql_3 --
+19 1 {"c":1}
+20 1 {"c":[{"a":1}]}
+21 1 {"c":[{"a":1}]}
+22 1 {"c":[{"a":1}]}
+1029 1 {"c":1}
+1999 1 {"c":1}
+
+-- !sql_5 --
+10 \N
+{"c":1} 1
+{"c":1} 1
+10 \N
+{"c":[{"a":1}]} [{"a":1}]
+{"c":[{"a":1}]} [{"a":1}]
+{"c":[{"a":1}]} [{"a":1}]
+{"c":1} 1
+{} \N
+{} \N
+
+-- !sql_11 --
+1 {"x":[1]}
+2 {"a":"1"}
+3 {"x":[3]}
+4 {"y":1}
+5 {"z":2.0}
+6 {"x":111}
+7 {"m":1}
+8 {"l":2}
+9 {"g":1.11}
+10 {"z":1.1111}
+11 {"sala":0}
+12 {"dddd":0.1}
+13 {"a":1}
+14 {"a":[[[1]]]}
+15 {"a":1}
+16 {"a":"1223"}
+17 {"a":[1]}
+18 {"a":["1",2,1.1]}
+19 {"a":1,"b":{"c":1}}
+20 {"a":1,"b":{"c":[{"a":1}]}}
+21 {"a":1,"b":{"c":[{"a":1}]}}
+22 {"a":1,"b":{"c":[{"a":1}]}}
+1022 {"a":1,"b":10}
+1029 {"a":1,"b":{"c":1}}
+1999 {"a":1,"b":{"c":1}}
+19921 {"a":1,"b":10}
+
+-- !sql_22 --
+14 [null]
+17 [1]
+18 [1, 2, null]
+
+-- !sql_33 --
+19 1 {"c":1}
+20 1 {"c":[{"a":1}]}
+21 1 {"c":[{"a":1}]}
+22 1 {"c":[{"a":1}]}
+1029 1 {"c":1}
+1999 1 {"c":1}
+
+-- !sql_55 --
+10 \N
+{"c":1} 1
+{"c":1} 1
+10 \N
+{"c":[{"a":1}]} [{"a":1}]
+{"c":[{"a":1}]} [{"a":1}]
+{"c":[{"a":1}]} [{"a":1}]
+{"c":1} 1
+
diff --git a/regression-test/data/variant_p0/desc.out b/regression-test/data/variant_p0/desc.out
index ecf0925a9c9..b46b5f9b4b0 100644
--- a/regression-test/data/variant_p0/desc.out
+++ b/regression-test/data/variant_p0/desc.out
@@ -2,107 +2,107 @@
-- !sql_1 --
k BIGINT Yes true \N
v VARIANT Yes false \N NONE
-v.a SMALLINT Yes false \N
-v.xxxx TEXT Yes false \N
+v.a SMALLINT Yes false \N NONE
+v.xxxx TEXT Yes false \N NONE
-- !sql_2 --
k BIGINT Yes true \N
v VARIANT Yes false \N NONE
-v.a SMALLINT Yes false \N
-v.ddd.aaa TINYINT Yes false \N
-v.ddd.mxmxm JSON Yes false \N
-v.xxxx TEXT Yes false \N
+v.a SMALLINT Yes false \N NONE
+v.ddd.aaa TINYINT Yes false \N NONE
+v.ddd.mxmxm JSON Yes false \N NONE
+v.xxxx TEXT Yes false \N NONE
-- !sql_3 --
k BIGINT Yes true \N
v VARIANT Yes false \N NONE
-v.a SMALLINT Yes false \N
-v.b JSON Yes false \N
-v.c.c SMALLINT Yes false \N
-v.c.e DOUBLE Yes false \N
-v.xxxx TEXT Yes false \N
+v.a SMALLINT Yes false \N NONE
+v.b JSON Yes false \N NONE
+v.c.c SMALLINT Yes false \N NONE
+v.c.e DOUBLE Yes false \N NONE
+v.xxxx TEXT Yes false \N NONE
-- !sql_6_1 --
k BIGINT Yes true \N
v VARIANT Yes false \N NONE
-v.a SMALLINT Yes false \N
-v.ddd.aaa TINYINT Yes false \N
-v.ddd.mxmxm JSON Yes false \N
-v.xxxx TEXT Yes false \N
+v.a SMALLINT Yes false \N NONE
+v.ddd.aaa TINYINT Yes false \N NONE
+v.ddd.mxmxm JSON Yes false \N NONE
+v.xxxx TEXT Yes false \N NONE
-- !sql_6_2 --
k BIGINT Yes true \N
v VARIANT Yes false \N NONE
-v.a SMALLINT Yes false \N
-v.xxxx TEXT Yes false \N
+v.a SMALLINT Yes false \N NONE
+v.xxxx TEXT Yes false \N NONE
-- !sql_6_3 --
k BIGINT Yes true \N
v VARIANT Yes false \N NONE
-v.a SMALLINT Yes false \N
-v.b JSON Yes false \N
-v.c.c SMALLINT Yes false \N
-v.c.e DOUBLE Yes false \N
+v.a SMALLINT Yes false \N NONE
+v.b JSON Yes false \N NONE
+v.c.c SMALLINT Yes false \N NONE
+v.c.e DOUBLE Yes false \N NONE
-- !sql_6 --
k BIGINT Yes true \N
v VARIANT Yes false \N NONE
-v.a SMALLINT Yes false \N
-v.b JSON Yes false \N
-v.c.c SMALLINT Yes false \N
-v.c.e DOUBLE Yes false \N
-v.ddd.aaa TINYINT Yes false \N
-v.ddd.mxmxm JSON Yes false \N
-v.xxxx TEXT Yes false \N
+v.a SMALLINT Yes false \N NONE
+v.b JSON Yes false \N NONE
+v.c.c SMALLINT Yes false \N NONE
+v.c.e DOUBLE Yes false \N NONE
+v.ddd.aaa TINYINT Yes false \N NONE
+v.ddd.mxmxm JSON Yes false \N NONE
+v.xxxx TEXT Yes false \N NONE
-- !sql_7 --
k BIGINT Yes true \N
v VARIANT Yes false \N NONE
-v.a SMALLINT Yes false \N
-v.b JSON Yes false \N
-v.c.c SMALLINT Yes false \N
-v.c.e DOUBLE Yes false \N
-v.xxxx TEXT Yes false \N
+v.a SMALLINT Yes false \N NONE
+v.b JSON Yes false \N NONE
+v.c.c SMALLINT Yes false \N NONE
+v.c.e DOUBLE Yes false \N NONE
+v.xxxx TEXT Yes false \N NONE
-- !sql_7_1 --
k BIGINT Yes true \N
v VARIANT Yes false \N NONE
-v.a SMALLINT Yes false \N
-v.xxxx TEXT Yes false \N
+v.a SMALLINT Yes false \N NONE
+v.xxxx TEXT Yes false \N NONE
-- !sql_7_2 --
k BIGINT Yes true \N
v VARIANT Yes false \N NONE
-v.a SMALLINT Yes false \N
-v.b JSON Yes false \N
-v.c.c SMALLINT Yes false \N
-v.c.e DOUBLE Yes false \N
+v.a SMALLINT Yes false \N NONE
+v.b JSON Yes false \N NONE
+v.c.c SMALLINT Yes false \N NONE
+v.c.e DOUBLE Yes false \N NONE
-- !sql_7_3 --
k BIGINT Yes true \N
v VARIANT Yes false \N NONE
-v.a SMALLINT Yes false \N
-v.b JSON Yes false \N
-v.c.c SMALLINT Yes false \N
-v.c.e DOUBLE Yes false \N
-v.xxxx TEXT Yes false \N
+v.a SMALLINT Yes false \N NONE
+v.b JSON Yes false \N NONE
+v.c.c SMALLINT Yes false \N NONE
+v.c.e DOUBLE Yes false \N NONE
+v.xxxx TEXT Yes false \N NONE
-- !sql_8 --
k BIGINT Yes true \N
v1 VARIANT Yes false \N NONE
v2 VARIANT Yes false \N NONE
v3 VARIANT Yes false \N NONE
-v1.a SMALLINT Yes false \N
-v1.b JSON Yes false \N
-v1.c.c SMALLINT Yes false \N
-v1.c.e DOUBLE Yes false \N
-v1.oooo.xxxx.xxx TINYINT Yes false \N
-v2.a SMALLINT Yes false \N
-v2.xxxx TEXT Yes false \N
-v3.a SMALLINT Yes false \N
-v3.b JSON Yes false \N
-v3.c.c SMALLINT Yes false \N
-v3.c.e DOUBLE Yes false \N
+v1.a SMALLINT Yes false \N NONE
+v1.b JSON Yes false \N NONE
+v1.c.c SMALLINT Yes false \N NONE
+v1.c.e DOUBLE Yes false \N NONE
+v1.oooo.xxxx.xxx TINYINT Yes false \N NONE
+v2.a SMALLINT Yes false \N NONE
+v2.xxxx TEXT Yes false \N NONE
+v3.a SMALLINT Yes false \N NONE
+v3.b JSON Yes false \N NONE
+v3.c.c SMALLINT Yes false \N NONE
+v3.c.e DOUBLE Yes false \N NONE
-- !sql_9 --
k BIGINT Yes true \N
@@ -111,88 +111,88 @@ v VARIANT Yes false \N NONE
-- !sql_9_1 --
k BIGINT Yes true \N
v VARIANT Yes false \N NONE
-v.a SMALLINT Yes false \N
-v.b JSON Yes false \N
-v.c.c SMALLINT Yes false \N
-v.c.e DOUBLE Yes false \N
-v.oooo.xxxx.xxx TINYINT Yes false \N
+v.a SMALLINT Yes false \N NONE
+v.b JSON Yes false \N NONE
+v.c.c SMALLINT Yes false \N NONE
+v.c.e DOUBLE Yes false \N NONE
+v.oooo.xxxx.xxx TINYINT Yes false \N NONE
-- !sql_10 --
k BIGINT Yes true \N
v VARIANT Yes false \N NONE
-v.k1 TINYINT Yes false \N
-v.k2 TEXT Yes false \N
-v.k3 ARRAY<SMALLINT> Yes false []
-v.k4 DOUBLE Yes false \N
-v.k5 JSON Yes false \N
+v.k1 TINYINT Yes false \N NONE
+v.k2 TEXT Yes false \N NONE
+v.k3 ARRAY<SMALLINT> Yes false [] NONE
+v.k4 DOUBLE Yes false \N NONE
+v.k5 JSON Yes false \N NONE
-- !sql_10_1 --
k BIGINT Yes true \N
v VARIANT Yes false \N NONE
v2 VARIANT Yes false \N NONE
-v.a SMALLINT Yes false \N
-v.b JSON Yes false \N
-v.c.c SMALLINT Yes false \N
-v.c.e DOUBLE Yes false \N
-v.k1 TINYINT Yes false \N
-v.k2 TEXT Yes false \N
-v.k3 ARRAY<SMALLINT> Yes false []
-v.k4 DOUBLE Yes false \N
-v.k5 JSON Yes false \N
-v.oooo.xxxx.xxx TINYINT Yes false \N
-v2.a SMALLINT Yes false \N
-v2.b JSON Yes false \N
-v2.c.c SMALLINT Yes false \N
-v2.c.e DOUBLE Yes false \N
-v2.oooo.xxxx.xxx TINYINT Yes false \N
+v.a SMALLINT Yes false \N NONE
+v.b JSON Yes false \N NONE
+v.c.c SMALLINT Yes false \N NONE
+v.c.e DOUBLE Yes false \N NONE
+v.k1 TINYINT Yes false \N NONE
+v.k2 TEXT Yes false \N NONE
+v.k3 ARRAY<SMALLINT> Yes false [] NONE
+v.k4 DOUBLE Yes false \N NONE
+v.k5 JSON Yes false \N NONE
+v.oooo.xxxx.xxx TINYINT Yes false \N NONE
+v2.a SMALLINT Yes false \N NONE
+v2.b JSON Yes false \N NONE
+v2.c.c SMALLINT Yes false \N NONE
+v2.c.e DOUBLE Yes false \N NONE
+v2.oooo.xxxx.xxx TINYINT Yes false \N NONE
-- !sql_10_2 --
k BIGINT Yes true \N
v VARIANT Yes false \N NONE
-v.a SMALLINT Yes false \N
-v.b JSON Yes false \N
-v.c.c SMALLINT Yes false \N
-v.c.e DOUBLE Yes false \N
-v.k1 TINYINT Yes false \N
-v.k2 TEXT Yes false \N
-v.k3 ARRAY<SMALLINT> Yes false []
-v.k4 DOUBLE Yes false \N
-v.k5 JSON Yes false \N
-v.oooo.xxxx.xxx TINYINT Yes false \N
-v2.a SMALLINT Yes false \N
-v2.b JSON Yes false \N
-v2.c.c SMALLINT Yes false \N
-v2.c.e DOUBLE Yes false \N
-v2.oooo.xxxx.xxx TINYINT Yes false \N
+v.a SMALLINT Yes false \N NONE
+v.b JSON Yes false \N NONE
+v.c.c SMALLINT Yes false \N NONE
+v.c.e DOUBLE Yes false \N NONE
+v.k1 TINYINT Yes false \N NONE
+v.k2 TEXT Yes false \N NONE
+v.k3 ARRAY<SMALLINT> Yes false [] NONE
+v.k4 DOUBLE Yes false \N NONE
+v.k5 JSON Yes false \N NONE
+v.oooo.xxxx.xxx TINYINT Yes false \N NONE
+v2.a SMALLINT Yes false \N NONE
+v2.b JSON Yes false \N NONE
+v2.c.c SMALLINT Yes false \N NONE
+v2.c.e DOUBLE Yes false \N NONE
+v2.oooo.xxxx.xxx TINYINT Yes false \N NONE
-- !sql_10_3 --
k BIGINT Yes true \N
v VARIANT Yes false \N NONE
v3 VARIANT Yes false \N NONE
-v.a SMALLINT Yes false \N
-v.b JSON Yes false \N
-v.c.c SMALLINT Yes false \N
-v.c.e DOUBLE Yes false \N
-v.k1 TINYINT Yes false \N
-v.k2 TEXT Yes false \N
-v.k3 ARRAY<SMALLINT> Yes false []
-v.k4 DOUBLE Yes false \N
-v.k5 JSON Yes false \N
-v.oooo.xxxx.xxx TINYINT Yes false \N
-v3.a SMALLINT Yes false \N
-v3.b JSON Yes false \N
-v3.c.c SMALLINT Yes false \N
-v3.c.e DOUBLE Yes false \N
-v3.oooo.xxxx.xxx TINYINT Yes false \N
+v.a SMALLINT Yes false \N NONE
+v.b JSON Yes false \N NONE
+v.c.c SMALLINT Yes false \N NONE
+v.c.e DOUBLE Yes false \N NONE
+v.k1 TINYINT Yes false \N NONE
+v.k2 TEXT Yes false \N NONE
+v.k3 ARRAY<SMALLINT> Yes false [] NONE
+v.k4 DOUBLE Yes false \N NONE
+v.k5 JSON Yes false \N NONE
+v.oooo.xxxx.xxx TINYINT Yes false \N NONE
+v3.a SMALLINT Yes false \N NONE
+v3.b JSON Yes false \N NONE
+v3.c.c SMALLINT Yes false \N NONE
+v3.c.e DOUBLE Yes false \N NONE
+v3.oooo.xxxx.xxx TINYINT Yes false \N NONE
-- !sql_11 --
k BIGINT Yes true \N
v VARIANT Yes false \N NONE
-v.!@#^&*() TEXT Yes false \N
-v.名字 TEXT Yes false \N
-v.画像.丬文 TEXT Yes false \N
-v.画像.地址 TEXT Yes false \N
-v.金额 SMALLINT Yes false \N
+v.!@#^&*() TEXT Yes false \N NONE
+v.名字 TEXT Yes false \N NONE
+v.画像.丬文 TEXT Yes false \N NONE
+v.画像.地址 TEXT Yes false \N NONE
+v.金额 SMALLINT Yes false \N NONE
-- !sql_12 --
k BIGINT Yes true \N
diff --git a/regression-test/suites/variant_p0/agg.groovy b/regression-test/suites/variant_p0/agg.groovy
new file mode 100644
index 00000000000..b5010ee7f72
--- /dev/null
+++ b/regression-test/suites/variant_p0/agg.groovy
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("regression_test_variant_agg"){
+ sql """DROP TABLE IF EXISTS var_agg"""
+ sql """
+ CREATE TABLE IF NOT EXISTS var_agg (
+ k bigint,
+ v variant replace,
+ s bigint sum
+ )
+ AGGREGATE KEY(`k`)
+ DISTRIBUTED BY HASH(k) BUCKETS 4
+ properties("replication_num" = "1", "disable_auto_compaction" = "false");
+ """
+ sql """insert into var_agg values (1, '[1]', 1),(1, '{"a" : 1}', 1);"""
+ sql """insert into var_agg values (2, '[2]', 2),(1, '{"a" : [[[1]]]}', 2);"""
+ sql """insert into var_agg values (3, '3', 3),(1, '{"a" : 1}', 3), (1, '{"a" : [1]}', 3);"""
+ sql """insert into var_agg values (4, '"4"', 4),(1, '{"a" : "1223"}', 4);"""
+ sql """insert into var_agg values (5, '5', 5),(1, '{"a" : [1]}', 5);"""
+ sql """insert into var_agg values (6, '"[6]"', 6),(1, '{"a" : ["1", 2, 1.1]}', 6);"""
+ sql """insert into var_agg values (7, '7', 7),(1, '{"a" : 1, "b" : {"c" : 1}}', 7);"""
+ sql """insert into var_agg values (8, '8.11111', 8),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}', 8);"""
+ sql """insert into var_agg values (9, '"9999"', 9),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}', 9);"""
+ sql """insert into var_agg values (10, '1000000', 10),(1, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}', 10);"""
+ sql """insert into var_agg values (11, '[123.0]', 11),(1999, '{"a" : 1, "b" : {"c" : 1}}', 11),(19921, '{"a" : 1, "d" : 10}', 11);"""
+ sql """insert into var_agg values (12, '[123.2]', 12),(1022, '{"a" : 1, "b" : {"f" : 17034, "g" :1.111 }}', 12),(1029, '{"a" : 1, "b" : {"c" : 1}}', 12);"""
+ qt_sql1 "select k, cast(v['a'] as array<int>) from var_agg where size(cast(v['a'] as array<int>)) > 0 order by k, cast(v['a'] as string) asc"
+ qt_sql2 "select k, cast(v as int), cast(v['b'] as string) from var_agg where length(cast(v['b'] as string)) > 4 order by k, cast(v as string), cast(v['b'] as string) "
+ qt_sql3 "select k, v from var_agg order by k, cast(v as string) limit 5"
+ qt_sql4 "select v['b'], v['b']['c'], cast(v as int) from var_agg where cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k,cast(v as string) desc limit 10000;"
+ qt_sql5 "select v['b'] from var_agg where cast(v['b'] as int) > 0;"
+ qt_sql6 "select cast(v['b'] as string) from var_agg where cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k, cast(v['b'] as string) "
+ qt_sql7 "select * from var_agg where cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k, cast(v['b'] as string) "
+ qt_sql8 "select * from var_agg order by 1, cast(2 as string), 3"
+ sql "alter table var_agg drop column s"
+ sql """insert into var_agg select 5, '{"a" : 1234, "xxxx" : "fffff", "point" : 42000}' as json_str
+ union all select 5, '{"a": 1123}' as json_str union all select *, '{"a": 11245, "x" : 42005}' as json_str from numbers("number" = "1024") limit 1024;"""
+ sql """insert into var_agg select 5, '{"a" : 1234, "xxxx" : "fffff", "point" : 42000}' as json_str
+ union all select 5, '{"a": 1123}' as json_str union all select *, '{"a": 11245, "y" : 11111111}' as json_str from numbers("number" = "2048") where number > 1024 limit 1024;"""
+ sql """insert into var_agg select 5, '{"a" : 1234, "xxxx" : "fffff", "point" : 42000}' as json_str
+ union all select 5, '{"a": 1123}' as json_str union all select *, '{"a": 11245, "c" : 1.11}' as json_str from numbers("number" = "1024") limit 1024;"""
+ sql """insert into var_agg select 5, '{"a" : 1234, "xxxx" : "fffff", "point" : 42000}' as json_str
+ union all select 5, '{"a": 1123}' as json_str union all select *, '{"a": 11245, "e" : [123456]}' as json_str from numbers("number" = "1024") limit 1024;"""
+ sql """insert into var_agg select 5, '{"a" : 1234, "xxxx" : "fffff", "point" : 42000}' as json_str
+ union all select 5, '{"a": 1123}' as json_str union all select *, '{"a": 11245, "f" : ["123456"]}' as json_str from numbers("number" = "1024") limit 1024;"""
+ qt_sql9 "select * from var_agg order by cast(2 as string), 3, 1 limit 10"
+ qt_sql9 "select * from var_agg where k > 1024 order by cast(2 as string), 3, 1 limit 10"
+}
\ No newline at end of file
diff --git a/regression-test/suites/variant_p0/compaction/test_compaction.groovy b/regression-test/suites/variant_p0/compaction/test_compaction.groovy
index e5359f305fc..48d916e38e3 100644
--- a/regression-test/suites/variant_p0/compaction/test_compaction.groovy
+++ b/regression-test/suites/variant_p0/compaction/test_compaction.groovy
@@ -41,10 +41,14 @@ suite("test_compaction_variant") {
}
def create_table = { tableName, buckets="auto", key_type="DUPLICATE" ->
sql "DROP TABLE IF EXISTS ${tableName}"
+ def var_def = "variant"
+ if (key_type == "AGGREGATE") {
+ var_def = "variant replace"
+ }
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
k bigint,
- v variant
+ v ${var_def}
)
${key_type} KEY(`k`)
DISTRIBUTED BY HASH(k) BUCKETS ${buckets}
@@ -52,7 +56,8 @@ suite("test_compaction_variant") {
"""
}
- def key_types = ["DUPLICATE", "UNIQUE"]
+ def key_types = ["DUPLICATE", "UNIQUE", "AGGREGATE"]
+ // def key_types = ["AGGREGATE"]
for (int i = 0; i < key_types.size(); i++) {
def tableName = "simple_variant_${key_types[i]}"
// 1. simple cases
@@ -62,7 +67,7 @@ suite("test_compaction_variant") {
sql """insert into ${tableName} values (2, '{"a" : "1"}'),(14, '{"a" : [[[1]]]}');"""
sql """insert into ${tableName} values (3, '{"x" : [3]}'),(15, '{"a" : 1}')"""
sql """insert into ${tableName} values (4, '{"y": 1}'),(16, '{"a" : "1223"}');"""
- sql """insert into ${tableName} values (5, '{"z" : 2}'),(17, '{"a" : [1]}');"""
+ sql """insert into ${tableName} values (5, '{"z" : 2.0}'),(17, '{"a" : [1]}');"""
sql """insert into ${tableName} values (6, '{"x" : 111}'),(18, '{"a" : ["1", 2, 1.1]}');"""
sql """insert into ${tableName} values (7, '{"m" : 1}'),(19, '{"a" : 1, "b" : {"c" : 1}}');"""
sql """insert into ${tableName} values (8, '{"l" : 2}'),(20, '{"a" : 1, "b" : {"c" : [{"a" : 1}]}}');"""
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org