You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by xu...@apache.org on 2023/01/13 03:24:15 UTC
[doris] branch struct-type updated: [fix](struct-type) struct create fix and add more struct codes (#15879)
This is an automated email from the ASF dual-hosted git repository.
xuyang pushed a commit to branch struct-type
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/struct-type by this push:
new e1e8ca2a59 [fix](struct-type) struct create fix and add more struct codes (#15879)
e1e8ca2a59 is described below
commit e1e8ca2a59eea169f5ccb6467ec96f6eec1e4043
Author: camby <10...@qq.com>
AuthorDate: Fri Jan 13 11:24:09 2023 +0800
[fix](struct-type) struct create fix and add more struct codes (#15879)
Fix the dead loop when create column struct
Co-authored-by: cambyzju <zh...@baidu.com>
---
be/src/runtime/types.cpp | 29 +++++++++++++++++--
be/src/vec/columns/column_struct.cpp | 38 ++++++++++---------------
be/src/vec/columns/column_struct.h | 18 +++++-------
be/src/vec/data_types/data_type.cpp | 2 ++
be/src/vec/data_types/data_type_factory.cpp | 14 ++++++++++
be/src/vec/data_types/data_type_struct.cpp | 43 +++++++++++++++++++++++++++++
be/src/vec/data_types/data_type_struct.h | 19 ++++---------
7 files changed, 111 insertions(+), 52 deletions(-)
diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp
index f26b1dcbce..45b8f8e5f5 100644
--- a/be/src/runtime/types.cpp
+++ b/be/src/runtime/types.cpp
@@ -150,7 +150,7 @@ void TypeDescriptor::to_thrift(TTypeDesc* thrift_type) const {
}
void TypeDescriptor::to_protobuf(PTypeDesc* ptype) const {
- DCHECK(!is_complex_type() || type == TYPE_ARRAY)
+ DCHECK(!is_complex_type() || type == TYPE_ARRAY || type == TYPE_STRUCT)
<< "Don't support complex type now, type=" << type;
auto node = ptype->add_types();
node->set_type(TTypeNodeType::SCALAR);
@@ -170,8 +170,18 @@ void TypeDescriptor::to_protobuf(PTypeDesc* ptype) const {
for (const TypeDescriptor& child : children) {
child.to_protobuf(ptype);
}
+ } else if (type == TYPE_STRUCT) {
+ node->set_type(TTypeNodeType::STRUCT);
+ DCHECK_EQ(field_names.size(), contains_nulls.size());
+ for (size_t i = 0; i < field_names.size(); ++i) {
+ auto field = node->add_struct_fields();
+ field->set_name(field_names[i]);
+ field->set_contains_null(contains_nulls[i]);
+ }
+ for (const TypeDescriptor& child : children) {
+ child.to_protobuf(ptype);
+ }
}
- // TODO(xy): support struct
}
TypeDescriptor::TypeDescriptor(const google::protobuf::RepeatedPtrField<PTypeNode>& types, int* idx)
@@ -213,7 +223,20 @@ TypeDescriptor::TypeDescriptor(const google::protobuf::RepeatedPtrField<PTypeNod
children.push_back(TypeDescriptor(types, idx));
break;
}
- // TODO(xy): support struct
+ case TTypeNodeType::STRUCT: {
+ type = TYPE_STRUCT;
+ size_t children_size = node.struct_fields_size();
+ for (size_t i = 0; i < children_size; ++i) {
+ const auto& field = node.struct_fields(i);
+ field_names.push_back(field.name());
+ contains_nulls.push_back(field.contains_null());
+ }
+ for (size_t i = 0; i < children_size; ++i) {
+ ++(*idx);
+ children.push_back(TypeDescriptor(types, idx));
+ }
+ break;
+ }
default:
DCHECK(false) << node.type();
}
diff --git a/be/src/vec/columns/column_struct.cpp b/be/src/vec/columns/column_struct.cpp
index c7e5c23d2a..d51bebb536 100644
--- a/be/src/vec/columns/column_struct.cpp
+++ b/be/src/vec/columns/column_struct.cpp
@@ -54,43 +54,25 @@ ColumnStruct::ColumnStruct(MutableColumns&& mutable_columns) {
}
}
-ColumnStruct::ColumnStruct(Columns&& columns) {
- columns.reserve(columns.size());
- for (auto& column : columns) {
- if (is_column_const(*column)) {
- LOG(FATAL) << "ColumnStruct cannot have ColumnConst as its element";
- }
- columns.push_back(std::move(column));
- }
-}
-
-ColumnStruct::ColumnStruct(TupleColumns&& tuple_columns) {
- columns.reserve(tuple_columns.size());
- for (auto& column : tuple_columns) {
- if (is_column_const(*column)) {
- LOG(FATAL) << "ColumnStruct cannot have ColumnConst as its element";
- }
- columns.push_back(std::move(column));
- }
-}
-
-ColumnStruct::Ptr ColumnStruct::create(Columns& columns) {
+ColumnStruct::Ptr ColumnStruct::create(const Columns& columns) {
for (const auto& column : columns) {
if (is_column_const(*column)) {
LOG(FATAL) << "ColumnStruct cannot have ColumnConst as its element";
}
}
- auto column_struct = ColumnStruct::create(columns);
+ auto column_struct = ColumnStruct::create(MutableColumns());
+ column_struct->columns.assign(columns.begin(), columns.end());
return column_struct;
}
-ColumnStruct::Ptr ColumnStruct::create(TupleColumns& tuple_columns) {
+ColumnStruct::Ptr ColumnStruct::create(const TupleColumns& tuple_columns) {
for (const auto& column : tuple_columns) {
if (is_column_const(*column)) {
LOG(FATAL) << "ColumnStruct cannot have ColumnConst as its element";
}
}
- auto column_struct = ColumnStruct::create(tuple_columns);
+ auto column_struct = ColumnStruct::create(MutableColumns());
+ column_struct->columns = tuple_columns;
return column_struct;
}
@@ -221,6 +203,14 @@ void ColumnStruct::update_hash_with_value(size_t n, SipHash& hash) const {
// }
// }
+void ColumnStruct::insert_indices_from(const IColumn& src, const int* indices_begin,
+ const int* indices_end) {
+ const ColumnStruct& src_concrete = assert_cast<const ColumnStruct&>(src);
+ for (size_t i = 0; i < columns.size(); ++i) {
+ columns[i]->insert_indices_from(src_concrete.get_column(i), indices_begin, indices_end);
+ }
+}
+
// const char * ColumnStruct::skip_serialized_in_arena(const char * pos) const {
// for (const auto & column : columns) {
// pos = column->skip_serialized_in_arena(pos);
diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h
index 75dade874d..a66d91f3e0 100644
--- a/be/src/vec/columns/column_struct.h
+++ b/be/src/vec/columns/column_struct.h
@@ -79,8 +79,6 @@ private:
template <bool positive>
struct Less;
- ColumnStruct(Columns&& columns);
- ColumnStruct(TupleColumns&& tuple_columns);
explicit ColumnStruct(MutableColumns&& mutable_columns);
ColumnStruct(const ColumnStruct&) = default;
@@ -89,14 +87,14 @@ public:
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
*/
using Base = COWHelper<IColumn, ColumnStruct>;
- static Ptr create(Columns& columns);
- static Ptr create(MutableColumns& columns);
- static Ptr create(TupleColumns& columns);
+ static Ptr create(const Columns& columns);
+ static Ptr create(const TupleColumns& columns);
static Ptr create(Columns&& arg) { return create(arg); }
- template <typename... Args>
- static MutablePtr create(Args&&... args) {
- return Base::create(std::forward<Args>(args)...);
+ template <typename Arg,
+ typename = typename std::enable_if<std::is_rvalue_reference<Arg&&>::value>::type>
+ static MutablePtr create(Arg&& arg) {
+ return Base::create(std::forward<Arg>(arg));
}
std::string get_name() const override;
@@ -131,9 +129,7 @@ public:
// void update_hash_fast(SipHash & hash) const override;
void insert_indices_from(const IColumn& src, const int* indices_begin,
- const int* indices_end) override {
- LOG(FATAL) << "insert_indices_from not implemented";
- }
+ const int* indices_end) override;
void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
Permutation& res) const override {
diff --git a/be/src/vec/data_types/data_type.cpp b/be/src/vec/data_types/data_type.cpp
index 7a0d67d4a4..360fa48a98 100644
--- a/be/src/vec/data_types/data_type.cpp
+++ b/be/src/vec/data_types/data_type.cpp
@@ -145,6 +145,8 @@ PGenericType_TypeId IDataType::get_pdata_type(const IDataType* data_type) {
return PGenericType::HLL;
case TypeIndex::Array:
return PGenericType::LIST;
+ case TypeIndex::Struct:
+ return PGenericType::STRUCT;
case TypeIndex::FixedLengthObject:
return PGenericType::FIXEDLENGTHOBJECT;
case TypeIndex::JSONB:
diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp
index 36aae47d69..7100a679c0 100644
--- a/be/src/vec/data_types/data_type_factory.cpp
+++ b/be/src/vec/data_types/data_type_factory.cpp
@@ -334,6 +334,20 @@ DataTypePtr DataTypeFactory::create_data_type(const PColumnMeta& pcolumn) {
case PGenericType::FIXEDLENGTHOBJECT:
nested = std::make_shared<DataTypeFixedLengthObject>();
break;
+ case PGenericType::STRUCT: {
+ size_t col_size = pcolumn.children_size();
+ DCHECK(col_size >= 1);
+ DataTypes dataTypes;
+ Strings names;
+ dataTypes.reserve(col_size);
+ names.reserve(col_size);
+ for (size_t i = 0; i < col_size; i++) {
+ dataTypes.push_back(create_data_type(pcolumn.children(i)));
+ names.push_back(pcolumn.name());
+ }
+ nested = std::make_shared<DataTypeStruct>(dataTypes, names);
+ break;
+ }
default: {
LOG(FATAL) << fmt::format("Unknown data type: {}", pcolumn.type());
return nullptr;
diff --git a/be/src/vec/data_types/data_type_struct.cpp b/be/src/vec/data_types/data_type_struct.cpp
index 8fd2179bfa..4364f5c34f 100644
--- a/be/src/vec/data_types/data_type_struct.cpp
+++ b/be/src/vec/data_types/data_type_struct.cpp
@@ -232,6 +232,49 @@ String DataTypeStruct::get_name_by_position(size_t i) const {
return names[i - 1];
}
+int64_t DataTypeStruct::get_uncompressed_serialized_bytes(const IColumn& column,
+ int be_exec_version) const {
+ auto ptr = column.convert_to_full_column_if_const();
+ const auto& struct_column = assert_cast<const ColumnStruct&>(*ptr.get());
+ DCHECK(elems.size() == struct_column.tuple_size());
+
+ int64_t bytes = 0;
+ for (size_t i = 0; i < elems.size(); ++i) {
+ bytes += elems[i]->get_uncompressed_serialized_bytes(struct_column.get_column(i),
+ be_exec_version);
+ }
+ return bytes;
+}
+
+char* DataTypeStruct::serialize(const IColumn& column, char* buf, int be_exec_version) const {
+ auto ptr = column.convert_to_full_column_if_const();
+ const auto& struct_column = assert_cast<const ColumnStruct&>(*ptr.get());
+ DCHECK(elems.size() == struct_column.tuple_size());
+
+ for (size_t i = 0; i < elems.size(); ++i) {
+ buf = elems[i]->serialize(struct_column.get_column(i), buf, be_exec_version);
+ }
+ return buf;
+}
+
+const char* DataTypeStruct::deserialize(const char* buf, IColumn* column,
+ int be_exec_version) const {
+ auto* struct_column = assert_cast<ColumnStruct*>(column);
+ DCHECK(elems.size() == struct_column->tuple_size());
+
+ for (size_t i = 0; i < elems.size(); ++i) {
+ buf = elems[i]->deserialize(buf, &struct_column->get_column(i), be_exec_version);
+ }
+ return buf;
+}
+
+void DataTypeStruct::to_pb_column_meta(PColumnMeta* col_meta) const {
+ IDataType::to_pb_column_meta(col_meta);
+ for (size_t i = 0; i < elems.size(); ++i) {
+ elems[i]->to_pb_column_meta(col_meta->add_children());
+ }
+}
+
bool DataTypeStruct::text_can_contain_only_valid_utf8() const {
return std::all_of(elems.begin(), elems.end(),
[](auto&& elem) { return elem->text_can_contain_only_valid_utf8(); });
diff --git a/be/src/vec/data_types/data_type_struct.h b/be/src/vec/data_types/data_type_struct.h
index 9405544b76..88255de38c 100644
--- a/be/src/vec/data_types/data_type_struct.h
+++ b/be/src/vec/data_types/data_type_struct.h
@@ -90,20 +90,11 @@ public:
std::optional<size_t> try_get_position_by_name(const String& name) const;
String get_name_by_position(size_t i) const;
- [[noreturn]] int64_t get_uncompressed_serialized_bytes(const IColumn& column,
- int be_exec_version) const override {
- LOG(FATAL) << "get_uncompressed_serialized_bytes not implemented";
- }
-
- [[noreturn]] char* serialize(const IColumn& column, char* buf,
- int be_exec_version) const override {
- LOG(FATAL) << "serialize not implemented";
- }
-
- [[noreturn]] const char* deserialize(const char* buf, IColumn* column,
- int be_exec_version) const override {
- LOG(FATAL) << "serialize not implemented";
- }
+ int64_t get_uncompressed_serialized_bytes(const IColumn& column,
+ int be_exec_version) const override;
+ char* serialize(const IColumn& column, char* buf, int be_exec_version) const override;
+ const char* deserialize(const char* buf, IColumn* column, int be_exec_version) const override;
+ void to_pb_column_meta(PColumnMeta* col_meta) const override;
// bool is_parametric() const { return true; }
// SerializationPtr do_get_default_serialization() const override;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org