You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by xu...@apache.org on 2023/01/13 13:15:54 UTC
[doris] 07/09: [feature](struct-type) adjust the vexpr for struct type (#15669)
This is an automated email from the ASF dual-hosted git repository.
xuyang pushed a commit to branch struct-type
in repository https://gitbox.apache.org/repos/asf/doris.git
commit 0d4da4f5f7797c53c9a2389519d5a81073baf82c
Author: carlvinhust2012 <hu...@126.com>
AuthorDate: Fri Jan 13 17:14:11 2023 +0800
[feature](struct-type) adjust the vexpr for struct type (#15669)
---
be/src/vec/columns/column_struct.cpp | 7 --
be/src/vec/columns/column_struct.h | 1 -
be/src/vec/data_types/data_type_factory.hpp | 8 ++
be/src/vec/data_types/data_type_struct.cpp | 99 +++++++++++++++++++---
be/src/vec/data_types/data_type_struct.h | 3 +
be/src/vec/functions/function_cast.h | 13 ++-
.../java/org/apache/doris/analysis/CastExpr.java | 8 ++
.../java/org/apache/doris/analysis/TypeDef.java | 2 +-
.../main/java/org/apache/doris/catalog/Column.java | 1 +
.../main/java/org/apache/doris/catalog/Type.java | 14 ++-
10 files changed, 134 insertions(+), 22 deletions(-)
diff --git a/be/src/vec/columns/column_struct.cpp b/be/src/vec/columns/column_struct.cpp
index d51bebb536..afef00c314 100644
--- a/be/src/vec/columns/column_struct.cpp
+++ b/be/src/vec/columns/column_struct.cpp
@@ -22,13 +22,6 @@
namespace doris::vectorized {
-namespace ErrorCodes {
-extern const int ILLEGAL_COLUMN;
-extern const int NOT_IMPLEMENTED;
-extern const int CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE;
-extern const int LOGICAL_ERROR;
-} // namespace ErrorCodes
-
std::string ColumnStruct::get_name() const {
std::stringstream res;
res << "Struct(";
diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h
index a66d91f3e0..393b2c275f 100644
--- a/be/src/vec/columns/column_struct.h
+++ b/be/src/vec/columns/column_struct.h
@@ -103,7 +103,6 @@ public:
bool can_be_inside_nullable() const override { return true; }
MutableColumnPtr clone_empty() const override;
MutableColumnPtr clone_resized(size_t size) const override;
-
size_t size() const override { return columns.at(0)->size(); }
Field operator[](size_t n) const override;
diff --git a/be/src/vec/data_types/data_type_factory.hpp b/be/src/vec/data_types/data_type_factory.hpp
index d46dd3435d..9bc5e20d5c 100644
--- a/be/src/vec/data_types/data_type_factory.hpp
+++ b/be/src/vec/data_types/data_type_factory.hpp
@@ -112,6 +112,14 @@ public:
return entity.second;
}
}
+ if (type_ptr->get_type_id() == TypeIndex::Struct) {
+ DataTypeFactory::instance().register_data_type(type_ptr->get_name(), type_ptr);
+ for (const auto& entity : _invert_data_type_map) {
+ if (entity.first->equals(*type_ptr)) {
+ return entity.second;
+ }
+ }
+ }
return _empty_string;
}
diff --git a/be/src/vec/data_types/data_type_struct.cpp b/be/src/vec/data_types/data_type_struct.cpp
index 4364f5c34f..87b5c4f110 100644
--- a/be/src/vec/data_types/data_type_struct.cpp
+++ b/be/src/vec/data_types/data_type_struct.cpp
@@ -22,17 +22,6 @@
namespace doris::vectorized {
-namespace ErrorCodes {
-extern const int BAD_ARGUMENTS;
-extern const int DUPLICATE_COLUMN;
-extern const int EMPTY_DATA_PASSED;
-extern const int NOT_FOUND_COLUMN_IN_BLOCK;
-extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH;
-extern const int ILLEGAL_INDEX;
-extern const int LOGICAL_ERROR;
-} // namespace ErrorCodes
-
DataTypeStruct::DataTypeStruct(const DataTypes& elems_)
: elems(elems_), have_explicit_names(false) {
/// Automatically assigned names in form of '1', '2', ...
@@ -91,6 +80,94 @@ std::string DataTypeStruct::do_get_name() const {
return s.str();
}
+Status DataTypeStruct::from_string(ReadBuffer& rb, IColumn* column) const {
+ DCHECK(!rb.eof());
+ auto* struct_column = assert_cast<ColumnStruct*>(column);
+
+ if (*rb.position() != '{') {
+ return Status::InvalidArgument("Struct does not start with '{' character, found '{}'",
+ *rb.position());
+ }
+ if (rb.count() < 2 || *(rb.end() - 1) != '}') {
+ return Status::InvalidArgument("Struct does not end with '}' character, found '{}'",
+ *(rb.end() - 1));
+ }
+
+ // here need handle the empty struct '{}'
+ if (rb.count() == 2) {
+ return Status::OK();
+ }
+
+ ++rb.position();
+ std::vector<ReadBuffer> field_rbs;
+ field_rbs.reserve(elems.size());
+
+ // here get the value "jack" and 20 from {"name":"jack","age":20}
+ while (!rb.eof()) {
+ size_t field_len = 0;
+ auto start = rb.position();
+ while (!rb.eof() && *start != ',' && *start != '}') {
+ field_len++;
+ start++;
+ }
+ if (field_len >= rb.count()) {
+ return Status::InvalidArgument("Invalid Length");
+ }
+ ReadBuffer field_rb(rb.position(), field_len);
+ size_t len = 0;
+ auto start_rb = field_rb.position();
+ while (!field_rb.eof() && *start_rb != ':') {
+ len++;
+ start_rb++;
+ }
+ ReadBuffer field(field_rb.position() + len + 1, field_rb.count() - len - 1);
+
+ if (field.count() < 2 || *field.position() != '"' || *field.end() != '"') {
+ field_rbs.push_back(field);
+ } else {
+ ReadBuffer field_has_quote(field.position() + 1, field.count() - 2);
+ field_rbs.push_back(field_has_quote);
+ }
+
+ rb.position() += field_len + 1;
+ }
+
+ for (size_t idx = 0; idx < elems.size(); idx++) {
+ elems[idx]->from_string(field_rbs[idx], &struct_column->get_column(idx));
+ }
+
+ return Status::OK();
+}
+
+std::string DataTypeStruct::to_string(const IColumn& column, size_t row_num) const {
+ auto ptr = column.convert_to_full_column_if_const();
+ auto& struct_column = assert_cast<const ColumnStruct&>(*ptr.get());
+
+ std::stringstream ss;
+ ss << "<";
+ for (size_t idx = 0; idx < elems.size(); idx++) {
+ if (idx != 0) {
+ ss << ", ";
+ }
+ ss << elems[idx]->to_string(struct_column.get_column(idx), row_num);
+ }
+ ss << ">";
+ return ss.str();
+}
+
+void DataTypeStruct::to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const {
+ auto ptr = column.convert_to_full_column_if_const();
+ auto& struct_column = assert_cast<const ColumnStruct&>(*ptr.get());
+ ostr.write("<", 1);
+ for (size_t idx = 0; idx < elems.size(); idx++) {
+ if (idx != 0) {
+ ostr.write(", ", 2);
+ }
+ elems[idx]->to_string(struct_column.get_column(idx), row_num, ostr);
+ }
+ ostr.write(">", 1);
+}
+
static inline IColumn& extract_element_column(IColumn& column, size_t idx) {
return assert_cast<ColumnStruct&>(column).get_column(idx);
}
diff --git a/be/src/vec/data_types/data_type_struct.h b/be/src/vec/data_types/data_type_struct.h
index 88255de38c..47cfaab1a3 100644
--- a/be/src/vec/data_types/data_type_struct.h
+++ b/be/src/vec/data_types/data_type_struct.h
@@ -96,6 +96,9 @@ public:
const char* deserialize(const char* buf, IColumn* column, int be_exec_version) const override;
void to_pb_column_meta(PColumnMeta* col_meta) const override;
+ Status from_string(ReadBuffer& rb, IColumn* column) const override;
+ std::string to_string(const IColumn& column, size_t row_num) const override;
+ void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override;
// bool is_parametric() const { return true; }
// SerializationPtr do_get_default_serialization() const override;
// SerializationPtr get_serialization(const SerializationInfo& info) const override;
diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h
index d300c09a76..ec4608c115 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -1541,6 +1541,16 @@ private:
return &ConvertImplGenericToJsonb::execute;
}
}
+ // check struct value type and get to_type value
+ // TODO: need handle another type to cast struct
+ WrapperType create_struct_wrapper(const DataTypePtr& from_type,
+ const DataTypeStruct& to_type) const {
+ switch (from_type->get_type_id()) {
+ case TypeIndex::String:
+ default:
+ return &ConvertImplGenericFromString<ColumnString>::execute;
+ }
+ }
WrapperType prepare_unpack_dictionaries(FunctionContext* context, const DataTypePtr& from_type,
const DataTypePtr& to_type) const {
@@ -1712,6 +1722,8 @@ private:
case TypeIndex::Array:
return create_array_wrapper(context, from_type,
static_cast<const DataTypeArray&>(*to_type));
+ case TypeIndex::Struct:
+ return create_struct_wrapper(from_type, static_cast<const DataTypeStruct&>(*to_type));
default:
break;
}
@@ -1757,7 +1769,6 @@ protected:
// TODO(xy): support return struct type for factory
auto type = DataTypeFactory::instance().get(type_col->get_value<String>());
DCHECK(type != nullptr);
-
bool need_to_be_nullable = false;
// 1. from_type is nullable
need_to_be_nullable |= arguments[0].type->is_nullable();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
index ab5bc992f8..77975c1eb9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
@@ -318,9 +318,17 @@ public class CastExpr extends Expr {
type, Function.NullableMode.ALWAYS_NULLABLE,
Lists.newArrayList(Type.VARCHAR), false,
"doris::CastFunctions::cast_to_array_val", null, null, true);
+ } else if (type.isStructType()) {
+ fn = ScalarFunction.createBuiltin(getFnName(Type.STRUCT),
+ type, Function.NullableMode.ALWAYS_NULLABLE,
+ Lists.newArrayList(Type.VARCHAR), false,
+ "doris::CastFunctions::cast_to_struct_val", null, null, true);
}
if (fn == null) {
+ if (type.isStructType() && childType.isStringType()) {
+ return;
+ }
if (childType.isNull() && Type.canCastTo(childType, type)) {
return;
} else {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java
index 5690c115c1..c89c659207 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java
@@ -129,7 +129,7 @@ public class TypeDef implements ParseNode {
// check whether the sub-type is supported
if (!parent.supportSubType(child)) {
throw new AnalysisException(
- parent.getPrimitiveType() + "unsupported sub-type: " + child.toSql());
+ parent.getPrimitiveType() + " unsupported sub-type: " + child.toSql());
}
if (child.getPrimitiveType().isStringType() && !child.isLengthSet()) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
index 9efe901907..7b0de9b7e4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
@@ -59,6 +59,7 @@ public class Column implements Writable, GsonPostProcessable {
public static final String DELETE_SIGN = "__DORIS_DELETE_SIGN__";
public static final String SEQUENCE_COL = "__DORIS_SEQUENCE_COL__";
private static final String COLUMN_ARRAY_CHILDREN = "item";
+ private static final String COLUMN_STRUCT_CHILDREN = "field";
public static final int COLUMN_UNIQUE_ID_INIT_VALUE = -1;
@SerializedName(value = "name")
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
index 45d88897a9..587266926c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
@@ -176,8 +176,20 @@ public abstract class Type {
arraySubTypes.add(STRING);
structSubTypes = Lists.newArrayList();
- structSubTypes.add(INT);
+ structSubTypes.addAll(numericTypes);
+ structSubTypes.add(BOOLEAN);
+ structSubTypes.add(VARCHAR);
structSubTypes.add(STRING);
+ structSubTypes.add(CHAR);
+ structSubTypes.add(DATE);
+ structSubTypes.add(DATETIME);
+ structSubTypes.add(DATEV2);
+ structSubTypes.add(DATETIMEV2);
+ structSubTypes.add(TIME);
+ structSubTypes.add(TIMEV2);
+ structSubTypes.add(DECIMAL32);
+ structSubTypes.add(DECIMAL64);
+ structSubTypes.add(DECIMAL128);
}
public static ArrayList<ScalarType> getIntegerTypes() {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org