You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by xu...@apache.org on 2023/01/13 13:15:54 UTC

[doris] 07/09: [feature](struct-type) adjust the vexpr for struct type (#15669)

This is an automated email from the ASF dual-hosted git repository.

xuyang pushed a commit to branch struct-type
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 0d4da4f5f7797c53c9a2389519d5a81073baf82c
Author: carlvinhust2012 <hu...@126.com>
AuthorDate: Fri Jan 13 17:14:11 2023 +0800

    [feature](struct-type) adjust the vexpr for struct type (#15669)
---
 be/src/vec/columns/column_struct.cpp               |  7 --
 be/src/vec/columns/column_struct.h                 |  1 -
 be/src/vec/data_types/data_type_factory.hpp        |  8 ++
 be/src/vec/data_types/data_type_struct.cpp         | 99 +++++++++++++++++++---
 be/src/vec/data_types/data_type_struct.h           |  3 +
 be/src/vec/functions/function_cast.h               | 13 ++-
 .../java/org/apache/doris/analysis/CastExpr.java   |  8 ++
 .../java/org/apache/doris/analysis/TypeDef.java    |  2 +-
 .../main/java/org/apache/doris/catalog/Column.java |  1 +
 .../main/java/org/apache/doris/catalog/Type.java   | 14 ++-
 10 files changed, 134 insertions(+), 22 deletions(-)

diff --git a/be/src/vec/columns/column_struct.cpp b/be/src/vec/columns/column_struct.cpp
index d51bebb536..afef00c314 100644
--- a/be/src/vec/columns/column_struct.cpp
+++ b/be/src/vec/columns/column_struct.cpp
@@ -22,13 +22,6 @@
 
 namespace doris::vectorized {
 
-namespace ErrorCodes {
-extern const int ILLEGAL_COLUMN;
-extern const int NOT_IMPLEMENTED;
-extern const int CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE;
-extern const int LOGICAL_ERROR;
-} // namespace ErrorCodes
-
 std::string ColumnStruct::get_name() const {
     std::stringstream res;
     res << "Struct(";
diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h
index a66d91f3e0..393b2c275f 100644
--- a/be/src/vec/columns/column_struct.h
+++ b/be/src/vec/columns/column_struct.h
@@ -103,7 +103,6 @@ public:
     bool can_be_inside_nullable() const override { return true; }
     MutableColumnPtr clone_empty() const override;
     MutableColumnPtr clone_resized(size_t size) const override;
-
     size_t size() const override { return columns.at(0)->size(); }
 
     Field operator[](size_t n) const override;
diff --git a/be/src/vec/data_types/data_type_factory.hpp b/be/src/vec/data_types/data_type_factory.hpp
index d46dd3435d..9bc5e20d5c 100644
--- a/be/src/vec/data_types/data_type_factory.hpp
+++ b/be/src/vec/data_types/data_type_factory.hpp
@@ -112,6 +112,14 @@ public:
                 return entity.second;
             }
         }
+        if (type_ptr->get_type_id() == TypeIndex::Struct) {
+            DataTypeFactory::instance().register_data_type(type_ptr->get_name(), type_ptr);
+            for (const auto& entity : _invert_data_type_map) {
+                if (entity.first->equals(*type_ptr)) {
+                    return entity.second;
+                }
+            }
+        }
         return _empty_string;
     }
 
diff --git a/be/src/vec/data_types/data_type_struct.cpp b/be/src/vec/data_types/data_type_struct.cpp
index 4364f5c34f..87b5c4f110 100644
--- a/be/src/vec/data_types/data_type_struct.cpp
+++ b/be/src/vec/data_types/data_type_struct.cpp
@@ -22,17 +22,6 @@
 
 namespace doris::vectorized {
 
-namespace ErrorCodes {
-extern const int BAD_ARGUMENTS;
-extern const int DUPLICATE_COLUMN;
-extern const int EMPTY_DATA_PASSED;
-extern const int NOT_FOUND_COLUMN_IN_BLOCK;
-extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH;
-extern const int ILLEGAL_INDEX;
-extern const int LOGICAL_ERROR;
-} // namespace ErrorCodes
-
 DataTypeStruct::DataTypeStruct(const DataTypes& elems_)
         : elems(elems_), have_explicit_names(false) {
     /// Automatically assigned names in form of '1', '2', ...
@@ -91,6 +80,94 @@ std::string DataTypeStruct::do_get_name() const {
     return s.str();
 }
 
+Status DataTypeStruct::from_string(ReadBuffer& rb, IColumn* column) const {
+    DCHECK(!rb.eof());
+    auto* struct_column = assert_cast<ColumnStruct*>(column);
+
+    if (*rb.position() != '{') {
+        return Status::InvalidArgument("Struct does not start with '{' character, found '{}'",
+                                       *rb.position());
+    }
+    if (rb.count() < 2 || *(rb.end() - 1) != '}') {
+        return Status::InvalidArgument("Struct does not end with '}' character, found '{}'",
+                                       *(rb.end() - 1));
+    }
+
+    // here need handle the empty struct '{}'
+    if (rb.count() == 2) {
+        return Status::OK();
+    }
+
+    ++rb.position();
+    std::vector<ReadBuffer> field_rbs;
+    field_rbs.reserve(elems.size());
+
+    // here get the value "jack" and 20 from {"name":"jack","age":20}
+    while (!rb.eof()) {
+        size_t field_len = 0;
+        auto start = rb.position();
+        while (!rb.eof() && *start != ',' && *start != '}') {
+            field_len++;
+            start++;
+        }
+        if (field_len >= rb.count()) {
+            return Status::InvalidArgument("Invalid Length");
+        }
+        ReadBuffer field_rb(rb.position(), field_len);
+        size_t len = 0;
+        auto start_rb = field_rb.position();
+        while (!field_rb.eof() && *start_rb != ':') {
+            len++;
+            start_rb++;
+        }
+        ReadBuffer field(field_rb.position() + len + 1, field_rb.count() - len - 1);
+
+        if (field.count() < 2 || *field.position() != '"' || *field.end() != '"') {
+            field_rbs.push_back(field);
+        } else {
+            ReadBuffer field_has_quote(field.position() + 1, field.count() - 2);
+            field_rbs.push_back(field_has_quote);
+        }
+
+        rb.position() += field_len + 1;
+    }
+
+    for (size_t idx = 0; idx < elems.size(); idx++) {
+        elems[idx]->from_string(field_rbs[idx], &struct_column->get_column(idx));
+    }
+
+    return Status::OK();
+}
+
+std::string DataTypeStruct::to_string(const IColumn& column, size_t row_num) const {
+    auto ptr = column.convert_to_full_column_if_const();
+    auto& struct_column = assert_cast<const ColumnStruct&>(*ptr.get());
+
+    std::stringstream ss;
+    ss << "<";
+    for (size_t idx = 0; idx < elems.size(); idx++) {
+        if (idx != 0) {
+            ss << ", ";
+        }
+        ss << elems[idx]->to_string(struct_column.get_column(idx), row_num);
+    }
+    ss << ">";
+    return ss.str();
+}
+
+void DataTypeStruct::to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const {
+    auto ptr = column.convert_to_full_column_if_const();
+    auto& struct_column = assert_cast<const ColumnStruct&>(*ptr.get());
+    ostr.write("<", 1);
+    for (size_t idx = 0; idx < elems.size(); idx++) {
+        if (idx != 0) {
+            ostr.write(", ", 2);
+        }
+        elems[idx]->to_string(struct_column.get_column(idx), row_num, ostr);
+    }
+    ostr.write(">", 1);
+}
+
 static inline IColumn& extract_element_column(IColumn& column, size_t idx) {
     return assert_cast<ColumnStruct&>(column).get_column(idx);
 }
diff --git a/be/src/vec/data_types/data_type_struct.h b/be/src/vec/data_types/data_type_struct.h
index 88255de38c..47cfaab1a3 100644
--- a/be/src/vec/data_types/data_type_struct.h
+++ b/be/src/vec/data_types/data_type_struct.h
@@ -96,6 +96,9 @@ public:
     const char* deserialize(const char* buf, IColumn* column, int be_exec_version) const override;
     void to_pb_column_meta(PColumnMeta* col_meta) const override;
 
+    Status from_string(ReadBuffer& rb, IColumn* column) const override;
+    std::string to_string(const IColumn& column, size_t row_num) const override;
+    void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override;
     // bool is_parametric() const { return true; }
     // SerializationPtr do_get_default_serialization() const override;
     // SerializationPtr get_serialization(const SerializationInfo& info) const override;
diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h
index d300c09a76..ec4608c115 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -1541,6 +1541,16 @@ private:
             return &ConvertImplGenericToJsonb::execute;
         }
     }
+    // check struct value type and get to_type value
+    // TODO: need handle another type to cast struct
+    WrapperType create_struct_wrapper(const DataTypePtr& from_type,
+                                      const DataTypeStruct& to_type) const {
+        switch (from_type->get_type_id()) {
+        case TypeIndex::String:
+        default:
+            return &ConvertImplGenericFromString<ColumnString>::execute;
+        }
+    }
 
     WrapperType prepare_unpack_dictionaries(FunctionContext* context, const DataTypePtr& from_type,
                                             const DataTypePtr& to_type) const {
@@ -1712,6 +1722,8 @@ private:
         case TypeIndex::Array:
             return create_array_wrapper(context, from_type,
                                         static_cast<const DataTypeArray&>(*to_type));
+        case TypeIndex::Struct:
+            return create_struct_wrapper(from_type, static_cast<const DataTypeStruct&>(*to_type));
         default:
             break;
         }
@@ -1757,7 +1769,6 @@ protected:
         // TODO(xy): support return struct type for factory
         auto type = DataTypeFactory::instance().get(type_col->get_value<String>());
         DCHECK(type != nullptr);
-
         bool need_to_be_nullable = false;
         // 1. from_type is nullable
         need_to_be_nullable |= arguments[0].type->is_nullable();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
index ab5bc992f8..77975c1eb9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
@@ -318,9 +318,17 @@ public class CastExpr extends Expr {
                     type, Function.NullableMode.ALWAYS_NULLABLE,
                     Lists.newArrayList(Type.VARCHAR), false,
                     "doris::CastFunctions::cast_to_array_val", null, null, true);
+        } else if (type.isStructType()) {
+            fn = ScalarFunction.createBuiltin(getFnName(Type.STRUCT),
+                    type, Function.NullableMode.ALWAYS_NULLABLE,
+                    Lists.newArrayList(Type.VARCHAR), false,
+                    "doris::CastFunctions::cast_to_struct_val", null, null, true);
         }
 
         if (fn == null) {
+            if (type.isStructType() && childType.isStringType()) {
+                return;
+            }
             if (childType.isNull() && Type.canCastTo(childType, type)) {
                 return;
             } else {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java
index 5690c115c1..c89c659207 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java
@@ -129,7 +129,7 @@ public class TypeDef implements ParseNode {
         // check whether the sub-type is supported
         if (!parent.supportSubType(child)) {
             throw new AnalysisException(
-                    parent.getPrimitiveType() + "unsupported sub-type: " + child.toSql());
+                    parent.getPrimitiveType() + " unsupported sub-type: " + child.toSql());
         }
 
         if (child.getPrimitiveType().isStringType() && !child.isLengthSet()) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
index 9efe901907..7b0de9b7e4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
@@ -59,6 +59,7 @@ public class Column implements Writable, GsonPostProcessable {
     public static final String DELETE_SIGN = "__DORIS_DELETE_SIGN__";
     public static final String SEQUENCE_COL = "__DORIS_SEQUENCE_COL__";
     private static final String COLUMN_ARRAY_CHILDREN = "item";
+    private static final String COLUMN_STRUCT_CHILDREN = "field";
     public static final int COLUMN_UNIQUE_ID_INIT_VALUE = -1;
 
     @SerializedName(value = "name")
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
index 45d88897a9..587266926c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
@@ -176,8 +176,20 @@ public abstract class Type {
         arraySubTypes.add(STRING);
 
         structSubTypes = Lists.newArrayList();
-        structSubTypes.add(INT);
+        structSubTypes.addAll(numericTypes);
+        structSubTypes.add(BOOLEAN);
+        structSubTypes.add(VARCHAR);
         structSubTypes.add(STRING);
+        structSubTypes.add(CHAR);
+        structSubTypes.add(DATE);
+        structSubTypes.add(DATETIME);
+        structSubTypes.add(DATEV2);
+        structSubTypes.add(DATETIMEV2);
+        structSubTypes.add(TIME);
+        structSubTypes.add(TIMEV2);
+        structSubTypes.add(DECIMAL32);
+        structSubTypes.add(DECIMAL64);
+        structSubTypes.add(DECIMAL128);
     }
 
     public static ArrayList<ScalarType> getIntegerTypes() {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org