You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2023/01/05 21:06:35 UTC

[GitHub] [doris] github-actions[bot] commented on a diff in pull request #15665: [WIP](struct-type) support struct-type in vectorize engine

github-actions[bot] commented on code in PR #15665:
URL: https://github.com/apache/doris/pull/15665#discussion_r1062895384


##########
be/src/olap/rowset/segment_v2/column_writer.cpp:
##########
@@ -542,6 +613,108 @@ Status ScalarColumnWriter::finish_current_page() {
 
 ////////////////////////////////////////////////////////////////////////////////
 
+StructColumnWriter::StructColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr<Field> field,
+                                       ScalarColumnWriter* null_writer,
+                                       std::vector<std::unique_ptr<ColumnWriter>>& sub_column_writers)
+       : ColumnWriter(std::move(field), opts.meta->is_nullable()),
+         _opts(opts) {
+    for (auto& sub_column_writer : sub_column_writers) {
+        _sub_column_writers.push_back(std::move(sub_column_writer));
+    }
+    _num_sub_column_writers = _sub_column_writers.size();
+    DCHECK(_num_sub_column_writers >= 1);
+    if (is_nullable()) {
+        _null_writer.reset(null_writer);
+    }
+}
+
+Status StructColumnWriter::init() {
+    for (auto& column_writer : _sub_column_writers) {
+        RETURN_IF_ERROR(column_writer->init());
+    }
+    if (is_nullable()) {
+        RETURN_IF_ERROR(_null_writer->init());
+    }
+    return Status::OK();
+}
+
+Status StructColumnWriter::write_inverted_index() {
+    if (_opts.inverted_index) {
+        for (auto& column_writer : _sub_column_writers) {
+            RETURN_IF_ERROR(column_writer->write_inverted_index());
+        }
+    }
+    return Status::OK();
+}
+
+Status StructColumnWriter::append_nullable(const uint8_t* null_map, const uint8_t** ptr,
+                                     size_t num_rows) {
+    RETURN_IF_ERROR(append_data(ptr, num_rows));
+    RETURN_IF_ERROR(_null_writer->append_data(&null_map, num_rows));
+    return Status::OK();
+}
+
+Status StructColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) {
+    auto data_cursor = reinterpret_cast<const void**>(ptr);
+    auto null_map_cursor = data_cursor + _num_sub_column_writers;
+    for (auto& column_writer : _sub_column_writers) {
+        RETURN_IF_ERROR(column_writer->append(
+                reinterpret_cast<const uint8_t*>(*null_map_cursor), *data_cursor, num_rows));
+        data_cursor++;
+        null_map_cursor++;
+    }
+    return Status::OK();
+}
+
+uint64_t StructColumnWriter::estimate_buffer_size() {
+    uint64_t size = 0;

Review Comment:
   warning: variable 'size' set but not used [clang-diagnostic-unused-but-set-variable]
   ```cpp
       uint64_t size = 0;
                ^
   ```
   



##########
be/src/runtime/struct_value.h:
##########
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <type_traits>
+
+#include "runtime/primitive_type.h"
+
+namespace doris_udf {
+    class FunctionContext;
+    struct AnyVal;
+} // namespace doris_udf
+
+namespace doris {
+
+using doris_udf::FunctionContext;
+using doris_udf::AnyVal;

Review Comment:
   warning: using decl 'AnyVal' is unused [misc-unused-using-decls]
   ```cpp
   using doris_udf::AnyVal;
                    ^
   ```
   **be/src/runtime/struct_value.h:31:** remove the using
   ```cpp
   using doris_udf::AnyVal;
                    ^
   ```
   



##########
be/src/olap/types.h:
##########
@@ -431,6 +432,245 @@
     const size_t _item_size;
 };
 
+class StructTypeInfo: public TypeInfo {
+public:
+    explicit StructTypeInfo(std::vector<TypeInfoPtr>& type_infos) {
+        for (TypeInfoPtr& type_info : type_infos) {
+            _type_infos.push_back(std::move(type_info));
+        }
+    }
+    ~StructTypeInfo() override = default;
+
+    bool equal(const void* left, const void* right) const override {
+        auto l_value = reinterpret_cast<const StructValue*>(left);
+        auto r_value = reinterpret_cast<const StructValue*>(right);
+        if (l_value->size() != r_value->size()) {
+            return false;
+        }
+        uint32_t size = l_value->size();
+
+        if (!l_value->has_null() && !r_value->has_null()) {
+            for (size_t i = 0; i < size; ++i) {
+                if (!_type_infos[i]->equal(l_value->child_value(i), r_value->child_value(i))) {
+                    return false;
+                }
+            }
+        } else {
+            for (size_t i = 0; i < size; ++i) {
+                if (l_value->is_null_at(i)) {
+                    if (r_value->is_null_at(i)) { // both are null
+                        continue;
+                    } else { // left is null & right is not null
+                        return false;
+                    }
+                } else if (r_value->is_null_at(i)) { // left is not null & right is null
+                    return false;
+                }
+                if (!_type_infos[i]->equal(l_value->child_value(i), r_value->child_value(i))) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
+    int cmp(const void* left, const void* right) const override {
+        auto l_value = reinterpret_cast<const StructValue*>(left);
+        auto r_value = reinterpret_cast<const StructValue*>(right);
+        uint32_t l_size = l_value->size();
+        uint32_t r_size = r_value->size();
+        size_t cur = 0;
+
+        if (!l_value->has_null() && !r_value->has_null()) {
+            while (cur < l_size && cur < r_size) {
+                int result = _type_infos[cur]->cmp(l_value->child_value(cur), r_value->child_value(cur));
+                if (result != 0) {
+                    return result;
+                }
+                ++cur;
+            }
+        } else {
+            while (cur < l_size && cur < r_size) {
+                if (l_value->is_null_at(cur)) {
+                    if (!r_value->is_null_at(cur)) { // left is null & right is not null
+                        return -1;
+                    }
+                } else if (r_value->is_null_at(cur)) { // left is not null & right is null
+                    return 1;
+                } else { // both are not null
+                    int result =
+                            _type_infos[cur]->cmp(l_value->child_value(cur), r_value->child_value(cur));
+                    if (result != 0) {
+                        return result;
+                    }
+                }
+                ++cur;
+            }
+        }
+
+        if (l_size < r_size) {
+            return -1;
+        } else if (l_size > r_size) {
+            return 1;
+        } else {
+            return 0;
+        }
+    }
+
+    void shallow_copy(void* dest, const void* src) const override {
+        auto dest_value = reinterpret_cast<StructValue*>(dest);
+        auto src_value = reinterpret_cast<const StructValue*>(src);
+        dest_value->shallow_copy(src_value);
+    }
+
+    void deep_copy(void* dest, const void* src, MemPool* mem_pool) const override {
+        auto dest_value = reinterpret_cast<StructValue*>(dest);
+        auto src_value = reinterpret_cast<const StructValue*>(src);
+
+        if (src_value->size() == 0) {
+            new (dest_value) StructValue(src_value->size());
+            return;
+        }
+
+        dest_value->set_size(src_value->size());
+        dest_value->set_has_null(src_value->has_null());
+
+        size_t allocate_size = src_value->size() * sizeof(*src_value->values());
+        // allocate memory for children value
+        for (size_t i = 0; i < src_value->size(); ++i) {
+            if (src_value->is_null_at(i)) continue;
+            allocate_size += _type_infos[i]->size();
+        }
+
+        dest_value->set_values((void**)mem_pool->allocate(allocate_size));
+        auto ptr = reinterpret_cast<uint8_t*>(dest_value->mutable_values());
+        ptr += dest_value->size() * sizeof(*dest_value->values());
+
+        for (size_t i = 0; i < src_value->size(); ++i) {
+            dest_value->set_child_value(nullptr, i);
+            if (src_value->is_null_at(i)) continue;
+            dest_value->set_child_value(ptr, i);
+            ptr += _type_infos[i]->size();
+        }
+
+        // copy children value
+        for (size_t i = 0; i < src_value->size(); ++i) {
+            if (src_value->is_null_at(i)) continue;

Review Comment:
   warning: statement should be inside braces [readability-braces-around-statements]
   
   ```suggestion
               if (src_value->is_null_at(i)) { continue;
   }
   ```
   



##########
be/src/olap/types.h:
##########
@@ -431,6 +432,245 @@
     const size_t _item_size;
 };
 
+class StructTypeInfo: public TypeInfo {
+public:
+    explicit StructTypeInfo(std::vector<TypeInfoPtr>& type_infos) {
+        for (TypeInfoPtr& type_info : type_infos) {
+            _type_infos.push_back(std::move(type_info));
+        }
+    }
+    ~StructTypeInfo() override = default;
+
+    bool equal(const void* left, const void* right) const override {
+        auto l_value = reinterpret_cast<const StructValue*>(left);
+        auto r_value = reinterpret_cast<const StructValue*>(right);
+        if (l_value->size() != r_value->size()) {
+            return false;
+        }
+        uint32_t size = l_value->size();
+
+        if (!l_value->has_null() && !r_value->has_null()) {
+            for (size_t i = 0; i < size; ++i) {
+                if (!_type_infos[i]->equal(l_value->child_value(i), r_value->child_value(i))) {
+                    return false;
+                }
+            }
+        } else {
+            for (size_t i = 0; i < size; ++i) {
+                if (l_value->is_null_at(i)) {
+                    if (r_value->is_null_at(i)) { // both are null
+                        continue;
+                    } else { // left is null & right is not null
+                        return false;
+                    }
+                } else if (r_value->is_null_at(i)) { // left is not null & right is null
+                    return false;
+                }
+                if (!_type_infos[i]->equal(l_value->child_value(i), r_value->child_value(i))) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
+    int cmp(const void* left, const void* right) const override {
+        auto l_value = reinterpret_cast<const StructValue*>(left);
+        auto r_value = reinterpret_cast<const StructValue*>(right);
+        uint32_t l_size = l_value->size();
+        uint32_t r_size = r_value->size();
+        size_t cur = 0;
+
+        if (!l_value->has_null() && !r_value->has_null()) {
+            while (cur < l_size && cur < r_size) {
+                int result = _type_infos[cur]->cmp(l_value->child_value(cur), r_value->child_value(cur));
+                if (result != 0) {
+                    return result;
+                }
+                ++cur;
+            }
+        } else {
+            while (cur < l_size && cur < r_size) {
+                if (l_value->is_null_at(cur)) {
+                    if (!r_value->is_null_at(cur)) { // left is null & right is not null
+                        return -1;
+                    }
+                } else if (r_value->is_null_at(cur)) { // left is not null & right is null
+                    return 1;
+                } else { // both are not null
+                    int result =
+                            _type_infos[cur]->cmp(l_value->child_value(cur), r_value->child_value(cur));
+                    if (result != 0) {
+                        return result;
+                    }
+                }
+                ++cur;
+            }
+        }
+
+        if (l_size < r_size) {
+            return -1;
+        } else if (l_size > r_size) {
+            return 1;
+        } else {
+            return 0;
+        }
+    }
+
+    void shallow_copy(void* dest, const void* src) const override {
+        auto dest_value = reinterpret_cast<StructValue*>(dest);
+        auto src_value = reinterpret_cast<const StructValue*>(src);
+        dest_value->shallow_copy(src_value);
+    }
+
+    void deep_copy(void* dest, const void* src, MemPool* mem_pool) const override {
+        auto dest_value = reinterpret_cast<StructValue*>(dest);
+        auto src_value = reinterpret_cast<const StructValue*>(src);
+
+        if (src_value->size() == 0) {
+            new (dest_value) StructValue(src_value->size());
+            return;
+        }
+
+        dest_value->set_size(src_value->size());
+        dest_value->set_has_null(src_value->has_null());
+
+        size_t allocate_size = src_value->size() * sizeof(*src_value->values());
+        // allocate memory for children value
+        for (size_t i = 0; i < src_value->size(); ++i) {
+            if (src_value->is_null_at(i)) continue;
+            allocate_size += _type_infos[i]->size();
+        }
+
+        dest_value->set_values((void**)mem_pool->allocate(allocate_size));
+        auto ptr = reinterpret_cast<uint8_t*>(dest_value->mutable_values());
+        ptr += dest_value->size() * sizeof(*dest_value->values());
+
+        for (size_t i = 0; i < src_value->size(); ++i) {
+            dest_value->set_child_value(nullptr, i);
+            if (src_value->is_null_at(i)) continue;
+            dest_value->set_child_value(ptr, i);
+            ptr += _type_infos[i]->size();
+        }
+
+        // copy children value
+        for (size_t i = 0; i < src_value->size(); ++i) {
+            if (src_value->is_null_at(i)) continue;
+            _type_infos[i]->deep_copy(
+                    dest_value->mutable_child_value(i), src_value->child_value(i), mem_pool);
+        }
+    }
+
+    void copy_object(void* dest, const void* src, MemPool* mem_pool) const override {
+        deep_copy(dest, src, mem_pool);
+    }
+
+    void direct_copy(void* dest, const void* src) const override {
+        auto dest_value = static_cast<StructValue*>(dest);
+        auto base = reinterpret_cast<uint8_t*>(dest_value->mutable_values());
+        direct_copy(&base, dest, src);
+    }
+
+    void direct_copy(uint8_t** base, void* dest, const void* src) const {
+        auto dest_value = static_cast<StructValue*>(dest);
+        auto src_value = static_cast<const StructValue*>(src);
+
+        dest_value->set_size(src_value->size());
+        dest_value->set_has_null(src_value->has_null());
+        *base += src_value->size() * sizeof(*src_value->values());
+
+        for (size_t i = 0; i < src_value->size(); ++i) {
+            dest_value->set_child_value(nullptr, i);
+            if (src_value->is_null_at(i)) continue;
+            dest_value->set_child_value(*base, i);
+            *base += _type_infos[i]->size();
+        }
+
+        for (size_t i = 0; i < src_value->size(); ++i) {
+            if (dest_value->is_null_at(i)) {
+                continue;
+            }
+            auto dest_address = dest_value->mutable_child_value(i);
+            auto src_address = src_value->child_value(i);
+            if (_type_infos[i]->type() == OLAP_FIELD_TYPE_STRUCT) {
+                dynamic_cast<const StructTypeInfo*>(_type_infos[i].get())
+                ->direct_copy(base, dest_address, src_address);
+            } else if (_type_infos[i]->type() == OLAP_FIELD_TYPE_ARRAY) {
+                dynamic_cast<const ArrayTypeInfo*>(_type_infos[i].get())
+                ->direct_copy(base, dest_address, src_address);
+            } else {
+                if (is_olap_string_type(_type_infos[i]->type())) {
+                    auto dest_slice = reinterpret_cast<Slice*>(dest_address);
+                    auto src_slice = reinterpret_cast<const Slice*>(src_address);
+                    dest_slice->data = reinterpret_cast<char*>(*base);
+                    dest_slice->size = src_slice->size;
+                    *base += src_slice->size;
+                }
+                _type_infos[i]->direct_copy(dest_address, src_address);
+            }
+        }
+    }
+
+    void direct_copy_may_cut(void* dest, const void* src) const override { direct_copy(dest, src); }
+
+    Status convert_from(void* dest, const void* src, const TypeInfo* src_type, MemPool* mem_pool,
+                        size_t variable_len = 0) const override {
+        return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>();
+    }
+
+    Status from_string(void* buf, const std::string& scan_key, const int precision = 0,
+                       const int scale = 0) const override {
+        return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>();
+    }
+
+    std::string to_string(const void* src) const override {
+        auto src_value = reinterpret_cast<const StructValue*>(src);
+        std::string result = "{";
+
+        for (size_t i = 0; i < src_value->size(); ++i) {
+            std::string field_value = _type_infos[i]->to_string(src_value->child_value(i));
+            result += field_value;
+            if (i < src_value->size() - 1) {
+                result += ", ";
+            }
+        }
+        result += "}";
+        return result;
+    }
+
+    void set_to_max(void* buf) const override {
+        DCHECK(false) << "set_to_max of list is not implemented.";
+    }
+
+    void set_to_min(void* buf) const override {
+        DCHECK(false) << "set_to_min of list is not implemented.";
+    }
+
+    uint32_t hash_code(const void* data, uint32_t seed) const override {
+        auto struct_value = reinterpret_cast<const StructValue*>(data);
+        auto size = struct_value->size();
+        uint32_t result = HashUtil::hash(&size, sizeof(size), seed);
+        for (size_t i = 0; i < size; ++i) {
+            if (struct_value->is_null_at(i)) {
+                result = seed * result;
+            } else {
+                result = seed * result + _type_infos[i]->hash_code(
+                        struct_value->values()[i], seed);
+            }
+        }
+        return result;
+    }
+
+    const size_t size() const override { return sizeof(StructValue); }
+
+    FieldType type() const override { return OLAP_FIELD_TYPE_STRUCT; }
+
+    inline const std::vector<TypeInfoPtr>* type_infos() const { return &:_type_infos; }

Review Comment:
   warning: expected expression [clang-diagnostic-error]
   ```cpp
       inline const std::vector<TypeInfoPtr>* type_infos() const { return &:_type_infos; }
                                                                           ^
   ```
   



##########
be/src/olap/types.h:
##########
@@ -431,6 +432,245 @@
     const size_t _item_size;
 };
 
+class StructTypeInfo: public TypeInfo {
+public:
+    explicit StructTypeInfo(std::vector<TypeInfoPtr>& type_infos) {
+        for (TypeInfoPtr& type_info : type_infos) {
+            _type_infos.push_back(std::move(type_info));
+        }
+    }
+    ~StructTypeInfo() override = default;
+
+    bool equal(const void* left, const void* right) const override {
+        auto l_value = reinterpret_cast<const StructValue*>(left);
+        auto r_value = reinterpret_cast<const StructValue*>(right);
+        if (l_value->size() != r_value->size()) {
+            return false;
+        }
+        uint32_t size = l_value->size();
+
+        if (!l_value->has_null() && !r_value->has_null()) {
+            for (size_t i = 0; i < size; ++i) {
+                if (!_type_infos[i]->equal(l_value->child_value(i), r_value->child_value(i))) {
+                    return false;
+                }
+            }
+        } else {
+            for (size_t i = 0; i < size; ++i) {
+                if (l_value->is_null_at(i)) {
+                    if (r_value->is_null_at(i)) { // both are null
+                        continue;
+                    } else { // left is null & right is not null
+                        return false;
+                    }
+                } else if (r_value->is_null_at(i)) { // left is not null & right is null
+                    return false;
+                }
+                if (!_type_infos[i]->equal(l_value->child_value(i), r_value->child_value(i))) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
+    int cmp(const void* left, const void* right) const override {
+        auto l_value = reinterpret_cast<const StructValue*>(left);
+        auto r_value = reinterpret_cast<const StructValue*>(right);
+        uint32_t l_size = l_value->size();
+        uint32_t r_size = r_value->size();
+        size_t cur = 0;
+
+        if (!l_value->has_null() && !r_value->has_null()) {
+            while (cur < l_size && cur < r_size) {
+                int result = _type_infos[cur]->cmp(l_value->child_value(cur), r_value->child_value(cur));
+                if (result != 0) {
+                    return result;
+                }
+                ++cur;
+            }
+        } else {
+            while (cur < l_size && cur < r_size) {
+                if (l_value->is_null_at(cur)) {
+                    if (!r_value->is_null_at(cur)) { // left is null & right is not null
+                        return -1;
+                    }
+                } else if (r_value->is_null_at(cur)) { // left is not null & right is null
+                    return 1;
+                } else { // both are not null
+                    int result =
+                            _type_infos[cur]->cmp(l_value->child_value(cur), r_value->child_value(cur));
+                    if (result != 0) {
+                        return result;
+                    }
+                }
+                ++cur;
+            }
+        }
+
+        if (l_size < r_size) {
+            return -1;
+        } else if (l_size > r_size) {
+            return 1;
+        } else {
+            return 0;
+        }
+    }
+
+    void shallow_copy(void* dest, const void* src) const override {
+        auto dest_value = reinterpret_cast<StructValue*>(dest);
+        auto src_value = reinterpret_cast<const StructValue*>(src);
+        dest_value->shallow_copy(src_value);
+    }
+
+    void deep_copy(void* dest, const void* src, MemPool* mem_pool) const override {
+        auto dest_value = reinterpret_cast<StructValue*>(dest);
+        auto src_value = reinterpret_cast<const StructValue*>(src);
+
+        if (src_value->size() == 0) {
+            new (dest_value) StructValue(src_value->size());
+            return;
+        }
+
+        dest_value->set_size(src_value->size());
+        dest_value->set_has_null(src_value->has_null());
+
+        size_t allocate_size = src_value->size() * sizeof(*src_value->values());
+        // allocate memory for children value
+        for (size_t i = 0; i < src_value->size(); ++i) {
+            if (src_value->is_null_at(i)) continue;
+            allocate_size += _type_infos[i]->size();
+        }
+
+        dest_value->set_values((void**)mem_pool->allocate(allocate_size));
+        auto ptr = reinterpret_cast<uint8_t*>(dest_value->mutable_values());
+        ptr += dest_value->size() * sizeof(*dest_value->values());
+
+        for (size_t i = 0; i < src_value->size(); ++i) {
+            dest_value->set_child_value(nullptr, i);
+            if (src_value->is_null_at(i)) continue;

Review Comment:
   warning: statement should be inside braces [readability-braces-around-statements]
   
   ```suggestion
               if (src_value->is_null_at(i)) { continue;
   }
   ```
   



##########
be/src/olap/types.h:
##########
@@ -431,6 +432,245 @@
     const size_t _item_size;
 };
 
+class StructTypeInfo: public TypeInfo {
+public:
+    explicit StructTypeInfo(std::vector<TypeInfoPtr>& type_infos) {
+        for (TypeInfoPtr& type_info : type_infos) {
+            _type_infos.push_back(std::move(type_info));
+        }
+    }
+    ~StructTypeInfo() override = default;
+
+    bool equal(const void* left, const void* right) const override {
+        auto l_value = reinterpret_cast<const StructValue*>(left);
+        auto r_value = reinterpret_cast<const StructValue*>(right);
+        if (l_value->size() != r_value->size()) {
+            return false;
+        }
+        uint32_t size = l_value->size();
+
+        if (!l_value->has_null() && !r_value->has_null()) {
+            for (size_t i = 0; i < size; ++i) {
+                if (!_type_infos[i]->equal(l_value->child_value(i), r_value->child_value(i))) {
+                    return false;
+                }
+            }
+        } else {
+            for (size_t i = 0; i < size; ++i) {
+                if (l_value->is_null_at(i)) {
+                    if (r_value->is_null_at(i)) { // both are null
+                        continue;
+                    } else { // left is null & right is not null
+                        return false;
+                    }
+                } else if (r_value->is_null_at(i)) { // left is not null & right is null
+                    return false;
+                }
+                if (!_type_infos[i]->equal(l_value->child_value(i), r_value->child_value(i))) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
+    int cmp(const void* left, const void* right) const override {
+        auto l_value = reinterpret_cast<const StructValue*>(left);
+        auto r_value = reinterpret_cast<const StructValue*>(right);
+        uint32_t l_size = l_value->size();
+        uint32_t r_size = r_value->size();
+        size_t cur = 0;
+
+        if (!l_value->has_null() && !r_value->has_null()) {
+            while (cur < l_size && cur < r_size) {
+                int result = _type_infos[cur]->cmp(l_value->child_value(cur), r_value->child_value(cur));
+                if (result != 0) {
+                    return result;
+                }
+                ++cur;
+            }
+        } else {
+            while (cur < l_size && cur < r_size) {
+                if (l_value->is_null_at(cur)) {
+                    if (!r_value->is_null_at(cur)) { // left is null & right is not null
+                        return -1;
+                    }
+                } else if (r_value->is_null_at(cur)) { // left is not null & right is null
+                    return 1;
+                } else { // both are not null
+                    int result =
+                            _type_infos[cur]->cmp(l_value->child_value(cur), r_value->child_value(cur));
+                    if (result != 0) {
+                        return result;
+                    }
+                }
+                ++cur;
+            }
+        }
+
+        if (l_size < r_size) {
+            return -1;
+        } else if (l_size > r_size) {
+            return 1;
+        } else {
+            return 0;
+        }
+    }
+
+    void shallow_copy(void* dest, const void* src) const override {
+        auto dest_value = reinterpret_cast<StructValue*>(dest);
+        auto src_value = reinterpret_cast<const StructValue*>(src);
+        dest_value->shallow_copy(src_value);
+    }
+
+    void deep_copy(void* dest, const void* src, MemPool* mem_pool) const override {
+        auto dest_value = reinterpret_cast<StructValue*>(dest);
+        auto src_value = reinterpret_cast<const StructValue*>(src);
+
+        if (src_value->size() == 0) {
+            new (dest_value) StructValue(src_value->size());
+            return;
+        }
+
+        dest_value->set_size(src_value->size());
+        dest_value->set_has_null(src_value->has_null());
+
+        size_t allocate_size = src_value->size() * sizeof(*src_value->values());
+        // allocate memory for children value
+        for (size_t i = 0; i < src_value->size(); ++i) {
+            if (src_value->is_null_at(i)) continue;
+            allocate_size += _type_infos[i]->size();
+        }
+
+        dest_value->set_values((void**)mem_pool->allocate(allocate_size));
+        auto ptr = reinterpret_cast<uint8_t*>(dest_value->mutable_values());
+        ptr += dest_value->size() * sizeof(*dest_value->values());
+
+        for (size_t i = 0; i < src_value->size(); ++i) {
+            dest_value->set_child_value(nullptr, i);
+            if (src_value->is_null_at(i)) continue;
+            dest_value->set_child_value(ptr, i);
+            ptr += _type_infos[i]->size();
+        }
+
+        // copy children value
+        for (size_t i = 0; i < src_value->size(); ++i) {
+            if (src_value->is_null_at(i)) continue;
+            _type_infos[i]->deep_copy(
+                    dest_value->mutable_child_value(i), src_value->child_value(i), mem_pool);
+        }
+    }
+
+    void copy_object(void* dest, const void* src, MemPool* mem_pool) const override {
+        deep_copy(dest, src, mem_pool);
+    }
+
+    void direct_copy(void* dest, const void* src) const override {
+        auto dest_value = static_cast<StructValue*>(dest);
+        auto base = reinterpret_cast<uint8_t*>(dest_value->mutable_values());
+        direct_copy(&base, dest, src);
+    }
+
+    void direct_copy(uint8_t** base, void* dest, const void* src) const {
+        auto dest_value = static_cast<StructValue*>(dest);
+        auto src_value = static_cast<const StructValue*>(src);
+
+        dest_value->set_size(src_value->size());
+        dest_value->set_has_null(src_value->has_null());
+        *base += src_value->size() * sizeof(*src_value->values());
+
+        for (size_t i = 0; i < src_value->size(); ++i) {
+            dest_value->set_child_value(nullptr, i);
+            if (src_value->is_null_at(i)) continue;

Review Comment:
   warning: statement should be inside braces [readability-braces-around-statements]
   
   ```suggestion
               if (src_value->is_null_at(i)) { continue;
   }
   ```
   



##########
be/src/runtime/struct_value.h:
##########
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <type_traits>
+
+#include "runtime/primitive_type.h"
+
+namespace doris_udf {
+    class FunctionContext;
+    struct AnyVal;
+} // namespace doris_udf
+
+namespace doris {
+
+using doris_udf::FunctionContext;

Review Comment:
   warning: using decl 'FunctionContext' is unused [misc-unused-using-decls]
   ```cpp
   using doris_udf::FunctionContext;
                    ^
   ```
   **be/src/runtime/struct_value.h:30:** remove the using
   ```cpp
   using doris_udf::FunctionContext;
                    ^
   ```
   



##########
be/src/olap/types.h:
##########
@@ -431,6 +432,245 @@ class ArrayTypeInfo : public TypeInfo {
     const size_t _item_size;
 };
 
+class StructTypeInfo: public TypeInfo {
+public:
+    explicit StructTypeInfo(std::vector<TypeInfoPtr>& type_infos) {
+        for (TypeInfoPtr& type_info : type_infos) {
+            _type_infos.push_back(std::move(type_info));
+        }
+    }
+    ~StructTypeInfo() override = default;
+
+    bool equal(const void* left, const void* right) const override {
+        auto l_value = reinterpret_cast<const StructValue*>(left);
+        auto r_value = reinterpret_cast<const StructValue*>(right);
+        if (l_value->size() != r_value->size()) {
+            return false;
+        }
+        uint32_t size = l_value->size();
+
+        if (!l_value->has_null() && !r_value->has_null()) {
+            for (size_t i = 0; i < size; ++i) {
+                if (!_type_infos[i]->equal(l_value->child_value(i), r_value->child_value(i))) {
+                    return false;
+                }
+            }
+        } else {
+            for (size_t i = 0; i < size; ++i) {
+                if (l_value->is_null_at(i)) {
+                    if (r_value->is_null_at(i)) { // both are null
+                        continue;
+                    } else { // left is null & right is not null
+                        return false;
+                    }
+                } else if (r_value->is_null_at(i)) { // left is not null & right is null
+                    return false;
+                }
+                if (!_type_infos[i]->equal(l_value->child_value(i), r_value->child_value(i))) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
+    int cmp(const void* left, const void* right) const override {
+        auto l_value = reinterpret_cast<const StructValue*>(left);
+        auto r_value = reinterpret_cast<const StructValue*>(right);
+        uint32_t l_size = l_value->size();
+        uint32_t r_size = r_value->size();
+        size_t cur = 0;
+
+        if (!l_value->has_null() && !r_value->has_null()) {
+            while (cur < l_size && cur < r_size) {
+                int result = _type_infos[cur]->cmp(l_value->child_value(cur), r_value->child_value(cur));
+                if (result != 0) {
+                    return result;
+                }
+                ++cur;
+            }
+        } else {
+            while (cur < l_size && cur < r_size) {
+                if (l_value->is_null_at(cur)) {
+                    if (!r_value->is_null_at(cur)) { // left is null & right is not null
+                        return -1;
+                    }
+                } else if (r_value->is_null_at(cur)) { // left is not null & right is null
+                    return 1;
+                } else { // both are not null
+                    int result =
+                            _type_infos[cur]->cmp(l_value->child_value(cur), r_value->child_value(cur));
+                    if (result != 0) {
+                        return result;
+                    }
+                }
+                ++cur;
+            }
+        }
+
+        if (l_size < r_size) {
+            return -1;
+        } else if (l_size > r_size) {
+            return 1;
+        } else {
+            return 0;
+        }
+    }
+
+    void shallow_copy(void* dest, const void* src) const override {
+        auto dest_value = reinterpret_cast<StructValue*>(dest);
+        auto src_value = reinterpret_cast<const StructValue*>(src);
+        dest_value->shallow_copy(src_value);
+    }
+
+    void deep_copy(void* dest, const void* src, MemPool* mem_pool) const override {
+        auto dest_value = reinterpret_cast<StructValue*>(dest);
+        auto src_value = reinterpret_cast<const StructValue*>(src);
+
+        if (src_value->size() == 0) {
+            new (dest_value) StructValue(src_value->size());
+            return;
+        }
+
+        dest_value->set_size(src_value->size());
+        dest_value->set_has_null(src_value->has_null());
+
+        size_t allocate_size = src_value->size() * sizeof(*src_value->values());
+        // allocate memory for children value
+        for (size_t i = 0; i < src_value->size(); ++i) {
+            if (src_value->is_null_at(i)) continue;

Review Comment:
   warning: statement should be inside braces [readability-braces-around-statements]
   
   ```suggestion
               if (src_value->is_null_at(i)) { continue;
   }
   ```
   



##########
be/src/olap/types.cpp:
##########
@@ -166,10 +166,34 @@ const TypeInfo* get_array_type_info(FieldType leaf_type, int32_t iterations) {
     return array_type_Info_arr[leaf_type][iterations];
 }
 
+// Produce a struct type info
+// TODO(xy): Need refactor to this produce method
+const TypeInfo* get_struct_type_info(std::vector<FieldType> field_types) {
+    std::vector<TypeInfoPtr> type_infos;
+    type_infos.reserve(field_types.size());
+    for(FieldType& type : field_types) {
+        if (is_scalar_type(type)) {
+            type_infos.push_back(std::move(

Review Comment:
   warning: moving a temporary object prevents copy elision [clang-diagnostic-pessimizing-move]
   ```cpp
               type_infos.push_back(std::move(
                                    ^
   ```
   **be/src/olap/types.cpp:175:** remove std::move call here
   ```cpp
               type_infos.push_back(std::move(
                                    ^
   ```
   



##########
be/src/olap/types.cpp:
##########
@@ -219,9 +250,21 @@
     if (is_scalar_type(type_info->type())) {
         return create_static_type_info_ptr(type_info);
     } else {
-        const auto array_type_info = dynamic_cast<const ArrayTypeInfo*>(type_info);
-        return create_dynamic_type_info_ptr(
-                new ArrayTypeInfo(clone_type_info(array_type_info->item_type_info())));
+        auto type = type_info->type();
+        if (type == OLAP_FIELD_TYPE_STRUCT) {
+            const auto struct_type_info = dynamic_cast<const StructTypeInfo*>(type_info);
+            std::vector<TypeInfoPtr> clone_type_infos;
+            const std::vector<TypeInfoPtr>* sub_type_infos = struct_type_info->type_infos();
+            clone_type_infos.reserve(sub_type_infos->size());
+            for (size_t i = 0; i < sub_type_infos->size(); i++) {
+                clone_type_infos.push_back(std::move(clone_type_info((*sub_type_infos)[i].get())));

Review Comment:
   warning: moving a temporary object prevents copy elision [clang-diagnostic-pessimizing-move]
   ```cpp
                   clone_type_infos.push_back(std::move(clone_type_info((*sub_type_infos)[i].get())));
                                              ^
   ```
   **be/src/olap/types.cpp:259:** remove std::move call here
   ```cpp
                   clone_type_infos.push_back(std::move(clone_type_info((*sub_type_infos)[i].get())));
                                              ^
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org