You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by xu...@apache.org on 2022/12/29 09:57:17 UTC

[doris] 04/04: [feature](struct-type) add the class ColumnStruct and class DataTypeStruct implement (#14545)

This is an automated email from the ASF dual-hosted git repository.

xuyang pushed a commit to branch struct-type
in repository https://gitbox.apache.org/repos/asf/doris.git

commit bebab27bc9c1a1a93193359fd929309c81c2a69b
Author: carlvinhust2012 <hu...@126.com>
AuthorDate: Thu Dec 8 09:26:12 2022 +0800

    [feature](struct-type) add the class ColumnStruct and class DataTypeStruct implement (#14545)
    
    Co-authored-by: hucheng01 <hu...@baidu.com>
---
 be/src/vec/CMakeLists.txt                  |   2 +
 be/src/vec/columns/column_struct.cpp       | 618 +++++++++++++++++++++++++++++
 be/src/vec/columns/column_struct.h         | 232 +++++++++++
 be/src/vec/core/types.h                    |   3 +
 be/src/vec/data_types/data_type_struct.cpp | 361 +++++++++++++++++
 be/src/vec/data_types/data_type_struct.h   | 115 ++++++
 6 files changed, 1331 insertions(+)

diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt
index 4426ba63a7..27d163e8ca 100644
--- a/be/src/vec/CMakeLists.txt
+++ b/be/src/vec/CMakeLists.txt
@@ -49,6 +49,7 @@ set(VEC_FILES
   aggregate_functions/aggregate_function_histogram.cpp
   columns/column.cpp
   columns/column_array.cpp
+  columns/column_struct.cpp
   columns/column_const.cpp
   columns/column_decimal.cpp
   columns/column_nullable.cpp
@@ -72,6 +73,7 @@ set(VEC_FILES
   core/materialize_block.cpp
   data_types/data_type.cpp
   data_types/data_type_array.cpp
+  data_types/data_type_struct.cpp
   data_types/data_type_bitmap.cpp
   data_types/data_type_factory.cpp
   data_types/data_type_fixed_length_object.cpp
diff --git a/be/src/vec/columns/column_struct.cpp b/be/src/vec/columns/column_struct.cpp
new file mode 100644
index 0000000000..05d5df0fd4
--- /dev/null
+++ b/be/src/vec/columns/column_struct.cpp
@@ -0,0 +1,618 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnStruct.cpp
+// and modified by Doris
+
+#include "vec/columns/column_struct.h"
+
+namespace doris::vectorized {
+
+namespace ErrorCodes {
+extern const int ILLEGAL_COLUMN;
+extern const int NOT_IMPLEMENTED;
+extern const int CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE;
+extern const int LOGICAL_ERROR;
+} // namespace ErrorCodes
+
+std::string ColumnStruct::get_name() const {
+    std::stringstream res;
+    res << "Struct(";
+    bool is_first = true;
+    for (const auto& column : columns) {
+        if (!is_first) {
+            res << ", ";
+        }
+        is_first = false;
+        res << column->get_name();
+    }
+    res << ")";
+    return res.str();
+}
+
+ColumnStruct::ColumnStruct(MutableColumns&& mutable_columns) {
+    columns.reserve(mutable_columns.size());
+    for (auto& column : mutable_columns) {
+        if (is_column_const(*column)) {
+            throw Exception {"ColumnStruct cannot have ColumnConst as its element",
+                             ErrorCodes::ILLEGAL_COLUMN};
+        }
+        columns.push_back(std::move(column));
+    }
+}
+
+ColumnStruct::ColumnStruct(Columns&& columns) {
+    columns.reserve(columns.size());
+    for (auto& column : columns) {
+        if (is_column_const(*column)) {
+            throw Exception {"ColumnStruct cannot have ColumnConst as its element",
+                             ErrorCodes::ILLEGAL_COLUMN};
+        }
+        columns.push_back(std::move(column));
+    }
+}
+
+ColumnStruct::ColumnStruct(TupleColumns&& tuple_columns) {
+    columns.reserve(tuple_columns.size());
+    for (auto& column : tuple_columns) {
+        if (is_column_const(*column)) {
+            throw Exception {"ColumnStruct cannot have ColumnConst as its element",
+                             ErrorCodes::ILLEGAL_COLUMN};
+        }
+        columns.push_back(std::move(column));
+    }
+}
+
+ColumnStruct::Ptr ColumnStruct::create(Columns& columns) {
+    for (const auto& column : columns) {
+        if (is_column_const(*column))
+            throw Exception {"ColumnStruct cannot have ColumnConst as its element",
+                             ErrorCodes::ILLEGAL_COLUMN};
+    }
+    auto column_struct = ColumnStruct::create(columns);
+    return column_struct;
+}
+
+ColumnStruct::Ptr ColumnStruct::create(TupleColumns& tuple_columns) {
+    for (const auto& column : tuple_columns) {
+        if (is_column_const(*column)) {
+            throw Exception {"ColumnStruct cannot have ColumnConst as its element",
+                             ErrorCodes::ILLEGAL_COLUMN};
+        }
+    }
+    auto column_struct = ColumnStruct::create(tuple_columns);
+    return column_struct;
+}
+
+MutableColumnPtr ColumnStruct::clone_empty() const {
+    const size_t tuple_size = columns.size();
+    MutableColumns new_columns(tuple_size);
+    for (size_t i = 0; i < tuple_size; ++i) {
+        new_columns[i] = columns[i]->clone_empty();
+    }
+    return ColumnStruct::create(std::move(new_columns));
+}
+
+MutableColumnPtr ColumnStruct::clone_resized(size_t new_size) const {
+    const size_t tuple_size = columns.size();
+    MutableColumns new_columns(tuple_size);
+    for (size_t i = 0; i < tuple_size; ++i) {
+        new_columns[i] = columns[i]->clone_resized(new_size);
+    }
+    return ColumnStruct::create(std::move(new_columns));
+}
+
+Field ColumnStruct::operator[](size_t n) const {
+    Field res;
+    get(n, res);
+    return res;
+}
+
+void ColumnStruct::get(size_t n, Field& res) const {
+    const size_t tuple_size = columns.size();
+
+    res = Tuple();
+    Tuple& res_tuple = res.get<Tuple&>();
+    res_tuple.reserve(tuple_size);
+
+    for (size_t i = 0; i < tuple_size; ++i) {
+        res_tuple.push_back((*columns[i])[n]);
+    }
+}
+
+bool ColumnStruct::is_default_at(size_t n) const {
+    const size_t tuple_size = columns.size();
+    for (size_t i = 0; i < tuple_size; ++i) {
+        if (!columns[i]->is_default_at(n)) {
+            return false;
+        }
+    }
+    return true;
+}
+
+StringRef ColumnStruct::get_data_at(size_t) const {
+    throw Exception("Method get_data_at is not supported for " + get_name(),
+                    ErrorCodes::NOT_IMPLEMENTED);
+}
+
+void ColumnStruct::insert_data(const char*, size_t) {
+    throw Exception("Method insert_data is not supported for " + get_name(),
+                    ErrorCodes::NOT_IMPLEMENTED);
+}
+
+void ColumnStruct::insert(const Field& x) {
+    const auto& tuple = x.get<const Tuple&>();
+    const size_t tuple_size = columns.size();
+    if (tuple.size() != tuple_size) {
+        throw Exception("Cannot insert value of different size into tuple",
+                        ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE);
+    }
+
+    for (size_t i = 0; i < tuple_size; ++i) {
+        columns[i]->insert(tuple[i]);
+    }
+}
+
+void ColumnStruct::insert_from(const IColumn& src_, size_t n) {
+    const ColumnStruct& src = assert_cast<const ColumnStruct&>(src_);
+
+    const size_t tuple_size = columns.size();
+    if (src.columns.size() != tuple_size) {
+        throw Exception("Cannot insert value of different size into tuple",
+                        ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE);
+    }
+
+    for (size_t i = 0; i < tuple_size; ++i) {
+        columns[i]->insert_from(*src.columns[i], n);
+    }
+}
+
+void ColumnStruct::insert_default() {
+    for (auto& column : columns) {
+        column->insert_default();
+    }
+}
+
+void ColumnStruct::pop_back(size_t n) {
+    for (auto& column : columns) {
+        column->pop_back(n);
+    }
+}
+
+StringRef ColumnStruct::serialize_value_into_arena(size_t n, Arena& arena,
+                                                   char const*& begin) const {
+    StringRef res(begin, 0);
+    for (const auto& column : columns) {
+        auto value_ref = column->serialize_value_into_arena(n, arena, begin);
+        res.data = value_ref.data - res.size;
+        res.size += value_ref.size;
+    }
+
+    return res;
+}
+
+const char* ColumnStruct::deserialize_and_insert_from_arena(const char* pos) {
+    for (auto& column : columns) {
+        pos = column->deserialize_and_insert_from_arena(pos);
+    }
+
+    return pos;
+}
+
+void ColumnStruct::update_hash_with_value(size_t n, SipHash& hash) const {
+    for (const auto& column : columns) {
+        column->update_hash_with_value(n, hash);
+    }
+}
+
+// void ColumnStruct::update_weak_hash32(WeakHash32 & hash) const {
+//     auto s = size();
+//     if (hash.get_data().size() != s) {
+//         throw Exception("Size of WeakHash32 does not match size of column: column size is " + std::to_string(s) +
+//                         ", hash size is " + std::to_string(hash.getData().size()), ErrorCodes::LOGICAL_ERROR);
+//     }
+
+//     for (const auto & column : columns) {
+//         column->update_weak_hash32(hash);
+//     }
+// }
+
+// void ColumnStruct::update_hash_fast(SipHash & hash) const {
+//     for (const auto & column : columns) {
+//         column->update_hash_fast(hash);
+//     }
+// }
+
+// const char * ColumnStruct::skip_serialized_in_arena(const char * pos) const {
+//     for (const auto & column : columns) {
+//         pos = column->skip_serialized_in_arena(pos);
+//     }
+//     return pos;
+// }
+
+// void ColumnStruct::expand(const Filter & mask, bool inverted)
+// {
+//     for (auto & column : columns) {
+//         column->expand(mask, inverted);
+//     }
+// }
+
+// ColumnPtr ColumnStruct::index(const IColumn & indexes, size_t limit) const
+// {
+//     const size_t tuple_size = columns.size();
+//     Columns new_columns(tuple_size);
+
+//     for (size_t i = 0; i < tuple_size; ++i) {
+//         new_columns[i] = columns[i]->index(indexes, limit);
+//     }
+
+//     return ColumnStruct::create(new_columns);
+// }
+
+void ColumnStruct::insert_range_from(const IColumn& src, size_t start, size_t length) {
+    const size_t tuple_size = columns.size();
+    for (size_t i = 0; i < tuple_size; ++i) {
+        columns[i]->insert_range_from(*assert_cast<const ColumnStruct&>(src).columns[i], start,
+                                      length);
+    }
+}
+
+ColumnPtr ColumnStruct::filter(const Filter& filt, ssize_t result_size_hint) const {
+    const size_t tuple_size = columns.size();
+    Columns new_columns(tuple_size);
+
+    for (size_t i = 0; i < tuple_size; ++i) {
+        new_columns[i] = columns[i]->filter(filt, result_size_hint);
+    }
+    return ColumnStruct::create(new_columns);
+}
+
+ColumnPtr ColumnStruct::permute(const Permutation& perm, size_t limit) const {
+    const size_t tuple_size = columns.size();
+    Columns new_columns(tuple_size);
+
+    for (size_t i = 0; i < tuple_size; ++i) {
+        new_columns[i] = columns[i]->permute(perm, limit);
+    }
+
+    return ColumnStruct::create(new_columns);
+}
+
+ColumnPtr ColumnStruct::replicate(const Offsets& offsets) const {
+    const size_t tuple_size = columns.size();
+    Columns new_columns(tuple_size);
+
+    for (size_t i = 0; i < tuple_size; ++i) {
+        new_columns[i] = columns[i]->replicate(offsets);
+    }
+
+    return ColumnStruct::create(new_columns);
+}
+
+MutableColumns ColumnStruct::scatter(ColumnIndex num_columns, const Selector& selector) const {
+    const size_t tuple_size = columns.size();
+    std::vector<MutableColumns> scattered_tuple_elements(tuple_size);
+
+    for (size_t tuple_element_idx = 0; tuple_element_idx < tuple_size; ++tuple_element_idx) {
+        scattered_tuple_elements[tuple_element_idx] =
+                columns[tuple_element_idx]->scatter(num_columns, selector);
+    }
+
+    MutableColumns res(num_columns);
+
+    for (size_t scattered_idx = 0; scattered_idx < num_columns; ++scattered_idx) {
+        MutableColumns new_columns(tuple_size);
+        for (size_t tuple_element_idx = 0; tuple_element_idx < tuple_size; ++tuple_element_idx) {
+            new_columns[tuple_element_idx] =
+                    std::move(scattered_tuple_elements[tuple_element_idx][scattered_idx]);
+        }
+        res[scattered_idx] = ColumnStruct::create(std::move(new_columns));
+    }
+
+    return res;
+}
+
+// int ColumnStruct::compare_at_impl(size_t n, size_t m, const IColumn& rhs, int nan_direction_hint,
+//                                   const Collator* collator) const {
+//     const size_t tuple_size = columns.size();
+//     for (size_t i = 0; i < tuple_size; ++i) {
+//         int res = 0;
+//         if (collator && columns[i]->is_collation_supported()) {
+//             res = columns[i]->compare_at_with_collation(
+//                     n, m, *assert_cast<const ColumnStruct&>(rhs).columns[i], nan_direction_hint,
+//                     *collator);
+//         } else {
+//             res = columns[i]->compare_at(n, m, *assert_cast<const ColumnStruct&>(rhs).columns[i],
+//                                          nan_direction_hint);
+//         }
+
+//         if (res) {
+//             return res;
+//         }
+//     }
+//     return 0;
+// }
+
+// int ColumnStruct::compare_at(size_t n, size_t m, const IColumn& rhs, int nan_direction_hint) const {
+//     return compare_at_impl(n, m, rhs, nan_direction_hint);
+// }
+
+// void ColumnStruct::compare_column(const IColumn& rhs, size_t rhs_row_num,
+//                                   PaddedPODArray<UInt64>* row_indexes,
+//                                   PaddedPODArray<Int8>& compare_results, int direction,
+//                                   int nan_direction_hint) const {
+//     return do_compare_column<ColumnStruct>(assert_cast<const ColumnStruct&>(rhs), rhs_row_num,
+//                                            row_indexes, compare_results, direction,
+//                                            nan_direction_hint);
+// }
+
+// int ColumnStruct::compare_at_with_collation(size_t n, size_t m, const IColumn& rhs,
+//                                             int nan_direction_hint,
+//                                             const Collator& collator) const {
+//     return compare_at_impl(n, m, rhs, nan_direction_hint, &collator);
+// }
+
+// bool ColumnStruct::has_equal_values() const {
+//     return has_equal_values_impl<ColumnStruct>();
+// }
+
+// template <bool positive>
+// struct ColumnStruct::Less {
+//     TupleColumns columns;
+//     int nan_direction_hint;
+//     const Collator* collator;
+
+//     Less(const TupleColumns& columns_, int nan_direction_hint_, const Collator* collator_ = nullptr)
+//             : columns(columns_), nan_direction_hint(nan_direction_hint_), collator(collator_) {}
+
+//     bool operator()(size_t a, size_t b) const {
+//         for (const auto& column : columns) {
+//             int res;
+//             if (collator && column->isCollationSupported()) {
+//                 res = column->compareAtWithCollation(a, b, *column, nan_direction_hint, *collator);
+//             } else {
+//                 res = column->compareAt(a, b, *column, nan_direction_hint);
+//             }
+//             if (res < 0) {
+//                 return positive;
+//             } else if (res > 0) {
+//                 return !positive;
+//             }
+//         }
+//         return false;
+//     }
+// };
+
+// void ColumnStruct::get_permutation_impl(IColumn::PermutationSortDirection direction,
+//                                         IColumn::PermutationSortStability stability, size_t limit,
+//                                         int nan_direction_hint, Permutation& res,
+//                                         const Collator* collator) const {
+//     size_t rows = size();
+//     res.resize(rows);
+//     for (size_t i = 0; i < rows; ++i) {
+//         res[i] = i;
+//     }
+
+//     if (limit >= rows) {
+//         limit = 0;
+//     }
+
+//     EqualRange ranges;
+//     ranges.emplace_back(0, rows);
+//     update_permutation_impl(direction, stability, limit, nan_direction_hint, res, ranges, collator);
+// }
+
+// void ColumnStruct::update_permutation_impl(IColumn::PermutationSortDirection direction,
+//                                            IColumn::PermutationSortStability stability,
+//                                            size_t limit, int nan_direction_hint,
+//                                            IColumn::Permutation& res, EqualRanges& equal_ranges,
+//                                            const Collator* collator) const {
+//     if (equal_ranges.empty()) {
+//         return;
+//     }
+
+//     for (const auto& column : columns) {
+//         while (!equal_ranges.empty() && limit && limit <= equal_ranges.back().first) {
+//             equal_ranges.pop_back();
+//         }
+
+//         if (collator && column->isCollationSupported()) {
+//             column->update_permutation_with_collation(*collator, direction, stability, limit,
+//                                                       nan_direction_hint, res, equal_ranges);
+//         } else {
+//             column->update_permutation(direction, stability, limit, nan_direction_hint, res,
+//                                        equal_ranges);
+//         }
+//         if (equal_ranges.empty()) {
+//             break;
+//         }
+//     }
+// }
+
+// void ColumnStruct::get_permutation(IColumn::PermutationSortDirection direction,
+//                                    IColumn::PermutationSortStability stability, size_t limit,
+//                                    int nan_direction_hint, Permutation& res) const {
+//     get_permutation_impl(direction, stability, limit, nan_direction_hint, res, nullptr);
+// }
+
+// void ColumnStruct::update_permutation(IColumn::PermutationSortDirection direction,
+//                                       IColumn::PermutationSortStability stability, size_t limit,
+//                                       int nan_direction_hint, IColumn::Permutation& res,
+//                                       EqualRanges& equal_ranges) const {
+//     update_permutation_impl(direction, stability, limit, nan_direction_hint, res, equal_ranges);
+// }
+
+// void ColumnStruct::get_permutation_with_collation(const Collator& collator,
+//                                                   IColumn::PermutationSortDirection direction,
+//                                                   IColumn::PermutationSortStability stability,
+//                                                   size_t limit, int nan_direction_hint,
+//                                                   Permutation& res) const {
+//     get_permutation_impl(direction, stability, limit, nan_direction_hint, res, &collator);
+// }
+
+// void ColumnStruct::update_permutation_with_collation(const Collator& collator,
+//                                                      IColumn::PermutationSortDirection direction,
+//                                                      IColumn::PermutationSortStability stability,
+//                                                      size_t limit, int nan_direction_hint,
+//                                                      Permutation& res,
+//                                                      EqualRanges& equal_ranges) const {
+//     update_permutation_impl(direction, stability, limit, nan_direction_hint, res, equal_ranges,
+//                             &collator);
+// }
+
+// void ColumnStruct::gather(ColumnGathererStream& gatherer) {
+//     gatherer.gather(*this);
+// }
+
+void ColumnStruct::reserve(size_t n) {
+    const size_t tuple_size = columns.size();
+    for (size_t i = 0; i < tuple_size; ++i) {
+        get_column(i).reserve(n);
+    }
+}
+
+size_t ColumnStruct::byte_size() const {
+    size_t res = 0;
+    for (const auto& column : columns) {
+        res += column->byte_size();
+    }
+    return res;
+}
+
+// size_t ColumnStruct::byte_size_at(size_t n) const {
+//     size_t res = 0;
+//     for (const auto& column : columns) {
+//         res += column->byte_size_at(n);
+//     }
+//     return res;
+// }
+
+// void ColumnStruct::ensure_ownership() {
+//     const size_t tuple_size = columns.size();
+//     for (size_t i = 0; i < tuple_size; ++i) {
+//         get_column(i).ensure_ownership();
+//     }
+// }
+
+size_t ColumnStruct::allocated_bytes() const {
+    size_t res = 0;
+    for (const auto& column : columns) {
+        res += column->allocated_bytes();
+    }
+    return res;
+}
+
+void ColumnStruct::protect() {
+    for (auto& column : columns) {
+        column->protect();
+    }
+}
+
+void ColumnStruct::get_extremes(Field& min, Field& max) const {
+    const size_t tuple_size = columns.size();
+
+    Tuple min_tuple(tuple_size);
+    Tuple max_tuple(tuple_size);
+
+    for (size_t i = 0; i < tuple_size; ++i) {
+        columns[i]->get_extremes(min_tuple[i], max_tuple[i]);
+    }
+
+    min = min_tuple;
+    max = max_tuple;
+}
+
+void ColumnStruct::for_each_subcolumn(ColumnCallback callback) {
+    for (auto& column : columns) {
+        callback(column);
+    }
+}
+
+bool ColumnStruct::structure_equals(const IColumn& rhs) const {
+    if (const auto* rhs_tuple = typeid_cast<const ColumnStruct*>(&rhs)) {
+        const size_t tuple_size = columns.size();
+        if (tuple_size != rhs_tuple->columns.size()) {
+            return false;
+        }
+
+        for (size_t i = 0; i < tuple_size; ++i) {
+            if (!columns[i]->structure_equals(*rhs_tuple->columns[i])) {
+                return false;
+            }
+        }
+        return true;
+    } else {
+        return false;
+    }
+}
+
+// void ColumnStruct::for_each_subcolumn_recursively(ColumnCallback callback) {
+//     for (auto& column : columns) {
+//         callback(column);
+//         column->for_each_subcolumn_recursively(callback);
+//     }
+// }
+
+// bool ColumnStruct::is_collation_supported() const {
+//     for (const auto& column : columns) {
+//         if (column->is_collation_supported()) {
+//             return true;
+//         }
+//     }
+//     return false;
+// }
+
+// ColumnPtr ColumnStruct::compress() const {
+//     size_t byte_size = 0;
+//     Columns compressed;
+//     compressed.reserve(columns.size());
+//     for (const auto& column : columns) {
+//         auto compressed_column = column->compress();
+//         byte_size += compressed_column->byteSize();
+//         compressed.emplace_back(std::move(compressed_column));
+//     }
+
+//     return ColumnCompressed::create(size(), byte_size,
+//                                     [compressed = std::move(compressed)]() mutable {
+//                                         for (auto& column : compressed) {
+//                                             column = column->decompress();
+//                                         }
+//                                         return ColumnStruct::create(compressed);
+//                                     });
+// }
+
+// double ColumnStruct::get_ratio_of_default_rows(double sample_ratio) const {
+//     return get_ratio_of_default_rows_impl<ColumnStruct>(sample_ratio);
+// }
+
+// void ColumnStruct::get_indices_of_nondefault_rows(Offsets& indices, size_t from,
+//                                                   size_t limit) const {
+//     return get_indices_of_nondefault_rows_impl<ColumnStruct>(indices, from, limit);
+// }
+
+// void ColumnStruct::finalize() {
+//     for (auto& column : columns) {
+//         column->finalize();
+//     }
+// }
+
+// bool ColumnStruct::is_finalized() const {
+//     return std::all_of(columns.begin(), columns.end(),
+//                        [](const auto& column) { return column->is_finalized(); });
+// }
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h
new file mode 100644
index 0000000000..895a2796b7
--- /dev/null
+++ b/be/src/vec/columns/column_struct.h
@@ -0,0 +1,232 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnTuple.h
+// and modified by Doris
+
+/********************************************************************************
+// doris/core/be/src/vec/core/field.h
+class Field;
+using FieldVector = std::vector<Field>;
+
+/// Array and Tuple use the same storage type -- FieldVector, but we declare
+/// distinct types for them, so that the caller can choose whether it wants to
+/// construct a Field of Array or a Tuple type. An alternative approach would be
+/// to construct both of these types from FieldVector, and have the caller
+/// specify the desired Field type explicitly.
+
+#define DEFINE_FIELD_VECTOR(X)          \
+    struct X : public FieldVector {     \
+        using FieldVector::FieldVector; \
+    }
+
+DEFINE_FIELD_VECTOR(Array);
+DEFINE_FIELD_VECTOR(Tuple);
+
+#undef DEFINE_FIELD_VECTOR
+
+// defination of some pointer
+using WrappedPtr = chameleon_ptr<Derived>;
+
+using Ptr = immutable_ptr<Derived>;
+using ColumnPtr = IColumn::Ptr;
+using Columns = std::vector<ColumnPtr>;
+using MutablePtr = mutable_ptr<Derived>;
+using MutableColumnPtr = IColumn::MutablePtr;
+using MutableColumns = std::vector<MutableColumnPtr>;
+****************************************************************************/
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/columns/column_impl.h"
+#include "vec/columns/column_vector.h"
+#include "vec/common/arena.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/typeid_cast.h"
+#include "vec/core/field.h"
+#include "vec/core/types.h"
+
+namespace doris::vectorized {
+
+/** Column, that is just group of few another columns.
+  *
+  * For constant Tuples, see ColumnConst.
+  * Mixed constant/non-constant columns is prohibited in tuple
+  *  for implementation simplicity.
+  */
+class ColumnStruct final : public COWHelper<IColumn, ColumnStruct> {
+private:
+    friend class COWHelper<IColumn, ColumnStruct>;
+
+    using TupleColumns = std::vector<WrappedPtr>;
+    TupleColumns columns;
+
+    template <bool positive>
+    struct Less;
+
+    ColumnStruct(Columns&& columns);
+    ColumnStruct(TupleColumns&& tuple_columns);
+    explicit ColumnStruct(MutableColumns&& mutable_columns);
+    ColumnStruct(const ColumnStruct&) = default;
+
+public:
+    /** Create immutable column using immutable arguments. This arguments may be shared with other columns.
+      * Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
+      */
+    using Base = COWHelper<IColumn, ColumnStruct>;
+    static Ptr create(Columns& columns);
+    static Ptr create(MutableColumns& columns);
+    static Ptr create(TupleColumns& columns);
+    static Ptr create(Columns&& arg) { return create(arg); }
+
+    template <typename... Args>
+    static MutablePtr create(Args&&... args) {
+        return Base::create(std::forward<Args>(args)...);
+    }
+
+    std::string get_name() const override;
+    const char* get_family_name() const override { return "Struct"; }
+    TypeIndex get_data_type() const { return TypeIndex::Struct; }
+
+    MutableColumnPtr clone_empty() const override;
+    MutableColumnPtr clone_resized(size_t size) const override;
+
+    size_t size() const override { return columns.at(0)->size(); }
+
+    Field operator[](size_t n) const override;
+    void get(size_t n, Field& res) const override;
+
+    bool is_default_at(size_t n) const override;
+    StringRef get_data_at(size_t n) const override;
+    void insert_data(const char* pos, size_t length) override;
+    void insert(const Field& x) override;
+    void insert_from(const IColumn& src_, size_t n) override;
+    void insert_default() override;
+    void pop_back(size_t n) override;
+    StringRef serialize_value_into_arena(size_t n, Arena& arena, char const*& begin) const override;
+    const char* deserialize_and_insert_from_arena(const char* pos) override;
+    void update_hash_with_value(size_t n, SipHash& hash) const override;
+
+    // const char * skip_serialized_in_arena(const char * pos) const override;
+    // void update_weak_hash32(WeakHash32 & hash) const override;
+    // void update_hash_fast(SipHash & hash) const override;
+
+    void insert_indices_from(const IColumn& src, const int* indices_begin,
+                             const int* indices_end) override {
+        LOG(FATAL) << "insert_indices_from not implemented";
+    }
+
+    void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
+                         Permutation& res) const override {
+        LOG(FATAL) << "get_permutation not implemented";
+    }
+    void append_data_by_selector(MutableColumnPtr& res, const Selector& selector) const override {
+        return append_data_by_selector_impl<ColumnStruct>(res, selector);
+    }
+    void replace_column_data(const IColumn&, size_t row, size_t self_row = 0) override {
+        LOG(FATAL) << "replace_column_data not implemented";
+    }
+    void replace_column_data_default(size_t self_row = 0) override {
+        LOG(FATAL) << "replace_column_data_default not implemented";
+    }
+
+    void insert_range_from(const IColumn& src, size_t start, size_t length) override;
+    ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override;
+    ColumnPtr permute(const Permutation& perm, size_t limit) const override;
+    ColumnPtr replicate(const Offsets& offsets) const override;
+    MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) const override;
+
+    // ColumnPtr index(const IColumn & indexes, size_t limit) const override;
+    // void expand(const Filter & mask, bool inverted) override;
+    // void gather(ColumnGathererStream & gatherer_stream) override;
+    // bool has_equal_values() const override;
+
+    // void compare_column(const IColumn& rhs, size_t rhs_row_num, PaddedPODArray<UInt64>* row_indexes,
+    //                     PaddedPODArray<Int8>& compare_results, int direction,
+    //                     int nan_direction_hint) const override;
+    // int compare_at_with_collation(size_t n, size_t m, const IColumn& rhs, int nan_direction_hint,
+    //                               const Collator& collator) const override;
+
+    int compare_at(size_t n, size_t m, const IColumn& rhs, int nan_direction_hint) const override;
+    void get_extremes(Field& min, Field& max) const override;
+
+    // void get_permutation(IColumn::PermutationSortDirection direction,
+    //                      IColumn::PermutationSortStability stability, size_t limit,
+    //                      int nan_direction_hint, IColumn::Permutation& res) const override;
+    // void update_permutation(IColumn::PermutationSortDirection direction,
+    //                         IColumn::PermutationSortStability stability, size_t limit,
+    //                         int nan_direction_hint, IColumn::Permutation& res,
+    //                         EqualRanges& equal_ranges) const override;
+    // void get_permutation_with_collation(const Collator& collator,
+    //                                     IColumn::PermutationSortDirection direction,
+    //                                     IColumn::PermutationSortStability stability, size_t limit,
+    //                                     int nan_direction_hint,
+    //                                     IColumn::Permutation& res) const override;
+    // void update_permutation_with_collation(const Collator& collator,
+    //                                        IColumn::PermutationSortDirection direction,
+    //                                        IColumn::PermutationSortStability stability,
+    //                                        size_t limit, int nan_direction_hint,
+    //                                        IColumn::Permutation& res,
+    //                                        EqualRanges& equal_ranges) const override;
+
+    void reserve(size_t n) override;
+    size_t byte_size() const override;
+
+    // size_t byte_size_at(size_t n) const override;
+    // void ensure_ownership() override;
+
+    size_t allocated_bytes() const override;
+    void protect() override;
+    void for_each_subcolumn(ColumnCallback callback) override;
+    bool structure_equals(const IColumn& rhs) const override;
+
+    // void for_each_subcolumn_recursively(ColumnCallback callback) override;
+    // bool is_collation_supported() const override;
+    // ColumnPtr compress() const override;
+    // double get_ratio_of_default_rows(double sample_ratio) const override;
+    // void get_indices_of_nondefault_rows(Offsets & indices, size_t from, size_t limit) const override;
+    // void finalize() override;
+    // bool is_finalized() const override;
+
+    size_t tuple_size() const { return columns.size(); }
+
+    const IColumn& get_column(size_t idx) const { return *columns[idx]; }
+    IColumn& get_column(size_t idx) { return *columns[idx]; }
+
+    const TupleColumns& get_columns() const { return columns; }
+    Columns get_columns_copy() const { return {columns.begin(), columns.end()}; }
+
+    const ColumnPtr& get_column_ptr(size_t idx) const { return columns[idx]; }
+    ColumnPtr& get_column_ptr(size_t idx) { return columns[idx]; }
+
+private:
+    int compare_at_impl(size_t n, size_t m, const IColumn& rhs, int nan_direction_hint) const;
+
+    // void get_permutation_impl(IColumn::PermutationSortDirection direction,
+    //                           IColumn::PermutationSortStability stability, size_t limit,
+    //                           int nan_direction_hint, Permutation& res,
+    //                           const Collator* collator) const;
+
+    // void update_permutation_impl(IColumn::PermutationSortDirection direction,
+    //                              IColumn::PermutationSortStability stability, size_t limit,
+    //                              int nan_direction_hint, IColumn::Permutation& res,
+    //                              EqualRanges& equal_ranges,
+    //                              const Collator* collator = nullptr) const;
+};
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/core/types.h b/be/src/vec/core/types.h
index 7636d714b3..95947ca7eb 100644
--- a/be/src/vec/core/types.h
+++ b/be/src/vec/core/types.h
@@ -80,6 +80,7 @@ enum class TypeIndex {
     FixedLengthObject,
     JSONB,
     Decimal128I,
+    Struct,
 };
 
 struct Consted {
@@ -525,6 +526,8 @@ inline const char* getTypeName(TypeIndex idx) {
         return "FixedLengthObject";
     case TypeIndex::JSONB:
         return "JSONB";
+    case TypeIndex::Struct:
+        return "Struct";
     }
 
     __builtin_unreachable();
diff --git a/be/src/vec/data_types/data_type_struct.cpp b/be/src/vec/data_types/data_type_struct.cpp
new file mode 100644
index 0000000000..91aff67a40
--- /dev/null
+++ b/be/src/vec/data_types/data_type_struct.cpp
@@ -0,0 +1,361 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypeTuple.cpp
+// and modified by Doris
+
+#include "vec/data_types/data_type_struct.h"
+
+namespace doris::vectorized {
+
+namespace ErrorCodes {
+extern const int BAD_ARGUMENTS;
+extern const int DUPLICATE_COLUMN;
+extern const int EMPTY_DATA_PASSED;
+extern const int NOT_FOUND_COLUMN_IN_BLOCK;
+extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH;
+extern const int ILLEGAL_INDEX;
+extern const int LOGICAL_ERROR;
+} // namespace ErrorCodes
+
+DataTypeStruct::DataTypeStruct(const DataTypes& elems_)
+        : elems(elems_), have_explicit_names(false) {
+    /// Automatically assigned names in form of '1', '2', ...
+    size_t size = elems.size();
+    names.resize(size);
+    for (size_t i = 0; i < size; ++i) {
+        names[i] = std::to_string(i + 1);
+    }
+}
+
+static std::optional<Exception> check_tuple_names(const Strings& names) {
+    std::unordered_set<String> names_set;
+    for (const auto& name : names) {
+        if (name.empty()) {
+            return Exception("Names of tuple elements cannot be empty", ErrorCodes::BAD_ARGUMENTS);
+        }
+
+        if (!names_set.insert(name).second) {
+            return Exception("Names of tuple elements must be unique",
+                             ErrorCodes::DUPLICATE_COLUMN);
+        }
+    }
+
+    return {};
+}
+
+DataTypeStruct::DataTypeStruct(const DataTypes& elems_, const Strings& names_)
+        : elems(elems_), names(names_), have_explicit_names(true) {
+    size_t size = elems.size();
+    if (names.size() != size) {
+        throw Exception("Wrong number of names passed to constructor of DataTypeStruct",
+                        ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+    }
+
+    if (auto exception = check_tuple_names(names)) {
+        throw std::move(*exception);
+    }
+}
+
+std::string DataTypeStruct::do_get_name() const {
+    size_t size = elems.size();
+    std::stringstream s;
+
+    s << "Struct(";
+    for (size_t i = 0; i < size; ++i) {
+        if (i != 0) {
+            s << ", ";
+        }
+
+        // if (have_explicit_names) {
+        //     s << back_quote_if_need(names[i]) << ' ';
+        // }
+
+        s << elems[i]->get_name();
+    }
+    s << ")";
+
+    return s.str();
+}
+
+static inline IColumn& extract_element_column(IColumn& column, size_t idx) {
+    return assert_cast<ColumnStruct&>(column).get_column(idx);
+}
+
+template <typename F>
+static void add_element_safe(const DataTypes& elems, IColumn& column, F&& impl) {
+    /// We use the assumption that tuples of zero size do not exist.
+    size_t old_size = column.size();
+
+    try {
+        impl();
+
+        // Check that all columns now have the same size.
+        size_t new_size = column.size();
+
+        // for (auto i : collections::range(0, elems.size())) {
+        for (auto i = 0; i < elems.size(); i++) {
+            const auto& element_column = extract_element_column(column, i);
+            if (element_column.size() != new_size) {
+                // This is not a logical error because it may work with
+                // user-supplied data.
+                throw Exception("Cannot read a tuple because not all elements are present",
+                                ErrorCodes::SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH);
+            }
+        }
+    } catch (...) {
+        // for (const auto& i : collections::range(0, elems.size())) {
+        for (auto i = 0; i < elems.size(); i++) {
+            auto& element_column = extract_element_column(column, i);
+
+            if (element_column.size() > old_size) {
+                element_column.pop_back(1);
+            }
+        }
+
+        throw;
+    }
+}
+
+MutableColumnPtr DataTypeStruct::create_column() const {
+    size_t size = elems.size();
+    MutableColumns tuple_columns(size);
+    for (size_t i = 0; i < size; ++i) {
+        tuple_columns[i] = elems[i]->create_column();
+    }
+    return ColumnStruct::create(std::move(tuple_columns));
+}
+
+// MutableColumnPtr DataTypeStruct::create_column(const ISerialization& serialization) const {
+//     /// If we read subcolumn of nested Tuple, it may be wrapped to SerializationNamed
+//     /// several times to allow to reconstruct the substream path name.
+//     /// Here we don't need substream path name, so we drop first several wrapper serializations.
+
+//     const auto* current_serialization = &serialization;
+//     while (const auto* serialization_named =
+//                    typeid_cast<const SerializationNamed*>(current_serialization))
+//         current_serialization = serialization_named->get_nested().get();
+
+//     const auto* serialization_tuple = typeid_cast<const SerializationTuple*>(current_serialization);
+//     if (!serialization_tuple)
+//         throw Exception(ErrorCodes::LOGICAL_ERROR,
+//                         "Unexpected serialization to create column of type Tuple");
+
+//     const auto& element_serializations = serialization_tuple->getElementsSerializations();
+
+//     size_t size = elems.size();
+//     assert(element_serializations.size() == size);
+//     MutableColumns tuple_columns(size);
+//     for (size_t i = 0; i < size; ++i) {
+//         tuple_columns[i] = elems[i]->create_column(*element_serializations[i]->get_nested());
+//     }
+
+//     return ColumnStruct::create(std::move(tuple_columns));
+// }
+
+// Field DataTypeStruct::get_default() const {
+//     return Tuple(collections::map<Tuple>(
+//             elems, [](const DataTypePtr& elem) { return elem->get_default(); }));
+// }
+
+void DataTypeStruct::insert_default_into(IColumn& column) const {
+    add_element_safe(elems, column, [&] {
+        // for (const auto& i : collections::range(0, elems.size()))
+        for (auto i = 0; i < elems.size(); i++) {
+            elems[i]->insert_default_into(extract_element_column(column, i));
+        }
+    });
+}
+
+bool DataTypeStruct::equals(const IDataType& rhs) const {
+    if (typeid(rhs) != typeid(*this)) {
+        return false;
+    }
+
+    const DataTypeStruct& rhs_tuple = static_cast<const DataTypeStruct&>(rhs);
+
+    size_t size = elems.size();
+    if (size != rhs_tuple.elems.size()) {
+        return false;
+    }
+
+    for (size_t i = 0; i < size; ++i) {
+        if (!elems[i]->equals(*rhs_tuple.elems[i]) || names[i] != rhs_tuple.names[i]) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+size_t DataTypeStruct::get_position_by_name(const String& name) const {
+    size_t size = elems.size();
+    for (size_t i = 0; i < size; ++i) {
+        if (names[i] == name) {
+            return i;
+        }
+    }
+    throw Exception("Struct doesn't have element with name '" + name + "'",
+                    ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
+}
+
+std::optional<size_t> DataTypeStruct::try_get_position_by_name(const String& name) const {
+    size_t size = elems.size();
+    for (size_t i = 0; i < size; ++i) {
+        if (names[i] == name) {
+            return std::optional<size_t>(i);
+        }
+    }
+    return std::nullopt;
+}
+
+String DataTypeStruct::get_name_by_position(size_t i) const {
+    if (i == 0 || i > names.size()) {
+        fmt::memory_buffer error_msg;
+        fmt::format_to(error_msg, "Index of tuple element ({}) if out range ([1, {}])", i,
+                       names.size());
+        throw Exception(fmt::to_string(error_msg), ErrorCodes::ILLEGAL_INDEX);
+    }
+
+    return names[i - 1];
+}
+
+bool DataTypeStruct::text_can_contain_only_valid_utf8() const {
+    return std::all_of(elems.begin(), elems.end(),
+                       [](auto&& elem) { return elem->text_can_contain_only_valid_utf8(); });
+}
+
+bool DataTypeStruct::have_maximum_size_of_value() const {
+    return std::all_of(elems.begin(), elems.end(),
+                       [](auto&& elem) { return elem->have_maximum_size_of_value(); });
+}
+
+bool DataTypeStruct::is_comparable() const {
+    return std::all_of(elems.begin(), elems.end(),
+                       [](auto&& elem) { return elem->is_comparable(); });
+}
+
+size_t DataTypeStruct::get_maximum_size_of_value_in_memory() const {
+    size_t res = 0;
+    for (const auto& elem : elems) {
+        res += elem->get_maximum_size_of_value_in_memory();
+    }
+    return res;
+}
+
+size_t DataTypeStruct::get_size_of_value_in_memory() const {
+    size_t res = 0;
+    for (const auto& elem : elems) {
+        res += elem->get_size_of_value_in_memory();
+    }
+    return res;
+}
+
+// bool DataTypeStruct::has_dynamic_subcolumns() const {
+//     return std::any_of(elems.begin(), elems.end(),
+//                        [](auto&& elem) { return elem->has_dynamic_subcolumns(); });
+// }
+
+// SerializationPtr DataTypeStruct::do_get_default_serialization() const {
+//     SerializationTuple::ElementSerializations serializations(elems.size());
+
+//     for (size_t i = 0; i < elems.size(); ++i) {
+//         String elem_name = have_explicit_names ? names[i] : toString(i + 1);
+//         auto serialization = elems[i]->get_default_serialization();
+//         serializations[i] = std::make_shared<SerializationNamed>(serialization, elem_name);
+//     }
+
+//     return std::make_shared<SerializationTuple>(std::move(serializations), have_explicit_names);
+// }
+
+// SerializationPtr DataTypeStruct::get_serialization(const SerializationInfo& info) const {
+//     SerializationTuple::ElementSerializations serializations(elems.size());
+//     const auto& info_tuple = assert_cast<const SerializationInfoTuple&>(info);
+
+//     for (size_t i = 0; i < elems.size(); ++i) {
+//         String elem_name = have_explicit_names ? names[i] : toString(i + 1);
+//         auto serialization = elems[i]->get_serialization(*info_tuple.get_element_info(i));
+//         serializations[i] = std::make_shared<SerializationNamed>(serialization, elem_name);
+//     }
+
+//     return std::make_shared<SerializationTuple>(std::move(serializations), have_explicit_names);
+// }
+
+// MutableSerializationInfoPtr DataTypeStruct::create_serialization_info(
+//         const SerializationInfo::Settings& settings) const {
+//     MutableSerializationInfos infos;
+//     infos.reserve(elems.size());
+//     for (const auto& elem : elems) {
+//         infos.push_back(elem->create_serializationInfo(settings));
+//     }
+
+//     return std::make_shared<SerializationInfoTuple>(std::move(infos), names, settings);
+// }
+
+// SerializationInfoPtr DataTypeStruct::get_serialization_info(const IColumn& column) const {
+//     if (const auto* column_const = check_and_get_column<ColumnConst>(&column)) {
+//         return get_serialization_info(column_const->get_data_column());
+//     }
+
+//     MutableSerializationInfos infos;
+//     infos.reserve(elems.size());
+
+//     const auto& column_tuple = assert_cast<const ColumnStruct&>(column);
+//     assert(elems.size() == column_tuple.get_columns().size());
+
+//     for (size_t i = 0; i < elems.size(); ++i) {
+//         auto element_info = elems[i]->get_serialization_info(column_tuple.getColumn(i));
+//         infos.push_back(const_pointer_cast<SerializationInfo>(element_info));
+//     }
+
+//     return std::make_shared<SerializationInfoTuple>(std::move(infos), names,
+//                                                     SerializationInfo::Settings {});
+// }
+
+// static DataTypePtr create(const ASTPtr& arguments) {
+//     if (!arguments || arguments->children.empty())
+//         throw Exception("Struct cannot be empty", ErrorCodes::EMPTY_DATA_PASSED);
+
+//     DataTypes nested_types;
+//     nested_types.reserve(arguments->children.size());
+
+//     Strings names;
+//     names.reserve(arguments->children.size());
+
+//     for (const ASTPtr& child : arguments->children) {
+//         if (const auto* name_and_type_pair = child->as<ASTNameTypePair>()) {
+//             nested_types.emplace_back(DataTypeFactory::instance().get(name_and_type_pair->type));
+//             names.emplace_back(name_and_type_pair->name);
+//         } else
+//             nested_types.emplace_back(DataTypeFactory::instance().get(child));
+//     }
+
+//     if (names.empty())
+//         return std::make_shared<DataTypeStruct>(nested_types);
+//     else if (names.size() != nested_types.size())
+//         throw Exception("Names are specified not for all elements of Struct type",
+//                         ErrorCodes::BAD_ARGUMENTS);
+//     else
+//         return std::make_shared<DataTypeStruct>(nested_types, names);
+// }
+
+// void registerDataTypeStruct(DataTypeFactory& factory) {
+//     factory.registerDataType("Struct", create);
+// }
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_struct.h b/be/src/vec/data_types/data_type_struct.h
new file mode 100644
index 0000000000..4201583ef4
--- /dev/null
+++ b/be/src/vec/data_types/data_type_struct.h
@@ -0,0 +1,115 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypeTuple.h
+// and modified by Doris
+
+#pragma once
+
+#include <exception>
+
+#include "gen_cpp/data.pb.h"
+#include "util/stack_util.h"
+#include "vec/columns/column_array.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_struct.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_nullable.h"
+
+namespace doris::vectorized {
+
+/** Struct data type.
+  * Used as an intermediate result when evaluating expressions.
+  * Also can be used as a column - the result of the query execution.
+  *
+  * Struct elements can have names.
+  * If an element is unnamed, it will have automatically assigned name like '1', '2', '3' corresponding to its position.
+  * Manually assigned names must not begin with digit. Names must be unique.
+  *
+  * All tuples with same size and types of elements are equivalent for expressions, regardless to names of elements.
+  */
+class DataTypeStruct final : public IDataType {
+private:
+    // using DataTypePtr = std::shared_ptr<const IDataType>;
+    // using DataTypes = std::vector<DataTypePtr>;
+    // using Strings = std::vector<std::string>;
+
+    DataTypes elems;
+    Strings names;
+    bool have_explicit_names;
+
+public:
+    // static constexpr bool is_parametric = true;
+
+    explicit DataTypeStruct(const DataTypes& elems);
+    DataTypeStruct(const DataTypes& elems, const Strings& names);
+
+    TypeIndex get_type_id() const override { return TypeIndex::Struct; }
+    std::string do_get_name() const override;
+    const char* get_family_name() const override { return "Struct"; }
+
+    bool can_be_inside_nullable() const override { return false; }
+    bool supports_sparse_serialization() const { return true; }
+
+    MutableColumnPtr create_column() const override;
+    // MutableColumnPtr create_column(const ISerialization& serialization) const override;
+
+    Field get_default() const override;
+    void insert_default_into(IColumn& column) const override;
+
+    bool equals(const IDataType& rhs) const override;
+
+    bool get_is_parametric() const override { return true; }
+    bool have_subtypes() const override { return !elems.empty(); }
+    bool is_comparable() const override;
+    bool text_can_contain_only_valid_utf8() const override;
+    bool have_maximum_size_of_value() const override;
+    bool has_dynamic_subcolumns() const;
+    size_t get_maximum_size_of_value_in_memory() const override;
+    size_t get_size_of_value_in_memory() const override;
+
+    const DataTypePtr& get_element(size_t i) const { return elems[i]; }
+    const DataTypes& get_elements() const { return elems; }
+    const Strings& get_element_names() const { return names; }
+
+    size_t get_position_by_name(const String& name) const;
+    std::optional<size_t> try_get_position_by_name(const String& name) const;
+    String get_name_by_position(size_t i) const;
+
+    int64_t get_uncompressed_serialized_bytes(const IColumn& column,
+                                              int be_exec_version) const override {
+        LOG(FATAL) << "get_uncompressed_serialized_bytes not implemented";
+    }
+
+    char* serialize(const IColumn& column, char* buf, int be_exec_version) const override {
+        LOG(FATAL) << "serialize not implemented";
+    }
+
+    const char* deserialize(const char* buf, IColumn* column, int be_exec_version) const override {
+        LOG(FATAL) << "serialize not implemented";
+    }
+
+    // bool is_parametric() const { return true; }
+    // SerializationPtr do_get_default_serialization() const override;
+    // SerializationPtr get_serialization(const SerializationInfo& info) const override;
+    // MutableSerializationInfoPtr create_serialization_info(
+    //         const SerializationInfo::Settings& settings) const override;
+    // SerializationInfoPtr get_serialization_info(const IColumn& column) const override;
+    // bool have_explicit_names() const { return have_explicit_names; }
+};
+
+} // namespace doris::vectorized


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org