You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/04/13 01:48:00 UTC

[incubator-doris] branch master updated: permute impl for column array; and codes format (#8949)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 52d18aa83c permute impl for column array; and codes format (#8949)
52d18aa83c is described below

commit 52d18aa83c1180f419b5fba6ab908f320b262346
Author: camby <10...@qq.com>
AuthorDate: Wed Apr 13 09:47:54 2022 +0800

    permute impl for column array; and codes format (#8949)
    
    Co-authored-by: cambyzju <zh...@baidu.com>
---
 be/src/vec/columns/column_array.cpp    | 405 ++++++++++++++++++---------------
 be/src/vec/columns/column_array.h      | 111 ++++-----
 be/test/vec/core/column_array_test.cpp | 101 ++++++++
 3 files changed, 386 insertions(+), 231 deletions(-)

diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp
index 4754ca3b70..cc4f380f7e 100644
--- a/be/src/vec/columns/column_array.cpp
+++ b/be/src/vec/columns/column_array.cpp
@@ -18,27 +18,28 @@
 // https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnArray.cpp
 // and modified by Doris
 
+#include "vec/columns/column_array.h"
+
 #include <string.h> // memcpy
 
-#include "vec/common/assert_cast.h"
 #include "vec/columns/collator.h"
-#include "vec/columns/column_array.h"
 #include "vec/columns/column_const.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/columns/column_string.h"
 #include "vec/columns/columns_common.h"
 #include "vec/columns/columns_number.h"
+#include "vec/common/assert_cast.h"
 
 namespace doris::vectorized {
 
 namespace ErrorCodes {
-    extern const int NOT_IMPLEMENTED;
-    extern const int BAD_ARGUMENTS;
-    extern const int PARAMETER_OUT_OF_BOUND;
-    extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
-    extern const int LOGICAL_ERROR;
-    extern const int TOO_LARGE_ARRAY_SIZE;
-}
+extern const int NOT_IMPLEMENTED;
+extern const int BAD_ARGUMENTS;
+extern const int PARAMETER_OUT_OF_BOUND;
+extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
+extern const int LOGICAL_ERROR;
+extern const int TOO_LARGE_ARRAY_SIZE;
+} // namespace ErrorCodes
 
 /** Obtaining array as Field can be slow for large arrays and consume vast amount of memory.
   * Just don't allow to do it.
@@ -48,9 +49,9 @@ namespace ErrorCodes {
   */
 static constexpr size_t max_array_size_as_field = 1000000;
 
-ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && offsets_column)
-    : data(std::move(nested_column)), offsets(std::move(offsets_column)) {
-    const ColumnOffsets * offsets_concrete = typeid_cast<const ColumnOffsets *>(offsets.get());
+ColumnArray::ColumnArray(MutableColumnPtr&& nested_column, MutableColumnPtr&& offsets_column)
+        : data(std::move(nested_column)), offsets(std::move(offsets_column)) {
+    const ColumnOffsets* offsets_concrete = typeid_cast<const ColumnOffsets*>(offsets.get());
 
     if (!offsets_concrete) {
         LOG(FATAL) << "offsets_column must be a ColumnUInt64";
@@ -71,8 +72,7 @@ ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr &&
       */
 }
 
-ColumnArray::ColumnArray(MutableColumnPtr && nested_column)
-    : data(std::move(nested_column)) {
+ColumnArray::ColumnArray(MutableColumnPtr&& nested_column) : data(std::move(nested_column)) {
     if (!data->empty()) {
         LOG(FATAL) << "Not empty data passed to ColumnArray, but no offsets passed";
     }
@@ -80,13 +80,14 @@ ColumnArray::ColumnArray(MutableColumnPtr && nested_column)
     offsets = ColumnOffsets::create();
 }
 
-std::string ColumnArray::get_name() const { return "Array(" + get_data().get_name() + ")"; }
+std::string ColumnArray::get_name() const {
+    return "Array(" + get_data().get_name() + ")";
+}
 
 MutableColumnPtr ColumnArray::clone_resized(size_t to_size) const {
     auto res = ColumnArray::create(get_data().clone_empty());
 
-    if (to_size == 0)
-        return res;
+    if (to_size == 0) return res;
     size_t from_size = size();
 
     if (to_size <= from_size) {
@@ -103,8 +104,7 @@ MutableColumnPtr ColumnArray::clone_resized(size_t to_size) const {
         }
 
         res->get_offsets().resize(to_size);
-        for (size_t i = from_size; i < to_size; ++i)
-            res->get_offsets()[i] = offset;
+        for (size_t i = from_size; i < to_size; ++i) res->get_offsets()[i] = offset;
     }
 
     return res;
@@ -124,13 +124,12 @@ Field ColumnArray::operator[](size_t n) const {
 
     Array res(size);
 
-    for (size_t i = 0; i < size; ++i)
-        res[i] = get_data()[offset + i];
+    for (size_t i = 0; i < size; ++i) res[i] = get_data()[offset + i];
 
     return res;
 }
 
-void ColumnArray::get(size_t n, Field & res) const {
+void ColumnArray::get(size_t n, Field& res) const {
     size_t offset = offset_at(n);
     size_t size = size_at(n);
 
@@ -139,10 +138,9 @@ void ColumnArray::get(size_t n, Field & res) const {
                    << " maximum size " << max_array_size_as_field;
 
     res = Array(size);
-    Array & res_arr = doris::vectorized::get<Array &>(res);
+    Array& res_arr = doris::vectorized::get<Array&>(res);
 
-    for (size_t i = 0; i < size; ++i)
-        get_data().get(offset + i, res_arr[i]);
+    for (size_t i = 0; i < size; ++i) get_data().get(offset + i, res_arr[i]);
 }
 
 StringRef ColumnArray::get_data_at(size_t n) const {
@@ -156,8 +154,7 @@ StringRef ColumnArray::get_data_at(size_t n) const {
     StringRef first = get_data().get_data_at_with_terminating_zero(offset_of_first_elem);
 
     size_t array_size = size_at(n);
-    if (array_size == 0)
-        return StringRef(first.data, 0);
+    if (array_size == 0) return StringRef(first.data, 0);
 
     size_t offset_of_last_elem = get_offsets()[n] - 1;
     StringRef last = get_data().get_data_at_with_terminating_zero(offset_of_last_elem);
@@ -166,11 +163,11 @@ StringRef ColumnArray::get_data_at(size_t n) const {
 }
 
 bool ColumnArray::is_default_at(size_t n) const {
-    const auto & offsets_data = get_offsets();
+    const auto& offsets_data = get_offsets();
     return offsets_data[n] == offsets_data[static_cast<ssize_t>(n) - 1];
 }
 
-void ColumnArray::insert_data(const char * pos, size_t length) {
+void ColumnArray::insert_data(const char* pos, size_t length) {
     /** Similarly - only for arrays of fixed length values.
       */
     if (!data->is_fixed_and_contiguous())
@@ -180,9 +177,8 @@ void ColumnArray::insert_data(const char * pos, size_t length) {
 
     size_t elems = 0;
 
-    if (length)
-    {
-        const char * end = pos + length;
+    if (length) {
+        const char* end = pos + length;
         for (; pos + field_size <= end; pos += field_size, ++elems)
             data->insert_data(pos, field_size);
 
@@ -193,11 +189,12 @@ void ColumnArray::insert_data(const char * pos, size_t length) {
     get_offsets().push_back(get_offsets().back() + elems);
 }
 
-StringRef ColumnArray::serialize_value_into_arena(size_t n, Arena & arena, char const *& begin) const {
+StringRef ColumnArray::serialize_value_into_arena(size_t n, Arena& arena,
+                                                  char const*& begin) const {
     size_t array_size = size_at(n);
     size_t offset = offset_at(n);
 
-    char * pos = arena.alloc_continue(sizeof(array_size), begin);
+    char* pos = arena.alloc_continue(sizeof(array_size), begin);
     memcpy(pos, &array_size, sizeof(array_size));
 
     StringRef res(pos, sizeof(array_size));
@@ -211,36 +208,33 @@ StringRef ColumnArray::serialize_value_into_arena(size_t n, Arena & arena, char
     return res;
 }
 
-const char * ColumnArray::deserialize_and_insert_from_arena(const char * pos) {
+const char* ColumnArray::deserialize_and_insert_from_arena(const char* pos) {
     size_t array_size = unaligned_load<size_t>(pos);
     pos += sizeof(array_size);
 
-    for (size_t i = 0; i < array_size; ++i)
-        pos = get_data().deserialize_and_insert_from_arena(pos);
+    for (size_t i = 0; i < array_size; ++i) pos = get_data().deserialize_and_insert_from_arena(pos);
 
     get_offsets().push_back(get_offsets().back() + array_size);
     return pos;
 }
 
-void ColumnArray::update_hash_with_value(size_t n, SipHash & hash) const {
+void ColumnArray::update_hash_with_value(size_t n, SipHash& hash) const {
     size_t array_size = size_at(n);
     size_t offset = offset_at(n);
 
     hash.update(array_size);
-    for (size_t i = 0; i < array_size; ++i)
-        get_data().update_hash_with_value(offset + i, hash);
+    for (size_t i = 0; i < array_size; ++i) get_data().update_hash_with_value(offset + i, hash);
 }
 
-void ColumnArray::insert(const Field & x) {
-    const Array & array = doris::vectorized::get<const Array &>(x);
+void ColumnArray::insert(const Field& x) {
+    const Array& array = doris::vectorized::get<const Array&>(x);
     size_t size = array.size();
-    for (size_t i = 0; i < size; ++i)
-        get_data().insert(array[i]);
+    for (size_t i = 0; i < size; ++i) get_data().insert(array[i]);
     get_offsets().push_back(get_offsets().back() + size);
 }
 
-void ColumnArray::insert_from(const IColumn & src_, size_t n) {
-    const ColumnArray & src = assert_cast<const ColumnArray &>(src_);
+void ColumnArray::insert_from(const IColumn& src_, size_t n) {
+    const ColumnArray& src = assert_cast<const ColumnArray&>(src_);
     size_t size = src.size_at(n);
     size_t offset = src.offset_at(n);
 
@@ -256,17 +250,17 @@ void ColumnArray::insert_default() {
 }
 
 void ColumnArray::pop_back(size_t n) {
-    auto & offsets_data = get_offsets();
+    auto& offsets_data = get_offsets();
     DCHECK(n <= offsets_data.size());
     size_t nested_n = offsets_data.back() - offset_at(offsets_data.size() - n);
-    if (nested_n)
-        get_data().pop_back(nested_n);
+    if (nested_n) get_data().pop_back(nested_n);
     offsets_data.resize_assume_reserved(offsets_data.size() - n);
 }
 
 void ColumnArray::reserve(size_t n) {
     get_offsets().reserve(n);
-    get_data().reserve(n); /// The average size of arrays is not taken into account here. Or it is considered to be no more than 1.
+    get_data().reserve(
+            n); /// The average size of arrays is not taken into account here. Or it is considered to be no more than 1.
 }
 
 size_t ColumnArray::byte_size() const {
@@ -288,24 +282,24 @@ ColumnPtr ColumnArray::convert_to_full_column_if_const() const {
     return ColumnArray::create(data->convert_to_full_column_if_const(), offsets);
 }
 
-void ColumnArray::insert_range_from(const IColumn & src, size_t start, size_t length) {
-    if (length == 0)
-        return;
+void ColumnArray::insert_range_from(const IColumn& src, size_t start, size_t length) {
+    if (length == 0) return;
 
-    const ColumnArray & src_concrete = assert_cast<const ColumnArray &>(src);
+    const ColumnArray& src_concrete = assert_cast<const ColumnArray&>(src);
 
     if (start + length > src_concrete.get_offsets().size())
         LOG(FATAL) << "Parameter out of bound in ColumnArray::insert_range_from method. [start("
                    << std::to_string(start) << ") + length(" << std::to_string(length)
-                   << ") > offsets.size(" << std::to_string(src_concrete.get_offsets().size()) << ")]";
+                   << ") > offsets.size(" << std::to_string(src_concrete.get_offsets().size())
+                   << ")]";
 
     size_t nested_offset = src_concrete.offset_at(start);
     size_t nested_length = src_concrete.get_offsets()[start + length - 1] - nested_offset;
 
     get_data().insert_range_from(src_concrete.get_data(), nested_offset, nested_length);
 
-    Offsets & cur_offsets = get_offsets();
-    const Offsets & src_offsets = src_concrete.get_offsets();
+    Offsets& cur_offsets = get_offsets();
+    const Offsets& src_offsets = src_concrete.get_offsets();
 
     if (start == 0 && cur_offsets.empty()) {
         cur_offsets.assign(src_offsets.begin(), src_offsets.begin() + length);
@@ -320,55 +314,64 @@ void ColumnArray::insert_range_from(const IColumn & src, size_t start, size_t le
     }
 }
 
-ColumnPtr ColumnArray::filter(const Filter & filt, ssize_t result_size_hint) const {
-    if (typeid_cast<const ColumnUInt8 *>(data.get()))      return filter_number<UInt8>(filt, result_size_hint);
-    if (typeid_cast<const ColumnUInt16 *>(data.get()))     return filter_number<UInt16>(filt, result_size_hint);
-    if (typeid_cast<const ColumnUInt32 *>(data.get()))     return filter_number<UInt32>(filt, result_size_hint);
-    if (typeid_cast<const ColumnUInt64 *>(data.get()))     return filter_number<UInt64>(filt, result_size_hint);
-    if (typeid_cast<const ColumnInt8 *>(data.get()))       return filter_number<Int8>(filt, result_size_hint);
-    if (typeid_cast<const ColumnInt16 *>(data.get()))      return filter_number<Int16>(filt, result_size_hint);
-    if (typeid_cast<const ColumnInt32 *>(data.get()))      return filter_number<Int32>(filt, result_size_hint);
-    if (typeid_cast<const ColumnInt64 *>(data.get()))      return filter_number<Int64>(filt, result_size_hint);
-    if (typeid_cast<const ColumnFloat32 *>(data.get()))    return filter_number<Float32>(filt, result_size_hint);
-    if (typeid_cast<const ColumnFloat64 *>(data.get()))    return filter_number<Float64>(filt, result_size_hint);
-    if (typeid_cast<const ColumnString *>(data.get()))     return filter_string(filt, result_size_hint);
+ColumnPtr ColumnArray::filter(const Filter& filt, ssize_t result_size_hint) const {
+    if (typeid_cast<const ColumnUInt8*>(data.get()))
+        return filter_number<UInt8>(filt, result_size_hint);
+    if (typeid_cast<const ColumnUInt16*>(data.get()))
+        return filter_number<UInt16>(filt, result_size_hint);
+    if (typeid_cast<const ColumnUInt32*>(data.get()))
+        return filter_number<UInt32>(filt, result_size_hint);
+    if (typeid_cast<const ColumnUInt64*>(data.get()))
+        return filter_number<UInt64>(filt, result_size_hint);
+    if (typeid_cast<const ColumnInt8*>(data.get()))
+        return filter_number<Int8>(filt, result_size_hint);
+    if (typeid_cast<const ColumnInt16*>(data.get()))
+        return filter_number<Int16>(filt, result_size_hint);
+    if (typeid_cast<const ColumnInt32*>(data.get()))
+        return filter_number<Int32>(filt, result_size_hint);
+    if (typeid_cast<const ColumnInt64*>(data.get()))
+        return filter_number<Int64>(filt, result_size_hint);
+    if (typeid_cast<const ColumnFloat32*>(data.get()))
+        return filter_number<Float32>(filt, result_size_hint);
+    if (typeid_cast<const ColumnFloat64*>(data.get()))
+        return filter_number<Float64>(filt, result_size_hint);
+    if (typeid_cast<const ColumnString*>(data.get())) return filter_string(filt, result_size_hint);
     //if (typeid_cast<const ColumnTuple *>(data.get()))      return filterTuple(filt, result_size_hint);
-    if (typeid_cast<const ColumnNullable *>(data.get()))   return filter_nullable(filt, result_size_hint);
+    if (typeid_cast<const ColumnNullable*>(data.get()))
+        return filter_nullable(filt, result_size_hint);
     return filter_generic(filt, result_size_hint);
 }
 
 template <typename T>
-ColumnPtr ColumnArray::filter_number(const Filter & filt, ssize_t result_size_hint) const {
-    if (get_offsets().empty())
-        return ColumnArray::create(data);
+ColumnPtr ColumnArray::filter_number(const Filter& filt, ssize_t result_size_hint) const {
+    if (get_offsets().empty()) return ColumnArray::create(data);
 
     auto res = ColumnArray::create(data->clone_empty());
 
-    auto & res_elems = assert_cast<ColumnVector<T> &>(res->get_data()).get_data();
-    Offsets & res_offsets = res->get_offsets();
+    auto& res_elems = assert_cast<ColumnVector<T>&>(res->get_data()).get_data();
+    Offsets& res_offsets = res->get_offsets();
 
-    filter_arrays_impl<T>(assert_cast<const ColumnVector<T> &>(*data).get_data(), get_offsets(), res_elems, res_offsets, filt, result_size_hint);
+    filter_arrays_impl<T>(assert_cast<const ColumnVector<T>&>(*data).get_data(), get_offsets(),
+                          res_elems, res_offsets, filt, result_size_hint);
     return res;
 }
 
-ColumnPtr ColumnArray::filter_string(const Filter & filt, ssize_t result_size_hint) const {
+ColumnPtr ColumnArray::filter_string(const Filter& filt, ssize_t result_size_hint) const {
     size_t col_size = get_offsets().size();
-    if (col_size != filt.size())
-        LOG(FATAL) << "Size of filter doesn't match size of column.";
+    if (col_size != filt.size()) LOG(FATAL) << "Size of filter doesn't match size of column.";
 
-    if (0 == col_size)
-        return ColumnArray::create(data);
+    if (0 == col_size) return ColumnArray::create(data);
 
     auto res = ColumnArray::create(data->clone_empty());
 
-    const ColumnString & src_string = typeid_cast<const ColumnString &>(*data);
-    const ColumnString::Chars & src_chars = src_string.get_chars();
-    const Offsets & src_string_offsets = src_string.get_offsets();
-    const Offsets & src_offsets = get_offsets();
+    const ColumnString& src_string = typeid_cast<const ColumnString&>(*data);
+    const ColumnString::Chars& src_chars = src_string.get_chars();
+    const Offsets& src_string_offsets = src_string.get_offsets();
+    const Offsets& src_offsets = get_offsets();
 
-    ColumnString::Chars & res_chars = typeid_cast<ColumnString &>(res->get_data()).get_chars();
-    Offsets & res_string_offsets = typeid_cast<ColumnString &>(res->get_data()).get_offsets();
-    Offsets & res_offsets = res->get_offsets();
+    ColumnString::Chars& res_chars = typeid_cast<ColumnString&>(res->get_data()).get_chars();
+    Offsets& res_string_offsets = typeid_cast<ColumnString&>(res->get_data()).get_offsets();
+    Offsets& res_offsets = res->get_offsets();
 
     if (result_size_hint < 0) {
         res_chars.reserve(src_chars.size());
@@ -389,13 +392,16 @@ ColumnPtr ColumnArray::filter_string(const Filter & filt, ssize_t result_size_hi
         if (filt[i]) {
             /// If the array is not empty - copy content.
             if (array_size) {
-                size_t chars_to_copy = src_string_offsets[array_size + prev_src_offset - 1] - prev_src_string_offset;
+                size_t chars_to_copy = src_string_offsets[array_size + prev_src_offset - 1] -
+                                       prev_src_string_offset;
                 size_t res_chars_prev_size = res_chars.size();
                 res_chars.resize(res_chars_prev_size + chars_to_copy);
-                memcpy(&res_chars[res_chars_prev_size], &src_chars[prev_src_string_offset], chars_to_copy);
+                memcpy(&res_chars[res_chars_prev_size], &src_chars[prev_src_string_offset],
+                       chars_to_copy);
 
                 for (size_t j = 0; j < array_size; ++j)
-                    res_string_offsets.push_back(src_string_offsets[j + prev_src_offset] + prev_res_string_offset - prev_src_string_offset);
+                    res_string_offsets.push_back(src_string_offsets[j + prev_src_offset] +
+                                                 prev_res_string_offset - prev_src_string_offset);
 
                 prev_res_string_offset = res_string_offsets.back();
             }
@@ -413,13 +419,11 @@ ColumnPtr ColumnArray::filter_string(const Filter & filt, ssize_t result_size_hi
     return res;
 }
 
-ColumnPtr ColumnArray::filter_generic(const Filter & filt, ssize_t result_size_hint) const {
+ColumnPtr ColumnArray::filter_generic(const Filter& filt, ssize_t result_size_hint) const {
     size_t size = get_offsets().size();
-    if (size != filt.size())
-        LOG(FATAL) << "Size of filter doesn't match size of column.";
+    if (size != filt.size()) LOG(FATAL) << "Size of filter doesn't match size of column.";
 
-    if (size == 0)
-        return ColumnArray::create(data);
+    if (size == 0) return ColumnArray::create(data);
 
     Filter nested_filt(get_offsets().back());
     for (size_t i = 0; i < size; ++i) {
@@ -434,19 +438,18 @@ ColumnPtr ColumnArray::filter_generic(const Filter & filt, ssize_t result_size_h
     ssize_t nested_result_size_hint = 0;
     if (result_size_hint < 0)
         nested_result_size_hint = result_size_hint;
-    else if (result_size_hint && result_size_hint < 1000000000 && data->size() < 1000000000)    /// Avoid overflow.
-         nested_result_size_hint = result_size_hint * data->size() / size;
+    else if (result_size_hint && result_size_hint < 1000000000 &&
+             data->size() < 1000000000) /// Avoid overflow.
+        nested_result_size_hint = result_size_hint * data->size() / size;
 
     res->data = data->filter(nested_filt, nested_result_size_hint);
 
-    Offsets & res_offsets = res->get_offsets();
-    if (result_size_hint)
-        res_offsets.reserve(result_size_hint > 0 ? result_size_hint : size);
+    Offsets& res_offsets = res->get_offsets();
+    if (result_size_hint) res_offsets.reserve(result_size_hint > 0 ? result_size_hint : size);
 
     size_t current_offset = 0;
     for (size_t i = 0; i < size; ++i) {
-        if (filt[i])
-        {
+        if (filt[i]) {
             current_offset += size_at(i);
             res_offsets.push_back(current_offset);
         }
@@ -455,29 +458,29 @@ ColumnPtr ColumnArray::filter_generic(const Filter & filt, ssize_t result_size_h
     return res;
 }
 
-ColumnPtr ColumnArray::filter_nullable(const Filter & filt, ssize_t result_size_hint) const {
-    if (get_offsets().empty())
-        return ColumnArray::create(data);
+ColumnPtr ColumnArray::filter_nullable(const Filter& filt, ssize_t result_size_hint) const {
+    if (get_offsets().empty()) return ColumnArray::create(data);
 
-    const ColumnNullable & nullable_elems = assert_cast<const ColumnNullable &>(*data);
+    const ColumnNullable& nullable_elems = assert_cast<const ColumnNullable&>(*data);
 
     auto array_of_nested = ColumnArray::create(nullable_elems.get_nested_column_ptr(), offsets);
     auto filtered_array_of_nested_owner = array_of_nested->filter(filt, result_size_hint);
-    const auto & filtered_array_of_nested = assert_cast<const ColumnArray &>(*filtered_array_of_nested_owner);
-    const auto & filtered_offsets = filtered_array_of_nested.get_offsets_ptr();
+    const auto& filtered_array_of_nested =
+            assert_cast<const ColumnArray&>(*filtered_array_of_nested_owner);
+    const auto& filtered_offsets = filtered_array_of_nested.get_offsets_ptr();
 
     auto res_null_map = ColumnUInt8::create();
 
-    filter_arrays_impl_only_data(nullable_elems.get_null_map_data(), get_offsets(), res_null_map->get_data(), filt, result_size_hint);
+    filter_arrays_impl_only_data(nullable_elems.get_null_map_data(), get_offsets(),
+                                 res_null_map->get_data(), filt, result_size_hint);
 
-    return ColumnArray::create(
-        ColumnNullable::create(
-            filtered_array_of_nested.get_data_ptr(),
-            std::move(res_null_map)),
-        filtered_offsets);
+    return ColumnArray::create(ColumnNullable::create(filtered_array_of_nested.get_data_ptr(),
+                                                      std::move(res_null_map)),
+                               filtered_offsets);
 }
 
-void ColumnArray::insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) {
+void ColumnArray::insert_indices_from(const IColumn& src, const int* indices_begin,
+                                      const int* indices_end) {
     for (auto x = indices_begin; x != indices_end; ++x) {
         if (*x == -1) {
             ColumnArray::insert_default();
@@ -487,45 +490,56 @@ void ColumnArray::insert_indices_from(const IColumn& src, const int* indices_beg
     }
 }
 
-ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const {
-    if (replicate_offsets.empty())
-        return clone_empty();
-
-    if (typeid_cast<const ColumnUInt8 *>(data.get()))    return replicate_number<UInt8>(replicate_offsets);
-    if (typeid_cast<const ColumnUInt16 *>(data.get()))   return replicate_number<UInt16>(replicate_offsets);
-    if (typeid_cast<const ColumnUInt32 *>(data.get()))   return replicate_number<UInt32>(replicate_offsets);
-    if (typeid_cast<const ColumnUInt64 *>(data.get()))   return replicate_number<UInt64>(replicate_offsets);
-    if (typeid_cast<const ColumnInt8 *>(data.get()))     return replicate_number<Int8>(replicate_offsets);
-    if (typeid_cast<const ColumnInt16 *>(data.get()))    return replicate_number<Int16>(replicate_offsets);
-    if (typeid_cast<const ColumnInt32 *>(data.get()))    return replicate_number<Int32>(replicate_offsets);
-    if (typeid_cast<const ColumnInt64 *>(data.get()))    return replicate_number<Int64>(replicate_offsets);
-    if (typeid_cast<const ColumnFloat32 *>(data.get()))  return replicate_number<Float32>(replicate_offsets);
-    if (typeid_cast<const ColumnFloat64 *>(data.get()))  return replicate_number<Float64>(replicate_offsets);
-    if (typeid_cast<const ColumnString *>(data.get()))   return replicate_string(replicate_offsets);
-    if (typeid_cast<const ColumnConst *>(data.get()))    return replicate_const(replicate_offsets);
-    if (typeid_cast<const ColumnNullable *>(data.get())) return replicate_nullable(replicate_offsets);
+ColumnPtr ColumnArray::replicate(const Offsets& replicate_offsets) const {
+    if (replicate_offsets.empty()) return clone_empty();
+
+    if (typeid_cast<const ColumnUInt8*>(data.get()))
+        return replicate_number<UInt8>(replicate_offsets);
+    if (typeid_cast<const ColumnUInt16*>(data.get()))
+        return replicate_number<UInt16>(replicate_offsets);
+    if (typeid_cast<const ColumnUInt32*>(data.get()))
+        return replicate_number<UInt32>(replicate_offsets);
+    if (typeid_cast<const ColumnUInt64*>(data.get()))
+        return replicate_number<UInt64>(replicate_offsets);
+    if (typeid_cast<const ColumnInt8*>(data.get()))
+        return replicate_number<Int8>(replicate_offsets);
+    if (typeid_cast<const ColumnInt16*>(data.get()))
+        return replicate_number<Int16>(replicate_offsets);
+    if (typeid_cast<const ColumnInt32*>(data.get()))
+        return replicate_number<Int32>(replicate_offsets);
+    if (typeid_cast<const ColumnInt64*>(data.get()))
+        return replicate_number<Int64>(replicate_offsets);
+    if (typeid_cast<const ColumnFloat32*>(data.get()))
+        return replicate_number<Float32>(replicate_offsets);
+    if (typeid_cast<const ColumnFloat64*>(data.get()))
+        return replicate_number<Float64>(replicate_offsets);
+    if (typeid_cast<const ColumnString*>(data.get())) return replicate_string(replicate_offsets);
+    if (typeid_cast<const ColumnConst*>(data.get())) return replicate_const(replicate_offsets);
+    if (typeid_cast<const ColumnNullable*>(data.get()))
+        return replicate_nullable(replicate_offsets);
     //if (typeid_cast<const ColumnTuple *>(data.get()))    return replicateTuple(replicate_offsets);
     return replicate_generic(replicate_offsets);
 }
 
 template <typename T>
-ColumnPtr ColumnArray::replicate_number(const Offsets & replicate_offsets) const {
+ColumnPtr ColumnArray::replicate_number(const Offsets& replicate_offsets) const {
     size_t col_size = size();
     if (col_size != replicate_offsets.size())
         LOG(FATAL) << "Size of offsets doesn't match size of column.";
 
     MutableColumnPtr res = clone_empty();
 
-    if (0 == col_size)
-        return res;
+    if (0 == col_size) return res;
 
-    ColumnArray & res_arr = typeid_cast<ColumnArray &>(*res);
+    ColumnArray& res_arr = typeid_cast<ColumnArray&>(*res);
 
-    const typename ColumnVector<T>::Container & src_data = typeid_cast<const ColumnVector<T> &>(*data).get_data();
-    const Offsets & src_offsets = get_offsets();
+    const typename ColumnVector<T>::Container& src_data =
+            typeid_cast<const ColumnVector<T>&>(*data).get_data();
+    const Offsets& src_offsets = get_offsets();
 
-    typename ColumnVector<T>::Container & res_data = typeid_cast<ColumnVector<T> &>(res_arr.get_data()).get_data();
-    Offsets & res_offsets = res_arr.get_offsets();
+    typename ColumnVector<T>::Container& res_data =
+            typeid_cast<ColumnVector<T>&>(res_arr.get_data()).get_data();
+    Offsets& res_offsets = res_arr.get_offsets();
 
     res_data.reserve(data->size() / col_size * replicate_offsets.back());
     res_offsets.reserve(replicate_offsets.back());
@@ -544,7 +558,8 @@ ColumnPtr ColumnArray::replicate_number(const Offsets & replicate_offsets) const
 
             if (value_size) {
                 res_data.resize(res_data.size() + value_size);
-                memcpy(&res_data[res_data.size() - value_size], &src_data[prev_data_offset], value_size * sizeof(T));
+                memcpy(&res_data[res_data.size() - value_size], &src_data[prev_data_offset],
+                       value_size * sizeof(T));
             }
         }
 
@@ -555,26 +570,25 @@ ColumnPtr ColumnArray::replicate_number(const Offsets & replicate_offsets) const
     return res;
 }
 
-ColumnPtr ColumnArray::replicate_string(const Offsets & replicate_offsets) const {
+ColumnPtr ColumnArray::replicate_string(const Offsets& replicate_offsets) const {
     size_t col_size = size();
     if (col_size != replicate_offsets.size())
         LOG(FATAL) << "Size of offsets doesn't match size of column.";
 
     MutableColumnPtr res = clone_empty();
 
-    if (0 == col_size)
-        return res;
+    if (0 == col_size) return res;
 
-    ColumnArray & res_arr = assert_cast<ColumnArray &>(*res);
+    ColumnArray& res_arr = assert_cast<ColumnArray&>(*res);
 
-    const ColumnString & src_string = typeid_cast<const ColumnString &>(*data);
-    const ColumnString::Chars & src_chars = src_string.get_chars();
-    const Offsets & src_string_offsets = src_string.get_offsets();
-    const Offsets & src_offsets = get_offsets();
+    const ColumnString& src_string = typeid_cast<const ColumnString&>(*data);
+    const ColumnString::Chars& src_chars = src_string.get_chars();
+    const Offsets& src_string_offsets = src_string.get_offsets();
+    const Offsets& src_offsets = get_offsets();
 
-    ColumnString::Chars & res_chars = typeid_cast<ColumnString &>(res_arr.get_data()).get_chars();
-    Offsets & res_string_offsets = typeid_cast<ColumnString &>(res_arr.get_data()).get_offsets();
-    Offsets & res_offsets = res_arr.get_offsets();
+    ColumnString::Chars& res_chars = typeid_cast<ColumnString&>(res_arr.get_data()).get_chars();
+    Offsets& res_string_offsets = typeid_cast<ColumnString&>(res_arr.get_data()).get_offsets();
+    Offsets& res_offsets = res_arr.get_offsets();
 
     res_chars.reserve(src_chars.size() / col_size * replicate_offsets.back());
     res_string_offsets.reserve(src_string_offsets.size() / col_size * replicate_offsets.back());
@@ -594,7 +608,8 @@ ColumnPtr ColumnArray::replicate_string(const Offsets & replicate_offsets) const
         /// The number of strings in the array.
         size_t value_size = src_offsets[i] - prev_src_offset;
         /// Number of characters in strings of the array, including zero bytes.
-        size_t sum_chars_size = src_string_offsets[prev_src_offset + value_size - 1] - prev_src_string_offset;  /// -1th index is Ok, see PaddedPODArray.
+        size_t sum_chars_size = src_string_offsets[prev_src_offset + value_size - 1] -
+                                prev_src_string_offset; /// -1th index is Ok, see PaddedPODArray.
 
         for (size_t j = 0; j < size_to_replicate; ++j) {
             current_res_offset += value_size;
@@ -603,7 +618,8 @@ ColumnPtr ColumnArray::replicate_string(const Offsets & replicate_offsets) const
             size_t prev_src_string_offset_local = prev_src_string_offset;
             for (size_t k = 0; k < value_size; ++k) {
                 /// Size of single string.
-                size_t chars_size = src_string_offsets[k + prev_src_offset] - prev_src_string_offset_local;
+                size_t chars_size =
+                        src_string_offsets[k + prev_src_offset] - prev_src_string_offset_local;
 
                 current_res_string_offset += chars_size;
                 res_string_offsets.push_back(current_res_string_offset);
@@ -615,7 +631,8 @@ ColumnPtr ColumnArray::replicate_string(const Offsets & replicate_offsets) const
                 /// Copies the characters of the array of strings.
                 res_chars.resize(res_chars.size() + sum_chars_size);
                 memcpy_small_allow_read_write_overflow15(
-                    &res_chars[res_chars.size() - sum_chars_size], &src_chars[prev_src_string_offset], sum_chars_size);
+                        &res_chars[res_chars.size() - sum_chars_size],
+                        &src_chars[prev_src_string_offset], sum_chars_size);
             }
         }
 
@@ -627,18 +644,17 @@ ColumnPtr ColumnArray::replicate_string(const Offsets & replicate_offsets) const
     return res;
 }
 
-ColumnPtr ColumnArray::replicate_const(const Offsets & replicate_offsets) const {
+ColumnPtr ColumnArray::replicate_const(const Offsets& replicate_offsets) const {
     size_t col_size = size();
     if (col_size != replicate_offsets.size())
         LOG(FATAL) << "Size of offsets doesn't match size of column.";
 
-    if (0 == col_size)
-        return clone_empty();
+    if (0 == col_size) return clone_empty();
 
-    const Offsets & src_offsets = get_offsets();
+    const Offsets& src_offsets = get_offsets();
 
     auto res_column_offsets = ColumnOffsets::create();
-    Offsets & res_offsets = res_column_offsets->get_data();
+    Offsets& res_offsets = res_column_offsets->get_data();
     res_offsets.reserve(replicate_offsets.back());
 
     Offset prev_replicate_offset = 0;
@@ -658,48 +674,81 @@ ColumnPtr ColumnArray::replicate_const(const Offsets & replicate_offsets) const
         prev_data_offset = src_offsets[i];
     }
 
-    return ColumnArray::create(get_data().clone_resized(current_new_offset), std::move(res_column_offsets));
+    return ColumnArray::create(get_data().clone_resized(current_new_offset),
+                               std::move(res_column_offsets));
 }
 
-ColumnPtr ColumnArray::replicate_generic(const Offsets & replicate_offsets) const {
+ColumnPtr ColumnArray::replicate_generic(const Offsets& replicate_offsets) const {
     size_t col_size = size();
     if (col_size != replicate_offsets.size())
         LOG(FATAL) << "Size of offsets doesn't match size of column.";
 
     MutableColumnPtr res = clone_empty();
-    ColumnArray & res_concrete = assert_cast<ColumnArray &>(*res);
+    ColumnArray& res_concrete = assert_cast<ColumnArray&>(*res);
 
-    if (0 == col_size)
-        return res;
+    if (0 == col_size) return res;
 
     IColumn::Offset prev_offset = 0;
     for (size_t i = 0; i < col_size; ++i) {
         size_t size_to_replicate = replicate_offsets[i] - prev_offset;
         prev_offset = replicate_offsets[i];
 
-        for (size_t j = 0; j < size_to_replicate; ++j)
-            res_concrete.insert_from(*this, i);
+        for (size_t j = 0; j < size_to_replicate; ++j) res_concrete.insert_from(*this, i);
     }
 
     return res;
 }
 
-ColumnPtr ColumnArray::replicate_nullable(const Offsets & replicate_offsets) const {
-    const ColumnNullable & nullable = assert_cast<const ColumnNullable &>(*data);
+ColumnPtr ColumnArray::replicate_nullable(const Offsets& replicate_offsets) const {
+    const ColumnNullable& nullable = assert_cast<const ColumnNullable&>(*data);
 
     /// Make temporary arrays for each components of Nullable. Then replicate them independently and collect back to result.
     /// NOTE Offsets are calculated twice and it is redundant.
 
-    auto array_of_nested = ColumnArray(nullable.get_nested_column_ptr()->assume_mutable(), get_offsets_ptr()->assume_mutable())
-            .replicate(replicate_offsets);
-    auto array_of_null_map = ColumnArray(nullable.get_null_map_column_ptr()->assume_mutable(), get_offsets_ptr()->assume_mutable())
-            .replicate(replicate_offsets);
+    auto array_of_nested = ColumnArray(nullable.get_nested_column_ptr()->assume_mutable(),
+                                       get_offsets_ptr()->assume_mutable())
+                                   .replicate(replicate_offsets);
+    auto array_of_null_map = ColumnArray(nullable.get_null_map_column_ptr()->assume_mutable(),
+                                         get_offsets_ptr()->assume_mutable())
+                                     .replicate(replicate_offsets);
 
     return ColumnArray::create(
-        ColumnNullable::create(
-            assert_cast<const ColumnArray &>(*array_of_nested).get_data_ptr(),
-            assert_cast<const ColumnArray &>(*array_of_null_map).get_data_ptr()),
-        assert_cast<const ColumnArray &>(*array_of_nested).get_offsets_ptr());
+            ColumnNullable::create(
+                    assert_cast<const ColumnArray&>(*array_of_nested).get_data_ptr(),
+                    assert_cast<const ColumnArray&>(*array_of_null_map).get_data_ptr()),
+            assert_cast<const ColumnArray&>(*array_of_nested).get_offsets_ptr());
+}
+
+ColumnPtr ColumnArray::permute(const Permutation& perm, size_t limit) const {
+    size_t size = offsets->size();
+    if (limit == 0) {
+        limit = size;
+    } else {
+        limit = std::min(size, limit);
+    }
+    if (perm.size() < limit) {
+        LOG(FATAL) << "Size of permutation is less than required.";
+    }
+    if (limit == 0) {
+        return ColumnArray::create(data);
+    }
+
+    auto res = ColumnArray::create(data->clone_empty());
+    auto& res_offsets = res->get_offsets();
+    res_offsets.resize(limit);
+
+    Permutation nested_perm;
+    nested_perm.reserve(data->size());
+
+    for (size_t i = 0; i < limit; ++i) {
+        res_offsets[i] = res_offsets[i - 1] + size_at(perm[i]);
+        for (size_t j = 0; j < size_at(perm[i]); ++j) {
+            nested_perm.push_back(offset_at(perm[i]) + j);
+        }
+    }
+
+    res->data = data->permute(nested_perm, nested_perm.size());
+    return res;
 }
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_array.h b/be/src/vec/columns/column_array.h
index e6567d1c97..74a0805336 100644
--- a/be/src/vec/columns/column_array.h
+++ b/be/src/vec/columns/column_array.h
@@ -20,11 +20,11 @@
 
 #pragma once
 
-#include "vec/common/arena.h"
-#include "vec/common/assert_cast.h"
 #include "vec/columns/column.h"
 #include "vec/columns/column_impl.h"
 #include "vec/columns/column_vector.h"
+#include "vec/common/arena.h"
+#include "vec/common/assert_cast.h"
 #include "vec/core/types.h"
 
 namespace doris::vectorized {
@@ -38,12 +38,12 @@ private:
     friend class COWHelper<IColumn, ColumnArray>;
 
     /** Create an array column with specified values and offsets. */
-    ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && offsets_column);
+    ColumnArray(MutableColumnPtr&& nested_column, MutableColumnPtr&& offsets_column);
 
     /** Create an empty column of arrays with the type of values as in the column `nested_column` */
-    explicit ColumnArray(MutableColumnPtr && nested_column);
+    explicit ColumnArray(MutableColumnPtr&& nested_column);
 
-    ColumnArray(const ColumnArray &) = default;
+    ColumnArray(const ColumnArray&) = default;
 
 public:
     /** Create immutable column using immutable arguments. This arguments may be shared with other columns.
@@ -51,83 +51,87 @@ public:
       */
     using Base = COWHelper<IColumn, ColumnArray>;
 
-    static Ptr create(const ColumnPtr & nested_column, const ColumnPtr & offsets_column) {
-        return ColumnArray::create(nested_column->assume_mutable(), offsets_column->assume_mutable());
+    static Ptr create(const ColumnPtr& nested_column, const ColumnPtr& offsets_column) {
+        return ColumnArray::create(nested_column->assume_mutable(),
+                                   offsets_column->assume_mutable());
     }
 
-    static Ptr create(const ColumnPtr & nested_column) {
+    static Ptr create(const ColumnPtr& nested_column) {
         return ColumnArray::create(nested_column->assume_mutable());
     }
 
-    template <typename ... Args, typename = typename std::enable_if<IsMutableColumns<Args ...>::value>::type>
-    static MutablePtr create(Args &&... args) { return Base::create(std::forward<Args>(args)...); }
+    template <typename... Args,
+              typename = typename std::enable_if<IsMutableColumns<Args...>::value>::type>
+    static MutablePtr create(Args&&... args) {
+        return Base::create(std::forward<Args>(args)...);
+    }
 
     /** On the index i there is an offset to the beginning of the i + 1 -th element. */
     using ColumnOffsets = ColumnVector<Offset>;
 
     std::string get_name() const override;
-    const char * get_family_name() const override { return "Array"; }
+    const char* get_family_name() const override { return "Array"; }
     bool can_be_inside_nullable() const override { return true; }
     TypeIndex get_data_type() const { return TypeIndex::Array; }
     MutableColumnPtr clone_resized(size_t size) const override;
     size_t size() const override;
     Field operator[](size_t n) const override;
-    void get(size_t n, Field & res) const override;
+    void get(size_t n, Field& res) const override;
     StringRef get_data_at(size_t n) const override;
     bool is_default_at(size_t n) const override;
-    void insert_data(const char * pos, size_t length) override;
-    StringRef serialize_value_into_arena(size_t n, Arena & arena, char const *& begin) const override;
-    const char * deserialize_and_insert_from_arena(const char * pos) override;
-    void update_hash_with_value(size_t n, SipHash & hash) const override;
-    void insert_range_from(const IColumn & src, size_t start, size_t length) override;
-    void insert(const Field & x) override;
-    void insert_from(const IColumn & src_, size_t n) override;
+    void insert_data(const char* pos, size_t length) override;
+    StringRef serialize_value_into_arena(size_t n, Arena& arena, char const*& begin) const override;
+    const char* deserialize_and_insert_from_arena(const char* pos) override;
+    void update_hash_with_value(size_t n, SipHash& hash) const override;
+    void insert_range_from(const IColumn& src, size_t start, size_t length) override;
+    void insert(const Field& x) override;
+    void insert_from(const IColumn& src_, size_t n) override;
     void insert_default() override;
     void pop_back(size_t n) override;
-    ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
-    [[noreturn]] ColumnPtr permute(const Permutation & perm, size_t limit) const override {
-        LOG(FATAL) << "permute not implemented";
-    }
+    ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override;
+    ColumnPtr permute(const Permutation& perm, size_t limit) const override;
     //ColumnPtr index(const IColumn & indexes, size_t limit) const;
     //template <typename Type> ColumnPtr index_impl(const PaddedPODArray<Type> & indexes, size_t limit) const;
-    [[noreturn]] int compare_at(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override {
+    [[noreturn]] int compare_at(size_t n, size_t m, const IColumn& rhs_,
+                                int nan_direction_hint) const override {
         LOG(FATAL) << "compare_at not implemented";
     }
-    [[noreturn]] void get_permutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override {
+    [[noreturn]] void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
+                                      Permutation& res) const override {
         LOG(FATAL) << "get_permutation not implemented";
     }
     void reserve(size_t n) override;
     size_t byte_size() const override;
     size_t allocated_bytes() const override;
     void protect() override;
-    ColumnPtr replicate(const Offsets & replicate_offsets) const override;
+    ColumnPtr replicate(const Offsets& replicate_offsets) const override;
     ColumnPtr convert_to_full_column_if_const() const override;
-    void get_extremes(Field & min, Field & max) const override {
+    void get_extremes(Field& min, Field& max) const override {
         LOG(FATAL) << "get_extremes not implemented";
     }
 
     /** More efficient methods of manipulation */
-    IColumn & get_data() { return *data; }
-    const IColumn & get_data() const { return *data; }
+    IColumn& get_data() { return *data; }
+    const IColumn& get_data() const { return *data; }
 
-    IColumn & get_offsets_column() { return *offsets; }
-    const IColumn & get_offsets_column() const { return *offsets; }
+    IColumn& get_offsets_column() { return *offsets; }
+    const IColumn& get_offsets_column() const { return *offsets; }
 
-    Offsets & ALWAYS_INLINE get_offsets() {
-        return assert_cast<ColumnOffsets &>(*offsets).get_data();
+    Offsets& ALWAYS_INLINE get_offsets() {
+        return assert_cast<ColumnOffsets&>(*offsets).get_data();
     }
 
-    const Offsets & ALWAYS_INLINE get_offsets() const {
-        return assert_cast<const ColumnOffsets &>(*offsets).get_data();
+    const Offsets& ALWAYS_INLINE get_offsets() const {
+        return assert_cast<const ColumnOffsets&>(*offsets).get_data();
     }
 
-    const ColumnPtr & get_data_ptr() const { return data; }
-    ColumnPtr & get_data_ptr() { return data; }
+    const ColumnPtr& get_data_ptr() const { return data; }
+    ColumnPtr& get_data_ptr() { return data; }
 
-    const ColumnPtr & get_offsets_ptr() const { return offsets; }
-    ColumnPtr & get_offsets_ptr() { return offsets; }
+    const ColumnPtr& get_offsets_ptr() const { return offsets; }
+    ColumnPtr& get_offsets_ptr() { return offsets; }
 
-    MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override {
+    MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) const override {
         return scatter_impl<ColumnArray>(num_columns, selector);
     }
 
@@ -136,7 +140,8 @@ public:
         callback(data);
     }
 
-    void insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) override;
+    void insert_indices_from(const IColumn& src, const int* indices_begin,
+                             const int* indices_end) override;
 
     void replace_column_data(const IColumn&, size_t row, size_t self_row = 0) override {
         LOG(FATAL) << "replace_column_data not implemented";
@@ -154,36 +159,36 @@ private:
     WrappedPtr offsets;
 
     size_t ALWAYS_INLINE offset_at(ssize_t i) const { return get_offsets()[i - 1]; }
-    size_t ALWAYS_INLINE size_at(ssize_t i) const { return get_offsets()[i] - get_offsets()[i - 1]; }
-
+    size_t ALWAYS_INLINE size_at(ssize_t i) const {
+        return get_offsets()[i] - get_offsets()[i - 1];
+    }
 
     /// Multiply values if the nested column is ColumnVector<T>.
     template <typename T>
-    ColumnPtr replicate_number(const Offsets & replicate_offsets) const;
+    ColumnPtr replicate_number(const Offsets& replicate_offsets) const;
 
     /// Multiply the values if the nested column is ColumnString. The code is too complicated.
-    ColumnPtr replicate_string(const Offsets & replicate_offsets) const;
+    ColumnPtr replicate_string(const Offsets& replicate_offsets) const;
 
     /** Non-constant arrays of constant values are quite rare.
       * Most functions can not work with them, and does not create such columns as a result.
       * An exception is the function `replicate` (see FunctionsMiscellaneous.h), which has service meaning for the implementation of lambda functions.
       * Only for its sake is the implementation of the `replicate` method for ColumnArray(ColumnConst).
       */
-    ColumnPtr replicate_const(const Offsets & replicate_offsets) const;
+    ColumnPtr replicate_const(const Offsets& replicate_offsets) const;
 
     /** The following is done by simply replicating of nested columns.
       */
-    ColumnPtr replicate_nullable(const Offsets & replicate_offsets) const;
-    ColumnPtr replicate_generic(const Offsets & replicate_offsets) const;
-
+    ColumnPtr replicate_nullable(const Offsets& replicate_offsets) const;
+    ColumnPtr replicate_generic(const Offsets& replicate_offsets) const;
 
     /// Specializations for the filter function.
     template <typename T>
-    ColumnPtr filter_number(const Filter & filt, ssize_t result_size_hint) const;
+    ColumnPtr filter_number(const Filter& filt, ssize_t result_size_hint) const;
 
-    ColumnPtr filter_string(const Filter & filt, ssize_t result_size_hint) const;
-    ColumnPtr filter_nullable(const Filter & filt, ssize_t result_size_hint) const;
-    ColumnPtr filter_generic(const Filter & filt, ssize_t result_size_hint) const;
+    ColumnPtr filter_string(const Filter& filt, ssize_t result_size_hint) const;
+    ColumnPtr filter_nullable(const Filter& filt, ssize_t result_size_hint) const;
+    ColumnPtr filter_generic(const Filter& filt, ssize_t result_size_hint) const;
 };
 
 } // namespace doris::vectorized
diff --git a/be/test/vec/core/column_array_test.cpp b/be/test/vec/core/column_array_test.cpp
index a65c7d8c0b..b497b14451 100644
--- a/be/test/vec/core/column_array_test.cpp
+++ b/be/test/vec/core/column_array_test.cpp
@@ -28,6 +28,34 @@
 
 namespace doris::vectorized {
 
+void check_array_offsets(ColumnPtr arr, const std::vector<IColumn::Offset>& offs) {
+    auto arr_col = check_and_get_column<ColumnArray>(*arr);
+    ASSERT_EQ(arr_col->size(), offs.size());
+    for (size_t i = 0; i < arr_col->size(); ++i) {
+        ASSERT_EQ(arr_col->get_offsets()[i], offs[i]);
+    }
+}
+template <typename T>
+void check_array_data(ColumnPtr arr, const std::vector<T>& data) {
+    auto arr_col = check_and_get_column<ColumnArray>(*arr);
+    auto data_col = arr_col->get_data_ptr();
+    ASSERT_EQ(data_col->size(), data.size());
+    for (size_t i = 0; i < data_col->size(); ++i) {
+        auto element = data_col->get_data_at(i);
+        ASSERT_EQ(*((T*)element.data), data[i]);
+    }
+}
+template <>
+void check_array_data(ColumnPtr arr, const std::vector<std::string>& data) {
+    auto arr_col = check_and_get_column<ColumnArray>(*arr);
+    auto data_col = arr_col->get_data_ptr();
+    ASSERT_EQ(data_col->size(), data.size());
+    for (size_t i = 0; i < data_col->size(); ++i) {
+        auto element = data_col->get_data_at(i);
+        ASSERT_EQ(std::string(element.data), data[i]);
+    }
+}
+
 TEST(ColumnArrayTest, IntArrayTest) {
     auto off_column = ColumnVector<IColumn::Offset>::create();
     auto data_column = ColumnVector<int32_t>::create();
@@ -78,4 +106,77 @@ TEST(ColumnArrayTest, StringArrayTest) {
     }
 }
 
+TEST(ColumnArrayTest, IntArrayPermuteTest) {
+    auto off_column = ColumnVector<IColumn::Offset>::create();
+    auto data_column = ColumnVector<int32_t>::create();
+    // init column array with [[1,2,3],[],[4],[5,6]]
+    std::vector<IColumn::Offset> offs = {0, 3, 3, 4, 6};
+    std::vector<int32_t> vals = {1, 2, 3, 4, 5, 6};
+    for (size_t i = 1; i < offs.size(); ++i) {
+        off_column->insert_data((const char*)(&offs[i]), 0);
+    }
+    for (auto& v : vals) {
+        data_column->insert_data((const char*)(&v), 0);
+    }
+    ColumnArray array_column(std::move(data_column), std::move(off_column));
+
+    IColumn::Permutation perm = {3, 2, 1, 0};
+    // return array column: [[5,6],[4]];
+    auto res1 = array_column.permute(perm, 2);
+    check_array_offsets(res1, {2, 3});
+    check_array_data<int32_t>(res1, {5, 6, 4});
+
+    // return array column: [[5,6],[4],[],[1,2,3]]
+    auto res2 = array_column.permute(perm, 0);
+    check_array_offsets(res2, {2, 3, 3, 6});
+    check_array_data<int32_t>(res2, {5, 6, 4, 1, 2, 3});
+}
+
+TEST(ColumnArrayTest, StringArrayPermuteTest) {
+    auto off_column = ColumnVector<IColumn::Offset>::create();
+    auto data_column = ColumnString::create();
+    // init column array with [["abc","d"],["ef"],[], [""]];
+    std::vector<IColumn::Offset> offs = {0, 2, 3, 3, 4};
+    std::vector<std::string> vals = {"abc", "d", "ef", ""};
+    for (size_t i = 1; i < offs.size(); ++i) {
+        off_column->insert_data((const char*)(&offs[i]), 0);
+    }
+    for (auto& v : vals) {
+        data_column->insert_data(v.data(), v.size());
+    }
+    ColumnArray array_column(std::move(data_column), std::move(off_column));
+
+    IColumn::Permutation perm = {3, 2, 1, 0};
+    // return array column: [[""],[]];
+    auto res1 = array_column.permute(perm, 2);
+    check_array_offsets(res1, {1, 1});
+    check_array_data<std::string>(res1, {""});
+}
+
+TEST(ColumnArrayTest, EmptyArrayPermuteTest) {
+    auto off_column = ColumnVector<IColumn::Offset>::create();
+    auto data_column = ColumnVector<int32_t>::create();
+    // init column array with [[],[],[],[]]
+    std::vector<IColumn::Offset> offs = {0, 0, 0, 0, 0};
+    std::vector<int32_t> vals = {};
+    for (size_t i = 1; i < offs.size(); ++i) {
+        off_column->insert_data((const char*)(&offs[i]), 0);
+    }
+    for (auto& v : vals) {
+        data_column->insert_data((const char*)(&v), 0);
+    }
+    ColumnArray array_column(std::move(data_column), std::move(off_column));
+
+    IColumn::Permutation perm = {3, 2, 1, 0};
+    // return array column: [[],[]];
+    auto res1 = array_column.permute(perm, 2);
+    check_array_offsets(res1, {0, 0});
+    check_array_data<int32_t>(res1, {});
+
+    // return array column: [[],[],[],[]]
+    auto res2 = array_column.permute(perm, 0);
+    check_array_offsets(res2, {0, 0, 0, 0});
+    check_array_data<int32_t>(res2, {});
+}
+
 } // namespace doris::vectorized


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org