You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/04/13 01:48:00 UTC
[incubator-doris] branch master updated: permute impl for column array; and codes format (#8949)
This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 52d18aa83c permute impl for column array; and codes format (#8949)
52d18aa83c is described below
commit 52d18aa83c1180f419b5fba6ab908f320b262346
Author: camby <10...@qq.com>
AuthorDate: Wed Apr 13 09:47:54 2022 +0800
permute impl for column array; and codes format (#8949)
Co-authored-by: cambyzju <zh...@baidu.com>
---
be/src/vec/columns/column_array.cpp | 405 ++++++++++++++++++---------------
be/src/vec/columns/column_array.h | 111 ++++-----
be/test/vec/core/column_array_test.cpp | 101 ++++++++
3 files changed, 386 insertions(+), 231 deletions(-)
diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp
index 4754ca3b70..cc4f380f7e 100644
--- a/be/src/vec/columns/column_array.cpp
+++ b/be/src/vec/columns/column_array.cpp
@@ -18,27 +18,28 @@
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnArray.cpp
// and modified by Doris
+#include "vec/columns/column_array.h"
+
#include <string.h> // memcpy
-#include "vec/common/assert_cast.h"
#include "vec/columns/collator.h"
-#include "vec/columns/column_array.h"
#include "vec/columns/column_const.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_string.h"
#include "vec/columns/columns_common.h"
#include "vec/columns/columns_number.h"
+#include "vec/common/assert_cast.h"
namespace doris::vectorized {
namespace ErrorCodes {
- extern const int NOT_IMPLEMENTED;
- extern const int BAD_ARGUMENTS;
- extern const int PARAMETER_OUT_OF_BOUND;
- extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
- extern const int LOGICAL_ERROR;
- extern const int TOO_LARGE_ARRAY_SIZE;
-}
+extern const int NOT_IMPLEMENTED;
+extern const int BAD_ARGUMENTS;
+extern const int PARAMETER_OUT_OF_BOUND;
+extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
+extern const int LOGICAL_ERROR;
+extern const int TOO_LARGE_ARRAY_SIZE;
+} // namespace ErrorCodes
/** Obtaining array as Field can be slow for large arrays and consume vast amount of memory.
* Just don't allow to do it.
@@ -48,9 +49,9 @@ namespace ErrorCodes {
*/
static constexpr size_t max_array_size_as_field = 1000000;
-ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && offsets_column)
- : data(std::move(nested_column)), offsets(std::move(offsets_column)) {
- const ColumnOffsets * offsets_concrete = typeid_cast<const ColumnOffsets *>(offsets.get());
+ColumnArray::ColumnArray(MutableColumnPtr&& nested_column, MutableColumnPtr&& offsets_column)
+ : data(std::move(nested_column)), offsets(std::move(offsets_column)) {
+ const ColumnOffsets* offsets_concrete = typeid_cast<const ColumnOffsets*>(offsets.get());
if (!offsets_concrete) {
LOG(FATAL) << "offsets_column must be a ColumnUInt64";
@@ -71,8 +72,7 @@ ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr &&
*/
}
-ColumnArray::ColumnArray(MutableColumnPtr && nested_column)
- : data(std::move(nested_column)) {
+ColumnArray::ColumnArray(MutableColumnPtr&& nested_column) : data(std::move(nested_column)) {
if (!data->empty()) {
LOG(FATAL) << "Not empty data passed to ColumnArray, but no offsets passed";
}
@@ -80,13 +80,14 @@ ColumnArray::ColumnArray(MutableColumnPtr && nested_column)
offsets = ColumnOffsets::create();
}
-std::string ColumnArray::get_name() const { return "Array(" + get_data().get_name() + ")"; }
+std::string ColumnArray::get_name() const {
+ return "Array(" + get_data().get_name() + ")";
+}
MutableColumnPtr ColumnArray::clone_resized(size_t to_size) const {
auto res = ColumnArray::create(get_data().clone_empty());
- if (to_size == 0)
- return res;
+ if (to_size == 0) return res;
size_t from_size = size();
if (to_size <= from_size) {
@@ -103,8 +104,7 @@ MutableColumnPtr ColumnArray::clone_resized(size_t to_size) const {
}
res->get_offsets().resize(to_size);
- for (size_t i = from_size; i < to_size; ++i)
- res->get_offsets()[i] = offset;
+ for (size_t i = from_size; i < to_size; ++i) res->get_offsets()[i] = offset;
}
return res;
@@ -124,13 +124,12 @@ Field ColumnArray::operator[](size_t n) const {
Array res(size);
- for (size_t i = 0; i < size; ++i)
- res[i] = get_data()[offset + i];
+ for (size_t i = 0; i < size; ++i) res[i] = get_data()[offset + i];
return res;
}
-void ColumnArray::get(size_t n, Field & res) const {
+void ColumnArray::get(size_t n, Field& res) const {
size_t offset = offset_at(n);
size_t size = size_at(n);
@@ -139,10 +138,9 @@ void ColumnArray::get(size_t n, Field & res) const {
<< " maximum size " << max_array_size_as_field;
res = Array(size);
- Array & res_arr = doris::vectorized::get<Array &>(res);
+ Array& res_arr = doris::vectorized::get<Array&>(res);
- for (size_t i = 0; i < size; ++i)
- get_data().get(offset + i, res_arr[i]);
+ for (size_t i = 0; i < size; ++i) get_data().get(offset + i, res_arr[i]);
}
StringRef ColumnArray::get_data_at(size_t n) const {
@@ -156,8 +154,7 @@ StringRef ColumnArray::get_data_at(size_t n) const {
StringRef first = get_data().get_data_at_with_terminating_zero(offset_of_first_elem);
size_t array_size = size_at(n);
- if (array_size == 0)
- return StringRef(first.data, 0);
+ if (array_size == 0) return StringRef(first.data, 0);
size_t offset_of_last_elem = get_offsets()[n] - 1;
StringRef last = get_data().get_data_at_with_terminating_zero(offset_of_last_elem);
@@ -166,11 +163,11 @@ StringRef ColumnArray::get_data_at(size_t n) const {
}
bool ColumnArray::is_default_at(size_t n) const {
- const auto & offsets_data = get_offsets();
+ const auto& offsets_data = get_offsets();
return offsets_data[n] == offsets_data[static_cast<ssize_t>(n) - 1];
}
-void ColumnArray::insert_data(const char * pos, size_t length) {
+void ColumnArray::insert_data(const char* pos, size_t length) {
/** Similarly - only for arrays of fixed length values.
*/
if (!data->is_fixed_and_contiguous())
@@ -180,9 +177,8 @@ void ColumnArray::insert_data(const char * pos, size_t length) {
size_t elems = 0;
- if (length)
- {
- const char * end = pos + length;
+ if (length) {
+ const char* end = pos + length;
for (; pos + field_size <= end; pos += field_size, ++elems)
data->insert_data(pos, field_size);
@@ -193,11 +189,12 @@ void ColumnArray::insert_data(const char * pos, size_t length) {
get_offsets().push_back(get_offsets().back() + elems);
}
-StringRef ColumnArray::serialize_value_into_arena(size_t n, Arena & arena, char const *& begin) const {
+StringRef ColumnArray::serialize_value_into_arena(size_t n, Arena& arena,
+ char const*& begin) const {
size_t array_size = size_at(n);
size_t offset = offset_at(n);
- char * pos = arena.alloc_continue(sizeof(array_size), begin);
+ char* pos = arena.alloc_continue(sizeof(array_size), begin);
memcpy(pos, &array_size, sizeof(array_size));
StringRef res(pos, sizeof(array_size));
@@ -211,36 +208,33 @@ StringRef ColumnArray::serialize_value_into_arena(size_t n, Arena & arena, char
return res;
}
-const char * ColumnArray::deserialize_and_insert_from_arena(const char * pos) {
+const char* ColumnArray::deserialize_and_insert_from_arena(const char* pos) {
size_t array_size = unaligned_load<size_t>(pos);
pos += sizeof(array_size);
- for (size_t i = 0; i < array_size; ++i)
- pos = get_data().deserialize_and_insert_from_arena(pos);
+ for (size_t i = 0; i < array_size; ++i) pos = get_data().deserialize_and_insert_from_arena(pos);
get_offsets().push_back(get_offsets().back() + array_size);
return pos;
}
-void ColumnArray::update_hash_with_value(size_t n, SipHash & hash) const {
+void ColumnArray::update_hash_with_value(size_t n, SipHash& hash) const {
size_t array_size = size_at(n);
size_t offset = offset_at(n);
hash.update(array_size);
- for (size_t i = 0; i < array_size; ++i)
- get_data().update_hash_with_value(offset + i, hash);
+ for (size_t i = 0; i < array_size; ++i) get_data().update_hash_with_value(offset + i, hash);
}
-void ColumnArray::insert(const Field & x) {
- const Array & array = doris::vectorized::get<const Array &>(x);
+void ColumnArray::insert(const Field& x) {
+ const Array& array = doris::vectorized::get<const Array&>(x);
size_t size = array.size();
- for (size_t i = 0; i < size; ++i)
- get_data().insert(array[i]);
+ for (size_t i = 0; i < size; ++i) get_data().insert(array[i]);
get_offsets().push_back(get_offsets().back() + size);
}
-void ColumnArray::insert_from(const IColumn & src_, size_t n) {
- const ColumnArray & src = assert_cast<const ColumnArray &>(src_);
+void ColumnArray::insert_from(const IColumn& src_, size_t n) {
+ const ColumnArray& src = assert_cast<const ColumnArray&>(src_);
size_t size = src.size_at(n);
size_t offset = src.offset_at(n);
@@ -256,17 +250,17 @@ void ColumnArray::insert_default() {
}
void ColumnArray::pop_back(size_t n) {
- auto & offsets_data = get_offsets();
+ auto& offsets_data = get_offsets();
DCHECK(n <= offsets_data.size());
size_t nested_n = offsets_data.back() - offset_at(offsets_data.size() - n);
- if (nested_n)
- get_data().pop_back(nested_n);
+ if (nested_n) get_data().pop_back(nested_n);
offsets_data.resize_assume_reserved(offsets_data.size() - n);
}
void ColumnArray::reserve(size_t n) {
get_offsets().reserve(n);
- get_data().reserve(n); /// The average size of arrays is not taken into account here. Or it is considered to be no more than 1.
+ get_data().reserve(
+ n); /// The average size of arrays is not taken into account here. Or it is considered to be no more than 1.
}
size_t ColumnArray::byte_size() const {
@@ -288,24 +282,24 @@ ColumnPtr ColumnArray::convert_to_full_column_if_const() const {
return ColumnArray::create(data->convert_to_full_column_if_const(), offsets);
}
-void ColumnArray::insert_range_from(const IColumn & src, size_t start, size_t length) {
- if (length == 0)
- return;
+void ColumnArray::insert_range_from(const IColumn& src, size_t start, size_t length) {
+ if (length == 0) return;
- const ColumnArray & src_concrete = assert_cast<const ColumnArray &>(src);
+ const ColumnArray& src_concrete = assert_cast<const ColumnArray&>(src);
if (start + length > src_concrete.get_offsets().size())
LOG(FATAL) << "Parameter out of bound in ColumnArray::insert_range_from method. [start("
<< std::to_string(start) << ") + length(" << std::to_string(length)
- << ") > offsets.size(" << std::to_string(src_concrete.get_offsets().size()) << ")]";
+ << ") > offsets.size(" << std::to_string(src_concrete.get_offsets().size())
+ << ")]";
size_t nested_offset = src_concrete.offset_at(start);
size_t nested_length = src_concrete.get_offsets()[start + length - 1] - nested_offset;
get_data().insert_range_from(src_concrete.get_data(), nested_offset, nested_length);
- Offsets & cur_offsets = get_offsets();
- const Offsets & src_offsets = src_concrete.get_offsets();
+ Offsets& cur_offsets = get_offsets();
+ const Offsets& src_offsets = src_concrete.get_offsets();
if (start == 0 && cur_offsets.empty()) {
cur_offsets.assign(src_offsets.begin(), src_offsets.begin() + length);
@@ -320,55 +314,64 @@ void ColumnArray::insert_range_from(const IColumn & src, size_t start, size_t le
}
}
-ColumnPtr ColumnArray::filter(const Filter & filt, ssize_t result_size_hint) const {
- if (typeid_cast<const ColumnUInt8 *>(data.get())) return filter_number<UInt8>(filt, result_size_hint);
- if (typeid_cast<const ColumnUInt16 *>(data.get())) return filter_number<UInt16>(filt, result_size_hint);
- if (typeid_cast<const ColumnUInt32 *>(data.get())) return filter_number<UInt32>(filt, result_size_hint);
- if (typeid_cast<const ColumnUInt64 *>(data.get())) return filter_number<UInt64>(filt, result_size_hint);
- if (typeid_cast<const ColumnInt8 *>(data.get())) return filter_number<Int8>(filt, result_size_hint);
- if (typeid_cast<const ColumnInt16 *>(data.get())) return filter_number<Int16>(filt, result_size_hint);
- if (typeid_cast<const ColumnInt32 *>(data.get())) return filter_number<Int32>(filt, result_size_hint);
- if (typeid_cast<const ColumnInt64 *>(data.get())) return filter_number<Int64>(filt, result_size_hint);
- if (typeid_cast<const ColumnFloat32 *>(data.get())) return filter_number<Float32>(filt, result_size_hint);
- if (typeid_cast<const ColumnFloat64 *>(data.get())) return filter_number<Float64>(filt, result_size_hint);
- if (typeid_cast<const ColumnString *>(data.get())) return filter_string(filt, result_size_hint);
+ColumnPtr ColumnArray::filter(const Filter& filt, ssize_t result_size_hint) const {
+ if (typeid_cast<const ColumnUInt8*>(data.get()))
+ return filter_number<UInt8>(filt, result_size_hint);
+ if (typeid_cast<const ColumnUInt16*>(data.get()))
+ return filter_number<UInt16>(filt, result_size_hint);
+ if (typeid_cast<const ColumnUInt32*>(data.get()))
+ return filter_number<UInt32>(filt, result_size_hint);
+ if (typeid_cast<const ColumnUInt64*>(data.get()))
+ return filter_number<UInt64>(filt, result_size_hint);
+ if (typeid_cast<const ColumnInt8*>(data.get()))
+ return filter_number<Int8>(filt, result_size_hint);
+ if (typeid_cast<const ColumnInt16*>(data.get()))
+ return filter_number<Int16>(filt, result_size_hint);
+ if (typeid_cast<const ColumnInt32*>(data.get()))
+ return filter_number<Int32>(filt, result_size_hint);
+ if (typeid_cast<const ColumnInt64*>(data.get()))
+ return filter_number<Int64>(filt, result_size_hint);
+ if (typeid_cast<const ColumnFloat32*>(data.get()))
+ return filter_number<Float32>(filt, result_size_hint);
+ if (typeid_cast<const ColumnFloat64*>(data.get()))
+ return filter_number<Float64>(filt, result_size_hint);
+ if (typeid_cast<const ColumnString*>(data.get())) return filter_string(filt, result_size_hint);
//if (typeid_cast<const ColumnTuple *>(data.get())) return filterTuple(filt, result_size_hint);
- if (typeid_cast<const ColumnNullable *>(data.get())) return filter_nullable(filt, result_size_hint);
+ if (typeid_cast<const ColumnNullable*>(data.get()))
+ return filter_nullable(filt, result_size_hint);
return filter_generic(filt, result_size_hint);
}
template <typename T>
-ColumnPtr ColumnArray::filter_number(const Filter & filt, ssize_t result_size_hint) const {
- if (get_offsets().empty())
- return ColumnArray::create(data);
+ColumnPtr ColumnArray::filter_number(const Filter& filt, ssize_t result_size_hint) const {
+ if (get_offsets().empty()) return ColumnArray::create(data);
auto res = ColumnArray::create(data->clone_empty());
- auto & res_elems = assert_cast<ColumnVector<T> &>(res->get_data()).get_data();
- Offsets & res_offsets = res->get_offsets();
+ auto& res_elems = assert_cast<ColumnVector<T>&>(res->get_data()).get_data();
+ Offsets& res_offsets = res->get_offsets();
- filter_arrays_impl<T>(assert_cast<const ColumnVector<T> &>(*data).get_data(), get_offsets(), res_elems, res_offsets, filt, result_size_hint);
+ filter_arrays_impl<T>(assert_cast<const ColumnVector<T>&>(*data).get_data(), get_offsets(),
+ res_elems, res_offsets, filt, result_size_hint);
return res;
}
-ColumnPtr ColumnArray::filter_string(const Filter & filt, ssize_t result_size_hint) const {
+ColumnPtr ColumnArray::filter_string(const Filter& filt, ssize_t result_size_hint) const {
size_t col_size = get_offsets().size();
- if (col_size != filt.size())
- LOG(FATAL) << "Size of filter doesn't match size of column.";
+ if (col_size != filt.size()) LOG(FATAL) << "Size of filter doesn't match size of column.";
- if (0 == col_size)
- return ColumnArray::create(data);
+ if (0 == col_size) return ColumnArray::create(data);
auto res = ColumnArray::create(data->clone_empty());
- const ColumnString & src_string = typeid_cast<const ColumnString &>(*data);
- const ColumnString::Chars & src_chars = src_string.get_chars();
- const Offsets & src_string_offsets = src_string.get_offsets();
- const Offsets & src_offsets = get_offsets();
+ const ColumnString& src_string = typeid_cast<const ColumnString&>(*data);
+ const ColumnString::Chars& src_chars = src_string.get_chars();
+ const Offsets& src_string_offsets = src_string.get_offsets();
+ const Offsets& src_offsets = get_offsets();
- ColumnString::Chars & res_chars = typeid_cast<ColumnString &>(res->get_data()).get_chars();
- Offsets & res_string_offsets = typeid_cast<ColumnString &>(res->get_data()).get_offsets();
- Offsets & res_offsets = res->get_offsets();
+ ColumnString::Chars& res_chars = typeid_cast<ColumnString&>(res->get_data()).get_chars();
+ Offsets& res_string_offsets = typeid_cast<ColumnString&>(res->get_data()).get_offsets();
+ Offsets& res_offsets = res->get_offsets();
if (result_size_hint < 0) {
res_chars.reserve(src_chars.size());
@@ -389,13 +392,16 @@ ColumnPtr ColumnArray::filter_string(const Filter & filt, ssize_t result_size_hi
if (filt[i]) {
/// If the array is not empty - copy content.
if (array_size) {
- size_t chars_to_copy = src_string_offsets[array_size + prev_src_offset - 1] - prev_src_string_offset;
+ size_t chars_to_copy = src_string_offsets[array_size + prev_src_offset - 1] -
+ prev_src_string_offset;
size_t res_chars_prev_size = res_chars.size();
res_chars.resize(res_chars_prev_size + chars_to_copy);
- memcpy(&res_chars[res_chars_prev_size], &src_chars[prev_src_string_offset], chars_to_copy);
+ memcpy(&res_chars[res_chars_prev_size], &src_chars[prev_src_string_offset],
+ chars_to_copy);
for (size_t j = 0; j < array_size; ++j)
- res_string_offsets.push_back(src_string_offsets[j + prev_src_offset] + prev_res_string_offset - prev_src_string_offset);
+ res_string_offsets.push_back(src_string_offsets[j + prev_src_offset] +
+ prev_res_string_offset - prev_src_string_offset);
prev_res_string_offset = res_string_offsets.back();
}
@@ -413,13 +419,11 @@ ColumnPtr ColumnArray::filter_string(const Filter & filt, ssize_t result_size_hi
return res;
}
-ColumnPtr ColumnArray::filter_generic(const Filter & filt, ssize_t result_size_hint) const {
+ColumnPtr ColumnArray::filter_generic(const Filter& filt, ssize_t result_size_hint) const {
size_t size = get_offsets().size();
- if (size != filt.size())
- LOG(FATAL) << "Size of filter doesn't match size of column.";
+ if (size != filt.size()) LOG(FATAL) << "Size of filter doesn't match size of column.";
- if (size == 0)
- return ColumnArray::create(data);
+ if (size == 0) return ColumnArray::create(data);
Filter nested_filt(get_offsets().back());
for (size_t i = 0; i < size; ++i) {
@@ -434,19 +438,18 @@ ColumnPtr ColumnArray::filter_generic(const Filter & filt, ssize_t result_size_h
ssize_t nested_result_size_hint = 0;
if (result_size_hint < 0)
nested_result_size_hint = result_size_hint;
- else if (result_size_hint && result_size_hint < 1000000000 && data->size() < 1000000000) /// Avoid overflow.
- nested_result_size_hint = result_size_hint * data->size() / size;
+ else if (result_size_hint && result_size_hint < 1000000000 &&
+ data->size() < 1000000000) /// Avoid overflow.
+ nested_result_size_hint = result_size_hint * data->size() / size;
res->data = data->filter(nested_filt, nested_result_size_hint);
- Offsets & res_offsets = res->get_offsets();
- if (result_size_hint)
- res_offsets.reserve(result_size_hint > 0 ? result_size_hint : size);
+ Offsets& res_offsets = res->get_offsets();
+ if (result_size_hint) res_offsets.reserve(result_size_hint > 0 ? result_size_hint : size);
size_t current_offset = 0;
for (size_t i = 0; i < size; ++i) {
- if (filt[i])
- {
+ if (filt[i]) {
current_offset += size_at(i);
res_offsets.push_back(current_offset);
}
@@ -455,29 +458,29 @@ ColumnPtr ColumnArray::filter_generic(const Filter & filt, ssize_t result_size_h
return res;
}
-ColumnPtr ColumnArray::filter_nullable(const Filter & filt, ssize_t result_size_hint) const {
- if (get_offsets().empty())
- return ColumnArray::create(data);
+ColumnPtr ColumnArray::filter_nullable(const Filter& filt, ssize_t result_size_hint) const {
+ if (get_offsets().empty()) return ColumnArray::create(data);
- const ColumnNullable & nullable_elems = assert_cast<const ColumnNullable &>(*data);
+ const ColumnNullable& nullable_elems = assert_cast<const ColumnNullable&>(*data);
auto array_of_nested = ColumnArray::create(nullable_elems.get_nested_column_ptr(), offsets);
auto filtered_array_of_nested_owner = array_of_nested->filter(filt, result_size_hint);
- const auto & filtered_array_of_nested = assert_cast<const ColumnArray &>(*filtered_array_of_nested_owner);
- const auto & filtered_offsets = filtered_array_of_nested.get_offsets_ptr();
+ const auto& filtered_array_of_nested =
+ assert_cast<const ColumnArray&>(*filtered_array_of_nested_owner);
+ const auto& filtered_offsets = filtered_array_of_nested.get_offsets_ptr();
auto res_null_map = ColumnUInt8::create();
- filter_arrays_impl_only_data(nullable_elems.get_null_map_data(), get_offsets(), res_null_map->get_data(), filt, result_size_hint);
+ filter_arrays_impl_only_data(nullable_elems.get_null_map_data(), get_offsets(),
+ res_null_map->get_data(), filt, result_size_hint);
- return ColumnArray::create(
- ColumnNullable::create(
- filtered_array_of_nested.get_data_ptr(),
- std::move(res_null_map)),
- filtered_offsets);
+ return ColumnArray::create(ColumnNullable::create(filtered_array_of_nested.get_data_ptr(),
+ std::move(res_null_map)),
+ filtered_offsets);
}
-void ColumnArray::insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) {
+void ColumnArray::insert_indices_from(const IColumn& src, const int* indices_begin,
+ const int* indices_end) {
for (auto x = indices_begin; x != indices_end; ++x) {
if (*x == -1) {
ColumnArray::insert_default();
@@ -487,45 +490,56 @@ void ColumnArray::insert_indices_from(const IColumn& src, const int* indices_beg
}
}
-ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const {
- if (replicate_offsets.empty())
- return clone_empty();
-
- if (typeid_cast<const ColumnUInt8 *>(data.get())) return replicate_number<UInt8>(replicate_offsets);
- if (typeid_cast<const ColumnUInt16 *>(data.get())) return replicate_number<UInt16>(replicate_offsets);
- if (typeid_cast<const ColumnUInt32 *>(data.get())) return replicate_number<UInt32>(replicate_offsets);
- if (typeid_cast<const ColumnUInt64 *>(data.get())) return replicate_number<UInt64>(replicate_offsets);
- if (typeid_cast<const ColumnInt8 *>(data.get())) return replicate_number<Int8>(replicate_offsets);
- if (typeid_cast<const ColumnInt16 *>(data.get())) return replicate_number<Int16>(replicate_offsets);
- if (typeid_cast<const ColumnInt32 *>(data.get())) return replicate_number<Int32>(replicate_offsets);
- if (typeid_cast<const ColumnInt64 *>(data.get())) return replicate_number<Int64>(replicate_offsets);
- if (typeid_cast<const ColumnFloat32 *>(data.get())) return replicate_number<Float32>(replicate_offsets);
- if (typeid_cast<const ColumnFloat64 *>(data.get())) return replicate_number<Float64>(replicate_offsets);
- if (typeid_cast<const ColumnString *>(data.get())) return replicate_string(replicate_offsets);
- if (typeid_cast<const ColumnConst *>(data.get())) return replicate_const(replicate_offsets);
- if (typeid_cast<const ColumnNullable *>(data.get())) return replicate_nullable(replicate_offsets);
+ColumnPtr ColumnArray::replicate(const Offsets& replicate_offsets) const {
+ if (replicate_offsets.empty()) return clone_empty();
+
+ if (typeid_cast<const ColumnUInt8*>(data.get()))
+ return replicate_number<UInt8>(replicate_offsets);
+ if (typeid_cast<const ColumnUInt16*>(data.get()))
+ return replicate_number<UInt16>(replicate_offsets);
+ if (typeid_cast<const ColumnUInt32*>(data.get()))
+ return replicate_number<UInt32>(replicate_offsets);
+ if (typeid_cast<const ColumnUInt64*>(data.get()))
+ return replicate_number<UInt64>(replicate_offsets);
+ if (typeid_cast<const ColumnInt8*>(data.get()))
+ return replicate_number<Int8>(replicate_offsets);
+ if (typeid_cast<const ColumnInt16*>(data.get()))
+ return replicate_number<Int16>(replicate_offsets);
+ if (typeid_cast<const ColumnInt32*>(data.get()))
+ return replicate_number<Int32>(replicate_offsets);
+ if (typeid_cast<const ColumnInt64*>(data.get()))
+ return replicate_number<Int64>(replicate_offsets);
+ if (typeid_cast<const ColumnFloat32*>(data.get()))
+ return replicate_number<Float32>(replicate_offsets);
+ if (typeid_cast<const ColumnFloat64*>(data.get()))
+ return replicate_number<Float64>(replicate_offsets);
+ if (typeid_cast<const ColumnString*>(data.get())) return replicate_string(replicate_offsets);
+ if (typeid_cast<const ColumnConst*>(data.get())) return replicate_const(replicate_offsets);
+ if (typeid_cast<const ColumnNullable*>(data.get()))
+ return replicate_nullable(replicate_offsets);
//if (typeid_cast<const ColumnTuple *>(data.get())) return replicateTuple(replicate_offsets);
return replicate_generic(replicate_offsets);
}
template <typename T>
-ColumnPtr ColumnArray::replicate_number(const Offsets & replicate_offsets) const {
+ColumnPtr ColumnArray::replicate_number(const Offsets& replicate_offsets) const {
size_t col_size = size();
if (col_size != replicate_offsets.size())
LOG(FATAL) << "Size of offsets doesn't match size of column.";
MutableColumnPtr res = clone_empty();
- if (0 == col_size)
- return res;
+ if (0 == col_size) return res;
- ColumnArray & res_arr = typeid_cast<ColumnArray &>(*res);
+ ColumnArray& res_arr = typeid_cast<ColumnArray&>(*res);
- const typename ColumnVector<T>::Container & src_data = typeid_cast<const ColumnVector<T> &>(*data).get_data();
- const Offsets & src_offsets = get_offsets();
+ const typename ColumnVector<T>::Container& src_data =
+ typeid_cast<const ColumnVector<T>&>(*data).get_data();
+ const Offsets& src_offsets = get_offsets();
- typename ColumnVector<T>::Container & res_data = typeid_cast<ColumnVector<T> &>(res_arr.get_data()).get_data();
- Offsets & res_offsets = res_arr.get_offsets();
+ typename ColumnVector<T>::Container& res_data =
+ typeid_cast<ColumnVector<T>&>(res_arr.get_data()).get_data();
+ Offsets& res_offsets = res_arr.get_offsets();
res_data.reserve(data->size() / col_size * replicate_offsets.back());
res_offsets.reserve(replicate_offsets.back());
@@ -544,7 +558,8 @@ ColumnPtr ColumnArray::replicate_number(const Offsets & replicate_offsets) const
if (value_size) {
res_data.resize(res_data.size() + value_size);
- memcpy(&res_data[res_data.size() - value_size], &src_data[prev_data_offset], value_size * sizeof(T));
+ memcpy(&res_data[res_data.size() - value_size], &src_data[prev_data_offset],
+ value_size * sizeof(T));
}
}
@@ -555,26 +570,25 @@ ColumnPtr ColumnArray::replicate_number(const Offsets & replicate_offsets) const
return res;
}
-ColumnPtr ColumnArray::replicate_string(const Offsets & replicate_offsets) const {
+ColumnPtr ColumnArray::replicate_string(const Offsets& replicate_offsets) const {
size_t col_size = size();
if (col_size != replicate_offsets.size())
LOG(FATAL) << "Size of offsets doesn't match size of column.";
MutableColumnPtr res = clone_empty();
- if (0 == col_size)
- return res;
+ if (0 == col_size) return res;
- ColumnArray & res_arr = assert_cast<ColumnArray &>(*res);
+ ColumnArray& res_arr = assert_cast<ColumnArray&>(*res);
- const ColumnString & src_string = typeid_cast<const ColumnString &>(*data);
- const ColumnString::Chars & src_chars = src_string.get_chars();
- const Offsets & src_string_offsets = src_string.get_offsets();
- const Offsets & src_offsets = get_offsets();
+ const ColumnString& src_string = typeid_cast<const ColumnString&>(*data);
+ const ColumnString::Chars& src_chars = src_string.get_chars();
+ const Offsets& src_string_offsets = src_string.get_offsets();
+ const Offsets& src_offsets = get_offsets();
- ColumnString::Chars & res_chars = typeid_cast<ColumnString &>(res_arr.get_data()).get_chars();
- Offsets & res_string_offsets = typeid_cast<ColumnString &>(res_arr.get_data()).get_offsets();
- Offsets & res_offsets = res_arr.get_offsets();
+ ColumnString::Chars& res_chars = typeid_cast<ColumnString&>(res_arr.get_data()).get_chars();
+ Offsets& res_string_offsets = typeid_cast<ColumnString&>(res_arr.get_data()).get_offsets();
+ Offsets& res_offsets = res_arr.get_offsets();
res_chars.reserve(src_chars.size() / col_size * replicate_offsets.back());
res_string_offsets.reserve(src_string_offsets.size() / col_size * replicate_offsets.back());
@@ -594,7 +608,8 @@ ColumnPtr ColumnArray::replicate_string(const Offsets & replicate_offsets) const
/// The number of strings in the array.
size_t value_size = src_offsets[i] - prev_src_offset;
/// Number of characters in strings of the array, including zero bytes.
- size_t sum_chars_size = src_string_offsets[prev_src_offset + value_size - 1] - prev_src_string_offset; /// -1th index is Ok, see PaddedPODArray.
+ size_t sum_chars_size = src_string_offsets[prev_src_offset + value_size - 1] -
+ prev_src_string_offset; /// -1th index is Ok, see PaddedPODArray.
for (size_t j = 0; j < size_to_replicate; ++j) {
current_res_offset += value_size;
@@ -603,7 +618,8 @@ ColumnPtr ColumnArray::replicate_string(const Offsets & replicate_offsets) const
size_t prev_src_string_offset_local = prev_src_string_offset;
for (size_t k = 0; k < value_size; ++k) {
/// Size of single string.
- size_t chars_size = src_string_offsets[k + prev_src_offset] - prev_src_string_offset_local;
+ size_t chars_size =
+ src_string_offsets[k + prev_src_offset] - prev_src_string_offset_local;
current_res_string_offset += chars_size;
res_string_offsets.push_back(current_res_string_offset);
@@ -615,7 +631,8 @@ ColumnPtr ColumnArray::replicate_string(const Offsets & replicate_offsets) const
/// Copies the characters of the array of strings.
res_chars.resize(res_chars.size() + sum_chars_size);
memcpy_small_allow_read_write_overflow15(
- &res_chars[res_chars.size() - sum_chars_size], &src_chars[prev_src_string_offset], sum_chars_size);
+ &res_chars[res_chars.size() - sum_chars_size],
+ &src_chars[prev_src_string_offset], sum_chars_size);
}
}
@@ -627,18 +644,17 @@ ColumnPtr ColumnArray::replicate_string(const Offsets & replicate_offsets) const
return res;
}
-ColumnPtr ColumnArray::replicate_const(const Offsets & replicate_offsets) const {
+ColumnPtr ColumnArray::replicate_const(const Offsets& replicate_offsets) const {
size_t col_size = size();
if (col_size != replicate_offsets.size())
LOG(FATAL) << "Size of offsets doesn't match size of column.";
- if (0 == col_size)
- return clone_empty();
+ if (0 == col_size) return clone_empty();
- const Offsets & src_offsets = get_offsets();
+ const Offsets& src_offsets = get_offsets();
auto res_column_offsets = ColumnOffsets::create();
- Offsets & res_offsets = res_column_offsets->get_data();
+ Offsets& res_offsets = res_column_offsets->get_data();
res_offsets.reserve(replicate_offsets.back());
Offset prev_replicate_offset = 0;
@@ -658,48 +674,81 @@ ColumnPtr ColumnArray::replicate_const(const Offsets & replicate_offsets) const
prev_data_offset = src_offsets[i];
}
- return ColumnArray::create(get_data().clone_resized(current_new_offset), std::move(res_column_offsets));
+ return ColumnArray::create(get_data().clone_resized(current_new_offset),
+ std::move(res_column_offsets));
}
-ColumnPtr ColumnArray::replicate_generic(const Offsets & replicate_offsets) const {
+ColumnPtr ColumnArray::replicate_generic(const Offsets& replicate_offsets) const {
size_t col_size = size();
if (col_size != replicate_offsets.size())
LOG(FATAL) << "Size of offsets doesn't match size of column.";
MutableColumnPtr res = clone_empty();
- ColumnArray & res_concrete = assert_cast<ColumnArray &>(*res);
+ ColumnArray& res_concrete = assert_cast<ColumnArray&>(*res);
- if (0 == col_size)
- return res;
+ if (0 == col_size) return res;
IColumn::Offset prev_offset = 0;
for (size_t i = 0; i < col_size; ++i) {
size_t size_to_replicate = replicate_offsets[i] - prev_offset;
prev_offset = replicate_offsets[i];
- for (size_t j = 0; j < size_to_replicate; ++j)
- res_concrete.insert_from(*this, i);
+ for (size_t j = 0; j < size_to_replicate; ++j) res_concrete.insert_from(*this, i);
}
return res;
}
-ColumnPtr ColumnArray::replicate_nullable(const Offsets & replicate_offsets) const {
- const ColumnNullable & nullable = assert_cast<const ColumnNullable &>(*data);
+ColumnPtr ColumnArray::replicate_nullable(const Offsets& replicate_offsets) const {
+ const ColumnNullable& nullable = assert_cast<const ColumnNullable&>(*data);
/// Make temporary arrays for each components of Nullable. Then replicate them independently and collect back to result.
/// NOTE Offsets are calculated twice and it is redundant.
- auto array_of_nested = ColumnArray(nullable.get_nested_column_ptr()->assume_mutable(), get_offsets_ptr()->assume_mutable())
- .replicate(replicate_offsets);
- auto array_of_null_map = ColumnArray(nullable.get_null_map_column_ptr()->assume_mutable(), get_offsets_ptr()->assume_mutable())
- .replicate(replicate_offsets);
+ auto array_of_nested = ColumnArray(nullable.get_nested_column_ptr()->assume_mutable(),
+ get_offsets_ptr()->assume_mutable())
+ .replicate(replicate_offsets);
+ auto array_of_null_map = ColumnArray(nullable.get_null_map_column_ptr()->assume_mutable(),
+ get_offsets_ptr()->assume_mutable())
+ .replicate(replicate_offsets);
return ColumnArray::create(
- ColumnNullable::create(
- assert_cast<const ColumnArray &>(*array_of_nested).get_data_ptr(),
- assert_cast<const ColumnArray &>(*array_of_null_map).get_data_ptr()),
- assert_cast<const ColumnArray &>(*array_of_nested).get_offsets_ptr());
+ ColumnNullable::create(
+ assert_cast<const ColumnArray&>(*array_of_nested).get_data_ptr(),
+ assert_cast<const ColumnArray&>(*array_of_null_map).get_data_ptr()),
+ assert_cast<const ColumnArray&>(*array_of_nested).get_offsets_ptr());
+}
+
+ColumnPtr ColumnArray::permute(const Permutation& perm, size_t limit) const {
+ size_t size = offsets->size();
+ if (limit == 0) {
+ limit = size;
+ } else {
+ limit = std::min(size, limit);
+ }
+ if (perm.size() < limit) {
+ LOG(FATAL) << "Size of permutation is less than required.";
+ }
+ if (limit == 0) {
+ return ColumnArray::create(data);
+ }
+
+ auto res = ColumnArray::create(data->clone_empty());
+ auto& res_offsets = res->get_offsets();
+ res_offsets.resize(limit);
+
+ Permutation nested_perm;
+ nested_perm.reserve(data->size());
+
+ for (size_t i = 0; i < limit; ++i) {
+ res_offsets[i] = res_offsets[i - 1] + size_at(perm[i]);
+ for (size_t j = 0; j < size_at(perm[i]); ++j) {
+ nested_perm.push_back(offset_at(perm[i]) + j);
+ }
+ }
+
+ res->data = data->permute(nested_perm, nested_perm.size());
+ return res;
}
} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_array.h b/be/src/vec/columns/column_array.h
index e6567d1c97..74a0805336 100644
--- a/be/src/vec/columns/column_array.h
+++ b/be/src/vec/columns/column_array.h
@@ -20,11 +20,11 @@
#pragma once
-#include "vec/common/arena.h"
-#include "vec/common/assert_cast.h"
#include "vec/columns/column.h"
#include "vec/columns/column_impl.h"
#include "vec/columns/column_vector.h"
+#include "vec/common/arena.h"
+#include "vec/common/assert_cast.h"
#include "vec/core/types.h"
namespace doris::vectorized {
@@ -38,12 +38,12 @@ private:
friend class COWHelper<IColumn, ColumnArray>;
/** Create an array column with specified values and offsets. */
- ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && offsets_column);
+ ColumnArray(MutableColumnPtr&& nested_column, MutableColumnPtr&& offsets_column);
/** Create an empty column of arrays with the type of values as in the column `nested_column` */
- explicit ColumnArray(MutableColumnPtr && nested_column);
+ explicit ColumnArray(MutableColumnPtr&& nested_column);
- ColumnArray(const ColumnArray &) = default;
+ ColumnArray(const ColumnArray&) = default;
public:
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
@@ -51,83 +51,87 @@ public:
*/
using Base = COWHelper<IColumn, ColumnArray>;
- static Ptr create(const ColumnPtr & nested_column, const ColumnPtr & offsets_column) {
- return ColumnArray::create(nested_column->assume_mutable(), offsets_column->assume_mutable());
+ static Ptr create(const ColumnPtr& nested_column, const ColumnPtr& offsets_column) {
+ return ColumnArray::create(nested_column->assume_mutable(),
+ offsets_column->assume_mutable());
}
- static Ptr create(const ColumnPtr & nested_column) {
+ static Ptr create(const ColumnPtr& nested_column) {
return ColumnArray::create(nested_column->assume_mutable());
}
- template <typename ... Args, typename = typename std::enable_if<IsMutableColumns<Args ...>::value>::type>
- static MutablePtr create(Args &&... args) { return Base::create(std::forward<Args>(args)...); }
+ template <typename... Args,
+ typename = typename std::enable_if<IsMutableColumns<Args...>::value>::type>
+ static MutablePtr create(Args&&... args) {
+ return Base::create(std::forward<Args>(args)...);
+ }
/** On the index i there is an offset to the beginning of the i + 1 -th element. */
using ColumnOffsets = ColumnVector<Offset>;
std::string get_name() const override;
- const char * get_family_name() const override { return "Array"; }
+ const char* get_family_name() const override { return "Array"; }
bool can_be_inside_nullable() const override { return true; }
TypeIndex get_data_type() const { return TypeIndex::Array; }
MutableColumnPtr clone_resized(size_t size) const override;
size_t size() const override;
Field operator[](size_t n) const override;
- void get(size_t n, Field & res) const override;
+ void get(size_t n, Field& res) const override;
StringRef get_data_at(size_t n) const override;
bool is_default_at(size_t n) const override;
- void insert_data(const char * pos, size_t length) override;
- StringRef serialize_value_into_arena(size_t n, Arena & arena, char const *& begin) const override;
- const char * deserialize_and_insert_from_arena(const char * pos) override;
- void update_hash_with_value(size_t n, SipHash & hash) const override;
- void insert_range_from(const IColumn & src, size_t start, size_t length) override;
- void insert(const Field & x) override;
- void insert_from(const IColumn & src_, size_t n) override;
+ void insert_data(const char* pos, size_t length) override;
+ StringRef serialize_value_into_arena(size_t n, Arena& arena, char const*& begin) const override;
+ const char* deserialize_and_insert_from_arena(const char* pos) override;
+ void update_hash_with_value(size_t n, SipHash& hash) const override;
+ void insert_range_from(const IColumn& src, size_t start, size_t length) override;
+ void insert(const Field& x) override;
+ void insert_from(const IColumn& src_, size_t n) override;
void insert_default() override;
void pop_back(size_t n) override;
- ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
- [[noreturn]] ColumnPtr permute(const Permutation & perm, size_t limit) const override {
- LOG(FATAL) << "permute not implemented";
- }
+ ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override;
+ ColumnPtr permute(const Permutation& perm, size_t limit) const override;
//ColumnPtr index(const IColumn & indexes, size_t limit) const;
//template <typename Type> ColumnPtr index_impl(const PaddedPODArray<Type> & indexes, size_t limit) const;
- [[noreturn]] int compare_at(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override {
+ [[noreturn]] int compare_at(size_t n, size_t m, const IColumn& rhs_,
+ int nan_direction_hint) const override {
LOG(FATAL) << "compare_at not implemented";
}
- [[noreturn]] void get_permutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override {
+ [[noreturn]] void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
+ Permutation& res) const override {
LOG(FATAL) << "get_permutation not implemented";
}
void reserve(size_t n) override;
size_t byte_size() const override;
size_t allocated_bytes() const override;
void protect() override;
- ColumnPtr replicate(const Offsets & replicate_offsets) const override;
+ ColumnPtr replicate(const Offsets& replicate_offsets) const override;
ColumnPtr convert_to_full_column_if_const() const override;
- void get_extremes(Field & min, Field & max) const override {
+ void get_extremes(Field& min, Field& max) const override {
LOG(FATAL) << "get_extremes not implemented";
}
/** More efficient methods of manipulation */
- IColumn & get_data() { return *data; }
- const IColumn & get_data() const { return *data; }
+ IColumn& get_data() { return *data; }
+ const IColumn& get_data() const { return *data; }
- IColumn & get_offsets_column() { return *offsets; }
- const IColumn & get_offsets_column() const { return *offsets; }
+ IColumn& get_offsets_column() { return *offsets; }
+ const IColumn& get_offsets_column() const { return *offsets; }
- Offsets & ALWAYS_INLINE get_offsets() {
- return assert_cast<ColumnOffsets &>(*offsets).get_data();
+ Offsets& ALWAYS_INLINE get_offsets() {
+ return assert_cast<ColumnOffsets&>(*offsets).get_data();
}
- const Offsets & ALWAYS_INLINE get_offsets() const {
- return assert_cast<const ColumnOffsets &>(*offsets).get_data();
+ const Offsets& ALWAYS_INLINE get_offsets() const {
+ return assert_cast<const ColumnOffsets&>(*offsets).get_data();
}
- const ColumnPtr & get_data_ptr() const { return data; }
- ColumnPtr & get_data_ptr() { return data; }
+ const ColumnPtr& get_data_ptr() const { return data; }
+ ColumnPtr& get_data_ptr() { return data; }
- const ColumnPtr & get_offsets_ptr() const { return offsets; }
- ColumnPtr & get_offsets_ptr() { return offsets; }
+ const ColumnPtr& get_offsets_ptr() const { return offsets; }
+ ColumnPtr& get_offsets_ptr() { return offsets; }
- MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override {
+ MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) const override {
return scatter_impl<ColumnArray>(num_columns, selector);
}
@@ -136,7 +140,8 @@ public:
callback(data);
}
- void insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) override;
+ void insert_indices_from(const IColumn& src, const int* indices_begin,
+ const int* indices_end) override;
void replace_column_data(const IColumn&, size_t row, size_t self_row = 0) override {
LOG(FATAL) << "replace_column_data not implemented";
@@ -154,36 +159,36 @@ private:
WrappedPtr offsets;
size_t ALWAYS_INLINE offset_at(ssize_t i) const { return get_offsets()[i - 1]; }
- size_t ALWAYS_INLINE size_at(ssize_t i) const { return get_offsets()[i] - get_offsets()[i - 1]; }
-
+ size_t ALWAYS_INLINE size_at(ssize_t i) const {
+ return get_offsets()[i] - get_offsets()[i - 1];
+ }
/// Multiply values if the nested column is ColumnVector<T>.
template <typename T>
- ColumnPtr replicate_number(const Offsets & replicate_offsets) const;
+ ColumnPtr replicate_number(const Offsets& replicate_offsets) const;
/// Multiply the values if the nested column is ColumnString. The code is too complicated.
- ColumnPtr replicate_string(const Offsets & replicate_offsets) const;
+ ColumnPtr replicate_string(const Offsets& replicate_offsets) const;
/** Non-constant arrays of constant values are quite rare.
* Most functions can not work with them, and does not create such columns as a result.
* An exception is the function `replicate` (see FunctionsMiscellaneous.h), which has service meaning for the implementation of lambda functions.
* Only for its sake is the implementation of the `replicate` method for ColumnArray(ColumnConst).
*/
- ColumnPtr replicate_const(const Offsets & replicate_offsets) const;
+ ColumnPtr replicate_const(const Offsets& replicate_offsets) const;
/** The following is done by simply replicating of nested columns.
*/
- ColumnPtr replicate_nullable(const Offsets & replicate_offsets) const;
- ColumnPtr replicate_generic(const Offsets & replicate_offsets) const;
-
+ ColumnPtr replicate_nullable(const Offsets& replicate_offsets) const;
+ ColumnPtr replicate_generic(const Offsets& replicate_offsets) const;
/// Specializations for the filter function.
template <typename T>
- ColumnPtr filter_number(const Filter & filt, ssize_t result_size_hint) const;
+ ColumnPtr filter_number(const Filter& filt, ssize_t result_size_hint) const;
- ColumnPtr filter_string(const Filter & filt, ssize_t result_size_hint) const;
- ColumnPtr filter_nullable(const Filter & filt, ssize_t result_size_hint) const;
- ColumnPtr filter_generic(const Filter & filt, ssize_t result_size_hint) const;
+ ColumnPtr filter_string(const Filter& filt, ssize_t result_size_hint) const;
+ ColumnPtr filter_nullable(const Filter& filt, ssize_t result_size_hint) const;
+ ColumnPtr filter_generic(const Filter& filt, ssize_t result_size_hint) const;
};
} // namespace doris::vectorized
diff --git a/be/test/vec/core/column_array_test.cpp b/be/test/vec/core/column_array_test.cpp
index a65c7d8c0b..b497b14451 100644
--- a/be/test/vec/core/column_array_test.cpp
+++ b/be/test/vec/core/column_array_test.cpp
@@ -28,6 +28,34 @@
namespace doris::vectorized {
+void check_array_offsets(ColumnPtr arr, const std::vector<IColumn::Offset>& offs) {
+ auto arr_col = check_and_get_column<ColumnArray>(*arr);
+ ASSERT_EQ(arr_col->size(), offs.size());
+ for (size_t i = 0; i < arr_col->size(); ++i) {
+ ASSERT_EQ(arr_col->get_offsets()[i], offs[i]);
+ }
+}
+template <typename T>
+void check_array_data(ColumnPtr arr, const std::vector<T>& data) {
+ auto arr_col = check_and_get_column<ColumnArray>(*arr);
+ auto data_col = arr_col->get_data_ptr();
+ ASSERT_EQ(data_col->size(), data.size());
+ for (size_t i = 0; i < data_col->size(); ++i) {
+ auto element = data_col->get_data_at(i);
+ ASSERT_EQ(*((T*)element.data), data[i]);
+ }
+}
+template <>
+void check_array_data(ColumnPtr arr, const std::vector<std::string>& data) {
+ auto arr_col = check_and_get_column<ColumnArray>(*arr);
+ auto data_col = arr_col->get_data_ptr();
+ ASSERT_EQ(data_col->size(), data.size());
+ for (size_t i = 0; i < data_col->size(); ++i) {
+ auto element = data_col->get_data_at(i);
+ ASSERT_EQ(std::string(element.data), data[i]);
+ }
+}
+
TEST(ColumnArrayTest, IntArrayTest) {
auto off_column = ColumnVector<IColumn::Offset>::create();
auto data_column = ColumnVector<int32_t>::create();
@@ -78,4 +106,77 @@ TEST(ColumnArrayTest, StringArrayTest) {
}
}
+TEST(ColumnArrayTest, IntArrayPermuteTest) {
+ auto off_column = ColumnVector<IColumn::Offset>::create();
+ auto data_column = ColumnVector<int32_t>::create();
+ // init column array with [[1,2,3],[],[4],[5,6]]
+ std::vector<IColumn::Offset> offs = {0, 3, 3, 4, 6};
+ std::vector<int32_t> vals = {1, 2, 3, 4, 5, 6};
+ for (size_t i = 1; i < offs.size(); ++i) {
+ off_column->insert_data((const char*)(&offs[i]), 0);
+ }
+ for (auto& v : vals) {
+ data_column->insert_data((const char*)(&v), 0);
+ }
+ ColumnArray array_column(std::move(data_column), std::move(off_column));
+
+ IColumn::Permutation perm = {3, 2, 1, 0};
+ // return array column: [[5,6],[4]];
+ auto res1 = array_column.permute(perm, 2);
+ check_array_offsets(res1, {2, 3});
+ check_array_data<int32_t>(res1, {5, 6, 4});
+
+ // return array column: [[5,6],[4],[],[1,2,3]]
+ auto res2 = array_column.permute(perm, 0);
+ check_array_offsets(res2, {2, 3, 3, 6});
+ check_array_data<int32_t>(res2, {5, 6, 4, 1, 2, 3});
+}
+
+TEST(ColumnArrayTest, StringArrayPermuteTest) {
+ auto off_column = ColumnVector<IColumn::Offset>::create();
+ auto data_column = ColumnString::create();
+ // init column array with [["abc","d"],["ef"],[], [""]];
+ std::vector<IColumn::Offset> offs = {0, 2, 3, 3, 4};
+ std::vector<std::string> vals = {"abc", "d", "ef", ""};
+ for (size_t i = 1; i < offs.size(); ++i) {
+ off_column->insert_data((const char*)(&offs[i]), 0);
+ }
+ for (auto& v : vals) {
+ data_column->insert_data(v.data(), v.size());
+ }
+ ColumnArray array_column(std::move(data_column), std::move(off_column));
+
+ IColumn::Permutation perm = {3, 2, 1, 0};
+ // return array column: [[""],[]];
+ auto res1 = array_column.permute(perm, 2);
+ check_array_offsets(res1, {1, 1});
+ check_array_data<std::string>(res1, {""});
+}
+
+TEST(ColumnArrayTest, EmptyArrayPermuteTest) {
+ auto off_column = ColumnVector<IColumn::Offset>::create();
+ auto data_column = ColumnVector<int32_t>::create();
+ // init column array with [[],[],[],[]]
+ std::vector<IColumn::Offset> offs = {0, 0, 0, 0, 0};
+ std::vector<int32_t> vals = {};
+ for (size_t i = 1; i < offs.size(); ++i) {
+ off_column->insert_data((const char*)(&offs[i]), 0);
+ }
+ for (auto& v : vals) {
+ data_column->insert_data((const char*)(&v), 0);
+ }
+ ColumnArray array_column(std::move(data_column), std::move(off_column));
+
+ IColumn::Permutation perm = {3, 2, 1, 0};
+ // return array column: [[],[]];
+ auto res1 = array_column.permute(perm, 2);
+ check_array_offsets(res1, {0, 0});
+ check_array_data<int32_t>(res1, {});
+
+ // return array column: [[],[],[],[]]
+ auto res2 = array_column.permute(perm, 0);
+ check_array_offsets(res2, {0, 0, 0, 0});
+ check_array_data<int32_t>(res2, {});
+}
+
} // namespace doris::vectorized
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org