You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by bk...@apache.org on 2022/11/18 22:05:22 UTC
[arrow] branch feature/format-string-view updated: Adding comparison and concatenation
This is an automated email from the ASF dual-hosted git repository.
bkietz pushed a commit to branch feature/format-string-view
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/feature/format-string-view by this push:
new 7435aa0821 Adding comparison and concatenation
7435aa0821 is described below
commit 7435aa0821706a0f2613b87433b1b91c94669cbc
Author: Benjamin Kietzman <be...@gmail.com>
AuthorDate: Fri Nov 18 17:04:47 2022 -0500
Adding comparison and concatenation
---
cpp/src/arrow/array/concatenate.cc | 24 +++++++++++++++++++++++-
cpp/src/arrow/array/concatenate_test.cc | 8 ++++++++
cpp/src/arrow/compare.cc | 8 +++++++-
cpp/src/arrow/testing/random.cc | 10 ++++++++--
cpp/src/arrow/testing/random.h | 13 +++++++++++++
5 files changed, 59 insertions(+), 4 deletions(-)
diff --git a/cpp/src/arrow/array/concatenate.cc b/cpp/src/arrow/array/concatenate.cc
index 3dd0ccea93..6f7d61283e 100644
--- a/cpp/src/arrow/array/concatenate.cc
+++ b/cpp/src/arrow/array/concatenate.cc
@@ -228,7 +228,29 @@ class ConcatenateImpl {
}
Status Visit(const BinaryViewType&) {
- return Status::NotImplemented("binary / string view");
+ bool any_opted_out_of_view_validation = false;
+ out_->buffers.resize(2);
+
+ for (const auto& in_data : in_) {
+ auto begin = in_data->buffers.begin() + 2;
+ auto end = in_data->buffers.end();
+
+ if (BinaryViewArray::OptedOutOfViewValidation(*in_data)) {
+ any_opted_out_of_view_validation = true;
+ --end;
+ }
+
+ for (auto it = begin; it != end; ++it) {
+ out_->buffers.push_back(*it);
+ }
+ }
+
+ if (any_opted_out_of_view_validation) {
+ out_->buffers = BinaryViewArray::DoNotValidateViews(std::move(out_->buffers));
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto header_buffers, Buffers(1, sizeof(StringHeader)));
+ return ConcatenateBuffers(header_buffers, pool_).Value(&out_->buffers[1]);
}
Status Visit(const ListType&) {
diff --git a/cpp/src/arrow/array/concatenate_test.cc b/cpp/src/arrow/array/concatenate_test.cc
index bff5d7eec1..1bc0c65bec 100644
--- a/cpp/src/arrow/array/concatenate_test.cc
+++ b/cpp/src/arrow/array/concatenate_test.cc
@@ -91,6 +91,7 @@ class ConcatenateTest : public ::testing::Test {
for (auto null_probability : this->null_probabilities_) {
std::shared_ptr<Array> array;
factory(size, null_probability, &array);
+ ASSERT_OK(array->ValidateFull());
auto expected = array->Slice(offsets.front(), offsets.back() - offsets.front());
auto slices = this->Slices(array, offsets);
ASSERT_OK_AND_ASSIGN(auto actual, Concatenate(slices));
@@ -154,6 +155,13 @@ TEST_F(ConcatenateTest, StringType) {
});
}
+TEST_F(ConcatenateTest, StringViewType) {
+ Check([this](int32_t size, double null_probability, std::shared_ptr<Array>* out) {
+ *out = rng_.StringView(size, /*min_length =*/0, /*max_length =*/15, null_probability);
+ ASSERT_OK((**out).ValidateFull());
+ });
+}
+
TEST_F(ConcatenateTest, LargeStringType) {
Check([this](int32_t size, double null_probability, std::shared_ptr<Array>* out) {
*out =
diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index 68250f0288..5d1c3294c0 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -261,7 +261,13 @@ class RangeDataEqualsImpl {
// Also matches StringViewType
Status Visit(const BinaryViewType& type) {
- return Status::NotImplemented("Binary / string view");
+ auto* left_values = left_.GetValues<StringHeader>(1) + left_start_idx_;
+ auto* right_values = right_.GetValues<StringHeader>(1) + right_start_idx_;
+ VisitValidRuns([&](int64_t i, int64_t length) {
+ return std::equal(left_values + i, left_values + i + length,
+ right_values + i, right_values + i + length);
+ });
+ return Status::OK();
}
// Also matches LargeStringType
diff --git a/cpp/src/arrow/testing/random.cc b/cpp/src/arrow/testing/random.cc
index f42909a8e2..e0b83fbdbc 100644
--- a/cpp/src/arrow/testing/random.cc
+++ b/cpp/src/arrow/testing/random.cc
@@ -340,11 +340,10 @@ std::shared_ptr<Array> RandomArrayGenerator::Decimal256(std::shared_ptr<DataType
return gen.MakeRandomArray(size, null_probability);
}
-template <typename TypeClass>
+template <typename TypeClass, typename offset_type = typename TypeClass::offset_type>
static std::shared_ptr<Array> GenerateBinaryArray(RandomArrayGenerator* gen, int64_t size,
int32_t min_length, int32_t max_length,
double null_probability) {
- using offset_type = typename TypeClass::offset_type;
using BuilderType = typename TypeTraits<TypeClass>::BuilderType;
using OffsetArrowType = typename CTypeTraits<offset_type>::ArrowType;
using OffsetArrayType = typename TypeTraits<OffsetArrowType>::ArrayType;
@@ -392,6 +391,13 @@ std::shared_ptr<Array> RandomArrayGenerator::LargeString(int64_t size, int32_t m
null_probability);
}
+std::shared_ptr<Array> RandomArrayGenerator::StringView(int64_t size, int32_t min_length,
+ int32_t max_length,
+ double null_probability) {
+ return GenerateBinaryArray<StringViewType, uint32_t>(this, size, min_length, max_length,
+ null_probability);
+}
+
std::shared_ptr<Array> RandomArrayGenerator::BinaryWithRepeats(int64_t size,
int64_t unique,
int32_t min_length,
diff --git a/cpp/src/arrow/testing/random.h b/cpp/src/arrow/testing/random.h
index 00d50f9bc0..20774f309a 100644
--- a/cpp/src/arrow/testing/random.h
+++ b/cpp/src/arrow/testing/random.h
@@ -292,6 +292,19 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
std::shared_ptr<Array> String(int64_t size, int32_t min_length, int32_t max_length,
double null_probability = 0);
+ /// \brief Generate a random StringViewArray
+ ///
+ /// \param[in] size the size of the array to generate
+ /// \param[in] min_length the lower bound of the string length
+ /// determined by the uniform distribution
+ /// \param[in] max_length the upper bound of the string length
+ /// determined by the uniform distribution
+ /// \param[in] null_probability the probability of a value being null
+ ///
+ /// \return a generated Array
+ std::shared_ptr<Array> StringView(int64_t size, int32_t min_length, int32_t max_length,
+ double null_probability = 0);
+
/// \brief Generate a random LargeStringArray
///
/// \param[in] size the size of the array to generate