You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/05/03 22:55:26 UTC
[arrow] branch master updated: ARROW-3767: [C++] Add cast from null
to any other type
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 982f341 ARROW-3767: [C++] Add cast from null to any other type
982f341 is described below
commit 982f341bc81e1e22d4b25f8cf00ef882a34766b6
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Fri May 3 17:55:11 2019 -0500
ARROW-3767: [C++] Add cast from null to any other type
Author: Antoine Pitrou <an...@python.org>
Closes #4196 from pitrou/ARROW-3767-cast-null-to-any and squashes the following commits:
f4d269d0c <Antoine Pitrou> Fix list and struct cases
0327b6033 <Antoine Pitrou> ARROW-3767: Add cast from null to any other type
---
cpp/src/arrow/array-binary-test.cc | 140 ++++++---------
cpp/src/arrow/array-list-test.cc | 23 +++
cpp/src/arrow/array.cc | 46 ++---
cpp/src/arrow/array/builder_adaptive.h | 4 +-
cpp/src/arrow/array/builder_base.cc | 6 +
cpp/src/arrow/array/builder_base.h | 26 ++-
cpp/src/arrow/array/builder_binary.cc | 7 +
cpp/src/arrow/array/builder_binary.h | 20 ++-
cpp/src/arrow/array/builder_dict.cc | 15 ++
cpp/src/arrow/array/builder_dict.h | 8 +-
cpp/src/arrow/array/builder_nested.cc | 25 ++-
cpp/src/arrow/array/builder_nested.h | 10 +-
cpp/src/arrow/array/builder_primitive.h | 12 +-
cpp/src/arrow/array/builder_union.h | 13 +-
cpp/src/arrow/compute/kernels/cast-test.cc | 196 +++++++++++----------
cpp/src/arrow/compute/kernels/cast.cc | 87 +++++++--
.../kernels/generated/cast-codegen-internal.h | 18 --
cpp/src/arrow/compute/kernels/generated/codegen.py | 6 +-
python/pyarrow/tests/test_array.py | 48 ++++-
19 files changed, 444 insertions(+), 266 deletions(-)
diff --git a/cpp/src/arrow/array-binary-test.cc b/cpp/src/arrow/array-binary-test.cc
index daf859e..227f74b 100644
--- a/cpp/src/arrow/array-binary-test.cc
+++ b/cpp/src/arrow/array-binary-test.cc
@@ -34,6 +34,7 @@
#include "arrow/type_traits.h"
#include "arrow/util/bit-util.h"
#include "arrow/util/checked_cast.h"
+#include "arrow/util/string_view.h"
namespace arrow {
@@ -42,6 +43,30 @@ using internal::checked_cast;
// ----------------------------------------------------------------------
// String / Binary tests
+template <typename ArrayType>
+void CheckStringArray(const ArrayType& array, const std::vector<std::string>& strings,
+ const std::vector<uint8_t>& is_valid, int repeats = 1) {
+ int64_t length = array.length();
+ int64_t base_length = static_cast<int64_t>(strings.size());
+ ASSERT_EQ(base_length, static_cast<int64_t>(is_valid.size()));
+ ASSERT_EQ(base_length * repeats, length);
+
+ int32_t value_pos = 0;
+ for (int i = 0; i < length; ++i) {
+ auto j = i % base_length;
+ if (is_valid[j]) {
+ ASSERT_FALSE(array.IsNull(i));
+ auto view = array.GetView(i);
+ ASSERT_EQ(value_pos, array.value_offset(i));
+ ASSERT_EQ(strings[j].size(), view.size());
+ ASSERT_EQ(util::string_view(strings[j]), view);
+ value_pos += static_cast<int32_t>(view.size());
+ } else {
+ ASSERT_TRUE(array.IsNull(i));
+ }
+ }
+}
+
class TestStringArray : public ::testing::Test {
public:
void SetUp() {
@@ -210,14 +235,14 @@ class TestStringBuilder : public TestBuilder {
TEST_F(TestStringBuilder, TestScalarAppend) {
std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
- std::vector<uint8_t> is_null = {0, 0, 0, 1, 0};
+ std::vector<uint8_t> is_valid = {1, 1, 1, 0, 1};
int N = static_cast<int>(strings.size());
int reps = 1000;
for (int j = 0; j < reps; ++j) {
for (int i = 0; i < N; ++i) {
- if (is_null[i]) {
+ if (!is_valid[i]) {
ASSERT_OK(builder_->AppendNull());
} else {
ASSERT_OK(builder_->Append(strings[i]));
@@ -230,21 +255,7 @@ TEST_F(TestStringBuilder, TestScalarAppend) {
ASSERT_EQ(reps, result_->null_count());
ASSERT_EQ(reps * 6, result_->value_data()->size());
- int32_t length;
- int32_t pos = 0;
- for (int i = 0; i < N * reps; ++i) {
- if (is_null[i % N]) {
- ASSERT_TRUE(result_->IsNull(i));
- } else {
- ASSERT_FALSE(result_->IsNull(i));
- result_->GetValue(i, &length);
- ASSERT_EQ(pos, result_->value_offset(i));
- ASSERT_EQ(static_cast<int>(strings[i % N].size()), length);
- ASSERT_EQ(strings[i % N], result_->GetString(i));
-
- pos += length;
- }
- }
+ CheckStringArray(*result_, strings, is_valid, reps);
}
TEST_F(TestStringBuilder, TestAppendVector) {
@@ -263,21 +274,7 @@ TEST_F(TestStringBuilder, TestAppendVector) {
ASSERT_EQ(reps, result_->null_count());
ASSERT_EQ(reps * 6, result_->value_data()->size());
- int32_t length;
- int32_t pos = 0;
- for (int i = 0; i < N * reps; ++i) {
- if (valid_bytes[i % N]) {
- ASSERT_FALSE(result_->IsNull(i));
- result_->GetValue(i, &length);
- ASSERT_EQ(pos, result_->value_offset(i));
- ASSERT_EQ(static_cast<int>(strings[i % N].size()), length);
- ASSERT_EQ(strings[i % N], result_->GetString(i));
-
- pos += length;
- } else {
- ASSERT_TRUE(result_->IsNull(i));
- }
- }
+ CheckStringArray(*result_, strings, valid_bytes, reps);
}
TEST_F(TestStringBuilder, TestAppendCStringsWithValidBytes) {
@@ -296,22 +293,7 @@ TEST_F(TestStringBuilder, TestAppendCStringsWithValidBytes) {
ASSERT_EQ(reps * 3, result_->null_count());
ASSERT_EQ(reps * 3, result_->value_data()->size());
- int32_t length;
- int32_t pos = 0;
- for (int i = 0; i < N * reps; ++i) {
- auto string = strings[i % N];
- if (string && valid_bytes[i % N]) {
- ASSERT_FALSE(result_->IsNull(i));
- result_->GetValue(i, &length);
- ASSERT_EQ(pos, result_->value_offset(i));
- ASSERT_EQ(static_cast<int32_t>(strlen(string)), length);
- ASSERT_EQ(strings[i % N], result_->GetString(i));
-
- pos += length;
- } else {
- ASSERT_TRUE(result_->IsNull(i));
- }
- }
+ CheckStringArray(*result_, {"", "aaa", "", "", ""}, {0, 1, 0, 0, 1}, reps);
}
TEST_F(TestStringBuilder, TestAppendCStringsWithoutValidBytes) {
@@ -329,21 +311,7 @@ TEST_F(TestStringBuilder, TestAppendCStringsWithoutValidBytes) {
ASSERT_EQ(reps, result_->null_count());
ASSERT_EQ(reps * 6, result_->value_data()->size());
- int32_t length;
- int32_t pos = 0;
- for (int i = 0; i < N * reps; ++i) {
- if (strings[i % N]) {
- ASSERT_FALSE(result_->IsNull(i));
- result_->GetValue(i, &length);
- ASSERT_EQ(pos, result_->value_offset(i));
- ASSERT_EQ(static_cast<int32_t>(strlen(strings[i % N])), length);
- ASSERT_EQ(strings[i % N], result_->GetString(i));
-
- pos += length;
- } else {
- ASSERT_TRUE(result_->IsNull(i));
- }
- }
+ CheckStringArray(*result_, {"", "bb", "a", "", "ccc"}, {1, 1, 1, 0, 1}, reps);
}
TEST_F(TestStringBuilder, TestZeroLength) {
@@ -499,14 +467,14 @@ class TestBinaryBuilder : public TestBuilder {
TEST_F(TestBinaryBuilder, TestScalarAppend) {
std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
- std::vector<uint8_t> is_null = {0, 0, 0, 1, 0};
+ std::vector<uint8_t> is_valid = {1, 1, 1, 0, 1};
int N = static_cast<int>(strings.size());
int reps = 10;
for (int j = 0; j < reps; ++j) {
for (int i = 0; i < N; ++i) {
- if (is_null[i]) {
+ if (!is_valid[i]) {
ASSERT_OK(builder_->AppendNull());
} else {
ASSERT_OK(builder_->Append(strings[i]));
@@ -519,22 +487,26 @@ TEST_F(TestBinaryBuilder, TestScalarAppend) {
ASSERT_EQ(reps, result_->null_count());
ASSERT_EQ(reps * 6, result_->value_data()->size());
- int32_t length;
- for (int i = 0; i < N * reps; ++i) {
- if (is_null[i % N]) {
- ASSERT_TRUE(result_->IsNull(i));
- } else {
- ASSERT_FALSE(result_->IsNull(i));
- const uint8_t* vals = result_->GetValue(i, &length);
- ASSERT_EQ(static_cast<int>(strings[i % N].size()), length);
- ASSERT_EQ(0, std::memcmp(vals, strings[i % N].data(), length));
- }
- }
+ CheckStringArray(*result_, strings, is_valid, reps);
+}
+
+TEST_F(TestBinaryBuilder, TestAppendNulls) {
+ ASSERT_OK(builder_->Append("bow"));
+ ASSERT_OK(builder_->AppendNulls(3));
+ ASSERT_OK(builder_->Append("arrow"));
+ Done();
+ ASSERT_OK(ValidateArray(*result_));
+
+ ASSERT_EQ(5, result_->length());
+ ASSERT_EQ(3, result_->null_count());
+ ASSERT_EQ(8, result_->value_data()->size());
+
+ CheckStringArray(*result_, {"bow", "", "", "", "arrow"}, {1, 0, 0, 0, 1});
}
TEST_F(TestBinaryBuilder, TestScalarAppendUnsafe) {
std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
- std::vector<uint8_t> is_null = {0, 0, 0, 1, 0};
+ std::vector<uint8_t> is_valid = {1, 1, 1, 0, 1};
int N = static_cast<int>(strings.size());
int reps = 13;
@@ -546,7 +518,7 @@ TEST_F(TestBinaryBuilder, TestScalarAppendUnsafe) {
for (int j = 0; j < reps; ++j) {
for (int i = 0; i < N; ++i) {
- if (is_null[i]) {
+ if (!is_valid[i]) {
builder_->UnsafeAppendNull();
} else {
builder_->UnsafeAppend(strings[i]);
@@ -560,17 +532,7 @@ TEST_F(TestBinaryBuilder, TestScalarAppendUnsafe) {
ASSERT_EQ(reps, result_->null_count());
ASSERT_EQ(reps * total_length, result_->value_data()->size());
- int32_t length;
- for (int i = 0; i < N * reps; ++i) {
- if (is_null[i % N]) {
- ASSERT_TRUE(result_->IsNull(i));
- } else {
- ASSERT_FALSE(result_->IsNull(i));
- const uint8_t* vals = result_->GetValue(i, &length);
- ASSERT_EQ(static_cast<int>(strings[i % N].size()), length);
- ASSERT_EQ(0, std::memcmp(vals, strings[i % N].data(), length));
- }
- }
+ CheckStringArray(*result_, strings, is_valid, reps);
}
TEST_F(TestBinaryBuilder, TestCapacityReserve) {
diff --git a/cpp/src/arrow/array-list-test.cc b/cpp/src/arrow/array-list-test.cc
index 16dc16a..0fb86ce 100644
--- a/cpp/src/arrow/array-list-test.cc
+++ b/cpp/src/arrow/array-list-test.cc
@@ -203,6 +203,29 @@ TEST_F(TestListArray, TestAppendNull) {
ASSERT_NE(nullptr, values->data()->buffers[1]);
}
+TEST_F(TestListArray, TestAppendNulls) {
+ ASSERT_OK(builder_->AppendNulls(3));
+
+ Done();
+
+ ASSERT_OK(ValidateArray(*result_));
+ ASSERT_EQ(result_->length(), 3);
+ ASSERT_EQ(result_->null_count(), 3);
+ ASSERT_TRUE(result_->IsNull(0));
+ ASSERT_TRUE(result_->IsNull(1));
+ ASSERT_TRUE(result_->IsNull(2));
+
+ ASSERT_EQ(0, result_->raw_value_offsets()[0]);
+ ASSERT_EQ(0, result_->value_offset(1));
+ ASSERT_EQ(0, result_->value_offset(2));
+ ASSERT_EQ(0, result_->value_offset(3));
+
+ auto values = result_->values();
+ ASSERT_EQ(0, values->length());
+ // Values buffer should be non-null
+ ASSERT_NE(nullptr, values->data()->buffers[1]);
+}
+
void ValidateBasicListArray(const ListArray* result, const std::vector<int32_t>& values,
const std::vector<uint8_t>& is_valid) {
ASSERT_OK(ValidateArray(*result));
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index 5956dd2..2346908 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -804,7 +804,7 @@ struct ValidateVisitor {
if (array.data()->buffers.size() != 3) {
return Status::Invalid("number of buffers was != 3");
}
- return Status::OK();
+ return ValidateOffsets(array);
}
Status Visit(const ListArray& array) {
@@ -836,25 +836,7 @@ struct ValidateVisitor {
return Status::Invalid("Child array invalid: ", child_valid.ToString());
}
- int32_t prev_offset = array.value_offset(0);
- if (prev_offset != 0) {
- return Status::Invalid("The first offset wasn't zero");
- }
- for (int64_t i = 1; i <= array.length(); ++i) {
- int32_t current_offset = array.value_offset(i);
- if (array.IsNull(i - 1) && current_offset != prev_offset) {
- return Status::Invalid("Offset invariant failure at: ", i,
- " inconsistent value_offsets for null slot",
- current_offset, "!=", prev_offset);
- }
- if (current_offset < prev_offset) {
- return Status::Invalid("Offset invariant failure: ", i,
- " inconsistent offset for non-null slot: ", current_offset,
- "<", prev_offset);
- }
- prev_offset = current_offset;
- }
- return Status::OK();
+ return ValidateOffsets(array);
}
Status Visit(const StructArray& array) {
@@ -912,6 +894,30 @@ struct ValidateVisitor {
}
Status Visit(const ExtensionArray& array) { return ValidateArray(*array.storage()); }
+
+ protected:
+ template <typename ArrayType>
+ Status ValidateOffsets(ArrayType& array) {
+ int32_t prev_offset = array.value_offset(0);
+ if (array.offset() == 0 && prev_offset != 0) {
+ return Status::Invalid("The first offset wasn't zero");
+ }
+ for (int64_t i = 1; i <= array.length(); ++i) {
+ int32_t current_offset = array.value_offset(i);
+ if (array.IsNull(i - 1) && current_offset != prev_offset) {
+ return Status::Invalid("Offset invariant failure at: ", i,
+ " inconsistent value_offsets for null slot",
+ current_offset, "!=", prev_offset);
+ }
+ if (current_offset < prev_offset) {
+ return Status::Invalid("Offset invariant failure: ", i,
+ " inconsistent offset for non-null slot: ", current_offset,
+ "<", prev_offset);
+ }
+ prev_offset = current_offset;
+ }
+ return Status::OK();
+ }
};
} // namespace internal
diff --git a/cpp/src/arrow/array/builder_adaptive.h b/cpp/src/arrow/array/builder_adaptive.h
index afbfca2..7f24109 100644
--- a/cpp/src/arrow/array/builder_adaptive.h
+++ b/cpp/src/arrow/array/builder_adaptive.h
@@ -31,7 +31,7 @@ class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
/// \brief Append multiple nulls
/// \param[in] length the number of nulls to append
- Status AppendNulls(int64_t length) {
+ Status AppendNulls(int64_t length) final {
ARROW_RETURN_NOT_OK(CommitPendingData());
ARROW_RETURN_NOT_OK(Reserve(length));
memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
@@ -39,7 +39,7 @@ class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
return Status::OK();
}
- Status AppendNull() {
+ Status AppendNull() final {
pending_data_[pending_pos_] = 0;
pending_valid_[pending_pos_] = 0;
pending_has_nulls_ = true;
diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
index 75baedd..fb13a88 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -63,6 +63,12 @@ Status ArrayBuilder::AppendToBitmap(const uint8_t* valid_bytes, int64_t length)
return Status::OK();
}
+Status ArrayBuilder::AppendToBitmap(int64_t num_bits, bool value) {
+ RETURN_NOT_OK(Reserve(num_bits));
+ UnsafeAppendToBitmap(num_bits, value);
+ return Status::OK();
+}
+
Status ArrayBuilder::Resize(int64_t capacity) {
RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
capacity_ = capacity;
diff --git a/cpp/src/arrow/array/builder_base.h b/cpp/src/arrow/array/builder_base.h
index 21503ee..4f04866 100644
--- a/cpp/src/arrow/array/builder_base.h
+++ b/cpp/src/arrow/array/builder_base.h
@@ -18,25 +18,18 @@
#pragma once
#include <algorithm> // IWYU pragma: keep
-#include <array>
-#include <cstddef>
#include <cstdint>
#include <cstring>
-#include <iterator>
#include <limits>
#include <memory>
-#include <string>
#include <type_traits>
#include <vector>
#include "arrow/buffer-builder.h"
-#include "arrow/memory_pool.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
-#include "arrow/util/bit-util.h"
#include "arrow/util/macros.h"
-#include "arrow/util/string_view.h"
#include "arrow/util/type_traits.h"
#include "arrow/util/visibility.h"
@@ -44,6 +37,7 @@ namespace arrow {
class Array;
struct ArrayData;
+class MemoryPool;
constexpr int64_t kMinBuilderCapacity = 1 << 5;
constexpr int64_t kListMaximumElements = std::numeric_limits<int32_t>::max() - 1;
@@ -103,6 +97,9 @@ class ARROW_EXPORT ArrayBuilder {
/// Reset the builder.
virtual void Reset();
+ virtual Status AppendNull() = 0;
+ virtual Status AppendNulls(int64_t length) = 0;
+
/// For cases where raw data was memcpy'd into the internal buffers, allows us
/// to advance the length of the builder. It is your responsibility to use
/// this function responsibly.
@@ -133,6 +130,9 @@ class ARROW_EXPORT ArrayBuilder {
/// assume all of length bits are valid.
Status AppendToBitmap(const uint8_t* valid_bytes, int64_t length);
+ /// Uniform append. Append N times the same validity bit.
+ Status AppendToBitmap(int64_t num_bits, bool value);
+
/// Set the next length bits to not null (i.e. valid).
Status SetNotNull(int64_t length);
@@ -158,11 +158,21 @@ class ARROW_EXPORT ArrayBuilder {
null_count_ = null_bitmap_builder_.false_count();
}
+ // Append the same validity value a given number of times.
+ void UnsafeAppendToBitmap(const int64_t num_bits, bool value) {
+ if (value) {
+ UnsafeSetNotNull(num_bits);
+ } else {
+ UnsafeSetNull(num_bits);
+ }
+ }
+
void UnsafeAppendToBitmap(const std::vector<bool>& is_valid);
- // Set the next length bits to not null (i.e. valid).
+ // Set the next validity bits to not null (i.e. valid).
void UnsafeSetNotNull(int64_t length);
+ // Set the next validity bits to null (i.e. invalid).
void UnsafeSetNull(int64_t length);
static Status TrimBuffer(const int64_t bytes_filled, ResizableBuffer* buffer);
diff --git a/cpp/src/arrow/array/builder_binary.cc b/cpp/src/arrow/array/builder_binary.cc
index 26c6cb4..4a8ea40 100644
--- a/cpp/src/arrow/array/builder_binary.cc
+++ b/cpp/src/arrow/array/builder_binary.cc
@@ -236,6 +236,13 @@ Status FixedSizeBinaryBuilder::AppendNull() {
return Status::OK();
}
+Status FixedSizeBinaryBuilder::AppendNulls(int64_t length) {
+ RETURN_NOT_OK(Reserve(length));
+ UnsafeAppendToBitmap(length, false);
+ byte_builder_.UnsafeAdvance(length * byte_width_);
+ return Status::OK();
+}
+
void FixedSizeBinaryBuilder::Reset() {
ArrayBuilder::Reset();
byte_builder_.Reset();
diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h
index 954f58e..c849572 100644
--- a/cpp/src/arrow/array/builder_binary.h
+++ b/cpp/src/arrow/array/builder_binary.h
@@ -55,7 +55,20 @@ class ARROW_EXPORT BinaryBuilder : public ArrayBuilder {
return Status::OK();
}
- Status AppendNull() {
+ Status AppendNulls(int64_t length) final {
+ const int64_t num_bytes = value_data_builder_.length();
+ if (ARROW_PREDICT_FALSE(num_bytes > kBinaryMemoryLimit)) {
+ return AppendOverflow(num_bytes);
+ }
+ ARROW_RETURN_NOT_OK(Reserve(length));
+ for (int64_t i = 0; i < length; ++i) {
+ offsets_builder_.UnsafeAppend(static_cast<int32_t>(num_bytes));
+ }
+ UnsafeAppendToBitmap(length, false);
+ return Status::OK();
+ }
+
+ Status AppendNull() final {
ARROW_RETURN_NOT_OK(AppendNextOffset());
ARROW_RETURN_NOT_OK(Reserve(1));
UnsafeAppendToBitmap(false);
@@ -215,7 +228,10 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
Status AppendValues(const uint8_t* data, int64_t length,
const uint8_t* valid_bytes = NULLPTR);
- Status AppendNull();
+
+ Status AppendNull() final;
+
+ Status AppendNulls(int64_t length) final;
void UnsafeAppend(const uint8_t* value) {
UnsafeAppendToBitmap(true);
diff --git a/cpp/src/arrow/array/builder_dict.cc b/cpp/src/arrow/array/builder_dict.cc
index b5fa0d6..2e43234 100644
--- a/cpp/src/arrow/array/builder_dict.cc
+++ b/cpp/src/arrow/array/builder_dict.cc
@@ -258,6 +258,14 @@ Status DictionaryBuilder<T>::AppendNull() {
return values_builder_.AppendNull();
}
+template <typename T>
+Status DictionaryBuilder<T>::AppendNulls(int64_t length) {
+ length_ += length;
+ null_count_ += length;
+
+ return values_builder_.AppendNulls(length);
+}
+
Status DictionaryBuilder<NullType>::AppendNull() {
length_ += 1;
null_count_ += 1;
@@ -265,6 +273,13 @@ Status DictionaryBuilder<NullType>::AppendNull() {
return values_builder_.AppendNull();
}
+Status DictionaryBuilder<NullType>::AppendNulls(int64_t length) {
+ length_ += length;
+ null_count_ += length;
+
+ return values_builder_.AppendNulls(length);
+}
+
template <typename T>
Status DictionaryBuilder<T>::AppendArray(const Array& array) {
using ArrayType = typename TypeTraits<T>::ArrayType;
diff --git a/cpp/src/arrow/array/builder_dict.h b/cpp/src/arrow/array/builder_dict.h
index 204d609..4d31cdb 100644
--- a/cpp/src/arrow/array/builder_dict.h
+++ b/cpp/src/arrow/array/builder_dict.h
@@ -95,7 +95,9 @@ class ARROW_EXPORT DictionaryBuilder : public ArrayBuilder {
}
/// \brief Append a scalar null value
- Status AppendNull();
+ Status AppendNull() final;
+
+ Status AppendNulls(int64_t length) final;
/// \brief Append a whole dense array to the builder
Status AppendArray(const Array& array);
@@ -127,7 +129,9 @@ class ARROW_EXPORT DictionaryBuilder<NullType> : public ArrayBuilder {
DictionaryBuilder(const std::shared_ptr<Array>& dictionary, MemoryPool* pool);
/// \brief Append a scalar null value
- Status AppendNull();
+ Status AppendNull() final;
+
+ Status AppendNulls(int64_t length) final;
/// \brief Append a whole dense array to the builder
Status AppendArray(const Array& array);
diff --git a/cpp/src/arrow/array/builder_nested.cc b/cpp/src/arrow/array/builder_nested.cc
index 4663771..b65394f 100644
--- a/cpp/src/arrow/array/builder_nested.cc
+++ b/cpp/src/arrow/array/builder_nested.cc
@@ -57,12 +57,18 @@ Status ListBuilder::AppendValues(const int32_t* offsets, int64_t length,
return Status::OK();
}
-Status ListBuilder::AppendNextOffset() {
+Status ListBuilder::CheckNextOffset() const {
const int64_t num_values = value_builder_->length();
ARROW_RETURN_IF(
num_values > kListMaximumElements,
Status::CapacityError("ListArray cannot contain more then 2^31 - 1 child elements,",
" have ", num_values));
+ return Status::OK();
+}
+
+Status ListBuilder::AppendNextOffset() {
+ RETURN_NOT_OK(CheckNextOffset());
+ const int64_t num_values = value_builder_->length();
return offsets_builder_.Append(static_cast<int32_t>(num_values));
}
@@ -72,6 +78,17 @@ Status ListBuilder::Append(bool is_valid) {
return AppendNextOffset();
}
+Status ListBuilder::AppendNulls(int64_t length) {
+ RETURN_NOT_OK(Reserve(length));
+ RETURN_NOT_OK(CheckNextOffset());
+ UnsafeAppendToBitmap(length, false);
+ const int64_t num_values = value_builder_->length();
+ for (int64_t i = 0; i < length; ++i) {
+ offsets_builder_.UnsafeAppend(static_cast<int32_t>(num_values));
+ }
+ return Status::OK();
+}
+
Status ListBuilder::Resize(int64_t capacity) {
DCHECK_LE(capacity, kListMaximumElements);
RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
@@ -141,6 +158,12 @@ void StructBuilder::Reset() {
}
}
+Status StructBuilder::AppendNulls(int64_t length) {
+ ARROW_RETURN_NOT_OK(Reserve(length));
+ UnsafeAppendToBitmap(length, false);
+ return Status::OK();
+}
+
Status StructBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
std::shared_ptr<Buffer> null_bitmap;
RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
diff --git a/cpp/src/arrow/array/builder_nested.h b/cpp/src/arrow/array/builder_nested.h
index 19b0ad8..79e8c1b 100644
--- a/cpp/src/arrow/array/builder_nested.h
+++ b/cpp/src/arrow/array/builder_nested.h
@@ -65,7 +65,9 @@ class ARROW_EXPORT ListBuilder : public ArrayBuilder {
/// value builder
Status Append(bool is_valid = true);
- Status AppendNull() { return Append(false); }
+ Status AppendNull() final { return Append(false); }
+
+ Status AppendNulls(int64_t length) final;
ArrayBuilder* value_builder() const;
@@ -74,7 +76,9 @@ class ARROW_EXPORT ListBuilder : public ArrayBuilder {
std::shared_ptr<ArrayBuilder> value_builder_;
std::shared_ptr<Array> values_;
+ Status CheckNextOffset() const;
Status AppendNextOffset();
+ Status AppendNextOffset(int64_t num_repeats);
};
// ----------------------------------------------------------------------
@@ -110,7 +114,9 @@ class ARROW_EXPORT StructBuilder : public ArrayBuilder {
return Status::OK();
}
- Status AppendNull() { return Append(false); }
+ Status AppendNull() final { return Append(false); }
+
+ Status AppendNulls(int64_t length) final;
void Reset() override;
diff --git a/cpp/src/arrow/array/builder_primitive.h b/cpp/src/arrow/array/builder_primitive.h
index 21f87b2..d8b2a5f 100644
--- a/cpp/src/arrow/array/builder_primitive.h
+++ b/cpp/src/arrow/array/builder_primitive.h
@@ -33,7 +33,7 @@ class ARROW_EXPORT NullBuilder : public ArrayBuilder {
: ArrayBuilder(null(), pool) {}
/// \brief Append the specified number of null elements
- Status AppendNulls(int64_t length) {
+ Status AppendNulls(int64_t length) final {
if (length < 0) return Status::Invalid("length must be positive");
null_count_ += length;
length_ += length;
@@ -41,7 +41,7 @@ class ARROW_EXPORT NullBuilder : public ArrayBuilder {
}
/// \brief Append a single null element
- Status AppendNull() { return AppendNulls(1); }
+ Status AppendNull() final { return AppendNulls(1); }
Status Append(std::nullptr_t) { return AppendNull(); }
@@ -71,7 +71,7 @@ class NumericBuilder : public ArrayBuilder {
/// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
/// The memory at the corresponding data slot is set to 0 to prevent
/// uninitialized memory access
- Status AppendNulls(int64_t length) {
+ Status AppendNulls(int64_t length) final {
ARROW_RETURN_NOT_OK(Reserve(length));
data_builder_.UnsafeAppend(length, static_cast<value_type>(0));
UnsafeSetNull(length);
@@ -79,7 +79,7 @@ class NumericBuilder : public ArrayBuilder {
}
/// \brief Append a single null element
- Status AppendNull() {
+ Status AppendNull() final {
ARROW_RETURN_NOT_OK(Reserve(1));
data_builder_.UnsafeAppend(static_cast<value_type>(0));
UnsafeAppendToBitmap(false);
@@ -263,14 +263,14 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
explicit BooleanBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool);
/// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
- Status AppendNulls(int64_t length) {
+ Status AppendNulls(int64_t length) final {
ARROW_RETURN_NOT_OK(Reserve(length));
data_builder_.UnsafeAppend(length, false);
UnsafeSetNull(length);
return Status::OK();
}
- Status AppendNull() {
+ Status AppendNull() final {
ARROW_RETURN_NOT_OK(Reserve(1));
UnsafeAppendNull();
return Status::OK();
diff --git a/cpp/src/arrow/array/builder_union.h b/cpp/src/arrow/array/builder_union.h
index 2ababc7..5764d5b 100644
--- a/cpp/src/arrow/array/builder_union.h
+++ b/cpp/src/arrow/array/builder_union.h
@@ -47,12 +47,23 @@ class ARROW_EXPORT DenseUnionBuilder : public ArrayBuilder {
explicit DenseUnionBuilder(MemoryPool* pool,
const std::shared_ptr<DataType>& type = NULLPTR);
- Status AppendNull() {
+ Status AppendNull() final {
ARROW_RETURN_NOT_OK(types_builder_.Append(0));
ARROW_RETURN_NOT_OK(offsets_builder_.Append(0));
return AppendToBitmap(false);
}
+ Status AppendNulls(int64_t length) final {
+ ARROW_RETURN_NOT_OK(types_builder_.Reserve(length));
+ ARROW_RETURN_NOT_OK(offsets_builder_.Reserve(length));
+ ARROW_RETURN_NOT_OK(Reserve(length));
+ for (int64_t i = 0; i < length; ++i) {
+ types_builder_.UnsafeAppend(0);
+ offsets_builder_.UnsafeAppend(0);
+ }
+ return AppendToBitmap(length, false);
+ }
+
/// \brief Append an element to the UnionArray. This must be followed
/// by an append to the appropriate child builder.
/// \param[in] type index of the child the value will be appended
diff --git a/cpp/src/arrow/compute/kernels/cast-test.cc b/cpp/src/arrow/compute/kernels/cast-test.cc
index 4bbdfaa..aa5815b 100644
--- a/cpp/src/arrow/compute/kernels/cast-test.cc
+++ b/cpp/src/arrow/compute/kernels/cast-test.cc
@@ -37,6 +37,7 @@
#include "arrow/type.h"
#include "arrow/type_fwd.h"
#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
#include "arrow/util/decimal.h"
#include "arrow/compute/context.h"
@@ -49,6 +50,8 @@
namespace arrow {
namespace compute {
+using internal::checked_cast;
+
static std::vector<std::shared_ptr<DataType>> kNumericTypes = {
uint8(), int8(), uint16(), int16(), uint32(),
int32(), uint64(), int64(), float32(), float64()};
@@ -64,6 +67,7 @@ class TestCast : public ComputeFixture, public TestBase {
const std::shared_ptr<DataType>& out_type, const CastOptions& options) {
std::shared_ptr<Array> result;
ASSERT_OK(Cast(&ctx_, input, out_type, options, &result));
+ ASSERT_OK(ValidateArray(*result));
ASSERT_ARRAYS_EQUAL(expected, *result);
}
@@ -83,6 +87,7 @@ class TestCast : public ComputeFixture, public TestBase {
void CheckZeroCopy(const Array& input, const std::shared_ptr<DataType>& out_type) {
std::shared_ptr<Array> result;
ASSERT_OK(Cast(&ctx_, input, out_type, {}, &result));
+ ASSERT_OK(ValidateArray(*result));
ASSERT_EQ(input.data()->buffers.size(), result->data()->buffers.size());
for (size_t i = 0; i < input.data()->buffers.size(); ++i) {
AssertBufferSame(input, *result, static_cast<int>(i));
@@ -806,22 +811,6 @@ TEST_F(TestCast, DateTimeZeroCopy) {
CheckZeroCopy(*arr, timestamp(TimeUnit::NANO));
}
-TEST_F(TestCast, FromNull) {
- // Null casts to everything
- const int length = 10;
-
- NullArray arr(length);
-
- std::shared_ptr<Array> result;
- ASSERT_OK(Cast(&ctx_, arr, int32(), {}, &result));
-
- ASSERT_EQ(length, result->length());
- ASSERT_EQ(length, result->null_count());
-
- // OK to look at bitmaps
- ASSERT_ARRAYS_EQUAL(*result, *result);
-}
-
TEST_F(TestCast, PreallocatedMemory) {
CastOptions options;
options.allow_int_overflow = false;
@@ -1094,86 +1083,6 @@ TEST_F(TestCast, BinaryToString) {
utf8(), strings, options);
}
-template <typename TestType>
-class TestDictionaryCast : public TestCast {};
-
-typedef ::testing::Types<NullType, UInt8Type, Int8Type, UInt16Type, Int16Type, Int32Type,
- UInt32Type, UInt64Type, Int64Type, FloatType, DoubleType,
- Date32Type, Date64Type, FixedSizeBinaryType, BinaryType>
- TestTypes;
-
-TYPED_TEST_CASE(TestDictionaryCast, TestTypes);
-
-TYPED_TEST(TestDictionaryCast, Basic) {
- CastOptions options;
- std::shared_ptr<Array> plain_array =
- TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 2);
-
- Datum out;
- ASSERT_OK(DictionaryEncode(&this->ctx_, plain_array->data(), &out));
-
- this->CheckPass(*MakeArray(out.array()), *plain_array, plain_array->type(), options);
-}
-
-TEST_F(TestCast, DictToNumericNoNulls) {
- // ARROW-3208
- CastOptions options;
-
- // Convoluted way to create an array with nullptr bitmap buffer
- auto array_ = _MakeArray<Int32Type, int32_t>(int32(), {1, 2, 3, 4, 5, 6}, {});
- auto data = array_->data();
- data->buffers[0] = nullptr;
- auto array = MakeArray(data);
-
- Datum encoded;
- ASSERT_OK(DictionaryEncode(&this->ctx_, array->data(), &encoded));
-
- this->CheckPass(*MakeArray(encoded.array()), *array, array->type(), options);
-}
-
-TEST_F(TestCast, DictToNonDictNoNulls) {
- std::vector<std::string> dict_values = {"foo", "bar", "baz"};
- auto ex_dict = _MakeArray<StringType, std::string>(utf8(), dict_values, {});
- auto dict_type = dictionary(int32(), ex_dict);
-
- // Explicitly construct with nullptr for the null_bitmap_data
- std::vector<int32_t> i1 = {1, 0, 1};
- std::vector<int32_t> i2 = {2, 1, 0, 1};
- auto c1 = std::make_shared<NumericArray<Int32Type>>(3, Buffer::Wrap(i1));
- auto c2 = std::make_shared<NumericArray<Int32Type>>(4, Buffer::Wrap(i2));
-
- ArrayVector dict_arrays = {std::make_shared<DictionaryArray>(dict_type, c1),
- std::make_shared<DictionaryArray>(dict_type, c2)};
- auto dict_carr = std::make_shared<ChunkedArray>(dict_arrays);
-
- Datum cast_input(dict_carr);
- Datum cast_output;
- // Ensure that casting works even when the null_bitmap_data array is a nullptr
- ASSERT_OK(Cast(&this->ctx_, cast_input,
- static_cast<DictionaryType&>(*dict_type).dictionary()->type(),
- CastOptions(), &cast_output));
- ASSERT_EQ(Datum::CHUNKED_ARRAY, cast_output.kind());
-
- auto e1 = _MakeArray<StringType, std::string>(utf8(), {"bar", "foo", "bar"}, {});
- auto e2 = _MakeArray<StringType, std::string>(utf8(), {"baz", "bar", "foo", "bar"}, {});
-
- auto chunks = cast_output.chunked_array()->chunks();
- ASSERT_EQ(chunks.size(), 2);
- ASSERT_ARRAYS_EQUAL(*e1, *chunks[0]);
- ASSERT_ARRAYS_EQUAL(*e2, *chunks[1]);
-}
-
-/*TYPED_TEST(TestDictionaryCast, Reverse) {
- CastOptions options;
- std::shared_ptr<Array> plain_array =
- TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 2);
-
- std::shared_ptr<Array> dict_array;
- ASSERT_OK(EncodeArrayToDictionary(*plain_array, this->pool_, &dict_array));
-
- this->CheckPass(*plain_array, *dict_array, dict_array->type(), options);
-}*/
-
TEST_F(TestCast, ListToList) {
CastOptions options;
std::shared_ptr<Array> offsets;
@@ -1264,5 +1173,100 @@ TEST_F(TestCast, EmptyCasts) {
}
}
+// ----------------------------------------------------------------------
+// Test casting from NullType
+
+template <typename TestType>
+class TestNullCast : public TestCast {};
+
+typedef ::testing::Types<NullType, UInt8Type, Int8Type, UInt16Type, Int16Type, Int32Type,
+ UInt32Type, UInt64Type, Int64Type, FloatType, DoubleType,
+ Date32Type, Date64Type, FixedSizeBinaryType, BinaryType>
+ TestTypes;
+
+TYPED_TEST_CASE(TestNullCast, TestTypes);
+
+TYPED_TEST(TestNullCast, FromNull) {
+ // Null casts to everything
+ const int length = 10;
+
+ // Hack to get a DataType including for parametric types
+ std::shared_ptr<DataType> out_type =
+ TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(0, 0)->type();
+
+ NullArray arr(length);
+
+ std::shared_ptr<Array> result;
+ ASSERT_OK(Cast(&this->ctx_, arr, out_type, {}, &result));
+ ASSERT_OK(ValidateArray(*result));
+
+ ASSERT_TRUE(result->type()->Equals(*out_type));
+ ASSERT_EQ(length, result->length());
+ ASSERT_EQ(length, result->null_count());
+}
+
+// ----------------------------------------------------------------------
+// Test casting to DictionaryType
+
+template <typename TestType>
+class TestDictionaryCast : public TestCast {};
+
+typedef ::testing::Types<NullType, UInt8Type, Int8Type, UInt16Type, Int16Type, Int32Type,
+ UInt32Type, UInt64Type, Int64Type, FloatType, DoubleType,
+ Date32Type, Date64Type, FixedSizeBinaryType, BinaryType>
+ TestTypes;
+
+TYPED_TEST_CASE(TestDictionaryCast, TestTypes);
+
+TYPED_TEST(TestDictionaryCast, Basic) {
+ CastOptions options;
+ std::shared_ptr<Array> plain_array =
+ TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 2);
+
+ Datum encoded;
+ ASSERT_OK(DictionaryEncode(&this->ctx_, plain_array->data(), &encoded));
+ ASSERT_EQ(encoded.array()->type->id(), Type::DICTIONARY);
+
+ this->CheckPass(*MakeArray(encoded.array()), *plain_array, plain_array->type(),
+ options);
+}
+
+TYPED_TEST(TestDictionaryCast, NoNulls) {
+ // Test with a nullptr bitmap buffer (ARROW-3208)
+ if (TypeParam::type_id == Type::NA) {
+ // Skip, but gtest doesn't support skipping :-/
+ return;
+ }
+
+ CastOptions options;
+ std::shared_ptr<Array> plain_array =
+ TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 0);
+ ASSERT_EQ(plain_array->null_count(), 0);
+
+ // Dict-encode the plain array
+ Datum encoded;
+ ASSERT_OK(DictionaryEncode(&this->ctx_, plain_array->data(), &encoded));
+
+ // Make a new dict array with nullptr bitmap buffer
+ auto data = encoded.array()->Copy();
+ data->buffers[0] = nullptr;
+ data->null_count = 0;
+ std::shared_ptr<Array> dict_array = std::make_shared<DictionaryArray>(data);
+ ASSERT_OK(ValidateArray(*dict_array));
+
+ this->CheckPass(*dict_array, *plain_array, plain_array->type(), options);
+}
+
+/*TYPED_TEST(TestDictionaryCast, Reverse) {
+ CastOptions options;
+ std::shared_ptr<Array> plain_array =
+ TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 2);
+
+ std::shared_ptr<Array> dict_array;
+ ASSERT_OK(EncodeArrayToDictionary(*plain_array, this->pool_, &dict_array));
+
+ this->CheckPass(*plain_array, *dict_array, dict_array->type(), options);
+}*/
+
} // namespace compute
} // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/cast.cc b/cpp/src/arrow/compute/kernels/cast.cc
index 2d3e1a8..749e200 100644
--- a/cpp/src/arrow/compute/kernels/cast.cc
+++ b/cpp/src/arrow/compute/kernels/cast.cc
@@ -38,6 +38,7 @@
#include "arrow/util/macros.h"
#include "arrow/util/parsing.h" // IWYU pragma: keep
#include "arrow/util/utf8.h"
+#include "arrow/visitor_inline.h"
#include "arrow/compute/context.h"
#include "arrow/compute/kernel.h"
@@ -78,19 +79,16 @@ namespace compute {
constexpr int64_t kMillisecondsInDay = 86400000;
+Status CastNotImplemented(const DataType& in_type, const DataType& out_type) {
+ return Status::NotImplemented("No cast implemented from ", in_type.ToString(), " to ",
+ out_type.ToString());
+}
+
template <typename OutType, typename InType, typename Enable = void>
struct CastFunctor {};
// ----------------------------------------------------------------------
-// Null to other things
-
-template <typename T>
-struct CastFunctor<
- T, NullType,
- typename std::enable_if<std::is_base_of<FixedWidthType, T>::value>::type> {
- void operator()(FunctionContext* ctx, const CastOptions& options,
- const ArrayData& input, ArrayData* output) {}
-};
+// Dictionary to null
template <>
struct CastFunctor<NullType, DictionaryType> {
@@ -689,6 +687,63 @@ class ListCastKernel : public CastKernelBase {
};
// ----------------------------------------------------------------------
+// Null to other things
+
+class FromNullCastKernel : public CastKernelBase {
+ public:
+ explicit FromNullCastKernel(std::shared_ptr<DataType> out_type)
+ : CastKernelBase(std::move(out_type)) {}
+
+ Status Call(FunctionContext* ctx, const Datum& input, Datum* out) override {
+ DCHECK_EQ(Datum::ARRAY, input.kind());
+
+ const ArrayData& in_data = *input.array();
+ DCHECK_EQ(Type::NA, in_data.type->id());
+ auto length = in_data.length;
+
+ // A ArrayData may be preallocated for the output (see InvokeUnaryArrayKernel),
+ // however, it doesn't have any actual data, so throw it away and start anew.
+ std::unique_ptr<ArrayBuilder> builder;
+ RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), out_type_, &builder));
+ NullBuilderVisitor visitor = {length, builder.get()};
+ RETURN_NOT_OK(VisitTypeInline(*out_type_, &visitor));
+
+ std::shared_ptr<Array> out_array;
+ RETURN_NOT_OK(visitor.builder_->Finish(&out_array));
+ out->value = out_array->data();
+ return Status::OK();
+ }
+
+ struct NullBuilderVisitor {
+ // Generic implementation
+ Status Visit(const DataType& type) { return builder_->AppendNulls(length_); }
+
+ Status Visit(const StructType& type) {
+ RETURN_NOT_OK(builder_->AppendNulls(length_));
+ auto& struct_builder = checked_cast<StructBuilder&>(*builder_);
+ // Append nulls to all child builders too
+ for (int i = 0; i < struct_builder.num_fields(); ++i) {
+ NullBuilderVisitor visitor = {length_, struct_builder.field_builder(i)};
+ RETURN_NOT_OK(VisitTypeInline(*type.child(i)->type(), &visitor));
+ }
+ return Status::OK();
+ }
+
+ Status Visit(const DictionaryType& type) {
+ // XXX (ARROW-5215): Cannot implement this easily, as DictionaryBuilder
+ // disregards the index type given in the dictionary type, and instead
+ // chooses the smallest possible index type.
+ return CastNotImplemented(*null(), type);
+ }
+
+ Status Visit(const UnionType& type) { return CastNotImplemented(*null(), type); }
+
+ int64_t length_;
+ ArrayBuilder* builder_;
+ };
+};
+
+// ----------------------------------------------------------------------
// Dictionary to other things
template <typename IndexType>
@@ -1125,7 +1180,6 @@ class CastKernel : public CastKernelBase {
#include "generated/cast-codegen-internal.h" // NOLINT
-GET_CAST_FUNCTION(NULL_CASES, NullType)
GET_CAST_FUNCTION(BOOLEAN_CASES, BooleanType)
GET_CAST_FUNCTION(UINT8_CASES, UInt8Type)
GET_CAST_FUNCTION(INT8_CASES, Int8Type)
@@ -1194,17 +1248,21 @@ inline bool IsZeroCopyCast(Type::type in_type, Type::type out_type) {
Status GetCastFunction(const DataType& in_type, std::shared_ptr<DataType> out_type,
const CastOptions& options, std::unique_ptr<UnaryKernel>* kernel) {
if (in_type.Equals(out_type)) {
- *kernel = std::unique_ptr<UnaryKernel>(new IdentityCast(std::move(out_type)));
+ kernel->reset(new IdentityCast(std::move(out_type)));
return Status::OK();
}
if (IsZeroCopyCast(in_type.id(), out_type->id())) {
- *kernel = std::unique_ptr<UnaryKernel>(new ZeroCopyCast(std::move(out_type)));
+ kernel->reset(new ZeroCopyCast(std::move(out_type)));
+ return Status::OK();
+ }
+
+ if (in_type.id() == Type::NA) {
+ kernel->reset(new FromNullCastKernel(std::move(out_type)));
return Status::OK();
}
switch (in_type.id()) {
- CAST_FUNCTION_CASE(NullType);
CAST_FUNCTION_CASE(BooleanType);
CAST_FUNCTION_CASE(UInt8Type);
CAST_FUNCTION_CASE(Int8Type);
@@ -1231,8 +1289,7 @@ Status GetCastFunction(const DataType& in_type, std::shared_ptr<DataType> out_ty
break;
}
if (*kernel == nullptr) {
- return Status::NotImplemented("No cast implemented from ", in_type.ToString(), " to ",
- out_type->ToString());
+ return CastNotImplemented(in_type, *out_type);
}
return Status::OK();
}
diff --git a/cpp/src/arrow/compute/kernels/generated/cast-codegen-internal.h b/cpp/src/arrow/compute/kernels/generated/cast-codegen-internal.h
index cf2c036..77334af 100644
--- a/cpp/src/arrow/compute/kernels/generated/cast-codegen-internal.h
+++ b/cpp/src/arrow/compute/kernels/generated/cast-codegen-internal.h
@@ -17,24 +17,6 @@
// THIS FILE IS AUTOMATICALLY GENERATED, DO NOT EDIT
// Generated by codegen.py script
-#define NULL_CASES(TEMPLATE) \
- TEMPLATE(NullType, BooleanType) \
- TEMPLATE(NullType, UInt8Type) \
- TEMPLATE(NullType, Int8Type) \
- TEMPLATE(NullType, UInt16Type) \
- TEMPLATE(NullType, Int16Type) \
- TEMPLATE(NullType, UInt32Type) \
- TEMPLATE(NullType, Int32Type) \
- TEMPLATE(NullType, UInt64Type) \
- TEMPLATE(NullType, Int64Type) \
- TEMPLATE(NullType, FloatType) \
- TEMPLATE(NullType, DoubleType) \
- TEMPLATE(NullType, Date32Type) \
- TEMPLATE(NullType, Date64Type) \
- TEMPLATE(NullType, Time32Type) \
- TEMPLATE(NullType, Time64Type) \
- TEMPLATE(NullType, TimestampType)
-
#define BOOLEAN_CASES(TEMPLATE) \
TEMPLATE(BooleanType, UInt8Type) \
TEMPLATE(BooleanType, Int8Type) \
diff --git a/cpp/src/arrow/compute/kernels/generated/codegen.py b/cpp/src/arrow/compute/kernels/generated/codegen.py
index 397ba66..04fc386 100644
--- a/cpp/src/arrow/compute/kernels/generated/codegen.py
+++ b/cpp/src/arrow/compute/kernels/generated/codegen.py
@@ -21,6 +21,7 @@
import io
+import os
INTEGER_TYPES = ['UInt8', 'Int8', 'UInt16', 'Int16',
@@ -64,7 +65,6 @@ class CastCodeGenerator(object):
CAST_GENERATORS = [
- CastCodeGenerator('Null', NUMERIC_TYPES + DATE_TIME_TYPES),
CastCodeGenerator('Boolean', NUMERIC_TYPES),
CastCodeGenerator('UInt8', NUMERIC_TYPES),
CastCodeGenerator('Int8', NUMERIC_TYPES),
@@ -126,8 +126,10 @@ def write_file_with_preamble(path, code):
def write_files():
+ here = os.path.abspath(os.path.dirname(__file__))
cast_code = generate_cast_code()
- write_file_with_preamble('cast-codegen-internal.h', cast_code)
+ write_file_with_preamble(os.path.join(here, 'cast-codegen-internal.h'),
+ cast_code)
if __name__ == '__main__':
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 476740d..b70dbca 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -531,11 +531,20 @@ def test_string_from_buffers():
def _check_cast_case(case, safe=True):
in_data, in_type, out_data, out_type = case
- expected = pa.array(out_data, type=out_type)
+ if isinstance(out_data, pa.Array):
+ assert out_data.type == out_type
+ expected = out_data
+ else:
+ expected = pa.array(out_data, type=out_type)
# check casting an already created array
- in_arr = pa.array(in_data, type=in_type)
+ if isinstance(in_data, pa.Array):
+ assert in_data.type == in_type
+ in_arr = in_data
+ else:
+ in_arr = pa.array(in_data, type=in_type)
casted = in_arr.cast(out_type, safe=safe)
+ casted.validate()
assert casted.equals(expected)
# constructing an array with out type which optionally involves casting
@@ -665,6 +674,41 @@ def test_cast_signed_to_unsigned():
_check_cast_case(case)
+def test_cast_from_null():
+ in_data = [None] * 3
+ in_type = pa.null()
+ out_types = [
+ pa.null(),
+ pa.uint8(),
+ pa.float16(),
+ pa.utf8(),
+ pa.binary(),
+ pa.binary(10),
+ pa.list_(pa.int16()),
+ pa.decimal128(19, 4),
+ pa.timestamp('us'),
+ pa.timestamp('us', tz='UTC'),
+ pa.timestamp('us', tz='Europe/Paris'),
+ pa.struct([pa.field('a', pa.int32()),
+ pa.field('b', pa.list_(pa.int8())),
+ pa.field('c', pa.string())]),
+ ]
+ for out_type in out_types:
+ _check_cast_case((in_data, in_type, in_data, out_type))
+
+ out_types = [
+ pa.dictionary(pa.int32(), pa.array(['a', 'b', 'c'])),
+ pa.union([pa.field('a', pa.binary(10)),
+ pa.field('b', pa.string())], mode=pa.lib.UnionMode_DENSE),
+ pa.union([pa.field('a', pa.binary(10)),
+ pa.field('b', pa.string())], mode=pa.lib.UnionMode_SPARSE),
+ ]
+ in_arr = pa.array(in_data, type=pa.null())
+ for out_type in out_types:
+ with pytest.raises(NotImplementedError):
+ in_arr.cast(out_type)
+
+
def test_unique_simple():
cases = [
(pa.array([1, 2, 3, 1, 2, 3]), pa.array([1, 2, 3])),