You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/03/15 15:10:43 UTC
arrow git commit: ARROW-491: [Format / C++] Add FixedWidthBinary type
to format, C++ implementation
Repository: arrow
Updated Branches:
refs/heads/master dd8204ce7 -> c8d15d467
ARROW-491: [Format / C++] Add FixedWidthBinary type to format, C++ implementation
I have a bunch more work to do on the C++ implementation:
- [x] Test builder class
- [x] Test array API (slice, etc.)
- [x] Implement/test ArrayEquals/ArrayRangeEquals
- [x] Implement `PrettyPrint` (may want to encode to hexadecimal, I don't think that BinaryArray prints properly right now for non-ASCII/UTF8 data)
- [x] Add IPC roundtrip tests
In the meantime, @julienledem @nongli or others could you look at the changes to the format Flatbuffers and let me know if that looks right to you? Thanks
Author: Wes McKinney <we...@twosigma.com>
Closes #379 from wesm/ARROW-491 and squashes the following commits:
f948835 [Wes McKinney] Move hex encode/decode to a separate header since including io-util on Windows causes a compilation failure
949fbc8 [Wes McKinney] Hex encode values in binary and fixedwidthbinary. Test PrettyPrint for FW binary
a97c11a [Wes McKinney] Complete IPC implementation for date/time types. Implement IPC for FixedWidthBinary
b679264 [Wes McKinney] Fix bug with fast bitsetting when length is a power of 2
8e76225 [Wes McKinney] Do not needlessly create 0-length buffers
832b363 [Wes McKinney] Implement TypeEquals, ArrayRangeEquals, clang fixes
bf9ecd0 [Wes McKinney] cpplint
ec50654 [Wes McKinney] Add some basic tests for the fixed width binary builder
caa0314 [Wes McKinney] Draft FixedWidthBinaryBuilder. No tests yet
c183639 [Wes McKinney] Consolidate some type tests. Draft FixedWidthBinaryArray class
9143c53 [Wes McKinney] Draft FixedWidthBinaryType
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/c8d15d46
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/c8d15d46
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/c8d15d46
Branch: refs/heads/master
Commit: c8d15d467f7a1950cf08bfcc1ead2e7ab828be00
Parents: dd8204c
Author: Wes McKinney <we...@twosigma.com>
Authored: Wed Mar 15 11:10:36 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Wed Mar 15 11:10:36 2017 -0400
----------------------------------------------------------------------
cpp/src/arrow/array-list-test.cc | 20 ---
cpp/src/arrow/array-string-test.cc | 194 +++++++++++++++++++++++++---
cpp/src/arrow/array.cc | 139 ++++++++++++--------
cpp/src/arrow/array.h | 34 +++++
cpp/src/arrow/buffer.h | 10 ++
cpp/src/arrow/builder.cc | 93 +++++++++++--
cpp/src/arrow/builder.h | 50 ++++---
cpp/src/arrow/compare.cc | 32 +++++
cpp/src/arrow/ipc/adapter.cc | 11 ++
cpp/src/arrow/ipc/ipc-adapter-test.cc | 4 +-
cpp/src/arrow/ipc/ipc-file-test.cc | 8 +-
cpp/src/arrow/ipc/json-internal.cc | 26 +---
cpp/src/arrow/ipc/metadata-internal.cc | 74 ++++++++++-
cpp/src/arrow/ipc/test-common.h | 65 ++++++++--
cpp/src/arrow/loader.cc | 12 ++
cpp/src/arrow/pretty_print-test.cc | 26 +++-
cpp/src/arrow/pretty_print.cc | 82 ++++++++----
cpp/src/arrow/type-test.cc | 52 ++++++++
cpp/src/arrow/type.cc | 20 +++
cpp/src/arrow/type.h | 30 ++++-
cpp/src/arrow/type_fwd.h | 4 +
cpp/src/arrow/type_traits.h | 7 +
cpp/src/arrow/util/io-util.h | 5 +-
cpp/src/arrow/util/string.h | 57 ++++++++
format/Message.fbs | 8 +-
25 files changed, 870 insertions(+), 193 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/array-list-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-list-test.cc b/cpp/src/arrow/array-list-test.cc
index a144fd9..87dfdaa 100644
--- a/cpp/src/arrow/array-list-test.cc
+++ b/cpp/src/arrow/array-list-test.cc
@@ -36,26 +36,6 @@ using std::vector;
namespace arrow {
-TEST(TypesTest, TestListType) {
- std::shared_ptr<DataType> vt = std::make_shared<UInt8Type>();
-
- ListType list_type(vt);
- ASSERT_EQ(list_type.type, Type::LIST);
-
- ASSERT_EQ(list_type.name(), string("list"));
- ASSERT_EQ(list_type.ToString(), string("list<item: uint8>"));
-
- ASSERT_EQ(list_type.value_type()->type, vt->type);
- ASSERT_EQ(list_type.value_type()->type, vt->type);
-
- std::shared_ptr<DataType> st = std::make_shared<StringType>();
- std::shared_ptr<DataType> lt = std::make_shared<ListType>(st);
- ASSERT_EQ(lt->ToString(), string("list<item: string>"));
-
- ListType lt2(lt);
- ASSERT_EQ(lt2.ToString(), string("list<item: list<item: string>>"));
-}
-
// ----------------------------------------------------------------------
// List tests
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/array-string-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-string-test.cc b/cpp/src/arrow/array-string-test.cc
index 3fdeb3c..cf2ff41 100644
--- a/cpp/src/arrow/array-string-test.cc
+++ b/cpp/src/arrow/array-string-test.cc
@@ -33,22 +33,6 @@ namespace arrow {
class Buffer;
-TEST(TypesTest, BinaryType) {
- BinaryType t1;
- BinaryType e1;
- StringType t2;
- EXPECT_TRUE(t1.Equals(e1));
- EXPECT_FALSE(t1.Equals(t2));
- ASSERT_EQ(t1.type, Type::BINARY);
- ASSERT_EQ(t1.ToString(), std::string("binary"));
-}
-
-TEST(TypesTest, TestStringType) {
- StringType str;
- ASSERT_EQ(str.type, Type::STRING);
- ASSERT_EQ(str.ToString(), std::string("string"));
-}
-
// ----------------------------------------------------------------------
// String container
@@ -474,4 +458,182 @@ TEST_F(TestBinaryArray, LengthZeroCtor) {
BinaryArray array(0, nullptr, nullptr);
}
+// ----------------------------------------------------------------------
+// FixedWidthBinary tests
+
+class TestFWBinaryArray : public ::testing::Test {
+ public:
+ void SetUp() {}
+
+ void InitBuilder(int byte_width) {
+ auto type = fixed_width_binary(byte_width);
+ builder_.reset(new FixedWidthBinaryBuilder(default_memory_pool(), type));
+ }
+
+ protected:
+ std::unique_ptr<FixedWidthBinaryBuilder> builder_;
+};
+
+TEST_F(TestFWBinaryArray, Builder) {
+ const int32_t byte_width = 10;
+ int64_t length = 4096;
+
+ int64_t nbytes = length * byte_width;
+
+ std::vector<uint8_t> data(nbytes);
+ test::random_bytes(nbytes, 0, data.data());
+
+ std::vector<uint8_t> is_valid(length);
+ test::random_null_bytes(length, 0.1, is_valid.data());
+
+ const uint8_t* raw_data = data.data();
+
+ std::shared_ptr<Array> result;
+
+ auto CheckResult = [this, &length, &is_valid, &raw_data, &byte_width](
+ const Array& result) {
+ // Verify output
+ const auto& fw_result = static_cast<const FixedWidthBinaryArray&>(result);
+
+ ASSERT_EQ(length, result.length());
+
+ for (int64_t i = 0; i < result.length(); ++i) {
+ if (is_valid[i]) {
+ ASSERT_EQ(
+ 0, memcmp(raw_data + byte_width * i, fw_result.GetValue(i), byte_width));
+ } else {
+ ASSERT_TRUE(fw_result.IsNull(i));
+ }
+ }
+ };
+
+ // Build using iterative API
+ InitBuilder(byte_width);
+ for (int64_t i = 0; i < length; ++i) {
+ if (is_valid[i]) {
+ builder_->Append(raw_data + byte_width * i);
+ } else {
+ builder_->AppendNull();
+ }
+ }
+
+ ASSERT_OK(builder_->Finish(&result));
+ CheckResult(*result);
+
+ // Build using batch API
+ InitBuilder(byte_width);
+
+ const uint8_t* raw_is_valid = is_valid.data();
+
+ ASSERT_OK(builder_->Append(raw_data, 50, raw_is_valid));
+ ASSERT_OK(builder_->Append(raw_data + 50 * byte_width, length - 50, raw_is_valid + 50));
+ ASSERT_OK(builder_->Finish(&result));
+ CheckResult(*result);
+
+ // Build from std::string
+ InitBuilder(byte_width);
+ for (int64_t i = 0; i < length; ++i) {
+ if (is_valid[i]) {
+ builder_->Append(std::string(
+ reinterpret_cast<const char*>(raw_data + byte_width * i), byte_width));
+ } else {
+ builder_->AppendNull();
+ }
+ }
+
+ ASSERT_OK(builder_->Finish(&result));
+ CheckResult(*result);
+}
+
+TEST_F(TestFWBinaryArray, EqualsRangeEquals) {
+ // Check that we don't compare data in null slots
+
+ auto type = fixed_width_binary(4);
+ FixedWidthBinaryBuilder builder1(default_memory_pool(), type);
+ FixedWidthBinaryBuilder builder2(default_memory_pool(), type);
+
+ ASSERT_OK(builder1.Append("foo1"));
+ ASSERT_OK(builder1.AppendNull());
+
+ ASSERT_OK(builder2.Append("foo1"));
+ ASSERT_OK(builder2.Append("foo2"));
+
+ std::shared_ptr<Array> array1, array2;
+ ASSERT_OK(builder1.Finish(&array1));
+ ASSERT_OK(builder2.Finish(&array2));
+
+ const auto& a1 = static_cast<const FixedWidthBinaryArray&>(*array1);
+ const auto& a2 = static_cast<const FixedWidthBinaryArray&>(*array2);
+
+ FixedWidthBinaryArray equal1(type, 2, a1.data(), a1.null_bitmap(), 1);
+ FixedWidthBinaryArray equal2(type, 2, a2.data(), a1.null_bitmap(), 1);
+
+ ASSERT_TRUE(equal1.Equals(equal2));
+ ASSERT_TRUE(equal1.RangeEquals(equal2, 0, 2, 0));
+}
+
+TEST_F(TestFWBinaryArray, ZeroSize) {
+ auto type = fixed_width_binary(0);
+ FixedWidthBinaryBuilder builder(default_memory_pool(), type);
+
+ ASSERT_OK(builder.Append(nullptr));
+ ASSERT_OK(builder.Append(nullptr));
+ ASSERT_OK(builder.Append(nullptr));
+ ASSERT_OK(builder.AppendNull());
+ ASSERT_OK(builder.AppendNull());
+ ASSERT_OK(builder.AppendNull());
+
+ std::shared_ptr<Array> array;
+ ASSERT_OK(builder.Finish(&array));
+
+ const auto& fw_array = static_cast<const FixedWidthBinaryArray&>(*array);
+
+ // data is never allocated
+ ASSERT_TRUE(fw_array.data() == nullptr);
+ ASSERT_EQ(0, fw_array.byte_width());
+
+ ASSERT_EQ(6, array->length());
+ ASSERT_EQ(3, array->null_count());
+}
+
+TEST_F(TestFWBinaryArray, Slice) {
+ auto type = fixed_width_binary(4);
+ FixedWidthBinaryBuilder builder(default_memory_pool(), type);
+
+ std::vector<std::string> strings = {"foo1", "foo2", "foo3", "foo4", "foo5"};
+ std::vector<uint8_t> is_null = {0, 1, 0, 0, 0};
+
+ for (int i = 0; i < 5; ++i) {
+ if (is_null[i]) {
+ builder.AppendNull();
+ } else {
+ builder.Append(strings[i]);
+ }
+ }
+
+ std::shared_ptr<Array> array;
+ ASSERT_OK(builder.Finish(&array));
+
+ std::shared_ptr<Array> slice, slice2;
+
+ slice = array->Slice(1);
+ slice2 = array->Slice(1);
+ ASSERT_EQ(4, slice->length());
+
+ ASSERT_TRUE(slice->Equals(slice2));
+ ASSERT_TRUE(array->RangeEquals(1, slice->length(), 0, slice));
+
+ // Chained slices
+ slice = array->Slice(2);
+ slice2 = array->Slice(1)->Slice(1);
+ ASSERT_TRUE(slice->Equals(slice2));
+
+ slice = array->Slice(1, 3);
+ ASSERT_EQ(3, slice->length());
+
+ slice2 = array->Slice(1, 3);
+ ASSERT_TRUE(slice->Equals(slice2));
+ ASSERT_TRUE(array->RangeEquals(1, 3, 0, slice));
+}
+
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/array.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index 49da6bb..36b3fcc 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -128,10 +128,6 @@ std::shared_ptr<Array> NullArray::Slice(int64_t offset, int64_t length) const {
return std::make_shared<NullArray>(length);
}
-Status NullArray::Accept(ArrayVisitor* visitor) const {
- return visitor->Visit(*this);
-}
-
// ----------------------------------------------------------------------
// Primitive array base
@@ -144,33 +140,12 @@ PrimitiveArray::PrimitiveArray(const std::shared_ptr<DataType>& type, int64_t le
}
template <typename T>
-Status NumericArray<T>::Accept(ArrayVisitor* visitor) const {
- return visitor->Visit(*this);
-}
-
-template <typename T>
std::shared_ptr<Array> NumericArray<T>::Slice(int64_t offset, int64_t length) const {
ConformSliceParams(offset_, length_, &offset, &length);
return std::make_shared<NumericArray<T>>(
type_, length, data_, null_bitmap_, kUnknownNullCount, offset);
}
-template class NumericArray<UInt8Type>;
-template class NumericArray<UInt16Type>;
-template class NumericArray<UInt32Type>;
-template class NumericArray<UInt64Type>;
-template class NumericArray<Int8Type>;
-template class NumericArray<Int16Type>;
-template class NumericArray<Int32Type>;
-template class NumericArray<Int64Type>;
-template class NumericArray<TimestampType>;
-template class NumericArray<DateType>;
-template class NumericArray<Date32Type>;
-template class NumericArray<TimeType>;
-template class NumericArray<HalfFloatType>;
-template class NumericArray<FloatType>;
-template class NumericArray<DoubleType>;
-
// ----------------------------------------------------------------------
// BooleanArray
@@ -179,10 +154,6 @@ BooleanArray::BooleanArray(int64_t length, const std::shared_ptr<Buffer>& data,
: PrimitiveArray(std::make_shared<BooleanType>(), length, data, null_bitmap,
null_count, offset) {}
-Status BooleanArray::Accept(ArrayVisitor* visitor) const {
- return visitor->Visit(*this);
-}
-
std::shared_ptr<Array> BooleanArray::Slice(int64_t offset, int64_t length) const {
ConformSliceParams(offset_, length_, &offset, &length);
return std::make_shared<BooleanArray>(
@@ -244,10 +215,6 @@ Status ListArray::Validate() const {
return Status::OK();
}
-Status ListArray::Accept(ArrayVisitor* visitor) const {
- return visitor->Visit(*this);
-}
-
std::shared_ptr<Array> ListArray::Slice(int64_t offset, int64_t length) const {
ConformSliceParams(offset_, length_, &offset, &length);
return std::make_shared<ListArray>(
@@ -285,10 +252,6 @@ Status BinaryArray::Validate() const {
return Status::OK();
}
-Status BinaryArray::Accept(ArrayVisitor* visitor) const {
- return visitor->Visit(*this);
-}
-
std::shared_ptr<Array> BinaryArray::Slice(int64_t offset, int64_t length) const {
ConformSliceParams(offset_, length_, &offset, &length);
return std::make_shared<BinaryArray>(
@@ -306,10 +269,6 @@ Status StringArray::Validate() const {
return BinaryArray::Validate();
}
-Status StringArray::Accept(ArrayVisitor* visitor) const {
- return visitor->Visit(*this);
-}
-
std::shared_ptr<Array> StringArray::Slice(int64_t offset, int64_t length) const {
ConformSliceParams(offset_, length_, &offset, &length);
return std::make_shared<StringArray>(
@@ -317,6 +276,27 @@ std::shared_ptr<Array> StringArray::Slice(int64_t offset, int64_t length) const
}
// ----------------------------------------------------------------------
+// Fixed width binary
+
+FixedWidthBinaryArray::FixedWidthBinaryArray(const std::shared_ptr<DataType>& type,
+ int64_t length, const std::shared_ptr<Buffer>& data,
+ const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count, int64_t offset)
+ : Array(type, length, null_bitmap, null_count, offset),
+ data_(data),
+ raw_data_(nullptr) {
+ DCHECK(type->type == Type::FIXED_WIDTH_BINARY);
+ byte_width_ = static_cast<const FixedWidthBinaryType&>(*type).byte_width();
+ if (data) { raw_data_ = data->data(); }
+}
+
+std::shared_ptr<Array> FixedWidthBinaryArray::Slice(
+ int64_t offset, int64_t length) const {
+ ConformSliceParams(offset_, length_, &offset, &length);
+ return std::make_shared<FixedWidthBinaryArray>(
+ type_, length, data_, null_bitmap_, kUnknownNullCount, offset);
+}
+
+// ----------------------------------------------------------------------
// Struct
StructArray::StructArray(const std::shared_ptr<DataType>& type, int64_t length,
@@ -368,10 +348,6 @@ Status StructArray::Validate() const {
return Status::OK();
}
-Status StructArray::Accept(ArrayVisitor* visitor) const {
- return visitor->Visit(*this);
-}
-
std::shared_ptr<Array> StructArray::Slice(int64_t offset, int64_t length) const {
ConformSliceParams(offset_, length_, &offset, &length);
return std::make_shared<StructArray>(
@@ -413,10 +389,6 @@ Status UnionArray::Validate() const {
return Status::OK();
}
-Status UnionArray::Accept(ArrayVisitor* visitor) const {
- return visitor->Visit(*this);
-}
-
std::shared_ptr<Array> UnionArray::Slice(int64_t offset, int64_t length) const {
ConformSliceParams(offset_, length_, &offset, &length);
return std::make_shared<UnionArray>(type_, length, children_, type_ids_, value_offsets_,
@@ -447,17 +419,54 @@ std::shared_ptr<Array> DictionaryArray::dictionary() const {
return dict_type_->dictionary();
}
-Status DictionaryArray::Accept(ArrayVisitor* visitor) const {
- return visitor->Visit(*this);
-}
-
std::shared_ptr<Array> DictionaryArray::Slice(int64_t offset, int64_t length) const {
std::shared_ptr<Array> sliced_indices = indices_->Slice(offset, length);
return std::make_shared<DictionaryArray>(type_, sliced_indices);
}
// ----------------------------------------------------------------------
-// Default implementations of ArrayVisitor methods
+// Implement ArrayVisitor methods
+
+Status NullArray::Accept(ArrayVisitor* visitor) const {
+ return visitor->Visit(*this);
+}
+
+Status BooleanArray::Accept(ArrayVisitor* visitor) const {
+ return visitor->Visit(*this);
+}
+
+template <typename T>
+Status NumericArray<T>::Accept(ArrayVisitor* visitor) const {
+ return visitor->Visit(*this);
+}
+
+Status BinaryArray::Accept(ArrayVisitor* visitor) const {
+ return visitor->Visit(*this);
+}
+
+Status StringArray::Accept(ArrayVisitor* visitor) const {
+ return visitor->Visit(*this);
+}
+
+Status FixedWidthBinaryArray::Accept(ArrayVisitor* visitor) const {
+ return visitor->Visit(*this);
+}
+
+Status ListArray::Accept(ArrayVisitor* visitor) const {
+ return visitor->Visit(*this);
+}
+
+Status StructArray::Accept(ArrayVisitor* visitor) const {
+ return visitor->Visit(*this);
+}
+
+Status UnionArray::Accept(ArrayVisitor* visitor) const {
+ return visitor->Visit(*this);
+}
+
+Status DictionaryArray::Accept(ArrayVisitor* visitor) const {
+ return visitor->Visit(*this);
+}
#define ARRAY_VISITOR_DEFAULT(ARRAY_CLASS) \
Status ArrayVisitor::Visit(const ARRAY_CLASS& array) { \
@@ -477,8 +486,9 @@ ARRAY_VISITOR_DEFAULT(UInt64Array);
ARRAY_VISITOR_DEFAULT(HalfFloatArray);
ARRAY_VISITOR_DEFAULT(FloatArray);
ARRAY_VISITOR_DEFAULT(DoubleArray);
-ARRAY_VISITOR_DEFAULT(StringArray);
ARRAY_VISITOR_DEFAULT(BinaryArray);
+ARRAY_VISITOR_DEFAULT(StringArray);
+ARRAY_VISITOR_DEFAULT(FixedWidthBinaryArray);
ARRAY_VISITOR_DEFAULT(DateArray);
ARRAY_VISITOR_DEFAULT(Date32Array);
ARRAY_VISITOR_DEFAULT(TimeArray);
@@ -493,4 +503,23 @@ Status ArrayVisitor::Visit(const DecimalArray& array) {
return Status::NotImplemented("decimal");
}
+// ----------------------------------------------------------------------
+// Instantiate templates
+
+template class NumericArray<UInt8Type>;
+template class NumericArray<UInt16Type>;
+template class NumericArray<UInt32Type>;
+template class NumericArray<UInt64Type>;
+template class NumericArray<Int8Type>;
+template class NumericArray<Int16Type>;
+template class NumericArray<Int32Type>;
+template class NumericArray<Int64Type>;
+template class NumericArray<TimestampType>;
+template class NumericArray<DateType>;
+template class NumericArray<Date32Type>;
+template class NumericArray<TimeType>;
+template class NumericArray<HalfFloatType>;
+template class NumericArray<FloatType>;
+template class NumericArray<DoubleType>;
+
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/array.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index f111609..ecc8ce5 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -57,6 +57,7 @@ class ARROW_EXPORT ArrayVisitor {
virtual Status Visit(const DoubleArray& array);
virtual Status Visit(const StringArray& array);
virtual Status Visit(const BinaryArray& array);
+ virtual Status Visit(const FixedWidthBinaryArray& array);
virtual Status Visit(const DateArray& array);
virtual Status Visit(const Date32Array& array);
virtual Status Visit(const TimeArray& array);
@@ -387,6 +388,39 @@ class ARROW_EXPORT StringArray : public BinaryArray {
};
// ----------------------------------------------------------------------
+// Fixed width binary
+
+class ARROW_EXPORT FixedWidthBinaryArray : public Array {
+ public:
+ using TypeClass = FixedWidthBinaryType;
+
+ FixedWidthBinaryArray(const std::shared_ptr<DataType>& type, int64_t length,
+ const std::shared_ptr<Buffer>& data,
+ const std::shared_ptr<Buffer>& null_bitmap = nullptr, int64_t null_count = 0,
+ int64_t offset = 0);
+
+ const uint8_t* GetValue(int64_t i) const {
+ return raw_data_ + (i + offset_) * byte_width_;
+ }
+
+ /// Note that this buffer does not account for any slice offset
+ std::shared_ptr<Buffer> data() const { return data_; }
+
+ int32_t byte_width() const { return byte_width_; }
+
+ const uint8_t* raw_data() const { return raw_data_; }
+
+ Status Accept(ArrayVisitor* visitor) const override;
+
+ std::shared_ptr<Array> Slice(int64_t offset, int64_t length) const override;
+
+ protected:
+ int32_t byte_width_;
+ std::shared_ptr<Buffer> data_;
+ const uint8_t* raw_data_;
+};
+
+// ----------------------------------------------------------------------
// Struct
class ARROW_EXPORT StructArray : public Array {
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/buffer.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index 0724385..26c8ea6 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -157,6 +157,8 @@ class ARROW_EXPORT BufferBuilder {
/// Resizes the buffer to the nearest multiple of 64 bytes per Layout.md
Status Resize(int64_t elements) {
+ // Resize(0) is a no-op
+ if (elements == 0) { return Status::OK(); }
if (capacity_ == 0) { buffer_ = std::make_shared<PoolBuffer>(pool_); }
RETURN_NOT_OK(buffer_->Resize(elements));
capacity_ = buffer_->capacity();
@@ -170,6 +172,14 @@ class ARROW_EXPORT BufferBuilder {
return Status::OK();
}
+ // Advance pointer and zero out memory
+ Status Advance(int64_t length) {
+ if (capacity_ < length + size_) { RETURN_NOT_OK(Resize(length + size_)); }
+ memset(data_ + size_, 0, static_cast<size_t>(length));
+ size_ += length;
+ return Status::OK();
+ }
+
template <typename T>
Status Append(T arithmetic_value) {
static_assert(std::is_arithmetic<T>::value,
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/builder.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 4372925..b65a492 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -121,6 +121,14 @@ void ArrayBuilder::UnsafeAppendToBitmap(const uint8_t* valid_bytes, int64_t leng
uint8_t bitset = null_bitmap_data_[byte_offset];
for (int64_t i = 0; i < length; ++i) {
+ if (bit_offset == 8) {
+ bit_offset = 0;
+ null_bitmap_data_[byte_offset] = bitset;
+ byte_offset++;
+ // TODO: Except for the last byte, this shouldn't be needed
+ bitset = null_bitmap_data_[byte_offset];
+ }
+
if (valid_bytes[i]) {
bitset |= BitUtil::kBitmask[bit_offset];
} else {
@@ -129,13 +137,6 @@ void ArrayBuilder::UnsafeAppendToBitmap(const uint8_t* valid_bytes, int64_t leng
}
bit_offset++;
- if (bit_offset == 8) {
- bit_offset = 0;
- null_bitmap_data_[byte_offset] = bitset;
- byte_offset++;
- // TODO: Except for the last byte, this shouldn't be needed
- bitset = null_bitmap_data_[byte_offset];
- }
}
if (bit_offset != 0) { null_bitmap_data_[byte_offset] = bitset; }
length_ += length;
@@ -324,21 +325,37 @@ Status BooleanBuilder::Append(
// ----------------------------------------------------------------------
// ListBuilder
-ListBuilder::ListBuilder(
- MemoryPool* pool, std::shared_ptr<ArrayBuilder> value_builder, const TypePtr& type)
+ListBuilder::ListBuilder(MemoryPool* pool, std::shared_ptr<ArrayBuilder> value_builder,
+ const std::shared_ptr<DataType>& type)
: ArrayBuilder(
pool, type ? type : std::static_pointer_cast<DataType>(
std::make_shared<ListType>(value_builder->type()))),
offset_builder_(pool),
value_builder_(value_builder) {}
-ListBuilder::ListBuilder(
- MemoryPool* pool, std::shared_ptr<Array> values, const TypePtr& type)
+ListBuilder::ListBuilder(MemoryPool* pool, std::shared_ptr<Array> values,
+ const std::shared_ptr<DataType>& type)
: ArrayBuilder(pool, type ? type : std::static_pointer_cast<DataType>(
std::make_shared<ListType>(values->type()))),
offset_builder_(pool),
values_(values) {}
+Status ListBuilder::Append(
+ const int32_t* offsets, int64_t length, const uint8_t* valid_bytes) {
+ RETURN_NOT_OK(Reserve(length));
+ UnsafeAppendToBitmap(valid_bytes, length);
+ offset_builder_.UnsafeAppend<int32_t>(offsets, length);
+ return Status::OK();
+}
+
+Status ListBuilder::Append(bool is_valid) {
+ RETURN_NOT_OK(Reserve(1));
+ UnsafeAppendToBitmap(is_valid);
+ RETURN_NOT_OK(
+ offset_builder_.Append<int32_t>(static_cast<int32_t>(value_builder_->length())));
+ return Status::OK();
+}
+
Status ListBuilder::Init(int64_t elements) {
DCHECK_LT(elements, std::numeric_limits<int64_t>::max());
RETURN_NOT_OK(ArrayBuilder::Init(elements));
@@ -386,7 +403,7 @@ BinaryBuilder::BinaryBuilder(MemoryPool* pool)
byte_builder_ = static_cast<UInt8Builder*>(value_builder_.get());
}
-BinaryBuilder::BinaryBuilder(MemoryPool* pool, const TypePtr& type)
+BinaryBuilder::BinaryBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type)
: ListBuilder(pool, std::make_shared<UInt8Builder>(pool, uint8()), type) {
byte_builder_ = static_cast<UInt8Builder*>(value_builder_.get());
}
@@ -418,6 +435,58 @@ Status StringBuilder::Finish(std::shared_ptr<Array>* out) {
}
// ----------------------------------------------------------------------
+// Fixed width binary
+
+FixedWidthBinaryBuilder::FixedWidthBinaryBuilder(
+ MemoryPool* pool, const std::shared_ptr<DataType>& type)
+ : ArrayBuilder(pool, type), byte_builder_(pool) {
+ DCHECK(type->type == Type::FIXED_WIDTH_BINARY);
+ byte_width_ = static_cast<const FixedWidthBinaryType&>(*type).byte_width();
+}
+
+Status FixedWidthBinaryBuilder::Append(const uint8_t* value) {
+ RETURN_NOT_OK(Reserve(1));
+ UnsafeAppendToBitmap(true);
+ return byte_builder_.Append(value, byte_width_);
+}
+
+Status FixedWidthBinaryBuilder::Append(
+ const uint8_t* data, int64_t length, const uint8_t* valid_bytes) {
+ RETURN_NOT_OK(Reserve(length));
+ UnsafeAppendToBitmap(valid_bytes, length);
+ return byte_builder_.Append(data, length * byte_width_);
+}
+
+Status FixedWidthBinaryBuilder::Append(const std::string& value) {
+ return Append(reinterpret_cast<const uint8_t*>(value.c_str()));
+}
+
+Status FixedWidthBinaryBuilder::AppendNull() {
+ RETURN_NOT_OK(Reserve(1));
+ UnsafeAppendToBitmap(false);
+ return byte_builder_.Advance(byte_width_);
+}
+
+Status FixedWidthBinaryBuilder::Init(int64_t elements) {
+ DCHECK_LT(elements, std::numeric_limits<int64_t>::max());
+ RETURN_NOT_OK(ArrayBuilder::Init(elements));
+ return byte_builder_.Resize(elements * byte_width_);
+}
+
+Status FixedWidthBinaryBuilder::Resize(int64_t capacity) {
+ DCHECK_LT(capacity, std::numeric_limits<int64_t>::max());
+ RETURN_NOT_OK(byte_builder_.Resize(capacity * byte_width_));
+ return ArrayBuilder::Resize(capacity);
+}
+
+Status FixedWidthBinaryBuilder::Finish(std::shared_ptr<Array>* out) {
+ std::shared_ptr<Buffer> data = byte_builder_.Finish();
+ *out = std::make_shared<FixedWidthBinaryArray>(
+ type_, length_, data, null_bitmap_, null_count_);
+ return Status::OK();
+}
+
+// ----------------------------------------------------------------------
// Struct
Status StructBuilder::Finish(std::shared_ptr<Array>* out) {
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/builder.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index ebc683a..07b7cfc 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -46,7 +46,7 @@ static constexpr int64_t kMinBuilderCapacity = 1 << 5;
/// the null count.
class ARROW_EXPORT ArrayBuilder {
public:
- explicit ArrayBuilder(MemoryPool* pool, const TypePtr& type)
+ explicit ArrayBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type)
: pool_(pool),
type_(type),
null_bitmap_(nullptr),
@@ -140,7 +140,7 @@ class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder {
public:
using value_type = typename Type::c_type;
- explicit PrimitiveBuilder(MemoryPool* pool, const TypePtr& type)
+ explicit PrimitiveBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type)
: ArrayBuilder(pool, type), data_(nullptr), raw_data_(nullptr) {}
using ArrayBuilder::Advance;
@@ -313,11 +313,11 @@ class ARROW_EXPORT ListBuilder : public ArrayBuilder {
/// Use this constructor to incrementally build the value array along with offsets and
/// null bitmap.
ListBuilder(MemoryPool* pool, std::shared_ptr<ArrayBuilder> value_builder,
- const TypePtr& type = nullptr);
+ const std::shared_ptr<DataType>& type = nullptr);
/// Use this constructor to build the list with a pre-existing values array
- ListBuilder(
- MemoryPool* pool, std::shared_ptr<Array> values, const TypePtr& type = nullptr);
+ ListBuilder(MemoryPool* pool, std::shared_ptr<Array> values,
+ const std::shared_ptr<DataType>& type = nullptr);
Status Init(int64_t elements) override;
Status Resize(int64_t capacity) override;
@@ -328,24 +328,13 @@ class ARROW_EXPORT ListBuilder : public ArrayBuilder {
/// If passed, valid_bytes is of equal length to values, and any zero byte
/// will be considered as a null for that slot
Status Append(
- const int32_t* offsets, int64_t length, const uint8_t* valid_bytes = nullptr) {
- RETURN_NOT_OK(Reserve(length));
- UnsafeAppendToBitmap(valid_bytes, length);
- offset_builder_.UnsafeAppend<int32_t>(offsets, length);
- return Status::OK();
- }
+ const int32_t* offsets, int64_t length, const uint8_t* valid_bytes = nullptr);
/// Start a new variable-length list slot
///
/// This function should be called before beginning to append elements to the
/// value builder
- Status Append(bool is_valid = true) {
- RETURN_NOT_OK(Reserve(1));
- UnsafeAppendToBitmap(is_valid);
- RETURN_NOT_OK(
- offset_builder_.Append<int32_t>(static_cast<int32_t>(value_builder_->length())));
- return Status::OK();
- }
+ Status Append(bool is_valid = true);
Status AppendNull() { return Append(false); }
@@ -362,11 +351,10 @@ class ARROW_EXPORT ListBuilder : public ArrayBuilder {
// ----------------------------------------------------------------------
// Binary and String
-// BinaryBuilder : public ListBuilder
class ARROW_EXPORT BinaryBuilder : public ListBuilder {
public:
explicit BinaryBuilder(MemoryPool* pool);
- explicit BinaryBuilder(MemoryPool* pool, const TypePtr& type);
+ explicit BinaryBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type);
Status Append(const uint8_t* value, int32_t length) {
RETURN_NOT_OK(ListBuilder::Append());
@@ -400,6 +388,28 @@ class ARROW_EXPORT StringBuilder : public BinaryBuilder {
};
// ----------------------------------------------------------------------
+// FixedWidthBinaryBuilder
+
+class ARROW_EXPORT FixedWidthBinaryBuilder : public ArrayBuilder {
+ public:
+ FixedWidthBinaryBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type);
+
+ Status Append(const uint8_t* value);
+ Status Append(
+ const uint8_t* data, int64_t length, const uint8_t* valid_bytes = nullptr);
+ Status Append(const std::string& value);
+ Status AppendNull();
+
+ Status Init(int64_t elements) override;
+ Status Resize(int64_t capacity) override;
+ Status Finish(std::shared_ptr<Array>* out) override;
+
+ protected:
+ int32_t byte_width_;
+ BufferBuilder byte_builder_;
+};
+
+// ----------------------------------------------------------------------
// Struct
// ---------------------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/compare.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index 17b8833..86ed8cc 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -143,6 +143,32 @@ class RangeEqualsVisitor : public ArrayVisitor {
return Status::OK();
}
+ Status Visit(const FixedWidthBinaryArray& left) override {
+ const auto& right = static_cast<const FixedWidthBinaryArray&>(right_);
+
+ int32_t width = left.byte_width();
+
+ const uint8_t* left_data = left.raw_data() + left.offset() * width;
+ const uint8_t* right_data = right.raw_data() + right.offset() * width;
+
+ for (int64_t i = left_start_idx_, o_i = right_start_idx_; i < left_end_idx_;
+ ++i, ++o_i) {
+ const bool is_null = left.IsNull(i);
+ if (is_null != right.IsNull(o_i)) {
+ result_ = false;
+ return Status::OK();
+ }
+ if (is_null) continue;
+
+ if (std::memcmp(left_data + width * i, right_data + width * o_i, width)) {
+ result_ = false;
+ return Status::OK();
+ }
+ }
+ result_ = true;
+ return Status::OK();
+ }
+
Status Visit(const DateArray& left) override { return CompareValues<DateArray>(left); }
Status Visit(const Date32Array& left) override {
@@ -632,6 +658,12 @@ class TypeEqualsVisitor : public TypeVisitor {
return Status::OK();
}
+ Status Visit(const FixedWidthBinaryType& left) override {
+ const auto& right = static_cast<const FixedWidthBinaryType&>(right_);
+ result_ = left.byte_width() == right.byte_width();
+ return Status::OK();
+ }
+
Status Visit(const ListType& left) override { return VisitChildren(left); }
Status Visit(const StructType& left) override { return VisitChildren(left); }
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/ipc/adapter.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/adapter.cc b/cpp/src/arrow/ipc/adapter.cc
index a4eff72..406ce24 100644
--- a/cpp/src/arrow/ipc/adapter.cc
+++ b/cpp/src/arrow/ipc/adapter.cc
@@ -304,6 +304,17 @@ class RecordBatchWriter : public ArrayVisitor {
return Status::OK();
}
+ Status Visit(const FixedWidthBinaryArray& array) override {
+ auto data = array.data();
+ int32_t width = array.byte_width();
+
+ if (array.offset() != 0) {
+ data = SliceBuffer(data, array.offset() * width, width * array.length());
+ }
+ buffers_.push_back(data);
+ return Status::OK();
+ }
+
Status Visit(const BooleanArray& array) override {
buffers_.push_back(array.data());
return Status::OK();
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/ipc/ipc-adapter-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/ipc-adapter-test.cc b/cpp/src/arrow/ipc/ipc-adapter-test.cc
index b60b8a9..36a675f 100644
--- a/cpp/src/arrow/ipc/ipc-adapter-test.cc
+++ b/cpp/src/arrow/ipc/ipc-adapter-test.cc
@@ -175,8 +175,8 @@ INSTANTIATE_TEST_CASE_P(
RoundTripTests, TestRecordBatchParam,
::testing::Values(&MakeIntRecordBatch, &MakeStringTypesRecordBatch,
&MakeNonNullRecordBatch, &MakeZeroLengthRecordBatch, &MakeListRecordBatch,
- &MakeDeeplyNestedList, &MakeStruct, &MakeUnion, &MakeDictionary, &MakeDates,
- &MakeTimestamps, &MakeTimes));
+ &MakeDeeplyNestedList, &MakeStruct, &MakeUnion, &MakeDictionary, &MakeDate,
+ &MakeTimestamps, &MakeTimes, &MakeFWBinary));
void TestGetRecordBatchSize(std::shared_ptr<RecordBatch> batch) {
ipc::MockOutputStream mock;
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/ipc/ipc-file-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/ipc-file-test.cc b/cpp/src/arrow/ipc/ipc-file-test.cc
index 0c95c8e..b457822 100644
--- a/cpp/src/arrow/ipc/ipc-file-test.cc
+++ b/cpp/src/arrow/ipc/ipc-file-test.cc
@@ -43,7 +43,10 @@ namespace arrow {
namespace ipc {
void CompareBatch(const RecordBatch& left, const RecordBatch& right) {
- ASSERT_TRUE(left.schema()->Equals(right.schema()));
+ if (!left.schema()->Equals(right.schema())) {
+ FAIL() << "Left schema: " << left.schema()->ToString()
+ << "\nRight schema: " << right.schema()->ToString();
+ }
ASSERT_EQ(left.num_columns(), right.num_columns())
<< left.schema()->ToString() << " result: " << right.schema()->ToString();
EXPECT_EQ(left.num_rows(), right.num_rows());
@@ -180,7 +183,8 @@ TEST_P(TestStreamFormat, RoundTrip) {
#define BATCH_CASES() \
::testing::Values(&MakeIntRecordBatch, &MakeListRecordBatch, &MakeNonNullRecordBatch, \
&MakeZeroLengthRecordBatch, &MakeDeeplyNestedList, &MakeStringTypesRecordBatch, \
- &MakeStruct, &MakeUnion, &MakeDictionary);
+ &MakeStruct, &MakeUnion, &MakeDictionary, &MakeDate, &MakeTimestamps, &MakeTimes, \
+ &MakeFWBinary);
INSTANTIATE_TEST_CASE_P(FileRoundTripTests, TestFileFormat, BATCH_CASES());
INSTANTIATE_TEST_CASE_P(StreamRoundTripTests, TestStreamFormat, BATCH_CASES());
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/ipc/json-internal.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/json-internal.cc b/cpp/src/arrow/ipc/json-internal.cc
index 0458b85..549b26b 100644
--- a/cpp/src/arrow/ipc/json-internal.cc
+++ b/cpp/src/arrow/ipc/json-internal.cc
@@ -39,12 +39,11 @@
#include "arrow/type_traits.h"
#include "arrow/util/bit-util.h"
#include "arrow/util/logging.h"
+#include "arrow/util/string.h"
namespace arrow {
namespace ipc {
-static const char* kAsciiTable = "0123456789ABCDEF";
-
using RjArray = rj::Value::ConstArray;
using RjObject = rj::Value::ConstObject;
@@ -401,14 +400,7 @@ class JsonArrayWriter : public ArrayVisitor {
if (std::is_base_of<StringArray, T>::value) {
writer_->String(buf, length);
} else {
- std::string hex_string;
- hex_string.reserve(length * 2);
- for (int32_t j = 0; j < length; ++j) {
- // Convert to 2 base16 digits
- hex_string.push_back(kAsciiTable[buf[j] >> 4]);
- hex_string.push_back(kAsciiTable[buf[j] & 15]);
- }
- writer_->String(hex_string);
+ writer_->String(HexEncode(buf, length));
}
}
}
@@ -760,20 +752,6 @@ class JsonSchemaReader {
const rj::Value& json_schema_;
};
-static inline Status ParseHexValue(const char* data, uint8_t* out) {
- char c1 = data[0];
- char c2 = data[1];
-
- const char* pos1 = std::lower_bound(kAsciiTable, kAsciiTable + 16, c1);
- const char* pos2 = std::lower_bound(kAsciiTable, kAsciiTable + 16, c2);
-
- // Error checking
- if (*pos1 != c1 || *pos2 != c2) { return Status::Invalid("Encountered non-hex digit"); }
-
- *out = static_cast<uint8_t>((pos1 - kAsciiTable) << 4 | (pos2 - kAsciiTable));
- return Status::OK();
-}
-
template <typename T>
inline typename std::enable_if<IsSignedInt<T>::value, typename T::c_type>::type
UnboxValue(const rj::Value& val) {
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/ipc/metadata-internal.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
index 17a3a5f..be0d282 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -170,6 +170,39 @@ static Status UnionToFlatBuffer(FBB& fbb, const std::shared_ptr<DataType>& type,
*offset = IntToFlatbuffer(fbb, BIT_WIDTH, IS_SIGNED); \
break;
+static inline flatbuf::TimeUnit ToFlatbufferUnit(TimeUnit unit) {
+ switch (unit) {
+ case TimeUnit::SECOND:
+ return flatbuf::TimeUnit_SECOND;
+ case TimeUnit::MILLI:
+ return flatbuf::TimeUnit_MILLISECOND;
+ case TimeUnit::MICRO:
+ return flatbuf::TimeUnit_MICROSECOND;
+ case TimeUnit::NANO:
+ return flatbuf::TimeUnit_NANOSECOND;
+ default:
+ break;
+ }
+ return flatbuf::TimeUnit_MIN;
+}
+
+static inline TimeUnit FromFlatbufferUnit(flatbuf::TimeUnit unit) {
+ switch (unit) {
+ case flatbuf::TimeUnit_SECOND:
+ return TimeUnit::SECOND;
+ case flatbuf::TimeUnit_MILLISECOND:
+ return TimeUnit::MILLI;
+ case flatbuf::TimeUnit_MICROSECOND:
+ return TimeUnit::MICRO;
+ case flatbuf::TimeUnit_NANOSECOND:
+ return TimeUnit::NANO;
+ default:
+ break;
+ }
+ // cannot reach
+ return TimeUnit::SECOND;
+}
+
static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
const std::vector<std::shared_ptr<Field>>& children, std::shared_ptr<DataType>* out) {
switch (type) {
@@ -183,6 +216,11 @@ static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
case flatbuf::Type_Binary:
*out = binary();
return Status::OK();
+ case flatbuf::Type_FixedWidthBinary: {
+ auto fw_binary = static_cast<const flatbuf::FixedWidthBinary*>(type_data);
+ *out = fixed_width_binary(fw_binary->byteWidth());
+ return Status::OK();
+ }
case flatbuf::Type_Utf8:
*out = utf8();
return Status::OK();
@@ -190,7 +228,22 @@ static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
*out = boolean();
return Status::OK();
case flatbuf::Type_Decimal:
- case flatbuf::Type_Timestamp:
+ return Status::NotImplemented("Decimal");
+ case flatbuf::Type_Date:
+ *out = date();
+ return Status::OK();
+ case flatbuf::Type_Time: {
+ auto time_type = static_cast<const flatbuf::Time*>(type_data);
+ *out = time(FromFlatbufferUnit(time_type->unit()));
+ return Status::OK();
+ }
+ case flatbuf::Type_Timestamp: {
+ auto ts_type = static_cast<const flatbuf::Timestamp*>(type_data);
+ *out = timestamp(FromFlatbufferUnit(ts_type->unit()));
+ return Status::OK();
+ }
+ case flatbuf::Type_Interval:
+ return Status::NotImplemented("Interval");
case flatbuf::Type_List:
if (children.size() != 1) {
return Status::Invalid("List must have exactly 1 child field");
@@ -275,6 +328,11 @@ static Status TypeToFlatbuffer(FBB& fbb, const std::shared_ptr<DataType>& type,
*out_type = flatbuf::Type_FloatingPoint;
*offset = FloatToFlatbuffer(fbb, flatbuf::Precision_DOUBLE);
break;
+ case Type::FIXED_WIDTH_BINARY: {
+ const auto& fw_type = static_cast<const FixedWidthBinaryType&>(*type);
+ *out_type = flatbuf::Type_FixedWidthBinary;
+ *offset = flatbuf::CreateFixedWidthBinary(fbb, fw_type.byte_width()).Union();
+ } break;
case Type::BINARY:
*out_type = flatbuf::Type_Binary;
*offset = flatbuf::CreateBinary(fbb).Union();
@@ -283,6 +341,20 @@ static Status TypeToFlatbuffer(FBB& fbb, const std::shared_ptr<DataType>& type,
*out_type = flatbuf::Type_Utf8;
*offset = flatbuf::CreateUtf8(fbb).Union();
break;
+ case Type::DATE:
+ *out_type = flatbuf::Type_Date;
+ *offset = flatbuf::CreateDate(fbb).Union();
+ break;
+ case Type::TIME: {
+ const auto& time_type = static_cast<const TimeType&>(*type);
+ *out_type = flatbuf::Type_Time;
+ *offset = flatbuf::CreateTime(fbb, ToFlatbufferUnit(time_type.unit)).Union();
+ } break;
+ case Type::TIMESTAMP: {
+ const auto& ts_type = static_cast<const TimestampType&>(*type);
+ *out_type = flatbuf::Type_Timestamp;
+ *offset = flatbuf::CreateTimestamp(fbb, ToFlatbufferUnit(ts_type.unit)).Union();
+ } break;
case Type::LIST:
*out_type = flatbuf::Type_List;
return ListToFlatbuffer(fbb, type, children, dictionary_memo, offset);
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/ipc/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h
index 7f33aba..66a5e09 100644
--- a/cpp/src/arrow/ipc/test-common.h
+++ b/cpp/src/arrow/ipc/test-common.h
@@ -463,30 +463,42 @@ Status MakeDictionaryFlat(std::shared_ptr<RecordBatch>* out) {
return Status::OK();
}
-Status MakeDates(std::shared_ptr<RecordBatch>* out) {
+Status MakeDate(std::shared_ptr<RecordBatch>* out) {
std::vector<bool> is_valid = {true, true, true, false, true, true, true};
- auto f0 = field("f0", date32());
auto f1 = field("f1", date());
- std::shared_ptr<Schema> schema(new Schema({f0, f1}));
+ std::shared_ptr<Schema> schema(new Schema({f1}));
std::vector<int64_t> date_values = {1489269000000, 1489270000000, 1489271000000,
1489272000000, 1489272000000, 1489273000000};
- std::vector<int32_t> date32_values = {0, 1, 2, 3, 4, 5, 6};
- std::shared_ptr<Array> date_array, date32_array;
+ std::shared_ptr<Array> date_array;
ArrayFromVector<DateType, int64_t>(is_valid, date_values, &date_array);
- ArrayFromVector<Date32Type, int32_t>(is_valid, date32_values, &date32_array);
- std::vector<std::shared_ptr<Array>> arrays = {date32_array, date_array};
+ std::vector<std::shared_ptr<Array>> arrays = {date_array};
*out = std::make_shared<RecordBatch>(schema, date_array->length(), arrays);
return Status::OK();
}
+Status MakeDate32(std::shared_ptr<RecordBatch>* out) {
+ std::vector<bool> is_valid = {true, true, true, false, true, true, true};
+ auto f0 = field("f0", date32());
+ std::shared_ptr<Schema> schema(new Schema({f0}));
+
+ std::vector<int32_t> date32_values = {0, 1, 2, 3, 4, 5, 6};
+
+ std::shared_ptr<Array> date32_array;
+ ArrayFromVector<Date32Type, int32_t>(is_valid, date32_values, &date32_array);
+
+ std::vector<std::shared_ptr<Array>> arrays = {date32_array};
+ *out = std::make_shared<RecordBatch>(schema, date32_array->length(), arrays);
+ return Status::OK();
+}
+
Status MakeTimestamps(std::shared_ptr<RecordBatch>* out) {
std::vector<bool> is_valid = {true, true, true, false, true, true, true};
auto f0 = field("f0", timestamp(TimeUnit::MILLI));
auto f1 = field("f1", timestamp(TimeUnit::NANO));
- auto f2 = field("f2", timestamp("US/Los_Angeles", TimeUnit::SECOND));
+ auto f2 = field("f2", timestamp(TimeUnit::SECOND));
std::shared_ptr<Schema> schema(new Schema({f0, f1, f2}));
std::vector<int64_t> ts_values = {1489269000000, 1489270000000, 1489271000000,
@@ -522,6 +534,43 @@ Status MakeTimes(std::shared_ptr<RecordBatch>* out) {
return Status::OK();
}
+template <typename BuilderType, typename T>
+void AppendValues(const std::vector<bool>& is_valid, const std::vector<T>& values,
+ BuilderType* builder) {
+ for (size_t i = 0; i < values.size(); ++i) {
+ if (is_valid[i]) {
+ builder->Append(values[i]);
+ } else {
+ builder->AppendNull();
+ }
+ }
+}
+
+Status MakeFWBinary(std::shared_ptr<RecordBatch>* out) {
+ std::vector<bool> is_valid = {true, true, true, false};
+ auto f0 = field("f0", fixed_width_binary(4));
+ auto f1 = field("f1", fixed_width_binary(0));
+ std::shared_ptr<Schema> schema(new Schema({f0, f1}));
+
+ std::shared_ptr<Array> a1, a2;
+
+ FixedWidthBinaryBuilder b1(default_memory_pool(), f0->type);
+ FixedWidthBinaryBuilder b2(default_memory_pool(), f0->type);
+
+ std::vector<std::string> values1 = {"foo1", "foo2", "foo3", "foo4"};
+ AppendValues(is_valid, values1, &b1);
+
+ std::vector<std::string> values2 = {"", "", "", ""};
+ AppendValues(is_valid, values2, &b2);
+
+ RETURN_NOT_OK(b1.Finish(&a1));
+ RETURN_NOT_OK(b2.Finish(&a2));
+
+ ArrayVector arrays = {a1, a2};
+ *out = std::make_shared<RecordBatch>(schema, a1->length(), arrays);
+ return Status::OK();
+}
+
} // namespace ipc
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/loader.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/loader.cc b/cpp/src/arrow/loader.cc
index 0b3ee1c..fc37371 100644
--- a/cpp/src/arrow/loader.cc
+++ b/cpp/src/arrow/loader.cc
@@ -157,6 +157,18 @@ class ArrayLoader : public TypeVisitor {
Status Visit(const BinaryType& type) override { return LoadBinary<BinaryArray>(); }
+ Status Visit(const FixedWidthBinaryType& type) override {
+ FieldMetadata field_meta;
+ std::shared_ptr<Buffer> null_bitmap, data;
+
+ RETURN_NOT_OK(LoadCommon(&field_meta, &null_bitmap));
+ RETURN_NOT_OK(GetBuffer(context_->buffer_index++, &data));
+
+ result_ = std::make_shared<FixedWidthBinaryArray>(
+ type_, field_meta.length, data, null_bitmap, field_meta.null_count);
+ return Status::OK();
+ }
+
Status Visit(const ListType& type) override {
FieldMetadata field_meta;
std::shared_ptr<Buffer> null_bitmap, offsets;
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/pretty_print-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/pretty_print-test.cc b/cpp/src/arrow/pretty_print-test.cc
index aca650f..f21383f 100644
--- a/cpp/src/arrow/pretty_print-test.cc
+++ b/cpp/src/arrow/pretty_print-test.cc
@@ -56,7 +56,7 @@ void CheckPrimitive(int indent, const std::vector<bool>& is_valid,
const std::vector<C_TYPE>& values, const char* expected) {
std::shared_ptr<Array> array;
ArrayFromVector<TYPE, C_TYPE>(is_valid, values, &array);
- CheckArray(*array.get(), indent, expected);
+ CheckArray(*array, indent, expected);
}
TEST_F(TestPrettyPrint, PrimitiveType) {
@@ -71,6 +71,30 @@ TEST_F(TestPrettyPrint, PrimitiveType) {
CheckPrimitive<StringType, std::string>(0, is_valid, values2, ex2);
}
+TEST_F(TestPrettyPrint, BinaryType) {
+ std::vector<bool> is_valid = {true, true, false, true, false};
+ std::vector<std::string> values = {"foo", "bar", "", "baz", ""};
+ static const char* ex = R"expected([666F6F, 626172, null, 62617A, null])expected";
+ CheckPrimitive<BinaryType, std::string>(0, is_valid, values, ex);
+}
+
+TEST_F(TestPrettyPrint, FixedWidthBinaryType) {
+ std::vector<bool> is_valid = {true, true, false, true, false};
+ std::vector<std::string> values = {"foo", "bar", "baz"};
+ static const char* ex = R"expected([666F6F, 626172, 62617A])expected";
+
+ std::shared_ptr<Array> array;
+ auto type = fixed_width_binary(3);
+ FixedWidthBinaryBuilder builder(default_memory_pool(), type);
+
+ builder.Append(values[0]);
+ builder.Append(values[1]);
+ builder.Append(values[2]);
+ builder.Finish(&array);
+
+ CheckArray(*array, 0, ex);
+}
+
TEST_F(TestPrettyPrint, DictionaryType) {
std::vector<bool> is_valid = {true, true, false, true, true, true};
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/pretty_print.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc
index 2508fa5..87c1a1c 100644
--- a/cpp/src/arrow/pretty_print.cc
+++ b/cpp/src/arrow/pretty_print.cc
@@ -26,6 +26,7 @@
#include "arrow/table.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
+#include "arrow/util/string.h"
namespace arrow {
@@ -66,9 +67,9 @@ class ArrayPrinter : public ArrayVisitor {
}
}
- // String (Utf8), Binary
+ // String (Utf8)
template <typename T>
- typename std::enable_if<std::is_base_of<BinaryArray, T>::value, void>::type
+ typename std::enable_if<std::is_same<StringArray, T>::value, void>::type
WriteDataValues(const T& array) {
int32_t length;
for (int i = 0; i < array.length(); ++i) {
@@ -82,6 +83,37 @@ class ArrayPrinter : public ArrayVisitor {
}
}
+ // Binary
+ template <typename T>
+ typename std::enable_if<std::is_same<BinaryArray, T>::value, void>::type
+ WriteDataValues(const T& array) {
+ int32_t length;
+ for (int i = 0; i < array.length(); ++i) {
+ if (i > 0) { (*sink_) << ", "; }
+ if (array.IsNull(i)) {
+ Write("null");
+ } else {
+ const char* buf = reinterpret_cast<const char*>(array.GetValue(i, &length));
+ (*sink_) << HexEncode(buf, length);
+ }
+ }
+ }
+
+ template <typename T>
+ typename std::enable_if<std::is_same<FixedWidthBinaryArray, T>::value, void>::type
+ WriteDataValues(const T& array) {
+ int32_t width = array.byte_width();
+ for (int i = 0; i < array.length(); ++i) {
+ if (i > 0) { (*sink_) << ", "; }
+ if (array.IsNull(i)) {
+ Write("null");
+ } else {
+ const char* buf = reinterpret_cast<const char*>(array.GetValue(i));
+ (*sink_) << HexEncode(buf, width);
+ }
+ }
+ }
+
template <typename T>
typename std::enable_if<std::is_base_of<BooleanArray, T>::value, void>::type
WriteDataValues(const T& array) {
@@ -100,15 +132,7 @@ class ArrayPrinter : public ArrayVisitor {
void CloseArray() { (*sink_) << "]"; }
template <typename T>
- Status WritePrimitive(const T& array) {
- OpenArray();
- WriteDataValues(array);
- CloseArray();
- return Status::OK();
- }
-
- template <typename T>
- Status WriteVarBytes(const T& array) {
+ Status WriteArray(const T& array) {
OpenArray();
WriteDataValues(array);
CloseArray();
@@ -117,39 +141,41 @@ class ArrayPrinter : public ArrayVisitor {
Status Visit(const NullArray& array) override { return Status::OK(); }
- Status Visit(const BooleanArray& array) override { return WritePrimitive(array); }
+ Status Visit(const BooleanArray& array) override { return WriteArray(array); }
+
+ Status Visit(const Int8Array& array) override { return WriteArray(array); }
- Status Visit(const Int8Array& array) override { return WritePrimitive(array); }
+ Status Visit(const Int16Array& array) override { return WriteArray(array); }
- Status Visit(const Int16Array& array) override { return WritePrimitive(array); }
+ Status Visit(const Int32Array& array) override { return WriteArray(array); }
- Status Visit(const Int32Array& array) override { return WritePrimitive(array); }
+ Status Visit(const Int64Array& array) override { return WriteArray(array); }
- Status Visit(const Int64Array& array) override { return WritePrimitive(array); }
+ Status Visit(const UInt8Array& array) override { return WriteArray(array); }
- Status Visit(const UInt8Array& array) override { return WritePrimitive(array); }
+ Status Visit(const UInt16Array& array) override { return WriteArray(array); }
- Status Visit(const UInt16Array& array) override { return WritePrimitive(array); }
+ Status Visit(const UInt32Array& array) override { return WriteArray(array); }
- Status Visit(const UInt32Array& array) override { return WritePrimitive(array); }
+ Status Visit(const UInt64Array& array) override { return WriteArray(array); }
- Status Visit(const UInt64Array& array) override { return WritePrimitive(array); }
+ Status Visit(const HalfFloatArray& array) override { return WriteArray(array); }
- Status Visit(const HalfFloatArray& array) override { return WritePrimitive(array); }
+ Status Visit(const FloatArray& array) override { return WriteArray(array); }
- Status Visit(const FloatArray& array) override { return WritePrimitive(array); }
+ Status Visit(const DoubleArray& array) override { return WriteArray(array); }
- Status Visit(const DoubleArray& array) override { return WritePrimitive(array); }
+ Status Visit(const StringArray& array) override { return WriteArray(array); }
- Status Visit(const StringArray& array) override { return WriteVarBytes(array); }
+ Status Visit(const BinaryArray& array) override { return WriteArray(array); }
- Status Visit(const BinaryArray& array) override { return WriteVarBytes(array); }
+ Status Visit(const FixedWidthBinaryArray& array) override { return WriteArray(array); }
- Status Visit(const DateArray& array) override { return WritePrimitive(array); }
+ Status Visit(const DateArray& array) override { return WriteArray(array); }
- Status Visit(const Date32Array& array) override { return WritePrimitive(array); }
+ Status Visit(const Date32Array& array) override { return WriteArray(array); }
- Status Visit(const TimeArray& array) override { return WritePrimitive(array); }
+ Status Visit(const TimeArray& array) override { return WriteArray(array); }
Status Visit(const TimestampArray& array) override {
return Status::NotImplemented("timestamp");
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/type-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type-test.cc b/cpp/src/arrow/type-test.cc
index 3adc4d8..ddfff87 100644
--- a/cpp/src/arrow/type-test.cc
+++ b/cpp/src/arrow/type-test.cc
@@ -121,6 +121,58 @@ TEST_F(TestSchema, GetFieldByName) {
ASSERT_TRUE(result == nullptr);
}
+TEST(TestBinaryType, ToString) {
+ BinaryType t1;
+ BinaryType e1;
+ StringType t2;
+ EXPECT_TRUE(t1.Equals(e1));
+ EXPECT_FALSE(t1.Equals(t2));
+ ASSERT_EQ(t1.type, Type::BINARY);
+ ASSERT_EQ(t1.ToString(), std::string("binary"));
+}
+
+TEST(TestStringType, ToString) {
+ StringType str;
+ ASSERT_EQ(str.type, Type::STRING);
+ ASSERT_EQ(str.ToString(), std::string("string"));
+}
+
+TEST(TestFixedWidthBinaryType, ToString) {
+ auto t = fixed_width_binary(10);
+ ASSERT_EQ(t->type, Type::FIXED_WIDTH_BINARY);
+ ASSERT_EQ("fixed_width_binary[10]", t->ToString());
+}
+
+TEST(TestFixedWidthBinaryType, Equals) {
+ auto t1 = fixed_width_binary(10);
+ auto t2 = fixed_width_binary(10);
+ auto t3 = fixed_width_binary(3);
+
+ ASSERT_TRUE(t1->Equals(t1));
+ ASSERT_TRUE(t1->Equals(t2));
+ ASSERT_FALSE(t1->Equals(t3));
+}
+
+TEST(TestListType, Basics) {
+ std::shared_ptr<DataType> vt = std::make_shared<UInt8Type>();
+
+ ListType list_type(vt);
+ ASSERT_EQ(list_type.type, Type::LIST);
+
+ ASSERT_EQ("list", list_type.name());
+ ASSERT_EQ("list<item: uint8>", list_type.ToString());
+
+ ASSERT_EQ(list_type.value_type()->type, vt->type);
+ ASSERT_EQ(list_type.value_type()->type, vt->type);
+
+ std::shared_ptr<DataType> st = std::make_shared<StringType>();
+ std::shared_ptr<DataType> lt = std::make_shared<ListType>(st);
+ ASSERT_EQ("list<item: string>", lt->ToString());
+
+ ListType lt2(lt);
+ ASSERT_EQ("list<item: list<item: string>>", lt2.ToString());
+}
+
TEST(TestTimeType, Equals) {
TimeType t1;
TimeType t2;
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/type.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index d41b363..ee0a89a 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -88,6 +88,16 @@ std::string BinaryType::ToString() const {
return std::string("binary");
}
+int FixedWidthBinaryType::bit_width() const {
+ return 8 * byte_width();
+}
+
+std::string FixedWidthBinaryType::ToString() const {
+ std::stringstream ss;
+ ss << "fixed_width_binary[" << byte_width_ << "]";
+ return ss.str();
+}
+
std::string StructType::ToString() const {
std::stringstream s;
s << "struct<";
@@ -189,6 +199,7 @@ std::string NullType::ToString() const {
ACCEPT_VISITOR(NullType);
ACCEPT_VISITOR(BooleanType);
ACCEPT_VISITOR(BinaryType);
+ACCEPT_VISITOR(FixedWidthBinaryType);
ACCEPT_VISITOR(StringType);
ACCEPT_VISITOR(ListType);
ACCEPT_VISITOR(StructType);
@@ -225,6 +236,10 @@ TYPE_FACTORY(binary, BinaryType);
TYPE_FACTORY(date, DateType);
TYPE_FACTORY(date32, Date32Type);
+std::shared_ptr<DataType> fixed_width_binary(int32_t byte_width) {
+ return std::make_shared<FixedWidthBinaryType>(byte_width);
+}
+
std::shared_ptr<DataType> timestamp(TimeUnit unit) {
return std::make_shared<TimestampType>(unit);
}
@@ -285,6 +300,10 @@ std::vector<BufferDescr> BinaryType::GetBufferLayout() const {
return {kValidityBuffer, kOffsetBuffer, kValues8};
}
+std::vector<BufferDescr> FixedWidthBinaryType::GetBufferLayout() const {
+ return {kValidityBuffer, BufferDescr(BufferType::DATA, byte_width_ * 8)};
+}
+
std::vector<BufferDescr> ListType::GetBufferLayout() const {
return {kValidityBuffer, kOffsetBuffer};
}
@@ -335,6 +354,7 @@ TYPE_VISITOR_DEFAULT(FloatType);
TYPE_VISITOR_DEFAULT(DoubleType);
TYPE_VISITOR_DEFAULT(StringType);
TYPE_VISITOR_DEFAULT(BinaryType);
+TYPE_VISITOR_DEFAULT(FixedWidthBinaryType);
TYPE_VISITOR_DEFAULT(DateType);
TYPE_VISITOR_DEFAULT(Date32Type);
TYPE_VISITOR_DEFAULT(TimeType);
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 9f28875..a143d79 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -68,6 +68,9 @@ struct Type {
// Variable-length bytes (no guarantee of UTF8-ness)
BINARY,
+ // Fixed-width binary. Each value occupies the same number of bytes
+ FIXED_WIDTH_BINARY,
+
// int64_t milliseconds since the UNIX epoch
DATE,
@@ -135,6 +138,7 @@ class ARROW_EXPORT TypeVisitor {
virtual Status Visit(const DoubleType& type);
virtual Status Visit(const StringType& type);
virtual Status Visit(const BinaryType& type);
+ virtual Status Visit(const FixedWidthBinaryType& type);
virtual Status Visit(const DateType& type);
virtual Status Visit(const Date32Type& type);
virtual Status Visit(const TimeType& type);
@@ -347,7 +351,7 @@ struct ARROW_EXPORT ListType : public DataType, public NoExtraMeta {
std::vector<BufferDescr> GetBufferLayout() const override;
};
-// BinaryType type is reprsents lists of 1-byte values.
+// BinaryType type is represents lists of 1-byte values.
struct ARROW_EXPORT BinaryType : public DataType, public NoExtraMeta {
static constexpr Type::type type_id = Type::BINARY;
@@ -364,7 +368,27 @@ struct ARROW_EXPORT BinaryType : public DataType, public NoExtraMeta {
explicit BinaryType(Type::type logical_type) : DataType(logical_type) {}
};
-// UTF encoded strings
+// BinaryType type is represents lists of 1-byte values.
+class ARROW_EXPORT FixedWidthBinaryType : public FixedWidthType {
+ public:
+ static constexpr Type::type type_id = Type::FIXED_WIDTH_BINARY;
+
+ explicit FixedWidthBinaryType(int32_t byte_width)
+ : FixedWidthType(Type::FIXED_WIDTH_BINARY), byte_width_(byte_width) {}
+
+ Status Accept(TypeVisitor* visitor) const override;
+ std::string ToString() const override;
+
+ std::vector<BufferDescr> GetBufferLayout() const override;
+
+ int32_t byte_width() const { return byte_width_; }
+ int bit_width() const override;
+
+ protected:
+ int32_t byte_width_;
+};
+
+// UTF-8 encoded strings
struct ARROW_EXPORT StringType : public BinaryType {
static constexpr Type::type type_id = Type::STRING;
@@ -571,6 +595,8 @@ class ARROW_EXPORT DictionaryType : public FixedWidthType {
// ----------------------------------------------------------------------
// Factory functions
+std::shared_ptr<DataType> ARROW_EXPORT fixed_width_binary(int32_t byte_width);
+
std::shared_ptr<DataType> ARROW_EXPORT list(const std::shared_ptr<Field>& value_type);
std::shared_ptr<DataType> ARROW_EXPORT list(const std::shared_ptr<DataType>& value_type);
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/type_fwd.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index e53afe1..7fc36c4 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -48,6 +48,10 @@ struct BinaryType;
class BinaryArray;
class BinaryBuilder;
+class FixedWidthBinaryType;
+class FixedWidthBinaryArray;
+class FixedWidthBinaryBuilder;
+
struct StringType;
class StringArray;
class StringBuilder;
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/type_traits.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h
index 91461da..242e59d 100644
--- a/cpp/src/arrow/type_traits.h
+++ b/cpp/src/arrow/type_traits.h
@@ -228,6 +228,13 @@ struct TypeTraits<BinaryType> {
static inline std::shared_ptr<DataType> type_singleton() { return binary(); }
};
+template <>
+struct TypeTraits<FixedWidthBinaryType> {
+ using ArrayType = FixedWidthBinaryArray;
+ using BuilderType = FixedWidthBinaryBuilder;
+ constexpr static bool is_parameter_free = false;
+};
+
// Not all type classes have a c_type
template <typename T>
struct as_void {
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/util/io-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/io-util.h b/cpp/src/arrow/util/io-util.h
index 9f26456..34bee18 100644
--- a/cpp/src/arrow/util/io-util.h
+++ b/cpp/src/arrow/util/io-util.h
@@ -18,9 +18,12 @@
#ifndef ARROW_UTIL_IO_UTIL_H
#define ARROW_UTIL_IO_UTIL_H
-#include "arrow/buffer.h"
#include <iostream>
+#include "arrow/buffer.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/status.h"
+
namespace arrow {
namespace io {
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/cpp/src/arrow/util/string.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/string.h b/cpp/src/arrow/util/string.h
new file mode 100644
index 0000000..5d9fdc8
--- /dev/null
+++ b/cpp/src/arrow/util/string.h
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_UTIL_STRING_UTIL_H
+#define ARROW_UTIL_STRING_UTIL_H
+
+#include <algorithm>
+#include <string>
+
+#include "arrow/status.h"
+
+namespace arrow {
+
+static const char* kAsciiTable = "0123456789ABCDEF";
+
+static inline std::string HexEncode(const char* data, int32_t length) {
+ std::string hex_string;
+ hex_string.reserve(length * 2);
+ for (int32_t j = 0; j < length; ++j) {
+ // Convert to 2 base16 digits
+ hex_string.push_back(kAsciiTable[data[j] >> 4]);
+ hex_string.push_back(kAsciiTable[data[j] & 15]);
+ }
+ return hex_string;
+}
+
+static inline Status ParseHexValue(const char* data, uint8_t* out) {
+ char c1 = data[0];
+ char c2 = data[1];
+
+ const char* pos1 = std::lower_bound(kAsciiTable, kAsciiTable + 16, c1);
+ const char* pos2 = std::lower_bound(kAsciiTable, kAsciiTable + 16, c2);
+
+ // Error checking
+ if (*pos1 != c1 || *pos2 != c2) { return Status::Invalid("Encountered non-hex digit"); }
+
+ *out = static_cast<uint8_t>((pos1 - kAsciiTable) << 4 | (pos2 - kAsciiTable));
+ return Status::OK();
+}
+
+} // namespace arrow
+
+#endif // ARROW_UTIL_STRING_UTIL_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/c8d15d46/format/Message.fbs
----------------------------------------------------------------------
diff --git a/format/Message.fbs b/format/Message.fbs
index 86dfa87..fb3478d 100644
--- a/format/Message.fbs
+++ b/format/Message.fbs
@@ -68,6 +68,11 @@ table Utf8 {
table Binary {
}
+table FixedWidthBinary {
+ /// Number of bytes per value
+ byteWidth: int;
+}
+
table Bool {
}
@@ -113,7 +118,8 @@ union Type {
Interval,
List,
Struct_,
- Union
+ Union,
+ FixedWidthBinary
}
/// ----------------------------------------------------------------------