You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/03/26 00:52:21 UTC
arrow git commit: ARROW-37: [C++ / Python] Implement BooleanArray and
BooleanBuilder. Handle Python built-in bool
Repository: arrow
Updated Branches:
refs/heads/master c06b7654b -> 0a8979d3a
ARROW-37: [C++ / Python] Implement BooleanArray and BooleanBuilder. Handle Python built-in bool
This patch implements `arrow::Type::BOOL` in bit-packed form.
ARROW-59 (Support for boolean in Python lists) is also addressed here.
Author: Wes McKinney <we...@apache.org>
Closes #40 from wesm/ARROW-37 and squashes the following commits:
f6b60f1 [Wes McKinney] Clean up template instantiations to fix clang
2d3b855 [Wes McKinney] Use gcc-4.9 to build thirdparty, too
74f704b [Wes McKinney] Try to fix clang issues, make some PrimitiveBuilder methods protected
dc8d0a4 [Wes McKinney] Fix up pyarrow per C++ API changes. Add boolean builtin conversion / API support
2299a6c [Wes McKinney] Install logging.h
2dae07e [Wes McKinney] cpplint
0f55344 [Wes McKinney] Initialize memory to 0 after PoolBuffer::Resize to avoid boolean bit setting issues
83527a9 [Wes McKinney] Draft BooleanBuilder and arrange to share common code between normal numeric builders and boolean builder
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/0a8979d3
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/0a8979d3
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/0a8979d3
Branch: refs/heads/master
Commit: 0a8979d3ab80374eb4f84d08e060bcec70174c73
Parents: c06b765
Author: Wes McKinney <we...@apache.org>
Authored: Fri Mar 25 16:52:11 2016 -0700
Committer: Wes McKinney <we...@apache.org>
Committed: Fri Mar 25 16:52:11 2016 -0700
----------------------------------------------------------------------
.travis.yml | 4 +-
cpp/src/arrow/api.h | 1 -
cpp/src/arrow/array-test.cc | 5 +-
cpp/src/arrow/builder.cc | 7 +
cpp/src/arrow/builder.h | 2 +
cpp/src/arrow/test-util.h | 31 ++-
cpp/src/arrow/type.h | 15 +-
cpp/src/arrow/types/CMakeLists.txt | 1 -
cpp/src/arrow/types/boolean.h | 32 ---
cpp/src/arrow/types/construct.cc | 3 +-
cpp/src/arrow/types/list-test.cc | 5 +-
cpp/src/arrow/types/list.h | 11 +-
cpp/src/arrow/types/primitive-test.cc | 214 ++++++++++-----
cpp/src/arrow/types/primitive.cc | 179 ++++++++++++-
cpp/src/arrow/types/primitive.h | 302 ++++++++++++++--------
cpp/src/arrow/types/string-test.cc | 2 +-
cpp/src/arrow/util/CMakeLists.txt | 3 +-
cpp/src/arrow/util/bit-util.cc | 14 +-
cpp/src/arrow/util/bit-util.h | 13 +-
python/pyarrow/includes/libarrow.pxd | 3 +
python/pyarrow/scalar.pyx | 6 +-
python/pyarrow/tests/test_convert_builtin.py | 5 +-
python/pyarrow/tests/test_scalars.py | 39 +--
python/src/pyarrow/adapters/builtin.cc | 26 +-
24 files changed, 643 insertions(+), 280 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 49a956e..d89a200 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,10 +20,10 @@ matrix:
language: cpp
os: linux
before_script:
- - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
- script:
- export CC="gcc-4.9"
- export CXX="g++-4.9"
+ - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
+ script:
- $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh
- $TRAVIS_BUILD_DIR/ci/travis_script_python.sh
- compiler: clang
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/api.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h
index 7be7f88..2ae80f6 100644
--- a/cpp/src/arrow/api.h
+++ b/cpp/src/arrow/api.h
@@ -27,7 +27,6 @@
#include "arrow/table.h"
#include "arrow/type.h"
-#include "arrow/types/boolean.h"
#include "arrow/types/construct.h"
#include "arrow/types/list.h"
#include "arrow/types/primitive.h"
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/array-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 7c6eaf5..121b802 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -71,8 +71,7 @@ TEST_F(TestArray, TestIsNull) {
if (x == 0) ++null_count;
}
- std::shared_ptr<Buffer> null_buf = test::bytes_to_null_buffer(null_bitmap.data(),
- null_bitmap.size());
+ std::shared_ptr<Buffer> null_buf = test::bytes_to_null_buffer(null_bitmap);
std::unique_ptr<Array> arr;
arr.reset(new Int32Array(null_bitmap.size(), nullptr, null_count, null_buf));
@@ -82,7 +81,7 @@ TEST_F(TestArray, TestIsNull) {
ASSERT_TRUE(arr->null_bitmap()->Equals(*null_buf.get()));
for (size_t i = 0; i < null_bitmap.size(); ++i) {
- EXPECT_EQ(static_cast<bool>(null_bitmap[i]), !arr->IsNull(i)) << i;
+ EXPECT_EQ(null_bitmap[i], !arr->IsNull(i)) << i;
}
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/builder.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 6a62dc3..4061f35 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -54,5 +54,12 @@ Status ArrayBuilder::Advance(int32_t elements) {
return Status::OK();
}
+Status ArrayBuilder::Reserve(int32_t elements) {
+ if (length_ + elements > capacity_) {
+ int32_t new_capacity = util::next_power2(length_ + elements);
+ return Resize(new_capacity);
+ }
+ return Status::OK();
+}
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/builder.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index 308e54c..d1a49dc 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -69,6 +69,8 @@ class ArrayBuilder {
// Resizes the null_bitmap array
Status Resize(int32_t new_bits);
+ Status Reserve(int32_t extra_bits);
+
// For cases where raw data was memcpy'd into the internal buffers, allows us
// to advance the length of the builder. It is your responsibility to use
// this function responsibly.
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/test-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h
index ea3ce5f..b2bce26 100644
--- a/cpp/src/arrow/test-util.h
+++ b/cpp/src/arrow/test-util.h
@@ -99,27 +99,26 @@ void randint(int64_t N, T lower, T upper, std::vector<T>* out) {
template <typename T>
+void random_real(int n, uint32_t seed, T min_value, T max_value,
+ std::vector<T>* out) {
+ std::mt19937 gen(seed);
+ std::uniform_real_distribution<T> d(min_value, max_value);
+ for (int i = 0; i < n; ++i) {
+ out->push_back(d(gen));
+ }
+}
+
+
+template <typename T>
std::shared_ptr<Buffer> to_buffer(const std::vector<T>& values) {
return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(values.data()),
values.size() * sizeof(T));
}
-void random_null_bitmap(int64_t n, double pct_null, std::vector<uint8_t>* null_bitmap) {
- Random rng(random_seed());
- for (int i = 0; i < n; ++i) {
- if (rng.NextDoubleFraction() > pct_null) {
- null_bitmap->push_back(1);
- } else {
- // null
- null_bitmap->push_back(0);
- }
- }
-}
-
-void random_null_bitmap(int64_t n, double pct_null, std::vector<bool>* null_bitmap) {
+void random_null_bitmap(int64_t n, double pct_null, uint8_t* null_bitmap) {
Random rng(random_seed());
for (int i = 0; i < n; ++i) {
- null_bitmap->push_back(rng.NextDoubleFraction() > pct_null);
+ null_bitmap[i] = rng.NextDoubleFraction() > pct_null;
}
}
@@ -160,11 +159,11 @@ static inline int null_count(const std::vector<uint8_t>& valid_bytes) {
return result;
}
-std::shared_ptr<Buffer> bytes_to_null_buffer(uint8_t* bytes, int length) {
+std::shared_ptr<Buffer> bytes_to_null_buffer(const std::vector<uint8_t>& bytes) {
std::shared_ptr<Buffer> out;
// TODO(wesm): error checking
- util::bytes_to_bits(bytes, length, &out);
+ util::bytes_to_bits(bytes, &out);
return out;
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 5984b67..86e4779 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -132,6 +132,10 @@ struct DataType {
return children_.size();
}
+ virtual int value_size() const {
+ return -1;
+ }
+
virtual std::string ToString() const = 0;
};
@@ -191,11 +195,14 @@ inline std::string PrimitiveType<Derived>::ToString() const {
#define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME) \
typedef C_TYPE c_type; \
static constexpr Type::type type_enum = Type::ENUM; \
- static constexpr int size = SIZE; \
\
TYPENAME() \
: PrimitiveType<TYPENAME>() {} \
\
+ virtual int value_size() const { \
+ return SIZE; \
+ } \
+ \
static const char* name() { \
return NAME; \
}
@@ -295,6 +302,12 @@ struct StructType : public DataType {
std::string ToString() const override;
};
+// These will be defined elsewhere
+template <typename T>
+struct type_traits {
+};
+
+
} // namespace arrow
#endif // ARROW_TYPE_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/types/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/CMakeLists.txt b/cpp/src/arrow/types/CMakeLists.txt
index 595b3be..f3e4128 100644
--- a/cpp/src/arrow/types/CMakeLists.txt
+++ b/cpp/src/arrow/types/CMakeLists.txt
@@ -21,7 +21,6 @@
# Headers: top level
install(FILES
- boolean.h
collection.h
construct.h
datetime.h
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/types/boolean.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/boolean.h b/cpp/src/arrow/types/boolean.h
deleted file mode 100644
index 1cb91f9..0000000
--- a/cpp/src/arrow/types/boolean.h
+++ /dev/null
@@ -1,32 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TYPES_BOOLEAN_H
-#define ARROW_TYPES_BOOLEAN_H
-
-#include "arrow/types/primitive.h"
-
-namespace arrow {
-
-// typedef PrimitiveArrayImpl<BooleanType> BooleanArray;
-
-class BooleanBuilder : public ArrayBuilder {
-};
-
-} // namespace arrow
-
-#endif // ARROW_TYPES_BOOLEAN_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/types/construct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.cc b/cpp/src/arrow/types/construct.cc
index df2317c..34647a5 100644
--- a/cpp/src/arrow/types/construct.cc
+++ b/cpp/src/arrow/types/construct.cc
@@ -51,7 +51,7 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
BUILDER_CASE(UINT64, UInt64Builder);
BUILDER_CASE(INT64, Int64Builder);
- // BUILDER_CASE(BOOL, BooleanBuilder);
+ BUILDER_CASE(BOOL, BooleanBuilder);
BUILDER_CASE(FLOAT, FloatBuilder);
BUILDER_CASE(DOUBLE, DoubleBuilder);
@@ -83,6 +83,7 @@ Status MakePrimitiveArray(const std::shared_ptr<DataType>& type,
int32_t null_count, const std::shared_ptr<Buffer>& null_bitmap,
std::shared_ptr<Array>* out) {
switch (type->type) {
+ MAKE_PRIMITIVE_ARRAY_CASE(BOOL, BooleanArray);
MAKE_PRIMITIVE_ARRAY_CASE(UINT8, UInt8Array);
MAKE_PRIMITIVE_ARRAY_CASE(INT8, Int8Array);
MAKE_PRIMITIVE_ARRAY_CASE(UINT16, UInt16Array);
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/types/list-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc
index eb55ca8..4eb560e 100644
--- a/cpp/src/arrow/types/list-test.cc
+++ b/cpp/src/arrow/types/list-test.cc
@@ -116,11 +116,14 @@ TEST_F(TestListBuilder, TestBasics) {
Int32Builder* vb = static_cast<Int32Builder*>(builder_->value_builder().get());
+ EXPECT_OK(builder_->Reserve(lengths.size()));
+ EXPECT_OK(vb->Reserve(values.size()));
+
int pos = 0;
for (size_t i = 0; i < lengths.size(); ++i) {
ASSERT_OK(builder_->Append(is_null[i] > 0));
for (int j = 0; j < lengths[i]; ++j) {
- ASSERT_OK(vb->Append(values[pos++]));
+ vb->Append(values[pos++]);
}
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/types/list.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h
index 72e20e9..8073b51 100644
--- a/cpp/src/arrow/types/list.h
+++ b/cpp/src/arrow/types/list.h
@@ -116,7 +116,8 @@ class ListBuilder : public Int32Builder {
int32_t new_capacity = util::next_power2(length_ + length);
RETURN_NOT_OK(Resize(new_capacity));
}
- memcpy(raw_buffer() + length_, values, length * elsize_);
+ memcpy(raw_data_ + length_, values,
+ type_traits<Int32Type>::bytes_required(length));
if (valid_bytes != nullptr) {
AppendNulls(valid_bytes, length);
@@ -132,13 +133,13 @@ class ListBuilder : public Int32Builder {
// Add final offset if the length is non-zero
if (length_) {
- raw_buffer()[length_] = items->length();
+ raw_data_[length_] = items->length();
}
- auto result = std::make_shared<Container>(type_, length_, values_, items,
+ auto result = std::make_shared<Container>(type_, length_, data_, items,
null_count_, null_bitmap_);
- values_ = null_bitmap_ = nullptr;
+ data_ = null_bitmap_ = nullptr;
capacity_ = length_ = null_count_ = 0;
return result;
@@ -162,7 +163,7 @@ class ListBuilder : public Int32Builder {
} else {
util::set_bit(null_bitmap_data_, length_);
}
- raw_buffer()[length_++] = value_builder_->length();
+ raw_data_[length_++] = value_builder_->length();
return Status::OK();
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/types/primitive-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc
index 10ba113..761845d 100644
--- a/cpp/src/arrow/types/primitive-test.cc
+++ b/cpp/src/arrow/types/primitive-test.cc
@@ -69,11 +69,11 @@ PRIMITIVE_TEST(BooleanType, BOOL, "bool");
// ----------------------------------------------------------------------
// Primitive type tests
-TEST_F(TestBuilder, TestResize) {
+TEST_F(TestBuilder, TestReserve) {
builder_->Init(10);
ASSERT_EQ(2, builder_->null_bitmap()->size());
- builder_->Resize(30);
+ builder_->Reserve(30);
ASSERT_EQ(4, builder_->null_bitmap()->size());
}
@@ -83,8 +83,9 @@ class TestPrimitiveBuilder : public TestBuilder {
typedef typename Attrs::ArrayType ArrayType;
typedef typename Attrs::BuilderType BuilderType;
typedef typename Attrs::T T;
+ typedef typename Attrs::Type Type;
- void SetUp() {
+ virtual void SetUp() {
TestBuilder::SetUp();
type_ = Attrs::type();
@@ -99,58 +100,44 @@ class TestPrimitiveBuilder : public TestBuilder {
void RandomData(int N, double pct_null = 0.1) {
Attrs::draw(N, &draws_);
- test::random_null_bitmap(N, pct_null, &valid_bytes_);
+
+ valid_bytes_.resize(N);
+ test::random_null_bitmap(N, pct_null, valid_bytes_.data());
}
- void CheckNullable() {
- int size = builder_->length();
+ void Check(const std::shared_ptr<BuilderType>& builder, bool nullable) {
+ int size = builder->length();
- auto ex_data = std::make_shared<Buffer>(
- reinterpret_cast<uint8_t*>(draws_.data()),
+ auto ex_data = std::make_shared<Buffer>(reinterpret_cast<uint8_t*>(draws_.data()),
size * sizeof(T));
- auto ex_null_bitmap = test::bytes_to_null_buffer(valid_bytes_.data(), size);
- int32_t ex_null_count = test::null_count(valid_bytes_);
+ std::shared_ptr<Buffer> ex_null_bitmap;
+ int32_t ex_null_count = 0;
+
+ if (nullable) {
+ ex_null_bitmap = test::bytes_to_null_buffer(valid_bytes_);
+ ex_null_count = test::null_count(valid_bytes_);
+ } else {
+ ex_null_bitmap = nullptr;
+ }
auto expected = std::make_shared<ArrayType>(size, ex_data, ex_null_count,
ex_null_bitmap);
-
std::shared_ptr<ArrayType> result = std::dynamic_pointer_cast<ArrayType>(
- builder_->Finish());
+ builder->Finish());
// Builder is now reset
- ASSERT_EQ(0, builder_->length());
- ASSERT_EQ(0, builder_->capacity());
- ASSERT_EQ(0, builder_->null_count());
- ASSERT_EQ(nullptr, builder_->buffer());
+ ASSERT_EQ(0, builder->length());
+ ASSERT_EQ(0, builder->capacity());
+ ASSERT_EQ(0, builder->null_count());
+ ASSERT_EQ(nullptr, builder->data());
ASSERT_EQ(ex_null_count, result->null_count());
ASSERT_TRUE(result->EqualsExact(*expected.get()));
}
- void CheckNonNullable() {
- int size = builder_nn_->length();
-
- auto ex_data = std::make_shared<Buffer>(reinterpret_cast<uint8_t*>(draws_.data()),
- size * sizeof(T));
-
- auto expected = std::make_shared<ArrayType>(size, ex_data);
-
- std::shared_ptr<ArrayType> result = std::dynamic_pointer_cast<ArrayType>(
- builder_nn_->Finish());
-
- // Builder is now reset
- ASSERT_EQ(0, builder_nn_->length());
- ASSERT_EQ(0, builder_nn_->capacity());
- ASSERT_EQ(nullptr, builder_nn_->buffer());
-
- ASSERT_TRUE(result->EqualsExact(*expected.get()));
- ASSERT_EQ(0, result->null_count());
- }
-
protected:
- TypePtr type_;
- TypePtr type_nn_;
+ std::shared_ptr<DataType> type_;
shared_ptr<BuilderType> builder_;
shared_ptr<BuilderType> builder_nn_;
@@ -158,14 +145,14 @@ class TestPrimitiveBuilder : public TestBuilder {
vector<uint8_t> valid_bytes_;
};
-#define PTYPE_DECL(CapType, c_type) \
- typedef CapType##Array ArrayType; \
- typedef CapType##Builder BuilderType; \
- typedef CapType##Type Type; \
- typedef c_type T; \
- \
- static TypePtr type() { \
- return TypePtr(new Type()); \
+#define PTYPE_DECL(CapType, c_type) \
+ typedef CapType##Array ArrayType; \
+ typedef CapType##Builder BuilderType; \
+ typedef CapType##Type Type; \
+ typedef c_type T; \
+ \
+ static std::shared_ptr<DataType> type() { \
+ return std::shared_ptr<DataType>(new Type()); \
}
#define PINT_DECL(CapType, c_type, LOWER, UPPER) \
@@ -176,6 +163,14 @@ class TestPrimitiveBuilder : public TestBuilder {
} \
}
+#define PFLOAT_DECL(CapType, c_type, LOWER, UPPER) \
+ struct P##CapType { \
+ PTYPE_DECL(CapType, c_type); \
+ static void draw(int N, vector<T>* draws) { \
+ test::random_real<T>(N, 0, LOWER, UPPER, draws); \
+ } \
+ }
+
PINT_DECL(UInt8, uint8_t, 0, UINT8_MAX);
PINT_DECL(UInt16, uint16_t, 0, UINT16_MAX);
PINT_DECL(UInt32, uint32_t, 0, UINT32_MAX);
@@ -186,25 +181,89 @@ PINT_DECL(Int16, int16_t, INT16_MIN, INT16_MAX);
PINT_DECL(Int32, int32_t, INT32_MIN, INT32_MAX);
PINT_DECL(Int64, int64_t, INT64_MIN, INT64_MAX);
-typedef ::testing::Types<PUInt8, PUInt16, PUInt32, PUInt64,
- PInt8, PInt16, PInt32, PInt64> Primitives;
+PFLOAT_DECL(Float, float, -1000, 1000);
+PFLOAT_DECL(Double, double, -1000, 1000);
+
+struct PBoolean {
+ PTYPE_DECL(Boolean, uint8_t);
+};
+
+template <>
+void TestPrimitiveBuilder<PBoolean>::RandomData(int N, double pct_null) {
+ draws_.resize(N);
+ valid_bytes_.resize(N);
+
+ test::random_null_bitmap(N, 0.5, draws_.data());
+ test::random_null_bitmap(N, pct_null, valid_bytes_.data());
+}
+
+template <>
+void TestPrimitiveBuilder<PBoolean>::Check(
+ const std::shared_ptr<BooleanBuilder>& builder, bool nullable) {
+ int size = builder->length();
+
+ auto ex_data = test::bytes_to_null_buffer(draws_);
+
+ std::shared_ptr<Buffer> ex_null_bitmap;
+ int32_t ex_null_count = 0;
+
+ if (nullable) {
+ ex_null_bitmap = test::bytes_to_null_buffer(valid_bytes_);
+ ex_null_count = test::null_count(valid_bytes_);
+ } else {
+ ex_null_bitmap = nullptr;
+ }
+
+ auto expected = std::make_shared<BooleanArray>(size, ex_data, ex_null_count,
+ ex_null_bitmap);
+ std::shared_ptr<BooleanArray> result = std::dynamic_pointer_cast<BooleanArray>(
+ builder->Finish());
+
+ // Builder is now reset
+ ASSERT_EQ(0, builder->length());
+ ASSERT_EQ(0, builder->capacity());
+ ASSERT_EQ(0, builder->null_count());
+ ASSERT_EQ(nullptr, builder->data());
+
+ ASSERT_EQ(ex_null_count, result->null_count());
+
+ ASSERT_EQ(expected->length(), result->length());
+
+ for (int i = 0; i < result->length(); ++i) {
+ if (nullable) {
+ ASSERT_EQ(valid_bytes_[i] == 0, result->IsNull(i)) << i;
+ }
+ bool actual = util::get_bit(result->raw_data(), i);
+ ASSERT_EQ(static_cast<bool>(draws_[i]), actual) << i;
+ }
+ ASSERT_TRUE(result->EqualsExact(*expected.get()));
+}
+
+typedef ::testing::Types<PBoolean,
+ PUInt8, PUInt16, PUInt32, PUInt64,
+ PInt8, PInt16, PInt32, PInt64,
+ PFloat, PDouble> Primitives;
TYPED_TEST_CASE(TestPrimitiveBuilder, Primitives);
#define DECL_T() \
typedef typename TestFixture::T T;
+#define DECL_TYPE() \
+ typedef typename TestFixture::Type Type;
+
#define DECL_ARRAYTYPE() \
typedef typename TestFixture::ArrayType ArrayType;
TYPED_TEST(TestPrimitiveBuilder, TestInit) {
- DECL_T();
+ DECL_TYPE();
int n = 1000;
- ASSERT_OK(this->builder_->Init(n));
- ASSERT_EQ(n, this->builder_->capacity());
- ASSERT_EQ(n * sizeof(T), this->builder_->buffer()->size());
+ ASSERT_OK(this->builder_->Reserve(n));
+ ASSERT_EQ(util::next_power2(n), this->builder_->capacity());
+ ASSERT_EQ(util::next_power2(type_traits<Type>::bytes_required(n)),
+ this->builder_->data()->size());
// unsure if this should go in all builder classes
ASSERT_EQ(0, this->builder_->num_children());
@@ -235,12 +294,14 @@ TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) {
this->RandomData(size);
+ this->builder_->Reserve(size);
+
int i;
for (i = 0; i < size; ++i) {
if (valid_bytes[i] > 0) {
- ASSERT_OK(this->builder_->Append(draws[i]));
+ this->builder_->Append(draws[i]);
} else {
- ASSERT_OK(this->builder_->AppendNull());
+ this->builder_->AppendNull();
}
}
@@ -261,31 +322,41 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) {
this->RandomData(size);
+ this->builder_->Reserve(1000);
+ this->builder_nn_->Reserve(1000);
+
int i;
+ int null_count = 0;
// Append the first 1000
for (i = 0; i < 1000; ++i) {
if (valid_bytes[i] > 0) {
- ASSERT_OK(this->builder_->Append(draws[i]));
+ this->builder_->Append(draws[i]);
} else {
- ASSERT_OK(this->builder_->AppendNull());
+ this->builder_->AppendNull();
+ ++null_count;
}
- ASSERT_OK(this->builder_nn_->Append(draws[i]));
+ this->builder_nn_->Append(draws[i]);
}
+ ASSERT_EQ(null_count, this->builder_->null_count());
+
ASSERT_EQ(1000, this->builder_->length());
ASSERT_EQ(1024, this->builder_->capacity());
ASSERT_EQ(1000, this->builder_nn_->length());
ASSERT_EQ(1024, this->builder_nn_->capacity());
+ this->builder_->Reserve(size - 1000);
+ this->builder_nn_->Reserve(size - 1000);
+
// Append the next 9000
for (i = 1000; i < size; ++i) {
if (valid_bytes[i] > 0) {
- ASSERT_OK(this->builder_->Append(draws[i]));
+ this->builder_->Append(draws[i]);
} else {
- ASSERT_OK(this->builder_->AppendNull());
+ this->builder_->AppendNull();
}
- ASSERT_OK(this->builder_nn_->Append(draws[i]));
+ this->builder_nn_->Append(draws[i]);
}
ASSERT_EQ(size, this->builder_->length());
@@ -294,8 +365,8 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) {
ASSERT_EQ(size, this->builder_nn_->length());
ASSERT_EQ(util::next_power2(size), this->builder_nn_->capacity());
- this->CheckNullable();
- this->CheckNonNullable();
+ this->Check(this->builder_, true);
+ this->Check(this->builder_nn_, false);
}
@@ -327,31 +398,34 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendVector) {
ASSERT_EQ(size, this->builder_->length());
ASSERT_EQ(util::next_power2(size), this->builder_->capacity());
- this->CheckNullable();
- this->CheckNonNullable();
+ this->Check(this->builder_, true);
+ this->Check(this->builder_nn_, false);
}
TYPED_TEST(TestPrimitiveBuilder, TestAdvance) {
int n = 1000;
- ASSERT_OK(this->builder_->Init(n));
+ ASSERT_OK(this->builder_->Reserve(n));
ASSERT_OK(this->builder_->Advance(100));
ASSERT_EQ(100, this->builder_->length());
ASSERT_OK(this->builder_->Advance(900));
- ASSERT_RAISES(Invalid, this->builder_->Advance(1));
+
+ int too_many = this->builder_->capacity() - 1000 + 1;
+ ASSERT_RAISES(Invalid, this->builder_->Advance(too_many));
}
TYPED_TEST(TestPrimitiveBuilder, TestResize) {
- DECL_T();
+ DECL_TYPE();
int cap = MIN_BUILDER_CAPACITY * 2;
- ASSERT_OK(this->builder_->Resize(cap));
+ ASSERT_OK(this->builder_->Reserve(cap));
ASSERT_EQ(cap, this->builder_->capacity());
- ASSERT_EQ(cap * sizeof(T), this->builder_->buffer()->size());
- ASSERT_EQ(util::ceil_byte(cap) / 8, this->builder_->null_bitmap()->size());
+ ASSERT_EQ(type_traits<Type>::bytes_required(cap), this->builder_->data()->size());
+ ASSERT_EQ(util::bytes_for_bits(cap),
+ this->builder_->null_bitmap()->size());
}
TYPED_TEST(TestPrimitiveBuilder, TestReserve) {
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/types/primitive.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc
index ecd5d68..c54d075 100644
--- a/cpp/src/arrow/types/primitive.cc
+++ b/cpp/src/arrow/types/primitive.cc
@@ -20,20 +20,20 @@
#include <memory>
#include "arrow/util/buffer.h"
+#include "arrow/util/logging.h"
namespace arrow {
// ----------------------------------------------------------------------
// Primitive array base
-PrimitiveArray::PrimitiveArray(const TypePtr& type, int32_t length, int value_size,
+PrimitiveArray::PrimitiveArray(const TypePtr& type, int32_t length,
const std::shared_ptr<Buffer>& data,
int32_t null_count,
const std::shared_ptr<Buffer>& null_bitmap) :
Array(type, length, null_count, null_bitmap) {
data_ = data;
raw_data_ = data == nullptr? nullptr : data_->data();
- value_size_ = value_size;
}
bool PrimitiveArray::EqualsExact(const PrimitiveArray& other) const {
@@ -52,12 +52,15 @@ bool PrimitiveArray::EqualsExact(const PrimitiveArray& other) const {
const uint8_t* this_data = raw_data_;
const uint8_t* other_data = other.raw_data_;
+ int value_size = type_->value_size();
+ DCHECK_GT(value_size, 0);
+
for (int i = 0; i < length_; ++i) {
- if (!IsNull(i) && memcmp(this_data, other_data, value_size_)) {
+ if (!IsNull(i) && memcmp(this_data, other_data, value_size)) {
return false;
}
- this_data += value_size_;
- other_data += value_size_;
+ this_data += value_size;
+ other_data += value_size;
}
return true;
} else {
@@ -73,4 +76,170 @@ bool PrimitiveArray::Equals(const std::shared_ptr<Array>& arr) const {
return EqualsExact(*static_cast<const PrimitiveArray*>(arr.get()));
}
+template <typename T>
+Status PrimitiveBuilder<T>::Init(int32_t capacity) {
+ RETURN_NOT_OK(ArrayBuilder::Init(capacity));
+ data_ = std::make_shared<PoolBuffer>(pool_);
+
+ int64_t nbytes = type_traits<T>::bytes_required(capacity);
+ RETURN_NOT_OK(data_->Resize(nbytes));
+ memset(data_->mutable_data(), 0, nbytes);
+
+ raw_data_ = reinterpret_cast<value_type*>(data_->mutable_data());
+ return Status::OK();
+}
+
+template <typename T>
+Status PrimitiveBuilder<T>::Resize(int32_t capacity) {
+ // XXX: Set floor size for now
+ if (capacity < MIN_BUILDER_CAPACITY) {
+ capacity = MIN_BUILDER_CAPACITY;
+ }
+
+ if (capacity_ == 0) {
+ RETURN_NOT_OK(Init(capacity));
+ } else {
+ RETURN_NOT_OK(ArrayBuilder::Resize(capacity));
+
+ int64_t old_bytes = data_->size();
+ int64_t new_bytes = type_traits<T>::bytes_required(capacity);
+ RETURN_NOT_OK(data_->Resize(new_bytes));
+ raw_data_ = reinterpret_cast<value_type*>(data_->mutable_data());
+
+ memset(data_->mutable_data() + old_bytes, 0, new_bytes - old_bytes);
+ }
+ capacity_ = capacity;
+ return Status::OK();
+}
+
+template <typename T>
+Status PrimitiveBuilder<T>::Reserve(int32_t elements) {
+ if (length_ + elements > capacity_) {
+ int32_t new_capacity = util::next_power2(length_ + elements);
+ return Resize(new_capacity);
+ }
+ return Status::OK();
+}
+
+template <typename T>
+Status PrimitiveBuilder<T>::Append(const value_type* values, int32_t length,
+ const uint8_t* valid_bytes) {
+ RETURN_NOT_OK(PrimitiveBuilder<T>::Reserve(length));
+
+ if (length > 0) {
+ memcpy(raw_data_ + length_, values, type_traits<T>::bytes_required(length));
+ }
+
+ if (valid_bytes != nullptr) {
+ PrimitiveBuilder<T>::AppendNulls(valid_bytes, length);
+ } else {
+ for (int i = 0; i < length; ++i) {
+ util::set_bit(null_bitmap_data_, length_ + i);
+ }
+ }
+
+ length_ += length;
+ return Status::OK();
+}
+
+template <typename T>
+void PrimitiveBuilder<T>::AppendNulls(const uint8_t* valid_bytes, int32_t length) {
+ // If valid_bytes is all not null, then none of the values are null
+ for (int i = 0; i < length; ++i) {
+ if (valid_bytes[i] == 0) {
+ ++null_count_;
+ } else {
+ util::set_bit(null_bitmap_data_, length_ + i);
+ }
+ }
+}
+
+template <typename T>
+std::shared_ptr<Array> PrimitiveBuilder<T>::Finish() {
+ std::shared_ptr<Array> result = std::make_shared<
+ typename type_traits<T>::ArrayType>(
+ type_, length_, data_, null_count_, null_bitmap_);
+
+ data_ = null_bitmap_ = nullptr;
+ capacity_ = length_ = null_count_ = 0;
+ return result;
+}
+
+template <>
+Status PrimitiveBuilder<BooleanType>::Append(const uint8_t* values, int32_t length,
+ const uint8_t* valid_bytes) {
+ RETURN_NOT_OK(Reserve(length));
+
+ for (int i = 0; i < length; ++i) {
+ if (values[i] > 0) {
+ util::set_bit(raw_data_, length_ + i);
+ } else {
+ util::clear_bit(raw_data_, length_ + i);
+ }
+ }
+
+ if (valid_bytes != nullptr) {
+ PrimitiveBuilder<BooleanType>::AppendNulls(valid_bytes, length);
+ } else {
+ for (int i = 0; i < length; ++i) {
+ util::set_bit(null_bitmap_data_, length_ + i);
+ }
+ }
+ length_ += length;
+ return Status::OK();
+}
+
+template class PrimitiveBuilder<UInt8Type>;
+template class PrimitiveBuilder<UInt16Type>;
+template class PrimitiveBuilder<UInt32Type>;
+template class PrimitiveBuilder<UInt64Type>;
+template class PrimitiveBuilder<Int8Type>;
+template class PrimitiveBuilder<Int16Type>;
+template class PrimitiveBuilder<Int32Type>;
+template class PrimitiveBuilder<Int64Type>;
+template class PrimitiveBuilder<FloatType>;
+template class PrimitiveBuilder<DoubleType>;
+template class PrimitiveBuilder<BooleanType>;
+
+BooleanArray::BooleanArray(int32_t length, const std::shared_ptr<Buffer>& data,
+ int32_t null_count,
+ const std::shared_ptr<Buffer>& null_bitmap) :
+ PrimitiveArray(std::make_shared<BooleanType>(), length,
+ data, null_count, null_bitmap) {}
+
+bool BooleanArray::EqualsExact(const BooleanArray& other) const {
+ if (this == &other) return true;
+ if (null_count_ != other.null_count_) {
+ return false;
+ }
+
+ if (null_count_ > 0) {
+ bool equal_bitmap = null_bitmap_->Equals(*other.null_bitmap_,
+ util::bytes_for_bits(length_));
+ if (!equal_bitmap) {
+ return false;
+ }
+
+ const uint8_t* this_data = raw_data_;
+ const uint8_t* other_data = other.raw_data_;
+
+ for (int i = 0; i < length_; ++i) {
+ if (!IsNull(i) && util::get_bit(this_data, i) != util::get_bit(other_data, i)) {
+ return false;
+ }
+ }
+ return true;
+ } else {
+ return data_->Equals(*other.data_, util::bytes_for_bits(length_));
+ }
+}
+
+bool BooleanArray::Equals(const std::shared_ptr<Array>& arr) const {
+ if (this == arr.get()) return true;
+ if (Type::BOOL != arr->type_enum()) {
+ return false;
+ }
+ return EqualsExact(*static_cast<const BooleanArray*>(arr.get()));
+}
+
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/types/primitive.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h
index 4eaff43..ec6fee3 100644
--- a/cpp/src/arrow/types/primitive.h
+++ b/cpp/src/arrow/types/primitive.h
@@ -21,6 +21,7 @@
#include <cstdint>
#include <cstring>
#include <memory>
+#include <vector>
#include "arrow/array.h"
#include "arrow/builder.h"
@@ -37,7 +38,7 @@ class MemoryPool;
// Base class for fixed-size logical types
class PrimitiveArray : public Array {
public:
- PrimitiveArray(const TypePtr& type, int32_t length, int value_size,
+ PrimitiveArray(const TypePtr& type, int32_t length,
const std::shared_ptr<Buffer>& data,
int32_t null_count = 0,
const std::shared_ptr<Buffer>& null_bitmap = nullptr);
@@ -51,25 +52,19 @@ class PrimitiveArray : public Array {
protected:
std::shared_ptr<Buffer> data_;
const uint8_t* raw_data_;
- int value_size_;
};
#define NUMERIC_ARRAY_DECL(NAME, TypeClass, T) \
class NAME : public PrimitiveArray { \
public: \
using value_type = T; \
- NAME(const TypePtr& type, int32_t length, \
- const std::shared_ptr<Buffer>& data, \
- int32_t null_count = 0, \
- const std::shared_ptr<Buffer>& null_bitmap = nullptr) : \
- PrimitiveArray(std::make_shared<TypeClass>(), length, \
- sizeof(T), data, null_count, null_bitmap) {} \
+ using PrimitiveArray::PrimitiveArray; \
\
NAME(int32_t length, const std::shared_ptr<Buffer>& data, \
int32_t null_count = 0, \
const std::shared_ptr<Buffer>& null_bitmap = nullptr) : \
PrimitiveArray(std::make_shared<TypeClass>(), length, \
- sizeof(T), data, null_count, null_bitmap) {} \
+ data, null_count, null_bitmap) {} \
\
bool EqualsExact(const NAME& other) const { \
return PrimitiveArray::EqualsExact( \
@@ -96,148 +91,241 @@ NUMERIC_ARRAY_DECL(Int64Array, Int64Type, int64_t);
NUMERIC_ARRAY_DECL(FloatArray, FloatType, float);
NUMERIC_ARRAY_DECL(DoubleArray, DoubleType, double);
-template <typename Type, typename ArrayType>
+template <typename Type>
class PrimitiveBuilder : public ArrayBuilder {
public:
typedef typename Type::c_type value_type;
explicit PrimitiveBuilder(MemoryPool* pool, const TypePtr& type) :
ArrayBuilder(pool, type),
- values_(nullptr) {
- elsize_ = sizeof(value_type);
- }
+ data_(nullptr) {}
virtual ~PrimitiveBuilder() {}
- Status Resize(int32_t capacity) {
- // XXX: Set floor size for now
- if (capacity < MIN_BUILDER_CAPACITY) {
- capacity = MIN_BUILDER_CAPACITY;
- }
-
- if (capacity_ == 0) {
- RETURN_NOT_OK(Init(capacity));
- } else {
- RETURN_NOT_OK(ArrayBuilder::Resize(capacity));
- RETURN_NOT_OK(values_->Resize(capacity * elsize_));
- }
- capacity_ = capacity;
- return Status::OK();
- }
-
- Status Init(int32_t capacity) {
- RETURN_NOT_OK(ArrayBuilder::Init(capacity));
- values_ = std::make_shared<PoolBuffer>(pool_);
- return values_->Resize(capacity * elsize_);
- }
-
- Status Reserve(int32_t elements) {
- if (length_ + elements > capacity_) {
- int32_t new_capacity = util::next_power2(length_ + elements);
- return Resize(new_capacity);
- }
- return Status::OK();
- }
+ using ArrayBuilder::Advance;
- Status Advance(int32_t elements) {
- return ArrayBuilder::Advance(elements);
- }
+ // Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
+ void AppendNulls(const uint8_t* valid_bytes, int32_t length);
- // Scalar append
- Status Append(value_type val) {
+ Status AppendNull() {
if (length_ == capacity_) {
// If the capacity was not already a multiple of 2, do so here
RETURN_NOT_OK(Resize(util::next_power2(capacity_ + 1)));
}
- util::set_bit(null_bitmap_data_, length_);
- raw_buffer()[length_++] = val;
+ ++null_count_;
+ ++length_;
return Status::OK();
}
+ std::shared_ptr<Buffer> data() const {
+ return data_;
+ }
+
// Vector append
//
// If passed, valid_bytes is of equal length to values, and any zero byte
// will be considered as a null for that slot
Status Append(const value_type* values, int32_t length,
- const uint8_t* valid_bytes = nullptr) {
- if (length_ + length > capacity_) {
- int32_t new_capacity = util::next_power2(length_ + length);
- RETURN_NOT_OK(Resize(new_capacity));
- }
- if (length > 0) {
- memcpy(raw_buffer() + length_, values, length * elsize_);
- }
+ const uint8_t* valid_bytes = nullptr);
- if (valid_bytes != nullptr) {
- AppendNulls(valid_bytes, length);
- } else {
- for (int i = 0; i < length; ++i) {
- util::set_bit(null_bitmap_data_, length_ + i);
- }
- }
+ // Ensure that builder can accommodate an additional number of
+ // elements. Resizes if the current capacity is not sufficient
+ Status Reserve(int32_t elements);
- length_ += length;
- return Status::OK();
+ std::shared_ptr<Array> Finish() override;
+
+ protected:
+ std::shared_ptr<PoolBuffer> data_;
+ value_type* raw_data_;
+
+ Status Init(int32_t capacity);
+
+ // Increase the capacity of the builder to accommodate at least the indicated
+ // number of elements
+ Status Resize(int32_t capacity);
+};
+
+template <typename T>
+class NumericBuilder : public PrimitiveBuilder<T> {
+ public:
+ using typename PrimitiveBuilder<T>::value_type;
+ using PrimitiveBuilder<T>::PrimitiveBuilder;
+
+ using PrimitiveBuilder<T>::Append;
+
+ // Scalar append. Does not capacity-check; make sure to call Reserve beforehand
+ void Append(value_type val) {
+ util::set_bit(null_bitmap_data_, length_);
+ raw_data_[length_++] = val;
}
- // Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
- void AppendNulls(const uint8_t* valid_bytes, int32_t length) {
- // If valid_bytes is all not null, then none of the values are null
- for (int i = 0; i < length; ++i) {
- if (valid_bytes[i] == 0) {
- ++null_count_;
- } else {
- util::set_bit(null_bitmap_data_, length_ + i);
- }
- }
+ protected:
+ using PrimitiveBuilder<T>::length_;
+ using PrimitiveBuilder<T>::null_bitmap_data_;
+ using PrimitiveBuilder<T>::raw_data_;
+
+ using PrimitiveBuilder<T>::Init;
+ using PrimitiveBuilder<T>::Resize;
+};
+
+template <>
+struct type_traits<UInt8Type> {
+ typedef UInt8Array ArrayType;
+
+ static inline int bytes_required(int elements) {
+ return elements;
}
+};
- Status AppendNull() {
- if (length_ == capacity_) {
- // If the capacity was not already a multiple of 2, do so here
- RETURN_NOT_OK(Resize(util::next_power2(capacity_ + 1)));
- }
- ++null_count_;
- ++length_;
- return Status::OK();
+template <>
+struct type_traits<Int8Type> {
+ typedef Int8Array ArrayType;
+
+ static inline int bytes_required(int elements) {
+ return elements;
}
+};
- std::shared_ptr<Array> Finish() override {
- std::shared_ptr<ArrayType> result = std::make_shared<ArrayType>(
- type_, length_, values_, null_count_, null_bitmap_);
+template <>
+struct type_traits<UInt16Type> {
+ typedef UInt16Array ArrayType;
- values_ = null_bitmap_ = nullptr;
- capacity_ = length_ = null_count_ = 0;
- return result;
+ static inline int bytes_required(int elements) {
+ return elements * sizeof(uint16_t);
}
+};
+
+template <>
+struct type_traits<Int16Type> {
+ typedef Int16Array ArrayType;
+
+ static inline int bytes_required(int elements) {
+ return elements * sizeof(int16_t);
+ }
+};
- value_type* raw_buffer() {
- return reinterpret_cast<value_type*>(values_->mutable_data());
+template <>
+struct type_traits<UInt32Type> {
+ typedef UInt32Array ArrayType;
+
+ static inline int bytes_required(int elements) {
+ return elements * sizeof(uint32_t);
}
+};
- std::shared_ptr<Buffer> buffer() const {
- return values_;
+template <>
+struct type_traits<Int32Type> {
+ typedef Int32Array ArrayType;
+
+ static inline int bytes_required(int elements) {
+ return elements * sizeof(int32_t);
}
+};
- protected:
- std::shared_ptr<PoolBuffer> values_;
- int elsize_;
+template <>
+struct type_traits<UInt64Type> {
+ typedef UInt64Array ArrayType;
+
+ static inline int bytes_required(int elements) {
+ return elements * sizeof(uint64_t);
+ }
+};
+
+template <>
+struct type_traits<Int64Type> {
+ typedef Int64Array ArrayType;
+
+ static inline int bytes_required(int elements) {
+ return elements * sizeof(int64_t);
+ }
+};
+template <>
+struct type_traits<FloatType> {
+ typedef FloatArray ArrayType;
+
+ static inline int bytes_required(int elements) {
+ return elements * sizeof(float);
+ }
+};
+
+template <>
+struct type_traits<DoubleType> {
+ typedef DoubleArray ArrayType;
+
+ static inline int bytes_required(int elements) {
+ return elements * sizeof(double);
+ }
};
// Builders
-typedef PrimitiveBuilder<UInt8Type, UInt8Array> UInt8Builder;
-typedef PrimitiveBuilder<UInt16Type, UInt16Array> UInt16Builder;
-typedef PrimitiveBuilder<UInt32Type, UInt32Array> UInt32Builder;
-typedef PrimitiveBuilder<UInt64Type, UInt64Array> UInt64Builder;
+typedef NumericBuilder<UInt8Type> UInt8Builder;
+typedef NumericBuilder<UInt16Type> UInt16Builder;
+typedef NumericBuilder<UInt32Type> UInt32Builder;
+typedef NumericBuilder<UInt64Type> UInt64Builder;
+
+typedef NumericBuilder<Int8Type> Int8Builder;
+typedef NumericBuilder<Int16Type> Int16Builder;
+typedef NumericBuilder<Int32Type> Int32Builder;
+typedef NumericBuilder<Int64Type> Int64Builder;
+
+typedef NumericBuilder<FloatType> FloatBuilder;
+typedef NumericBuilder<DoubleType> DoubleBuilder;
+
-typedef PrimitiveBuilder<Int8Type, Int8Array> Int8Builder;
-typedef PrimitiveBuilder<Int16Type, Int16Array> Int16Builder;
-typedef PrimitiveBuilder<Int32Type, Int32Array> Int32Builder;
-typedef PrimitiveBuilder<Int64Type, Int64Array> Int64Builder;
+class BooleanArray : public PrimitiveArray {
+ public:
+ using PrimitiveArray::PrimitiveArray;
+
+ BooleanArray(int32_t length, const std::shared_ptr<Buffer>& data,
+ int32_t null_count = 0,
+ const std::shared_ptr<Buffer>& null_bitmap = nullptr);
+
+ bool EqualsExact(const BooleanArray& other) const;
+ bool Equals(const std::shared_ptr<Array>& arr) const override;
+
+ const uint8_t* raw_data() const {
+ return reinterpret_cast<const uint8_t*>(raw_data_);
+ }
+
+ bool Value(int i) const {
+ return util::get_bit(raw_data(), i);
+ }
+};
+
+template <>
+struct type_traits<BooleanType> {
+ typedef BooleanArray ArrayType;
+
+ static inline int bytes_required(int elements) {
+ return util::bytes_for_bits(elements);
+ }
+};
+
+class BooleanBuilder : public PrimitiveBuilder<BooleanType> {
+ public:
+ explicit BooleanBuilder(MemoryPool* pool, const TypePtr& type) :
+ PrimitiveBuilder<BooleanType>(pool, type) {}
+
+ virtual ~BooleanBuilder() {}
+
+ using PrimitiveBuilder<BooleanType>::Append;
+
+ // Scalar append
+ void Append(bool val) {
+ util::set_bit(null_bitmap_data_, length_);
+ if (val) {
+ util::set_bit(raw_data_, length_);
+ } else {
+ util::clear_bit(raw_data_, length_);
+ }
+ ++length_;
+ }
-typedef PrimitiveBuilder<FloatType, FloatArray> FloatBuilder;
-typedef PrimitiveBuilder<DoubleType, DoubleArray> DoubleBuilder;
+ void Append(uint8_t val) {
+ Append(static_cast<bool>(val));
+ }
+};
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/types/string-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string-test.cc b/cpp/src/arrow/types/string-test.cc
index b329b4f..d3a4cc3 100644
--- a/cpp/src/arrow/types/string-test.cc
+++ b/cpp/src/arrow/types/string-test.cc
@@ -92,7 +92,7 @@ class TestStringContainer : public ::testing::Test {
offsets_buf_ = test::to_buffer(offsets_);
- null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_.data(), valid_bytes_.size());
+ null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_);
null_count_ = test::null_count(valid_bytes_);
strings_ = std::make_shared<StringArray>(length_, offsets_buf_, values_,
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index fed05e3..d2a4b09 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -23,6 +23,7 @@
install(FILES
bit-util.h
buffer.h
+ logging.h
macros.h
memory-pool.h
status.h
@@ -59,7 +60,7 @@ if (ARROW_BUILD_BENCHMARKS)
)
else()
target_link_libraries(arrow_benchmark_main
- benchmark
+ benchmark
pthread
)
endif()
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/util/bit-util.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util.cc b/cpp/src/arrow/util/bit-util.cc
index 292cb33..6c6d533 100644
--- a/cpp/src/arrow/util/bit-util.cc
+++ b/cpp/src/arrow/util/bit-util.cc
@@ -16,6 +16,7 @@
// under the License.
#include <cstring>
+#include <vector>
#include "arrow/util/bit-util.h"
#include "arrow/util/buffer.h"
@@ -23,25 +24,24 @@
namespace arrow {
-void util::bytes_to_bits(uint8_t* bytes, int length, uint8_t* bits) {
- for (int i = 0; i < length; ++i) {
- if (static_cast<bool>(bytes[i])) {
+void util::bytes_to_bits(const std::vector<uint8_t>& bytes, uint8_t* bits) {
+ for (size_t i = 0; i < bytes.size(); ++i) {
+ if (bytes[i] > 0) {
set_bit(bits, i);
}
}
}
-Status util::bytes_to_bits(uint8_t* bytes, int length,
+Status util::bytes_to_bits(const std::vector<uint8_t>& bytes,
std::shared_ptr<Buffer>* out) {
- int bit_length = ceil_byte(length) / 8;
+ int bit_length = util::bytes_for_bits(bytes.size());
auto buffer = std::make_shared<PoolBuffer>();
RETURN_NOT_OK(buffer->Resize(bit_length));
memset(buffer->mutable_data(), 0, bit_length);
- bytes_to_bits(bytes, length, buffer->mutable_data());
+ bytes_to_bits(bytes, buffer->mutable_data());
*out = buffer;
-
return Status::OK();
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/cpp/src/arrow/util/bit-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index 08222d5..8d62871 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -20,6 +20,7 @@
#include <cstdint>
#include <memory>
+#include <vector>
namespace arrow {
@@ -43,15 +44,19 @@ static inline int64_t ceil_2bytes(int64_t size) {
static constexpr uint8_t BITMASK[] = {1, 2, 4, 8, 16, 32, 64, 128};
static inline bool get_bit(const uint8_t* bits, int i) {
- return bits[i / 8] & BITMASK[i % 8];
+ return static_cast<bool>(bits[i / 8] & BITMASK[i % 8]);
}
static inline bool bit_not_set(const uint8_t* bits, int i) {
return (bits[i / 8] & BITMASK[i % 8]) == 0;
}
+static inline void clear_bit(uint8_t* bits, int i) {
+ bits[i / 8] &= ~BITMASK[i % 8];
+}
+
static inline void set_bit(uint8_t* bits, int i) {
- bits[i / 8] |= 1 << (i % 8);
+ bits[i / 8] |= BITMASK[i % 8];
}
static inline int64_t next_power2(int64_t n) {
@@ -66,8 +71,8 @@ static inline int64_t next_power2(int64_t n) {
return n;
}
-void bytes_to_bits(uint8_t* bytes, int length, uint8_t* bits);
-Status bytes_to_bits(uint8_t*, int, std::shared_ptr<Buffer>*);
+void bytes_to_bits(const std::vector<uint8_t>& bytes, uint8_t* bits);
+Status bytes_to_bits(const std::vector<uint8_t>&, std::shared_ptr<Buffer>*);
} // namespace util
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index e6afcbd..943a08f 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -86,6 +86,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
c_bool IsNull(int i)
+ cdef cppclass CBooleanArray" arrow::BooleanArray"(CArray):
+ c_bool Value(int i)
+
cdef cppclass CUInt8Array" arrow::UInt8Array"(CArray):
uint8_t Value(int i)
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/python/pyarrow/scalar.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx
index 04f013d..0d391e5 100644
--- a/python/pyarrow/scalar.pyx
+++ b/python/pyarrow/scalar.pyx
@@ -58,7 +58,10 @@ cdef class ArrayValue(Scalar):
cdef class BooleanValue(ArrayValue):
- pass
+
+ def as_py(self):
+ cdef CBooleanArray* ap = <CBooleanArray*> self.sp_array.get()
+ return ap.Value(self.index)
cdef class Int8Value(ArrayValue):
@@ -172,6 +175,7 @@ cdef class ListValue(ArrayValue):
cdef dict _scalar_classes = {
+ Type_BOOL: BooleanValue,
Type_UINT8: Int8Value,
Type_UINT16: Int16Value,
Type_UINT32: Int32Value,
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/python/pyarrow/tests/test_convert_builtin.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 25f6969..2beb6b3 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -22,7 +22,10 @@ import pyarrow
class TestConvertList(unittest.TestCase):
def test_boolean(self):
- pass
+ arr = pyarrow.from_pylist([True, None, False, None])
+ assert len(arr) == 4
+ assert arr.null_count == 2
+ assert arr.type == pyarrow.bool_()
def test_empty_list(self):
arr = pyarrow.from_pylist([])
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/python/pyarrow/tests/test_scalars.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index 021737d..4fb850a 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -16,67 +16,74 @@
# under the License.
from pyarrow.compat import unittest, u
-import pyarrow as arrow
+import pyarrow as A
class TestScalars(unittest.TestCase):
def test_null_singleton(self):
with self.assertRaises(Exception):
- arrow.NAType()
+ A.NAType()
def test_bool(self):
- pass
+ arr = A.from_pylist([True, None, False, None])
+
+ v = arr[0]
+ assert isinstance(v, A.BooleanValue)
+ assert repr(v) == "True"
+ assert v.as_py() == True
+
+ assert arr[1] is A.NA
def test_int64(self):
- arr = arrow.from_pylist([1, 2, None])
+ arr = A.from_pylist([1, 2, None])
v = arr[0]
- assert isinstance(v, arrow.Int64Value)
+ assert isinstance(v, A.Int64Value)
assert repr(v) == "1"
assert v.as_py() == 1
- assert arr[2] is arrow.NA
+ assert arr[2] is A.NA
def test_double(self):
- arr = arrow.from_pylist([1.5, None, 3])
+ arr = A.from_pylist([1.5, None, 3])
v = arr[0]
- assert isinstance(v, arrow.DoubleValue)
+ assert isinstance(v, A.DoubleValue)
assert repr(v) == "1.5"
assert v.as_py() == 1.5
- assert arr[1] is arrow.NA
+ assert arr[1] is A.NA
v = arr[2]
assert v.as_py() == 3.0
def test_string(self):
- arr = arrow.from_pylist(['foo', None, u('bar')])
+ arr = A.from_pylist(['foo', None, u('bar')])
v = arr[0]
- assert isinstance(v, arrow.StringValue)
+ assert isinstance(v, A.StringValue)
assert repr(v) == "'foo'"
assert v.as_py() == 'foo'
- assert arr[1] is arrow.NA
+ assert arr[1] is A.NA
v = arr[2].as_py()
assert v == 'bar'
assert isinstance(v, str)
def test_list(self):
- arr = arrow.from_pylist([['foo', None], None, ['bar'], []])
+ arr = A.from_pylist([['foo', None], None, ['bar'], []])
v = arr[0]
assert len(v) == 2
- assert isinstance(v, arrow.ListValue)
+ assert isinstance(v, A.ListValue)
assert repr(v) == "['foo', None]"
assert v.as_py() == ['foo', None]
assert v[0].as_py() == 'foo'
- assert v[1] is arrow.NA
+ assert v[1] is A.NA
- assert arr[1] is arrow.NA
+ assert arr[1] is A.NA
v = arr[3]
assert len(v) == 0
http://git-wip-us.apache.org/repos/asf/arrow/blob/0a8979d3/python/src/pyarrow/adapters/builtin.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc
index acb13ac..78ef1b3 100644
--- a/python/src/pyarrow/adapters/builtin.cc
+++ b/python/src/pyarrow/adapters/builtin.cc
@@ -61,6 +61,8 @@ class ScalarVisitor {
++total_count_;
if (obj == Py_None) {
++none_count_;
+ } else if (PyBool_Check(obj)) {
+ ++bool_count_;
} else if (PyFloat_Check(obj)) {
++float_count_;
} else if (IsPyInteger(obj)) {
@@ -256,6 +258,20 @@ class TypedConverter : public SeqConverter {
class BoolConverter : public TypedConverter<arrow::BooleanBuilder> {
public:
Status AppendData(PyObject* seq) override {
+ Py_ssize_t size = PySequence_Size(seq);
+ RETURN_ARROW_NOT_OK(typed_builder_->Reserve(size));
+ for (int64_t i = 0; i < size; ++i) {
+ OwnedRef item(PySequence_GetItem(seq, i));
+ if (item.obj() == Py_None) {
+ typed_builder_->AppendNull();
+ } else {
+ if (item.obj() == Py_True) {
+ typed_builder_->Append(true);
+ } else {
+ typed_builder_->Append(false);
+ }
+ }
+ }
return Status::OK();
}
};
@@ -265,14 +281,15 @@ class Int64Converter : public TypedConverter<arrow::Int64Builder> {
Status AppendData(PyObject* seq) override {
int64_t val;
Py_ssize_t size = PySequence_Size(seq);
+ RETURN_ARROW_NOT_OK(typed_builder_->Reserve(size));
for (int64_t i = 0; i < size; ++i) {
OwnedRef item(PySequence_GetItem(seq, i));
if (item.obj() == Py_None) {
- RETURN_ARROW_NOT_OK(typed_builder_->AppendNull());
+ typed_builder_->AppendNull();
} else {
val = PyLong_AsLongLong(item.obj());
RETURN_IF_PYERROR();
- RETURN_ARROW_NOT_OK(typed_builder_->Append(val));
+ typed_builder_->Append(val);
}
}
return Status::OK();
@@ -284,14 +301,15 @@ class DoubleConverter : public TypedConverter<arrow::DoubleBuilder> {
Status AppendData(PyObject* seq) override {
double val;
Py_ssize_t size = PySequence_Size(seq);
+ RETURN_ARROW_NOT_OK(typed_builder_->Reserve(size));
for (int64_t i = 0; i < size; ++i) {
OwnedRef item(PySequence_GetItem(seq, i));
if (item.obj() == Py_None) {
- RETURN_ARROW_NOT_OK(typed_builder_->AppendNull());
+ typed_builder_->AppendNull();
} else {
val = PyFloat_AsDouble(item.obj());
RETURN_IF_PYERROR();
- RETURN_ARROW_NOT_OK(typed_builder_->Append(val));
+ typed_builder_->Append(val);
}
}
return Status::OK();