You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/03/03 23:56:05 UTC
arrow git commit: ARROW-20: Add null_count_ member to array
containers, remove nullable_ member
Repository: arrow
Updated Branches:
refs/heads/master e41802085 -> b88b69e20
ARROW-20: Add null_count_ member to array containers, remove nullable_ member
Based off of ARROW-19.
After some contemplation / discussion, I believe it would be better to track nullability at the schema metadata level (if at all!) rather than making it a property of the data structures. This allows the data containers to be "plain ol' data" and thus both nullable data with `null_count == 0` and non-nullable data (implicitly `null_count == 0`) can be treated as semantically equivalent in algorithms code.
If it is deemed useful we can validate (cheaply) that physical data meets the metadata requirements (e.g. non-nullable type metadata cannot be associated with data containers having nulls).
Author: Wes McKinney <we...@apache.org>
Closes #9 from wesm/ARROW-20 and squashes the following commits:
98be016 [Wes McKinney] ARROW-20: Add null_count_ member to Array containers, remove nullable member
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/b88b69e2
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/b88b69e2
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/b88b69e2
Branch: refs/heads/master
Commit: b88b69e204b59fa8f19cd20dcb6c091fe9bde3a9
Parents: e418020
Author: Wes McKinney <we...@apache.org>
Authored: Thu Mar 3 14:56:31 2016 -0800
Committer: Wes McKinney <we...@apache.org>
Committed: Thu Mar 3 14:56:31 2016 -0800
----------------------------------------------------------------------
cpp/CMakeLists.txt | 2 +-
cpp/src/arrow/array-test.cc | 57 +++++++++---------
cpp/src/arrow/array.cc | 11 ++--
cpp/src/arrow/array.h | 37 ++++++++----
cpp/src/arrow/builder.cc | 35 +++++------
cpp/src/arrow/builder.h | 29 ++++-----
cpp/src/arrow/test-util.h | 10 ++++
cpp/src/arrow/type.h | 12 ++--
cpp/src/arrow/types/collection.h | 2 +-
cpp/src/arrow/types/datetime.h | 12 ++--
cpp/src/arrow/types/json.h | 4 +-
cpp/src/arrow/types/list-test.cc | 12 +---
cpp/src/arrow/types/list.h | 46 ++++++++-------
cpp/src/arrow/types/primitive-test.cc | 34 ++++++-----
cpp/src/arrow/types/primitive.cc | 11 ++--
cpp/src/arrow/types/primitive.h | 95 +++++++++++++++++-------------
cpp/src/arrow/types/string-test.cc | 31 ++++------
cpp/src/arrow/types/string.cc | 2 +-
cpp/src/arrow/types/string.h | 43 +++++++-------
cpp/src/arrow/types/struct-test.cc | 6 +-
cpp/src/arrow/types/struct.h | 5 +-
cpp/src/arrow/types/test-common.h | 4 +-
cpp/src/arrow/types/union.h | 10 ++--
cpp/src/arrow/util/bit-util.cc | 4 +-
cpp/src/arrow/util/bit-util.h | 4 +-
25 files changed, 265 insertions(+), 253 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index d2c840a..f0eb73d 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -92,7 +92,7 @@ endif()
# For CMAKE_BUILD_TYPE=Release
# -O3: Enable all compiler optimizations
# -g: Enable symbols for profiler tools (TODO: remove for shipping)
-set(CXX_FLAGS_DEBUG "-ggdb")
+set(CXX_FLAGS_DEBUG "-ggdb -O0")
set(CXX_FLAGS_FASTDEBUG "-ggdb -O1")
set(CXX_FLAGS_RELEASE "-O3 -g -DNDEBUG")
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/array-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 16afb9b..df827aa 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -20,7 +20,6 @@
#include <cstdint>
#include <cstdlib>
#include <memory>
-#include <string>
#include <vector>
#include "arrow/array.h"
@@ -32,60 +31,60 @@
#include "arrow/util/memory-pool.h"
#include "arrow/util/status.h"
-using std::string;
-using std::vector;
-
namespace arrow {
static TypePtr int32 = TypePtr(new Int32Type());
-static TypePtr int32_nn = TypePtr(new Int32Type(false));
-
class TestArray : public ::testing::Test {
public:
void SetUp() {
pool_ = GetDefaultMemoryPool();
-
- auto data = std::make_shared<PoolBuffer>(pool_);
- auto nulls = std::make_shared<PoolBuffer>(pool_);
-
- ASSERT_OK(data->Resize(400));
- ASSERT_OK(nulls->Resize(128));
-
- arr_.reset(new Int32Array(100, data, nulls));
}
protected:
MemoryPool* pool_;
- std::unique_ptr<Int32Array> arr_;
};
-TEST_F(TestArray, TestNullable) {
- std::shared_ptr<Buffer> tmp = arr_->data();
- std::unique_ptr<Int32Array> arr_nn(new Int32Array(100, tmp));
+TEST_F(TestArray, TestNullCount) {
+ auto data = std::make_shared<PoolBuffer>(pool_);
+ auto nulls = std::make_shared<PoolBuffer>(pool_);
- ASSERT_TRUE(arr_->nullable());
- ASSERT_FALSE(arr_nn->nullable());
+ std::unique_ptr<Int32Array> arr(new Int32Array(100, data, 10, nulls));
+ ASSERT_EQ(10, arr->null_count());
+
+ std::unique_ptr<Int32Array> arr_no_nulls(new Int32Array(100, data));
+ ASSERT_EQ(0, arr_no_nulls->null_count());
}
TEST_F(TestArray, TestLength) {
- ASSERT_EQ(arr_->length(), 100);
+ auto data = std::make_shared<PoolBuffer>(pool_);
+ std::unique_ptr<Int32Array> arr(new Int32Array(100, data));
+ ASSERT_EQ(arr->length(), 100);
}
TEST_F(TestArray, TestIsNull) {
- vector<uint8_t> nulls = {1, 0, 1, 1, 0, 1, 0, 0,
- 1, 0, 1, 1, 0, 1, 0, 0,
- 1, 0, 1, 1, 0, 1, 0, 0,
- 1, 0, 1, 1, 0, 1, 0, 0,
- 1, 0, 0, 1};
+ std::vector<uint8_t> nulls = {1, 0, 1, 1, 0, 1, 0, 0,
+ 1, 0, 1, 1, 0, 1, 0, 0,
+ 1, 0, 1, 1, 0, 1, 0, 0,
+ 1, 0, 1, 1, 0, 1, 0, 0,
+ 1, 0, 0, 1};
+ int32_t null_count = 0;
+ for (uint8_t x : nulls) {
+ if (x > 0) ++null_count;
+ }
- std::shared_ptr<Buffer> null_buf = bytes_to_null_buffer(nulls.data(), nulls.size());
+ std::shared_ptr<Buffer> null_buf = bytes_to_null_buffer(nulls.data(),
+ nulls.size());
std::unique_ptr<Array> arr;
- arr.reset(new Array(int32, nulls.size(), null_buf));
+ arr.reset(new Array(int32, nulls.size(), null_count, null_buf));
+
+ ASSERT_EQ(null_count, arr->null_count());
+ ASSERT_EQ(5, null_buf->size());
+
+ ASSERT_TRUE(arr->nulls()->Equals(*null_buf.get()));
- ASSERT_EQ(null_buf->size(), 5);
for (size_t i = 0; i < nulls.size(); ++i) {
ASSERT_EQ(static_cast<bool>(nulls[i]), arr->IsNull(i));
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/array.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index 1726a2f..ee4ef66 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -17,6 +17,8 @@
#include "arrow/array.h"
+#include <cstdint>
+
#include "arrow/util/buffer.h"
namespace arrow {
@@ -24,18 +26,17 @@ namespace arrow {
// ----------------------------------------------------------------------
// Base array class
-Array::Array(const TypePtr& type, int64_t length,
+Array::Array(const TypePtr& type, int32_t length, int32_t null_count,
const std::shared_ptr<Buffer>& nulls) {
- Init(type, length, nulls);
+ Init(type, length, null_count, nulls);
}
-void Array::Init(const TypePtr& type, int64_t length,
+void Array::Init(const TypePtr& type, int32_t length, int32_t null_count,
const std::shared_ptr<Buffer>& nulls) {
type_ = type;
length_ = length;
+ null_count_ = null_count;
nulls_ = nulls;
-
- nullable_ = type->nullable;
if (nulls_) {
null_bits_ = nulls_->data();
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/array.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index 0eaa28d..3d748c1 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -30,38 +30,49 @@ namespace arrow {
class Buffer;
// Immutable data array with some logical type and some length. Any memory is
-// owned by the respective Buffer instance (or its parents). May or may not be
-// nullable.
+// owned by the respective Buffer instance (or its parents).
//
-// The base class only has a null array (if the data type is nullable)
+// The base class is only required to have a nulls buffer if the null count is
+// greater than 0
//
// Any buffers used to initialize the array have their references "stolen". If
// you wish to use the buffer beyond the lifetime of the array, you need to
// explicitly increment its reference count
class Array {
public:
- Array() : length_(0), nulls_(nullptr), null_bits_(nullptr) {}
- Array(const TypePtr& type, int64_t length,
+ Array() :
+ null_count_(0),
+ length_(0),
+ nulls_(nullptr),
+ null_bits_(nullptr) {}
+
+ Array(const TypePtr& type, int32_t length, int32_t null_count = 0,
const std::shared_ptr<Buffer>& nulls = nullptr);
virtual ~Array() {}
- void Init(const TypePtr& type, int64_t length, const std::shared_ptr<Buffer>& nulls);
+ void Init(const TypePtr& type, int32_t length, int32_t null_count,
+ const std::shared_ptr<Buffer>& nulls);
- // Determine if a slot if null. For inner loops. Does *not* boundscheck
- bool IsNull(int64_t i) const {
- return nullable_ && util::get_bit(null_bits_, i);
+ // Determine if a slot is null. For inner loops. Does *not* boundscheck
+ bool IsNull(int i) const {
+ return null_count_ > 0 && util::get_bit(null_bits_, i);
}
- int64_t length() const { return length_;}
- bool nullable() const { return nullable_;}
+ int32_t length() const { return length_;}
+ int32_t null_count() const { return null_count_;}
+
const TypePtr& type() const { return type_;}
TypeEnum type_enum() const { return type_->type;}
+ const std::shared_ptr<Buffer>& nulls() const {
+ return nulls_;
+ }
+
protected:
TypePtr type_;
- bool nullable_;
- int64_t length_;
+ int32_t null_count_;
+ int32_t length_;
std::shared_ptr<Buffer> nulls_;
const uint8_t* null_bits_;
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/builder.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index cb85067..ba70add 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -25,34 +25,29 @@
namespace arrow {
-Status ArrayBuilder::Init(int64_t capacity) {
+Status ArrayBuilder::Init(int32_t capacity) {
capacity_ = capacity;
-
- if (nullable_) {
- int64_t to_alloc = util::ceil_byte(capacity) / 8;
- nulls_ = std::make_shared<PoolBuffer>(pool_);
- RETURN_NOT_OK(nulls_->Resize(to_alloc));
- null_bits_ = nulls_->mutable_data();
- memset(null_bits_, 0, to_alloc);
- }
+ int32_t to_alloc = util::ceil_byte(capacity) / 8;
+ nulls_ = std::make_shared<PoolBuffer>(pool_);
+ RETURN_NOT_OK(nulls_->Resize(to_alloc));
+ null_bits_ = nulls_->mutable_data();
+ memset(null_bits_, 0, to_alloc);
return Status::OK();
}
-Status ArrayBuilder::Resize(int64_t new_bits) {
- if (nullable_) {
- int64_t new_bytes = util::ceil_byte(new_bits) / 8;
- int64_t old_bytes = nulls_->size();
- RETURN_NOT_OK(nulls_->Resize(new_bytes));
- null_bits_ = nulls_->mutable_data();
- if (old_bytes < new_bytes) {
- memset(null_bits_ + old_bytes, 0, new_bytes - old_bytes);
- }
+Status ArrayBuilder::Resize(int32_t new_bits) {
+ int32_t new_bytes = util::ceil_byte(new_bits) / 8;
+ int32_t old_bytes = nulls_->size();
+ RETURN_NOT_OK(nulls_->Resize(new_bytes));
+ null_bits_ = nulls_->mutable_data();
+ if (old_bytes < new_bytes) {
+ memset(null_bits_ + old_bytes, 0, new_bytes - old_bytes);
}
return Status::OK();
}
-Status ArrayBuilder::Advance(int64_t elements) {
- if (nullable_ && length_ + elements > capacity_) {
+Status ArrayBuilder::Advance(int32_t elements) {
+ if (length_ + elements > capacity_) {
return Status::Invalid("Builder must be expanded");
}
length_ += elements;
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/builder.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index 456bb04..491b913 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -32,7 +32,7 @@ class Array;
class MemoryPool;
class PoolBuffer;
-static constexpr int64_t MIN_BUILDER_CAPACITY = 1 << 8;
+static constexpr int32_t MIN_BUILDER_CAPACITY = 1 << 8;
// Base class for all data array builders
class ArrayBuilder {
@@ -40,8 +40,9 @@ class ArrayBuilder {
explicit ArrayBuilder(MemoryPool* pool, const TypePtr& type) :
pool_(pool),
type_(type),
- nullable_(type_->nullable),
- nulls_(nullptr), null_bits_(nullptr),
+ nulls_(nullptr),
+ null_count_(0),
+ null_bits_(nullptr),
length_(0),
capacity_(0) {}
@@ -57,21 +58,21 @@ class ArrayBuilder {
return children_.size();
}
- int64_t length() const { return length_;}
- int64_t capacity() const { return capacity_;}
- bool nullable() const { return nullable_;}
+ int32_t length() const { return length_;}
+ int32_t null_count() const { return null_count_;}
+ int32_t capacity() const { return capacity_;}
// Allocates requires memory at this level, but children need to be
// initialized independently
- Status Init(int64_t capacity);
+ Status Init(int32_t capacity);
- // Resizes the nulls array (if nullable)
- Status Resize(int64_t new_bits);
+ // Resizes the nulls array
+ Status Resize(int32_t new_bits);
// For cases where raw data was memcpy'd into the internal buffers, allows us
// to advance the length of the builder. It is your responsibility to use
// this function responsibly.
- Status Advance(int64_t elements);
+ Status Advance(int32_t elements);
const std::shared_ptr<PoolBuffer>& nulls() const { return nulls_;}
@@ -83,15 +84,15 @@ class ArrayBuilder {
MemoryPool* pool_;
TypePtr type_;
- bool nullable_;
- // If the type is not nullable, then null_ is nullptr after initialization
+ // When nulls are first appended to the builder, the null bitmap is allocated
std::shared_ptr<PoolBuffer> nulls_;
+ int32_t null_count_;
uint8_t* null_bits_;
// Array length, so far. Also, the index of the next element to be added
- int64_t length_;
- int64_t capacity_;
+ int32_t length_;
+ int32_t capacity_;
// Child value array builders. These are owned by this class
std::vector<std::unique_ptr<ArrayBuilder> > children_;
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/test-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h
index 2233a4f..0898c8e 100644
--- a/cpp/src/arrow/test-util.h
+++ b/cpp/src/arrow/test-util.h
@@ -84,6 +84,16 @@ void random_nulls(int64_t n, double pct_null, std::vector<bool>* nulls) {
}
}
+static inline int null_count(const std::vector<uint8_t>& nulls) {
+ int result = 0;
+ for (size_t i = 0; i < nulls.size(); ++i) {
+ if (nulls[i] > 0) {
+ ++result;
+ }
+ }
+ return result;
+}
+
std::shared_ptr<Buffer> bytes_to_null_buffer(uint8_t* bytes, int length) {
std::shared_ptr<Buffer> out;
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 220f99f..12f1960 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -57,11 +57,9 @@ struct LayoutType {
// either a primitive physical type (bytes or bits of some fixed size), a
// nested type consisting of other data types, or another data type (e.g. a
// timestamp encoded as an int64)
-//
-// Any data type can be nullable
enum class TypeEnum: char {
- // A degerate NULL type represented as 0 bytes/bits
+ // A degenerate NULL type represented as 0 bytes/bits
NA = 0,
// Little-endian integer types
@@ -138,14 +136,12 @@ enum class TypeEnum: char {
struct DataType {
TypeEnum type;
- bool nullable;
- explicit DataType(TypeEnum type, bool nullable = true)
- : type(type), nullable(nullable) {}
+ explicit DataType(TypeEnum type)
+ : type(type) {}
virtual bool Equals(const DataType* other) {
- return (this == other) || (this->type == other->type &&
- this->nullable == other->nullable);
+ return this == other || this->type == other->type;
}
virtual std::string ToString() const = 0;
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/types/collection.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/collection.h b/cpp/src/arrow/types/collection.h
index 59ba614..094b63f 100644
--- a/cpp/src/arrow/types/collection.h
+++ b/cpp/src/arrow/types/collection.h
@@ -29,7 +29,7 @@ template <TypeEnum T>
struct CollectionType : public DataType {
std::vector<TypePtr> child_types_;
- explicit CollectionType(bool nullable = true) : DataType(T, nullable) {}
+ CollectionType() : DataType(T) {}
const TypePtr& child(int i) const {
return child_types_[i];
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/types/datetime.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/datetime.h b/cpp/src/arrow/types/datetime.h
index b4d6252..d90883c 100644
--- a/cpp/src/arrow/types/datetime.h
+++ b/cpp/src/arrow/types/datetime.h
@@ -31,12 +31,12 @@ struct DateType : public DataType {
Unit unit;
- explicit DateType(Unit unit = Unit::DAY, bool nullable = true)
- : DataType(TypeEnum::DATE, nullable),
+ explicit DateType(Unit unit = Unit::DAY)
+ : DataType(TypeEnum::DATE),
unit(unit) {}
DateType(const DateType& other)
- : DateType(other.unit, other.nullable) {}
+ : DateType(other.unit) {}
static char const *name() {
return "date";
@@ -58,12 +58,12 @@ struct TimestampType : public DataType {
Unit unit;
- explicit TimestampType(Unit unit = Unit::MILLI, bool nullable = true)
- : DataType(TypeEnum::TIMESTAMP, nullable),
+ explicit TimestampType(Unit unit = Unit::MILLI)
+ : DataType(TypeEnum::TIMESTAMP),
unit(unit) {}
TimestampType(const TimestampType& other)
- : TimestampType(other.unit, other.nullable) {}
+ : TimestampType(other.unit) {}
static char const *name() {
return "timestamp";
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/types/json.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/json.h b/cpp/src/arrow/types/json.h
index 91fd132..6c2b097 100644
--- a/cpp/src/arrow/types/json.h
+++ b/cpp/src/arrow/types/json.h
@@ -28,8 +28,8 @@ struct JSONScalar : public DataType {
static TypePtr dense_type;
static TypePtr sparse_type;
- explicit JSONScalar(bool dense = true, bool nullable = true)
- : DataType(TypeEnum::JSON_SCALAR, nullable),
+ explicit JSONScalar(bool dense = true)
+ : DataType(TypeEnum::JSON_SCALAR),
dense(dense) {}
};
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/types/list-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc
index abfc8a3..1d9ddbe 100644
--- a/cpp/src/arrow/types/list-test.cc
+++ b/cpp/src/arrow/types/list-test.cc
@@ -44,11 +44,7 @@ TEST(TypesTest, TestListType) {
std::shared_ptr<DataType> vt = std::make_shared<UInt8Type>();
ListType list_type(vt);
- ListType list_type_nn(vt, false);
-
ASSERT_EQ(list_type.type, TypeEnum::LIST);
- ASSERT_TRUE(list_type.nullable);
- ASSERT_FALSE(list_type_nn.nullable);
ASSERT_EQ(list_type.name(), string("list"));
ASSERT_EQ(list_type.ToString(), string("list<uint8>"));
@@ -132,8 +128,8 @@ TEST_F(TestListBuilder, TestBasics) {
Done();
- ASSERT_TRUE(result_->nullable());
- ASSERT_TRUE(result_->values()->nullable());
+ ASSERT_EQ(1, result_->null_count());
+ ASSERT_EQ(0, result_->values()->null_count());
ASSERT_EQ(3, result_->length());
vector<int32_t> ex_offsets = {0, 3, 3, 7};
@@ -153,10 +149,6 @@ TEST_F(TestListBuilder, TestBasics) {
}
}
-TEST_F(TestListBuilder, TestBasicsNonNullable) {
-}
-
-
TEST_F(TestListBuilder, TestZeroLength) {
// All buffers are null
Done();
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/types/list.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h
index 4ca0f13..4190b53 100644
--- a/cpp/src/arrow/types/list.h
+++ b/cpp/src/arrow/types/list.h
@@ -40,8 +40,8 @@ struct ListType : public DataType {
// List can contain any other logical value type
TypePtr value_type;
- explicit ListType(const TypePtr& value_type, bool nullable = true)
- : DataType(TypeEnum::LIST, nullable),
+ explicit ListType(const TypePtr& value_type)
+ : DataType(TypeEnum::LIST),
value_type(value_type) {}
static char const *name() {
@@ -56,21 +56,25 @@ class ListArray : public Array {
public:
ListArray() : Array(), offset_buf_(nullptr), offsets_(nullptr) {}
- ListArray(const TypePtr& type, int64_t length, std::shared_ptr<Buffer> offsets,
- const ArrayPtr& values, std::shared_ptr<Buffer> nulls = nullptr) {
- Init(type, length, offsets, values, nulls);
+ ListArray(const TypePtr& type, int32_t length, std::shared_ptr<Buffer> offsets,
+ const ArrayPtr& values,
+ int32_t null_count = 0,
+ std::shared_ptr<Buffer> nulls = nullptr) {
+ Init(type, length, offsets, values, null_count, nulls);
}
virtual ~ListArray() {}
- void Init(const TypePtr& type, int64_t length, std::shared_ptr<Buffer> offsets,
- const ArrayPtr& values, std::shared_ptr<Buffer> nulls = nullptr) {
+ void Init(const TypePtr& type, int32_t length, std::shared_ptr<Buffer> offsets,
+ const ArrayPtr& values,
+ int32_t null_count = 0,
+ std::shared_ptr<Buffer> nulls = nullptr) {
offset_buf_ = offsets;
offsets_ = offsets == nullptr? nullptr :
reinterpret_cast<const int32_t*>(offset_buf_->data());
values_ = values;
- Array::Init(type, length, nulls);
+ Array::Init(type, length, null_count, nulls);
}
// Return a shared pointer in case the requestor desires to share ownership
@@ -108,7 +112,7 @@ class ListBuilder : public Int32Builder {
value_builder_.reset(value_builder);
}
- Status Init(int64_t elements) {
+ Status Init(int32_t elements) {
// One more than requested.
//
// XXX: This is slightly imprecise, because we might trigger null mask
@@ -116,7 +120,7 @@ class ListBuilder : public Int32Builder {
return Int32Builder::Init(elements + 1);
}
- Status Resize(int64_t capacity) {
+ Status Resize(int32_t capacity) {
// Need space for the end offset
RETURN_NOT_OK(Int32Builder::Resize(capacity + 1));
@@ -129,18 +133,15 @@ class ListBuilder : public Int32Builder {
//
// If passed, null_bytes is of equal length to values, and any nonzero byte
// will be considered as a null for that slot
- Status Append(T* values, int64_t length, uint8_t* null_bytes = nullptr) {
+ Status Append(T* values, int32_t length, uint8_t* null_bytes = nullptr) {
if (length_ + length > capacity_) {
- int64_t new_capacity = util::next_power2(length_ + length);
+ int32_t new_capacity = util::next_power2(length_ + length);
RETURN_NOT_OK(Resize(new_capacity));
}
memcpy(raw_buffer() + length_, values, length * elsize_);
- if (nullable_ && null_bytes != nullptr) {
- // If null_bytes is all not null, then none of the values are null
- for (int i = 0; i < length; ++i) {
- util::set_bit(null_bits_, length_ + i, static_cast<bool>(null_bytes[i]));
- }
+ if (null_bytes != nullptr) {
+ AppendNulls(null_bytes, length);
}
length_ += length;
@@ -159,9 +160,10 @@ class ListBuilder : public Int32Builder {
raw_buffer()[length_] = child_values->length();
}
- out->Init(type_, length_, values_, ArrayPtr(child_values), nulls_);
+ out->Init(type_, length_, values_, ArrayPtr(child_values),
+ null_count_, nulls_);
values_ = nulls_ = nullptr;
- capacity_ = length_ = 0;
+ capacity_ = length_ = null_count_ = 0;
return Status::OK();
}
@@ -181,10 +183,10 @@ class ListBuilder : public Int32Builder {
// If the capacity was not already a multiple of 2, do so here
RETURN_NOT_OK(Resize(util::next_power2(capacity_ + 1)));
}
- if (nullable_) {
- util::set_bit(null_bits_, length_, is_null);
+ if (is_null) {
+ ++null_count_;
+ util::set_bit(null_bits_, length_);
}
-
raw_buffer()[length_++] = value_builder_->length();
return Status::OK();
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/types/primitive-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc
index 3484294..9363443 100644
--- a/cpp/src/arrow/types/primitive-test.cc
+++ b/cpp/src/arrow/types/primitive-test.cc
@@ -53,15 +53,12 @@ TEST(TypesTest, TestBytesType) {
#define PRIMITIVE_TEST(KLASS, ENUM, NAME) \
TEST(TypesTest, TestPrimitive_##ENUM) { \
KLASS tp; \
- KLASS tp_nn(false); \
\
ASSERT_EQ(tp.type, TypeEnum::ENUM); \
ASSERT_EQ(tp.name(), string(NAME)); \
- ASSERT_TRUE(tp.nullable); \
- ASSERT_FALSE(tp_nn.nullable); \
\
- KLASS tp_copy = tp_nn; \
- ASSERT_FALSE(tp_copy.nullable); \
+ KLASS tp_copy = tp; \
+ ASSERT_EQ(tp_copy.type, TypeEnum::ENUM); \
}
PRIMITIVE_TEST(Int8Type, INT8, "int8");
@@ -100,17 +97,16 @@ class TestPrimitiveBuilder : public TestBuilder {
TestBuilder::SetUp();
type_ = Attrs::type();
- type_nn_ = Attrs::type(false);
ArrayBuilder* tmp;
ASSERT_OK(make_builder(pool_, type_, &tmp));
builder_.reset(static_cast<BuilderType*>(tmp));
- ASSERT_OK(make_builder(pool_, type_nn_, &tmp));
+ ASSERT_OK(make_builder(pool_, type_, &tmp));
builder_nn_.reset(static_cast<BuilderType*>(tmp));
}
- void RandomData(int64_t N, double pct_null = 0.1) {
+ void RandomData(int N, double pct_null = 0.1) {
Attrs::draw(N, &draws_);
random_nulls(N, pct_null, &nulls_);
}
@@ -118,28 +114,33 @@ class TestPrimitiveBuilder : public TestBuilder {
void CheckNullable() {
ArrayType result;
ArrayType expected;
- int64_t size = builder_->length();
+ int size = builder_->length();
- auto ex_data = std::make_shared<Buffer>(reinterpret_cast<uint8_t*>(draws_.data()),
+ auto ex_data = std::make_shared<Buffer>(
+ reinterpret_cast<uint8_t*>(draws_.data()),
size * sizeof(T));
auto ex_nulls = bytes_to_null_buffer(nulls_.data(), size);
- expected.Init(size, ex_data, ex_nulls);
+ int32_t ex_null_count = null_count(nulls_);
+
+ expected.Init(size, ex_data, ex_null_count, ex_nulls);
ASSERT_OK(builder_->Transfer(&result));
// Builder is now reset
ASSERT_EQ(0, builder_->length());
ASSERT_EQ(0, builder_->capacity());
+ ASSERT_EQ(0, builder_->null_count());
ASSERT_EQ(nullptr, builder_->buffer());
ASSERT_TRUE(result.Equals(expected));
+ ASSERT_EQ(ex_null_count, result.null_count());
}
void CheckNonNullable() {
ArrayType result;
ArrayType expected;
- int64_t size = builder_nn_->length();
+ int size = builder_nn_->length();
auto ex_data = std::make_shared<Buffer>(reinterpret_cast<uint8_t*>(draws_.data()),
size * sizeof(T));
@@ -153,6 +154,7 @@ class TestPrimitiveBuilder : public TestBuilder {
ASSERT_EQ(nullptr, builder_nn_->buffer());
ASSERT_TRUE(result.Equals(expected));
+ ASSERT_EQ(0, result.null_count());
}
protected:
@@ -171,14 +173,14 @@ class TestPrimitiveBuilder : public TestBuilder {
typedef CapType##Type Type; \
typedef c_type T; \
\
- static TypePtr type(bool nullable = true) { \
- return TypePtr(new Type(nullable)); \
+ static TypePtr type() { \
+ return TypePtr(new Type()); \
}
#define PINT_DECL(CapType, c_type, LOWER, UPPER) \
struct P##CapType { \
PTYPE_DECL(CapType, c_type); \
- static void draw(int64_t N, vector<T>* draws) { \
+ static void draw(int N, vector<T>* draws) { \
randint<T>(N, LOWER, UPPER, draws); \
} \
}
@@ -208,7 +210,7 @@ TYPED_TEST_CASE(TestPrimitiveBuilder, Primitives);
TYPED_TEST(TestPrimitiveBuilder, TestInit) {
DECL_T();
- int64_t n = 1000;
+ int n = 1000;
ASSERT_OK(this->builder_->Init(n));
ASSERT_EQ(n, this->builder_->capacity());
ASSERT_EQ(n * sizeof(T), this->builder_->buffer()->size());
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/types/primitive.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc
index 2612e8c..c86260b 100644
--- a/cpp/src/arrow/types/primitive.cc
+++ b/cpp/src/arrow/types/primitive.cc
@@ -26,20 +26,23 @@ namespace arrow {
// ----------------------------------------------------------------------
// Primitive array base
-void PrimitiveArray::Init(const TypePtr& type, int64_t length,
+void PrimitiveArray::Init(const TypePtr& type, int32_t length,
const std::shared_ptr<Buffer>& data,
+ int32_t null_count,
const std::shared_ptr<Buffer>& nulls) {
- Array::Init(type, length, nulls);
+ Array::Init(type, length, null_count, nulls);
data_ = data;
raw_data_ = data == nullptr? nullptr : data_->data();
}
bool PrimitiveArray::Equals(const PrimitiveArray& other) const {
if (this == &other) return true;
- if (type_->nullable != other.type_->nullable) return false;
+ if (null_count_ != other.null_count_) {
+ return false;
+ }
bool equal_data = data_->Equals(*other.data_, length_);
- if (type_->nullable) {
+ if (null_count_ > 0) {
return equal_data &&
nulls_->Equals(*other.nulls_, util::ceil_byte(length_) / 8);
} else {
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/types/primitive.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h
index c5ae0f7..aa8f351 100644
--- a/cpp/src/arrow/types/primitive.h
+++ b/cpp/src/arrow/types/primitive.h
@@ -36,24 +36,24 @@ class MemoryPool;
template <typename Derived>
struct PrimitiveType : public DataType {
- explicit PrimitiveType(bool nullable = true)
- : DataType(Derived::type_enum, nullable) {}
+ PrimitiveType()
+ : DataType(Derived::type_enum) {}
virtual std::string ToString() const {
return std::string(static_cast<const Derived*>(this)->name());
}
};
-#define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME) \
- typedef C_TYPE c_type; \
- static constexpr TypeEnum type_enum = TypeEnum::ENUM; \
- static constexpr int size = SIZE; \
- \
- explicit TYPENAME(bool nullable = true) \
- : PrimitiveType<TYPENAME>(nullable) {} \
- \
- static const char* name() { \
- return NAME; \
+#define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME) \
+ typedef C_TYPE c_type; \
+ static constexpr TypeEnum type_enum = TypeEnum::ENUM; \
+ static constexpr int size = SIZE; \
+ \
+ TYPENAME() \
+ : PrimitiveType<TYPENAME>() {} \
+ \
+ static const char* name() { \
+ return NAME; \
}
@@ -64,7 +64,9 @@ class PrimitiveArray : public Array {
virtual ~PrimitiveArray() {}
- void Init(const TypePtr& type, int64_t length, const std::shared_ptr<Buffer>& data,
+ void Init(const TypePtr& type, int32_t length,
+ const std::shared_ptr<Buffer>& data,
+ int32_t null_count = 0,
const std::shared_ptr<Buffer>& nulls = nullptr);
const std::shared_ptr<Buffer>& data() const { return data_;}
@@ -84,15 +86,17 @@ class PrimitiveArrayImpl : public PrimitiveArray {
PrimitiveArrayImpl() : PrimitiveArray() {}
- PrimitiveArrayImpl(int64_t length, const std::shared_ptr<Buffer>& data,
+ PrimitiveArrayImpl(int32_t length, const std::shared_ptr<Buffer>& data,
+ int32_t null_count = 0,
const std::shared_ptr<Buffer>& nulls = nullptr) {
- Init(length, data, nulls);
+ Init(length, data, null_count, nulls);
}
- void Init(int64_t length, const std::shared_ptr<Buffer>& data,
+ void Init(int32_t length, const std::shared_ptr<Buffer>& data,
+ int32_t null_count = 0,
const std::shared_ptr<Buffer>& nulls = nullptr) {
- TypePtr type(new TypeClass(nulls != nullptr));
- PrimitiveArray::Init(type, length, data, nulls);
+ TypePtr type(new TypeClass());
+ PrimitiveArray::Init(type, length, data, null_count, nulls);
}
bool Equals(const PrimitiveArrayImpl& other) const {
@@ -101,7 +105,7 @@ class PrimitiveArrayImpl : public PrimitiveArray {
const T* raw_data() const { return reinterpret_cast<const T*>(raw_data_);}
- T Value(int64_t i) const {
+ T Value(int i) const {
return raw_data()[i];
}
@@ -124,7 +128,7 @@ class PrimitiveBuilder : public ArrayBuilder {
virtual ~PrimitiveBuilder() {}
- Status Resize(int64_t capacity) {
+ Status Resize(int32_t capacity) {
// XXX: Set floor size for now
if (capacity < MIN_BUILDER_CAPACITY) {
capacity = MIN_BUILDER_CAPACITY;
@@ -135,27 +139,26 @@ class PrimitiveBuilder : public ArrayBuilder {
} else {
RETURN_NOT_OK(ArrayBuilder::Resize(capacity));
RETURN_NOT_OK(values_->Resize(capacity * elsize_));
- capacity_ = capacity;
}
+ capacity_ = capacity;
return Status::OK();
}
- Status Init(int64_t capacity) {
+ Status Init(int32_t capacity) {
RETURN_NOT_OK(ArrayBuilder::Init(capacity));
-
values_ = std::make_shared<PoolBuffer>(pool_);
return values_->Resize(capacity * elsize_);
}
- Status Reserve(int64_t elements) {
+ Status Reserve(int32_t elements) {
if (length_ + elements > capacity_) {
- int64_t new_capacity = util::next_power2(length_ + elements);
+ int32_t new_capacity = util::next_power2(length_ + elements);
return Resize(new_capacity);
}
return Status::OK();
}
- Status Advance(int64_t elements) {
+ Status Advance(int32_t elements) {
return ArrayBuilder::Advance(elements);
}
@@ -165,8 +168,9 @@ class PrimitiveBuilder : public ArrayBuilder {
// If the capacity was not already a multiple of 2, do so here
RETURN_NOT_OK(Resize(util::next_power2(capacity_ + 1)));
}
- if (nullable_) {
- util::set_bit(null_bits_, length_, is_null);
+ if (is_null) {
+ ++null_count_;
+ util::set_bit(null_bits_, length_);
}
raw_buffer()[length_++] = val;
return Status::OK();
@@ -176,42 +180,49 @@ class PrimitiveBuilder : public ArrayBuilder {
//
// If passed, null_bytes is of equal length to values, and any nonzero byte
// will be considered as a null for that slot
- Status Append(const T* values, int64_t length, uint8_t* null_bytes = nullptr) {
+ Status Append(const T* values, int32_t length,
+ const uint8_t* null_bytes = nullptr) {
if (length_ + length > capacity_) {
- int64_t new_capacity = util::next_power2(length_ + length);
+ int32_t new_capacity = util::next_power2(length_ + length);
RETURN_NOT_OK(Resize(new_capacity));
}
memcpy(raw_buffer() + length_, values, length * elsize_);
- if (nullable_ && null_bytes != nullptr) {
- // If null_bytes is all not null, then none of the values are null
- for (int64_t i = 0; i < length; ++i) {
- util::set_bit(null_bits_, length_ + i, static_cast<bool>(null_bytes[i]));
- }
+ if (null_bytes != nullptr) {
+ AppendNulls(null_bytes, length);
}
length_ += length;
return Status::OK();
}
- Status AppendNull() {
- if (!nullable_) {
- return Status::Invalid("not nullable");
+ // Write nulls as uint8_t* into pre-allocated memory
+ void AppendNulls(const uint8_t* null_bytes, int32_t length) {
+ // If null_bytes is all not null, then none of the values are null
+ for (int i = 0; i < length; ++i) {
+ if (static_cast<bool>(null_bytes[i])) {
+ ++null_count_;
+ util::set_bit(null_bits_, length_ + i);
+ }
}
+ }
+
+ Status AppendNull() {
if (length_ == capacity_) {
// If the capacity was not already a multiple of 2, do so here
RETURN_NOT_OK(Resize(util::next_power2(capacity_ + 1)));
}
- util::set_bit(null_bits_, length_++, true);
+ ++null_count_;
+ util::set_bit(null_bits_, length_++);
return Status::OK();
}
// Initialize an array type instance with the results of this builder
// Transfers ownership of all buffers
Status Transfer(PrimitiveArray* out) {
- out->Init(type_, length_, values_, nulls_);
+ out->Init(type_, length_, values_, null_count_, nulls_);
values_ = nulls_ = nullptr;
- capacity_ = length_ = 0;
+ capacity_ = length_ = null_count_ = 0;
return Status::OK();
}
@@ -236,7 +247,7 @@ class PrimitiveBuilder : public ArrayBuilder {
protected:
std::shared_ptr<PoolBuffer> values_;
- int64_t elsize_;
+ int elsize_;
};
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/types/string-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string-test.cc b/cpp/src/arrow/types/string-test.cc
index a2d87ea..e1dcebe 100644
--- a/cpp/src/arrow/types/string-test.cc
+++ b/cpp/src/arrow/types/string-test.cc
@@ -39,7 +39,6 @@ TEST(TypesTest, TestCharType) {
CharType t1(5);
ASSERT_EQ(t1.type, TypeEnum::CHAR);
- ASSERT_TRUE(t1.nullable);
ASSERT_EQ(t1.size, 5);
ASSERT_EQ(t1.ToString(), std::string("char(5)"));
@@ -47,7 +46,6 @@ TEST(TypesTest, TestCharType) {
// Test copy constructor
CharType t2 = t1;
ASSERT_EQ(t2.type, TypeEnum::CHAR);
- ASSERT_TRUE(t2.nullable);
ASSERT_EQ(t2.size, 5);
}
@@ -56,7 +54,6 @@ TEST(TypesTest, TestVarcharType) {
VarcharType t1(5);
ASSERT_EQ(t1.type, TypeEnum::VARCHAR);
- ASSERT_TRUE(t1.nullable);
ASSERT_EQ(t1.size, 5);
ASSERT_EQ(t1.physical_type.size, 6);
@@ -65,19 +62,14 @@ TEST(TypesTest, TestVarcharType) {
// Test copy constructor
VarcharType t2 = t1;
ASSERT_EQ(t2.type, TypeEnum::VARCHAR);
- ASSERT_TRUE(t2.nullable);
ASSERT_EQ(t2.size, 5);
ASSERT_EQ(t2.physical_type.size, 6);
}
TEST(TypesTest, TestStringType) {
StringType str;
- StringType str_nn(false);
-
ASSERT_EQ(str.type, TypeEnum::STRING);
ASSERT_EQ(str.name(), std::string("string"));
- ASSERT_TRUE(str.nullable);
- ASSERT_FALSE(str_nn.nullable);
}
// ----------------------------------------------------------------------
@@ -96,7 +88,7 @@ class TestStringContainer : public ::testing::Test {
void MakeArray() {
length_ = offsets_.size() - 1;
- int64_t nchars = chars_.size();
+ int nchars = chars_.size();
value_buf_ = to_buffer(chars_);
values_ = ArrayPtr(new UInt8Array(nchars, value_buf_));
@@ -104,7 +96,9 @@ class TestStringContainer : public ::testing::Test {
offsets_buf_ = to_buffer(offsets_);
nulls_buf_ = bytes_to_null_buffer(nulls_.data(), nulls_.size());
- strings_.Init(length_, offsets_buf_, values_, nulls_buf_);
+ null_count_ = null_count(nulls_);
+
+ strings_.Init(length_, offsets_buf_, values_, null_count_, nulls_buf_);
}
protected:
@@ -118,7 +112,8 @@ class TestStringContainer : public ::testing::Test {
std::shared_ptr<Buffer> offsets_buf_;
std::shared_ptr<Buffer> nulls_buf_;
- int64_t length_;
+ int null_count_;
+ int length_;
ArrayPtr values_;
StringArray strings_;
@@ -127,7 +122,7 @@ class TestStringContainer : public ::testing::Test {
TEST_F(TestStringContainer, TestArrayBasics) {
ASSERT_EQ(length_, strings_.length());
- ASSERT_TRUE(strings_.nullable());
+ ASSERT_EQ(1, strings_.null_count());
}
TEST_F(TestStringContainer, TestType) {
@@ -149,7 +144,8 @@ TEST_F(TestStringContainer, TestListFunctions) {
TEST_F(TestStringContainer, TestDestructor) {
- auto arr = std::make_shared<StringArray>(length_, offsets_buf_, values_, nulls_buf_);
+ auto arr = std::make_shared<StringArray>(length_, offsets_buf_, values_,
+ null_count_, nulls_buf_);
}
TEST_F(TestStringContainer, TestGetString) {
@@ -189,10 +185,6 @@ class TestStringBuilder : public TestBuilder {
std::unique_ptr<StringArray> result_;
};
-TEST_F(TestStringBuilder, TestAttrs) {
- ASSERT_FALSE(builder_->value_builder()->nullable());
-}
-
TEST_F(TestStringBuilder, TestScalarAppend) {
std::vector<std::string> strings = {"a", "bb", "", "", "ccc"};
std::vector<uint8_t> is_null = {0, 0, 0, 1, 0};
@@ -212,10 +204,11 @@ TEST_F(TestStringBuilder, TestScalarAppend) {
Done();
ASSERT_EQ(reps * N, result_->length());
+ ASSERT_EQ(reps * null_count(is_null), result_->null_count());
ASSERT_EQ(reps * 6, result_->values()->length());
- int64_t length;
- int64_t pos = 0;
+ int32_t length;
+ int32_t pos = 0;
for (int i = 0; i < N * reps; ++i) {
if (is_null[i % N]) {
ASSERT_TRUE(result_->IsNull(i));
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/types/string.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string.cc b/cpp/src/arrow/types/string.cc
index f3dfbdc..dea42e1 100644
--- a/cpp/src/arrow/types/string.cc
+++ b/cpp/src/arrow/types/string.cc
@@ -35,6 +35,6 @@ std::string VarcharType::ToString() const {
return s.str();
}
-TypePtr StringBuilder::value_type_ = TypePtr(new UInt8Type(false));
+TypePtr StringBuilder::value_type_ = TypePtr(new UInt8Type());
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/types/string.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string.h b/cpp/src/arrow/types/string.h
index d0690d9..0845625 100644
--- a/cpp/src/arrow/types/string.h
+++ b/cpp/src/arrow/types/string.h
@@ -40,13 +40,13 @@ struct CharType : public DataType {
BytesType physical_type;
- explicit CharType(int size, bool nullable = true)
- : DataType(TypeEnum::CHAR, nullable),
+ explicit CharType(int size)
+ : DataType(TypeEnum::CHAR),
size(size),
physical_type(BytesType(size)) {}
CharType(const CharType& other)
- : CharType(other.size, other.nullable) {}
+ : CharType(other.size) {}
virtual std::string ToString() const;
};
@@ -58,12 +58,12 @@ struct VarcharType : public DataType {
BytesType physical_type;
- explicit VarcharType(int size, bool nullable = true)
- : DataType(TypeEnum::VARCHAR, nullable),
+ explicit VarcharType(int size)
+ : DataType(TypeEnum::VARCHAR),
size(size),
physical_type(BytesType(size + 1)) {}
VarcharType(const VarcharType& other)
- : VarcharType(other.size, other.nullable) {}
+ : VarcharType(other.size) {}
virtual std::string ToString() const;
};
@@ -73,11 +73,11 @@ static const LayoutPtr physical_string = LayoutPtr(new ListLayoutType(byte1));
// String is a logical type consisting of a physical list of 1-byte values
struct StringType : public DataType {
- explicit StringType(bool nullable = true)
- : DataType(TypeEnum::STRING, nullable) {}
+ StringType()
+ : DataType(TypeEnum::STRING) {}
StringType(const StringType& other)
- : StringType(other.nullable) {}
+ : StringType() {}
const LayoutPtr& physical_type() {
return physical_string;
@@ -98,17 +98,19 @@ class StringArray : public ListArray {
public:
StringArray() : ListArray(), bytes_(nullptr), raw_bytes_(nullptr) {}
- StringArray(int64_t length, const std::shared_ptr<Buffer>& offsets,
+ StringArray(int32_t length, const std::shared_ptr<Buffer>& offsets,
const ArrayPtr& values,
+ int32_t null_count = 0,
const std::shared_ptr<Buffer>& nulls = nullptr) {
- Init(length, offsets, values, nulls);
+ Init(length, offsets, values, null_count, nulls);
}
- void Init(const TypePtr& type, int64_t length,
+ void Init(const TypePtr& type, int32_t length,
const std::shared_ptr<Buffer>& offsets,
const ArrayPtr& values,
+ int32_t null_count = 0,
const std::shared_ptr<Buffer>& nulls = nullptr) {
- ListArray::Init(type, length, offsets, values, nulls);
+ ListArray::Init(type, length, offsets, values, null_count, nulls);
// TODO: type validation for values array
@@ -117,23 +119,24 @@ class StringArray : public ListArray {
raw_bytes_ = bytes_->raw_data();
}
- void Init(int64_t length, const std::shared_ptr<Buffer>& offsets,
+ void Init(int32_t length, const std::shared_ptr<Buffer>& offsets,
const ArrayPtr& values,
+ int32_t null_count = 0,
const std::shared_ptr<Buffer>& nulls = nullptr) {
- TypePtr type(new StringType(nulls != nullptr));
- Init(type, length, offsets, values, nulls);
+ TypePtr type(new StringType());
+ Init(type, length, offsets, values, null_count, nulls);
}
// Compute the pointer t
- const uint8_t* GetValue(int64_t i, int64_t* out_length) const {
+ const uint8_t* GetValue(int i, int32_t* out_length) const {
int32_t pos = offsets_[i];
*out_length = offsets_[i + 1] - pos;
return raw_bytes_ + pos;
}
// Construct a std::string
- std::string GetString(int64_t i) const {
- int64_t nchars;
+ std::string GetString(int i) const {
+ int32_t nchars;
const uint8_t* str = GetValue(i, &nchars);
return std::string(reinterpret_cast<const char*>(str), nchars);
}
@@ -161,7 +164,7 @@ class StringBuilder : public ListBuilder {
value.size());
}
- Status Append(const uint8_t* value, int64_t length);
+ Status Append(const uint8_t* value, int32_t length);
Status Append(const std::vector<std::string>& values,
uint8_t* null_bytes);
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/types/struct-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct-test.cc b/cpp/src/arrow/types/struct-test.cc
index 644b545..1a9fc6b 100644
--- a/cpp/src/arrow/types/struct-test.cc
+++ b/cpp/src/arrow/types/struct-test.cc
@@ -43,11 +43,7 @@ TEST(TestStructType, Basics) {
vector<Field> fields = {f0, f1, f2};
- StructType struct_type(fields, true);
- StructType struct_type_nn(fields, false);
-
- ASSERT_TRUE(struct_type.nullable);
- ASSERT_FALSE(struct_type_nn.nullable);
+ StructType struct_type(fields);
ASSERT_TRUE(struct_type.field(0).Equals(f0));
ASSERT_TRUE(struct_type.field(1).Equals(f1));
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/types/struct.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct.h b/cpp/src/arrow/types/struct.h
index 7d8885b..afba19a 100644
--- a/cpp/src/arrow/types/struct.h
+++ b/cpp/src/arrow/types/struct.h
@@ -29,9 +29,8 @@ namespace arrow {
struct StructType : public DataType {
std::vector<Field> fields_;
- StructType(const std::vector<Field>& fields,
- bool nullable = true)
- : DataType(TypeEnum::STRUCT, nullable) {
+ explicit StructType(const std::vector<Field>& fields)
+ : DataType(TypeEnum::STRUCT) {
fields_ = fields;
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/types/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/test-common.h b/cpp/src/arrow/types/test-common.h
index 3ecb0de..1744efc 100644
--- a/cpp/src/arrow/types/test-common.h
+++ b/cpp/src/arrow/types/test-common.h
@@ -36,15 +36,13 @@ class TestBuilder : public ::testing::Test {
void SetUp() {
pool_ = GetDefaultMemoryPool();
type_ = TypePtr(new UInt8Type());
- type_nn_ = TypePtr(new UInt8Type(false));
builder_.reset(new UInt8Builder(pool_, type_));
- builder_nn_.reset(new UInt8Builder(pool_, type_nn_));
+ builder_nn_.reset(new UInt8Builder(pool_, type_));
}
protected:
MemoryPool* pool_;
TypePtr type_;
- TypePtr type_nn_;
unique_ptr<ArrayBuilder> builder_;
unique_ptr<ArrayBuilder> builder_nn_;
};
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/types/union.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/union.h b/cpp/src/arrow/types/union.h
index 7b66c3b..62a3d1c 100644
--- a/cpp/src/arrow/types/union.h
+++ b/cpp/src/arrow/types/union.h
@@ -33,9 +33,8 @@ class Buffer;
struct DenseUnionType : public CollectionType<TypeEnum::DENSE_UNION> {
typedef CollectionType<TypeEnum::DENSE_UNION> Base;
- DenseUnionType(const std::vector<TypePtr>& child_types,
- bool nullable = true)
- : Base(nullable) {
+ explicit DenseUnionType(const std::vector<TypePtr>& child_types) :
+ Base() {
child_types_ = child_types;
}
@@ -46,9 +45,8 @@ struct DenseUnionType : public CollectionType<TypeEnum::DENSE_UNION> {
struct SparseUnionType : public CollectionType<TypeEnum::SPARSE_UNION> {
typedef CollectionType<TypeEnum::SPARSE_UNION> Base;
- SparseUnionType(const std::vector<TypePtr>& child_types,
- bool nullable = true)
- : Base(nullable) {
+ explicit SparseUnionType(const std::vector<TypePtr>& child_types) :
+ Base() {
child_types_ = child_types;
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/util/bit-util.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util.cc b/cpp/src/arrow/util/bit-util.cc
index dbac0a4..292cb33 100644
--- a/cpp/src/arrow/util/bit-util.cc
+++ b/cpp/src/arrow/util/bit-util.cc
@@ -25,7 +25,9 @@ namespace arrow {
void util::bytes_to_bits(uint8_t* bytes, int length, uint8_t* bits) {
for (int i = 0; i < length; ++i) {
- set_bit(bits, i, static_cast<bool>(bytes[i]));
+ if (static_cast<bool>(bytes[i])) {
+ set_bit(bits, i);
+ }
}
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/b88b69e2/cpp/src/arrow/util/bit-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index 9ae6127..841f617 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -41,8 +41,8 @@ static inline bool get_bit(const uint8_t* bits, int i) {
return bits[i / 8] & (1 << (i % 8));
}
-static inline void set_bit(uint8_t* bits, int i, bool is_set) {
- bits[i / 8] |= (1 << (i % 8)) * is_set;
+static inline void set_bit(uint8_t* bits, int i) {
+ bits[i / 8] |= 1 << (i % 8);
}
static inline int64_t next_power2(int64_t n) {