You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/03/03 23:02:28 UTC
arrow git commit: ARROW-19: Add an externalized MemoryPool interface
for use in builder classes
Repository: arrow
Updated Branches:
refs/heads/master 1000d110c -> e41802085
ARROW-19: Add an externalized MemoryPool interface for use in builder classes
Memory management will be an ongoing concern, but this is a stride in the right direction. Applications requiring custom memory management will be able to implement a subclass of MemoryPool; we can evolve its API as user needs evolve.
Author: Wes McKinney <we...@apache.org>
Closes #8 from wesm/ARROW-19 and squashes the following commits:
08d3895 [Wes McKinney] Some include cleanup
e319a36 [Wes McKinney] cpplint fixes
abca6eb [Wes McKinney] Add a MemoryPool abstract interface, change builder instances to request memory from pool via Buffer subclass
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/e4180208
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/e4180208
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/e4180208
Branch: refs/heads/master
Commit: e418020852ad4fa148b07f21f5b4d47230fe4c5b
Parents: 1000d11
Author: Wes McKinney <we...@apache.org>
Authored: Thu Mar 3 14:02:53 2016 -0800
Committer: Wes McKinney <we...@apache.org>
Committed: Thu Mar 3 14:02:53 2016 -0800
----------------------------------------------------------------------
cpp/CMakeLists.txt | 2 +-
cpp/src/arrow/array-test.cc | 10 +++-
cpp/src/arrow/array.h | 1 -
cpp/src/arrow/builder.cc | 2 +-
cpp/src/arrow/builder.h | 22 ++++----
cpp/src/arrow/types/construct.cc | 13 ++---
cpp/src/arrow/types/construct.h | 4 +-
cpp/src/arrow/types/list-test.cc | 2 +-
cpp/src/arrow/types/list.h | 7 ++-
cpp/src/arrow/types/primitive-test.cc | 5 +-
cpp/src/arrow/types/primitive.h | 12 +++--
cpp/src/arrow/types/string-test.cc | 29 +++++------
cpp/src/arrow/types/string.h | 9 ++--
cpp/src/arrow/types/struct.cc | 1 +
cpp/src/arrow/types/test-common.h | 8 ++-
cpp/src/arrow/types/union.cc | 1 +
cpp/src/arrow/util/CMakeLists.txt | 3 ++
cpp/src/arrow/util/bit-util.cc | 2 +-
cpp/src/arrow/util/bit-util.h | 3 +-
cpp/src/arrow/util/buffer-test.cc | 6 +--
cpp/src/arrow/util/buffer.cc | 36 +++++++++----
cpp/src/arrow/util/buffer.h | 36 +++++++++----
cpp/src/arrow/util/memory-pool-test.cc | 47 +++++++++++++++++
cpp/src/arrow/util/memory-pool.cc | 78 +++++++++++++++++++++++++++++
cpp/src/arrow/util/memory-pool.h | 41 +++++++++++++++
25 files changed, 301 insertions(+), 79 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 5ddd9da..d2c840a 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -434,7 +434,7 @@ if (UNIX)
add_custom_target(lint ${BUILD_SUPPORT_DIR}/cpplint.py
--verbose=2
--linelength=90
- --filter=-whitespace/comments,-readability/todo,-build/header_guard
+ --filter=-whitespace/comments,-readability/todo,-build/header_guard,-build/c++11
`find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h`)
endif (UNIX)
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/array-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 5ecf916..16afb9b 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -18,6 +18,7 @@
#include <gtest/gtest.h>
#include <cstdint>
+#include <cstdlib>
#include <memory>
#include <string>
#include <vector>
@@ -28,6 +29,8 @@
#include "arrow/types/integer.h"
#include "arrow/types/primitive.h"
#include "arrow/util/buffer.h"
+#include "arrow/util/memory-pool.h"
+#include "arrow/util/status.h"
using std::string;
using std::vector;
@@ -41,8 +44,10 @@ static TypePtr int32_nn = TypePtr(new Int32Type(false));
class TestArray : public ::testing::Test {
public:
void SetUp() {
- auto data = std::make_shared<OwnedMutableBuffer>();
- auto nulls = std::make_shared<OwnedMutableBuffer>();
+ pool_ = GetDefaultMemoryPool();
+
+ auto data = std::make_shared<PoolBuffer>(pool_);
+ auto nulls = std::make_shared<PoolBuffer>(pool_);
ASSERT_OK(data->Resize(400));
ASSERT_OK(nulls->Resize(128));
@@ -51,6 +56,7 @@ class TestArray : public ::testing::Test {
}
protected:
+ MemoryPool* pool_;
std::unique_ptr<Int32Array> arr_;
};
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/array.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index c95450d..0eaa28d 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -19,7 +19,6 @@
#define ARROW_ARRAY_H
#include <cstdint>
-#include <cstdlib>
#include <memory>
#include "arrow/type.h"
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/builder.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 1fd7471..cb85067 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -30,7 +30,7 @@ Status ArrayBuilder::Init(int64_t capacity) {
if (nullable_) {
int64_t to_alloc = util::ceil_byte(capacity) / 8;
- nulls_ = std::make_shared<OwnedMutableBuffer>();
+ nulls_ = std::make_shared<PoolBuffer>(pool_);
RETURN_NOT_OK(nulls_->Resize(to_alloc));
null_bits_ = nulls_->mutable_data();
memset(null_bits_, 0, to_alloc);
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/builder.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index b43668a..456bb04 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -23,25 +23,27 @@
#include <vector>
#include "arrow/type.h"
-#include "arrow/util/buffer.h"
#include "arrow/util/macros.h"
#include "arrow/util/status.h"
namespace arrow {
class Array;
+class MemoryPool;
+class PoolBuffer;
static constexpr int64_t MIN_BUILDER_CAPACITY = 1 << 8;
// Base class for all data array builders
class ArrayBuilder {
public:
- explicit ArrayBuilder(const TypePtr& type)
- : type_(type),
- nullable_(type_->nullable),
- nulls_(nullptr), null_bits_(nullptr),
- length_(0),
- capacity_(0) {}
+ explicit ArrayBuilder(MemoryPool* pool, const TypePtr& type) :
+ pool_(pool),
+ type_(type),
+ nullable_(type_->nullable),
+ nulls_(nullptr), null_bits_(nullptr),
+ length_(0),
+ capacity_(0) {}
virtual ~ArrayBuilder() {}
@@ -71,18 +73,20 @@ class ArrayBuilder {
// this function responsibly.
Status Advance(int64_t elements);
- const std::shared_ptr<OwnedMutableBuffer>& nulls() const { return nulls_;}
+ const std::shared_ptr<PoolBuffer>& nulls() const { return nulls_;}
// Creates new array object to hold the contents of the builder and transfers
// ownership of the data
virtual Status ToArray(Array** out) = 0;
protected:
+ MemoryPool* pool_;
+
TypePtr type_;
bool nullable_;
// If the type is not nullable, then null_ is nullptr after initialization
- std::shared_ptr<OwnedMutableBuffer> nulls_;
+ std::shared_ptr<PoolBuffer> nulls_;
uint8_t* null_bits_;
// Array length, so far. Also, the index of the next element to be added
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/construct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.cc b/cpp/src/arrow/types/construct.cc
index 5176caf..e1bb990 100644
--- a/cpp/src/arrow/types/construct.cc
+++ b/cpp/src/arrow/types/construct.cc
@@ -32,12 +32,13 @@ class ArrayBuilder;
// Initially looked at doing this with vtables, but shared pointers makes it
// difficult
-#define BUILDER_CASE(ENUM, BuilderType) \
- case TypeEnum::ENUM: \
- *out = static_cast<ArrayBuilder*>(new BuilderType(type)); \
+#define BUILDER_CASE(ENUM, BuilderType) \
+ case TypeEnum::ENUM: \
+ *out = static_cast<ArrayBuilder*>(new BuilderType(pool, type)); \
return Status::OK();
-Status make_builder(const TypePtr& type, ArrayBuilder** out) {
+Status make_builder(MemoryPool* pool, const TypePtr& type,
+ ArrayBuilder** out) {
switch (type->type) {
BUILDER_CASE(UINT8, UInt8Builder);
BUILDER_CASE(INT8, Int8Builder);
@@ -59,10 +60,10 @@ Status make_builder(const TypePtr& type, ArrayBuilder** out) {
{
ListType* list_type = static_cast<ListType*>(type.get());
ArrayBuilder* value_builder;
- RETURN_NOT_OK(make_builder(list_type->value_type, &value_builder));
+ RETURN_NOT_OK(make_builder(pool, list_type->value_type, &value_builder));
// The ListBuilder takes ownership of the value_builder
- ListBuilder* builder = new ListBuilder(type, value_builder);
+ ListBuilder* builder = new ListBuilder(pool, type, value_builder);
*out = static_cast<ArrayBuilder*>(builder);
return Status::OK();
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/construct.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.h b/cpp/src/arrow/types/construct.h
index c0bfedd..b5ba436 100644
--- a/cpp/src/arrow/types/construct.h
+++ b/cpp/src/arrow/types/construct.h
@@ -23,9 +23,11 @@
namespace arrow {
class ArrayBuilder;
+class MemoryPool;
class Status;
-Status make_builder(const TypePtr& type, ArrayBuilder** out);
+Status make_builder(MemoryPool* pool, const TypePtr& type,
+ ArrayBuilder** out);
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/list-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc
index 47673ff..abfc8a3 100644
--- a/cpp/src/arrow/types/list-test.cc
+++ b/cpp/src/arrow/types/list-test.cc
@@ -76,7 +76,7 @@ class TestListBuilder : public TestBuilder {
type_ = TypePtr(new ListType(value_type_));
ArrayBuilder* tmp;
- ASSERT_OK(make_builder(type_, &tmp));
+ ASSERT_OK(make_builder(pool_, type_, &tmp));
builder_.reset(static_cast<ListBuilder*>(tmp));
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/list.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h
index 0f11162..4ca0f13 100644
--- a/cpp/src/arrow/types/list.h
+++ b/cpp/src/arrow/types/list.h
@@ -34,6 +34,8 @@
namespace arrow {
+class MemoryPool;
+
struct ListType : public DataType {
// List can contain any other logical value type
TypePtr value_type;
@@ -100,8 +102,9 @@ class ListArray : public Array {
// have been appended to the child array)
class ListBuilder : public Int32Builder {
public:
- ListBuilder(const TypePtr& type, ArrayBuilder* value_builder)
- : Int32Builder(type) {
+ ListBuilder(MemoryPool* pool, const TypePtr& type,
+ ArrayBuilder* value_builder)
+ : Int32Builder(pool, type) {
value_builder_.reset(value_builder);
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/primitive-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc
index 1296860..3484294 100644
--- a/cpp/src/arrow/types/primitive-test.cc
+++ b/cpp/src/arrow/types/primitive-test.cc
@@ -18,7 +18,6 @@
#include <gtest/gtest.h>
#include <cstdint>
-#include <cstdlib>
#include <memory>
#include <string>
#include <vector>
@@ -104,10 +103,10 @@ class TestPrimitiveBuilder : public TestBuilder {
type_nn_ = Attrs::type(false);
ArrayBuilder* tmp;
- ASSERT_OK(make_builder(type_, &tmp));
+ ASSERT_OK(make_builder(pool_, type_, &tmp));
builder_.reset(static_cast<BuilderType*>(tmp));
- ASSERT_OK(make_builder(type_nn_, &tmp));
+ ASSERT_OK(make_builder(pool_, type_nn_, &tmp));
builder_nn_.reset(static_cast<BuilderType*>(tmp));
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/primitive.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h
index a419112..c5ae0f7 100644
--- a/cpp/src/arrow/types/primitive.h
+++ b/cpp/src/arrow/types/primitive.h
@@ -20,6 +20,7 @@
#include <cstdint>
#include <cstring>
+#include <memory>
#include <string>
#include "arrow/array.h"
@@ -31,6 +32,8 @@
namespace arrow {
+class MemoryPool;
+
template <typename Derived>
struct PrimitiveType : public DataType {
explicit PrimitiveType(bool nullable = true)
@@ -113,8 +116,9 @@ class PrimitiveBuilder : public ArrayBuilder {
public:
typedef typename Type::c_type T;
- explicit PrimitiveBuilder(const TypePtr& type)
- : ArrayBuilder(type), values_(nullptr) {
+ explicit PrimitiveBuilder(MemoryPool* pool, const TypePtr& type) :
+ ArrayBuilder(pool, type),
+ values_(nullptr) {
elsize_ = sizeof(T);
}
@@ -139,7 +143,7 @@ class PrimitiveBuilder : public ArrayBuilder {
Status Init(int64_t capacity) {
RETURN_NOT_OK(ArrayBuilder::Init(capacity));
- values_ = std::make_shared<OwnedMutableBuffer>();
+ values_ = std::make_shared<PoolBuffer>(pool_);
return values_->Resize(capacity * elsize_);
}
@@ -231,7 +235,7 @@ class PrimitiveBuilder : public ArrayBuilder {
}
protected:
- std::shared_ptr<OwnedMutableBuffer> values_;
+ std::shared_ptr<PoolBuffer> values_;
int64_t elsize_;
};
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/string-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string-test.cc b/cpp/src/arrow/types/string-test.cc
index 6dba3fd..a2d87ea 100644
--- a/cpp/src/arrow/types/string-test.cc
+++ b/cpp/src/arrow/types/string-test.cc
@@ -31,12 +31,9 @@
#include "arrow/types/test-common.h"
#include "arrow/util/status.h"
-using std::string;
-using std::unique_ptr;
-using std::vector;
-
namespace arrow {
+class Buffer;
TEST(TypesTest, TestCharType) {
CharType t1(5);
@@ -45,7 +42,7 @@ TEST(TypesTest, TestCharType) {
ASSERT_TRUE(t1.nullable);
ASSERT_EQ(t1.size, 5);
- ASSERT_EQ(t1.ToString(), string("char(5)"));
+ ASSERT_EQ(t1.ToString(), std::string("char(5)"));
// Test copy constructor
CharType t2 = t1;
@@ -63,7 +60,7 @@ TEST(TypesTest, TestVarcharType) {
ASSERT_EQ(t1.size, 5);
ASSERT_EQ(t1.physical_type.size, 6);
- ASSERT_EQ(t1.ToString(), string("varchar(5)"));
+ ASSERT_EQ(t1.ToString(), std::string("varchar(5)"));
// Test copy constructor
VarcharType t2 = t1;
@@ -78,7 +75,7 @@ TEST(TypesTest, TestStringType) {
StringType str_nn(false);
ASSERT_EQ(str.type, TypeEnum::STRING);
- ASSERT_EQ(str.name(), string("string"));
+ ASSERT_EQ(str.name(), std::string("string"));
ASSERT_TRUE(str.nullable);
ASSERT_FALSE(str_nn.nullable);
}
@@ -111,11 +108,11 @@ class TestStringContainer : public ::testing::Test {
}
protected:
- vector<int32_t> offsets_;
- vector<char> chars_;
- vector<uint8_t> nulls_;
+ std::vector<int32_t> offsets_;
+ std::vector<char> chars_;
+ std::vector<uint8_t> nulls_;
- vector<string> expected_;
+ std::vector<std::string> expected_;
std::shared_ptr<Buffer> value_buf_;
std::shared_ptr<Buffer> offsets_buf_;
@@ -175,7 +172,7 @@ class TestStringBuilder : public TestBuilder {
type_ = TypePtr(new StringType());
ArrayBuilder* tmp;
- ASSERT_OK(make_builder(type_, &tmp));
+ ASSERT_OK(make_builder(pool_, type_, &tmp));
builder_.reset(static_cast<StringBuilder*>(tmp));
}
@@ -188,8 +185,8 @@ class TestStringBuilder : public TestBuilder {
protected:
TypePtr type_;
- unique_ptr<StringBuilder> builder_;
- unique_ptr<StringArray> result_;
+ std::unique_ptr<StringBuilder> builder_;
+ std::unique_ptr<StringArray> result_;
};
TEST_F(TestStringBuilder, TestAttrs) {
@@ -197,8 +194,8 @@ TEST_F(TestStringBuilder, TestAttrs) {
}
TEST_F(TestStringBuilder, TestScalarAppend) {
- vector<string> strings = {"a", "bb", "", "", "ccc"};
- vector<uint8_t> is_null = {0, 0, 0, 1, 0};
+ std::vector<std::string> strings = {"a", "bb", "", "", "ccc"};
+ std::vector<uint8_t> is_null = {0, 0, 0, 1, 0};
int N = strings.size();
int reps = 1000;
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/string.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string.h b/cpp/src/arrow/types/string.h
index 30d6e24..d0690d9 100644
--- a/cpp/src/arrow/types/string.h
+++ b/cpp/src/arrow/types/string.h
@@ -27,12 +27,13 @@
#include "arrow/type.h"
#include "arrow/types/integer.h"
#include "arrow/types/list.h"
-#include "arrow/util/buffer.h"
#include "arrow/util/status.h"
namespace arrow {
class ArrayBuilder;
+class Buffer;
+class MemoryPool;
struct CharType : public DataType {
int size;
@@ -148,8 +149,9 @@ class StringArray : public ListArray {
class StringBuilder : public ListBuilder {
public:
- explicit StringBuilder(const TypePtr& type) :
- ListBuilder(type, static_cast<ArrayBuilder*>(new UInt8Builder(value_type_))) {
+ explicit StringBuilder(MemoryPool* pool, const TypePtr& type) :
+ ListBuilder(pool, type,
+ static_cast<ArrayBuilder*>(new UInt8Builder(pool, value_type_))) {
byte_builder_ = static_cast<UInt8Builder*>(value_builder_.get());
}
@@ -171,6 +173,7 @@ class StringBuilder : public ListBuilder {
}
protected:
+ std::shared_ptr<ListBuilder> list_builder_;
UInt8Builder* byte_builder_;
static TypePtr value_type_;
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/struct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct.cc b/cpp/src/arrow/types/struct.cc
index b7be5d8..a245656 100644
--- a/cpp/src/arrow/types/struct.cc
+++ b/cpp/src/arrow/types/struct.cc
@@ -17,6 +17,7 @@
#include "arrow/types/struct.h"
+#include <cstdlib>
#include <memory>
#include <sstream>
#include <string>
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/test-common.h b/cpp/src/arrow/types/test-common.h
index 267e48a..3ecb0de 100644
--- a/cpp/src/arrow/types/test-common.h
+++ b/cpp/src/arrow/types/test-common.h
@@ -25,6 +25,7 @@
#include "arrow/test-util.h"
#include "arrow/type.h"
+#include "arrow/util/memory-pool.h"
using std::unique_ptr;
@@ -33,12 +34,15 @@ namespace arrow {
class TestBuilder : public ::testing::Test {
public:
void SetUp() {
+ pool_ = GetDefaultMemoryPool();
type_ = TypePtr(new UInt8Type());
type_nn_ = TypePtr(new UInt8Type(false));
- builder_.reset(new UInt8Builder(type_));
- builder_nn_.reset(new UInt8Builder(type_nn_));
+ builder_.reset(new UInt8Builder(pool_, type_));
+ builder_nn_.reset(new UInt8Builder(pool_, type_nn_));
}
protected:
+ MemoryPool* pool_;
+
TypePtr type_;
TypePtr type_nn_;
unique_ptr<ArrayBuilder> builder_;
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/types/union.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/union.cc b/cpp/src/arrow/types/union.cc
index 54f41a7..db3f817 100644
--- a/cpp/src/arrow/types/union.cc
+++ b/cpp/src/arrow/types/union.cc
@@ -17,6 +17,7 @@
#include "arrow/types/union.h"
+#include <cstdlib>
#include <sstream>
#include <string>
#include <vector>
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index ff8db6a..c53f307 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -22,6 +22,7 @@
set(UTIL_SRCS
bit-util.cc
buffer.cc
+ memory-pool.cc
status.cc
)
@@ -39,6 +40,7 @@ install(FILES
bit-util.h
buffer.h
macros.h
+ memory-pool.h
status.h
DESTINATION include/arrow/util)
@@ -79,3 +81,4 @@ endif()
ADD_ARROW_TEST(bit-util-test)
ADD_ARROW_TEST(buffer-test)
+ADD_ARROW_TEST(memory-pool-test)
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/util/bit-util.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util.cc b/cpp/src/arrow/util/bit-util.cc
index d2ddd65..dbac0a4 100644
--- a/cpp/src/arrow/util/bit-util.cc
+++ b/cpp/src/arrow/util/bit-util.cc
@@ -33,7 +33,7 @@ Status util::bytes_to_bits(uint8_t* bytes, int length,
std::shared_ptr<Buffer>* out) {
int bit_length = ceil_byte(length) / 8;
- auto buffer = std::make_shared<OwnedMutableBuffer>();
+ auto buffer = std::make_shared<PoolBuffer>();
RETURN_NOT_OK(buffer->Resize(bit_length));
memset(buffer->mutable_data(), 0, bit_length);
bytes_to_bits(bytes, length, buffer->mutable_data());
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/util/bit-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index 61dffa3..9ae6127 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -22,10 +22,9 @@
#include <cstdlib>
#include <memory>
-#include "arrow/util/buffer.h"
-
namespace arrow {
+class Buffer;
class Status;
namespace util {
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/util/buffer-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer-test.cc b/cpp/src/arrow/util/buffer-test.cc
index edfd08e..9f1fd91 100644
--- a/cpp/src/arrow/util/buffer-test.cc
+++ b/cpp/src/arrow/util/buffer-test.cc
@@ -16,10 +16,8 @@
// under the License.
#include <gtest/gtest.h>
-#include <cstdlib>
#include <cstdint>
#include <limits>
-#include <memory>
#include <string>
#include "arrow/test-util.h"
@@ -34,7 +32,7 @@ class TestBuffer : public ::testing::Test {
};
TEST_F(TestBuffer, Resize) {
- OwnedMutableBuffer buf;
+ PoolBuffer buf;
ASSERT_EQ(0, buf.size());
ASSERT_OK(buf.Resize(100));
@@ -49,7 +47,7 @@ TEST_F(TestBuffer, Resize) {
TEST_F(TestBuffer, ResizeOOM) {
// realloc fails, even though there may be no explicit limit
- OwnedMutableBuffer buf;
+ PoolBuffer buf;
ASSERT_OK(buf.Resize(100));
int64_t to_alloc = std::numeric_limits<int64_t>::max();
ASSERT_RAISES(OutOfMemory, buf.Resize(to_alloc));
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/util/buffer.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer.cc b/cpp/src/arrow/util/buffer.cc
index 2fb34d5..3f3807d 100644
--- a/cpp/src/arrow/util/buffer.cc
+++ b/cpp/src/arrow/util/buffer.cc
@@ -19,6 +19,7 @@
#include <cstdint>
+#include "arrow/util/memory-pool.h"
#include "arrow/util/status.h"
namespace arrow {
@@ -34,19 +35,34 @@ std::shared_ptr<Buffer> MutableBuffer::GetImmutableView() {
return std::make_shared<Buffer>(this->get_shared_ptr(), 0, size());
}
-OwnedMutableBuffer::OwnedMutableBuffer() :
- MutableBuffer(nullptr, 0) {}
+PoolBuffer::PoolBuffer(MemoryPool* pool) :
+ ResizableBuffer(nullptr, 0) {
+ if (pool == nullptr) {
+ pool = GetDefaultMemoryPool();
+ }
+ pool_ = pool;
+}
-Status OwnedMutableBuffer::Resize(int64_t new_size) {
- size_ = new_size;
- try {
- buffer_owner_.resize(new_size);
- } catch (const std::bad_alloc& e) {
- return Status::OutOfMemory("resize failed");
+Status PoolBuffer::Reserve(int64_t new_capacity) {
+ if (!mutable_data_ || new_capacity > capacity_) {
+ uint8_t* new_data;
+ if (mutable_data_) {
+ RETURN_NOT_OK(pool_->Allocate(new_capacity, &new_data));
+ memcpy(new_data, mutable_data_, size_);
+ pool_->Free(mutable_data_, capacity_);
+ } else {
+ RETURN_NOT_OK(pool_->Allocate(new_capacity, &new_data));
+ }
+ mutable_data_ = new_data;
+ data_ = mutable_data_;
+ capacity_ = new_capacity;
}
- data_ = buffer_owner_.data();
- mutable_data_ = buffer_owner_.data();
+ return Status::OK();
+}
+Status PoolBuffer::Resize(int64_t new_size) {
+ RETURN_NOT_OK(Reserve(new_size));
+ size_ = new_size;
return Status::OK();
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/util/buffer.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer.h b/cpp/src/arrow/util/buffer.h
index 3e41839..8704723 100644
--- a/cpp/src/arrow/util/buffer.h
+++ b/cpp/src/arrow/util/buffer.h
@@ -19,15 +19,14 @@
#define ARROW_UTIL_BUFFER_H
#include <cstdint>
-#include <cstdlib>
#include <cstring>
#include <memory>
-#include <vector>
#include "arrow/util/macros.h"
namespace arrow {
+class MemoryPool;
class Status;
// ----------------------------------------------------------------------
@@ -115,17 +114,34 @@ class MutableBuffer : public Buffer {
uint8_t* mutable_data_;
};
-// A MutableBuffer whose memory is owned by the class instance. For example,
-// for reading data out of files that you want to deallocate when this class is
-// garbage-collected
-class OwnedMutableBuffer : public MutableBuffer {
+class ResizableBuffer : public MutableBuffer {
public:
- OwnedMutableBuffer();
- Status Resize(int64_t new_size);
+ // Change buffer reported size to indicated size, allocating memory if
+ // necessary
+ virtual Status Resize(int64_t new_size) = 0;
+
+ // Ensure that buffer has enough memory allocated to fit the indicated
+ // capacity. Does not change buffer's reported size
+ virtual Status Reserve(int64_t new_capacity) = 0;
+
+ protected:
+ ResizableBuffer(uint8_t* data, int64_t size) :
+ MutableBuffer(data, size),
+ capacity_(size) {}
+
+ int64_t capacity_;
+};
+
+// A Buffer whose lifetime is tied to a particular MemoryPool
+class PoolBuffer : public ResizableBuffer {
+ public:
+ explicit PoolBuffer(MemoryPool* pool = nullptr);
+
+ virtual Status Resize(int64_t new_size);
+ virtual Status Reserve(int64_t new_capacity);
private:
- // TODO: aligned allocations
- std::vector<uint8_t> buffer_owner_;
+ MemoryPool* pool_;
};
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/util/memory-pool-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/memory-pool-test.cc b/cpp/src/arrow/util/memory-pool-test.cc
new file mode 100644
index 0000000..954b5f9
--- /dev/null
+++ b/cpp/src/arrow/util/memory-pool-test.cc
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <cstdint>
+#include <limits>
+
+#include "arrow/test-util.h"
+#include "arrow/util/memory-pool.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+TEST(DefaultMemoryPool, MemoryTracking) {
+ MemoryPool* pool = GetDefaultMemoryPool();
+
+ uint8_t* data;
+ ASSERT_OK(pool->Allocate(100, &data));
+ ASSERT_EQ(100, pool->bytes_allocated());
+
+ pool->Free(data, 100);
+ ASSERT_EQ(0, pool->bytes_allocated());
+}
+
+TEST(DefaultMemoryPool, OOM) {
+ MemoryPool* pool = GetDefaultMemoryPool();
+
+ uint8_t* data;
+ int64_t to_alloc = std::numeric_limits<int64_t>::max();
+ ASSERT_RAISES(OutOfMemory, pool->Allocate(to_alloc, &data));
+}
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/util/memory-pool.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/memory-pool.cc b/cpp/src/arrow/util/memory-pool.cc
new file mode 100644
index 0000000..5820346
--- /dev/null
+++ b/cpp/src/arrow/util/memory-pool.cc
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/memory-pool.h"
+
+#include <cstdlib>
+#include <sstream>
+#include <mutex>
+
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+MemoryPool::~MemoryPool() {}
+
+class InternalMemoryPool : public MemoryPool {
+ public:
+ InternalMemoryPool() : bytes_allocated_(0) {}
+ virtual ~InternalMemoryPool();
+
+ Status Allocate(int64_t size, uint8_t** out) override;
+
+ void Free(uint8_t* buffer, int64_t size) override;
+
+ int64_t bytes_allocated() const override;
+
+ private:
+ mutable std::mutex pool_lock_;
+ int64_t bytes_allocated_;
+};
+
+Status InternalMemoryPool::Allocate(int64_t size, uint8_t** out) {
+ std::lock_guard<std::mutex> guard(pool_lock_);
+ *out = static_cast<uint8_t*>(std::malloc(size));
+ if (*out == nullptr) {
+ std::stringstream ss;
+ ss << "malloc of size " << size << " failed";
+ return Status::OutOfMemory(ss.str());
+ }
+
+ bytes_allocated_ += size;
+
+ return Status::OK();
+}
+
+int64_t InternalMemoryPool::bytes_allocated() const {
+ std::lock_guard<std::mutex> guard(pool_lock_);
+ return bytes_allocated_;
+}
+
+void InternalMemoryPool::Free(uint8_t* buffer, int64_t size) {
+ std::lock_guard<std::mutex> guard(pool_lock_);
+ std::free(buffer);
+ bytes_allocated_ -= size;
+}
+
+InternalMemoryPool::~InternalMemoryPool() {}
+
+MemoryPool* GetDefaultMemoryPool() {
+ static InternalMemoryPool default_memory_pool;
+ return &default_memory_pool;
+}
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/e4180208/cpp/src/arrow/util/memory-pool.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/memory-pool.h b/cpp/src/arrow/util/memory-pool.h
new file mode 100644
index 0000000..a7cb10d
--- /dev/null
+++ b/cpp/src/arrow/util/memory-pool.h
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_UTIL_MEMORY_POOL_H
+#define ARROW_UTIL_MEMORY_POOL_H
+
+#include <cstdint>
+
+namespace arrow {
+
+class Status;
+
+class MemoryPool {
+ public:
+ virtual ~MemoryPool();
+
+ virtual Status Allocate(int64_t size, uint8_t** out) = 0;
+ virtual void Free(uint8_t* buffer, int64_t size) = 0;
+
+ virtual int64_t bytes_allocated() const = 0;
+};
+
+MemoryPool* GetDefaultMemoryPool();
+
+} // namespace arrow
+
+#endif // ARROW_UTIL_MEMORY_POOL_H