You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/09/06 17:24:14 UTC
arrow git commit: ARROW-1405: [Python] Expose LoggingMemoryPool in
Python API
Repository: arrow
Updated Branches:
refs/heads/master 5abd12321 -> 54d624b2e
ARROW-1405: [Python] Expose LoggingMemoryPool in Python API
I removed some output of pointer contents in the LoggingMemoryPool implementation which was showing up as garbage in the console. Example:
```
n [1]: import pyarrow as pa
In [2]: import pyarrow.parquet as pq
In [3]: pa.log_memory_allocations(True)
In [4]: t = pq.read_table('/home/wesm/Downloads/part-00000-6570e34b-b42c-4a39-8adf-21d3a97fb87d.snappy.parquet')
Allocate: size = 320
Allocate: size = 64
Allocate: size = 192
Free: size = 192
Allocate: size = 320
Allocate: size = 64
Allocate: size = 192
Allocate: size = 192
Allocate: size = 1152
Allocate: size = 64
Allocate: size = 64
Allocate: size = 64
Reallocate: old_size = 64 - new_size = 128
Reallocate: old_size = 128 - new_size = 192
Reallocate: old_size = 192 - new_size = 320
Reallocate: old_size = 320 - new_size = 576
Reallocate: old_size = 64 - new_size = 128
Reallocate: old_size = 576 - new_size = 1088
Reallocate: old_size = 128 - new_size = 256
```
Author: Wes McKinney <we...@twosigma.com>
Closes #1044 from wesm/ARROW-1405 and squashes the following commits:
33111644 [Wes McKinney] Restore includes to prior revision
41d8506f [Wes McKinney] Docstring
9d5e96a4 [Wes McKinney] Expose LoggingMemoryPool in Python API
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/54d624b2
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/54d624b2
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/54d624b2
Branch: refs/heads/master
Commit: 54d624b2eeb90afb2724b5137bed5dd0e5d8b0a1
Parents: 5abd123
Author: Wes McKinney <we...@twosigma.com>
Authored: Wed Sep 6 13:24:09 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Wed Sep 6 13:24:09 2017 -0400
----------------------------------------------------------------------
cpp/src/arrow/memory_pool.cc | 8 ++++----
python/doc/source/api.rst | 1 +
python/pyarrow/__init__.py | 5 ++++-
python/pyarrow/lib.pxd | 4 ----
python/pyarrow/memory.pxi | 27 ++++++++++++++++++++++++++-
5 files changed, 35 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/54d624b2/cpp/src/arrow/memory_pool.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index b5b4461..7fd999e 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -162,20 +162,20 @@ LoggingMemoryPool::LoggingMemoryPool(MemoryPool* pool) : pool_(pool) {}
Status LoggingMemoryPool::Allocate(int64_t size, uint8_t** out) {
Status s = pool_->Allocate(size, out);
- std::cout << "Allocate: size = " << size << " - out = " << *out << std::endl;
+ std::cout << "Allocate: size = " << size << std::endl;
return s;
}
Status LoggingMemoryPool::Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) {
Status s = pool_->Reallocate(old_size, new_size, ptr);
- std::cout << "Reallocate: old_size = " << old_size << " - new_size = " << new_size
- << " - ptr = " << *ptr << std::endl;
+ std::cout << "Reallocate: old_size = " << old_size
+ << " - new_size = " << new_size << std::endl;
return s;
}
void LoggingMemoryPool::Free(uint8_t* buffer, int64_t size) {
pool_->Free(buffer, size);
- std::cout << "Free: buffer = " << buffer << " - size = " << size << std::endl;
+ std::cout << "Free: size = " << size << std::endl;
}
int64_t LoggingMemoryPool::bytes_allocated() const {
http://git-wip-us.apache.org/repos/asf/arrow/blob/54d624b2/python/doc/source/api.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/api.rst b/python/doc/source/api.rst
index 473b16d..cc24e49 100644
--- a/python/doc/source/api.rst
+++ b/python/doc/source/api.rst
@@ -222,6 +222,7 @@ Memory Pools
default_memory_pool
total_allocated_bytes
set_memory_pool
+ log_memory_allocations
.. _api.type_classes:
http://git-wip-us.apache.org/repos/asf/arrow/blob/54d624b2/python/pyarrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index a4a6ed4..2b6c9fe 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -76,9 +76,12 @@ from pyarrow.lib import (HdfsFile, NativeFile, PythonFile,
have_libhdfs, have_libhdfs3, MockOutputStream)
from pyarrow.lib import (MemoryPool, total_allocated_bytes,
- set_memory_pool, default_memory_pool)
+ set_memory_pool, default_memory_pool,
+ log_memory_allocations)
+
from pyarrow.lib import (ChunkedArray, Column, RecordBatch, Table,
concat_tables)
+
from pyarrow.lib import (ArrowException,
ArrowKeyError,
ArrowInvalid,
http://git-wip-us.apache.org/repos/asf/arrow/blob/54d624b2/python/pyarrow/lib.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index 48a58f7..8fdcf55 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -35,10 +35,6 @@ cdef class MemoryPool:
cdef void init(self, CMemoryPool* pool)
-cdef class LoggingMemoryPool(MemoryPool):
- pass
-
-
cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool)
http://git-wip-us.apache.org/repos/asf/arrow/blob/54d624b2/python/pyarrow/memory.pxi
----------------------------------------------------------------------
diff --git a/python/pyarrow/memory.pxi b/python/pyarrow/memory.pxi
index 6671a01..3d2601f 100644
--- a/python/pyarrow/memory.pxi
+++ b/python/pyarrow/memory.pxi
@@ -36,7 +36,12 @@ cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool):
cdef class LoggingMemoryPool(MemoryPool):
- pass
+ cdef:
+ unique_ptr[CLoggingMemoryPool] logging_pool
+
+ def __cinit__(self, MemoryPool pool):
+ self.logging_pool.reset(new CLoggingMemoryPool(pool.pool))
+ self.init(self.logging_pool.get())
def default_memory_pool():
@@ -50,6 +55,26 @@ def set_memory_pool(MemoryPool pool):
c_set_default_memory_pool(pool.pool)
+cdef MemoryPool _default_memory_pool = default_memory_pool()
+cdef LoggingMemoryPool _logging_memory_pool = (
+ LoggingMemoryPool(_default_memory_pool))
+
+
+def log_memory_allocations(enable=True):
+ """
+ Enable or disable memory allocator logging for debugging purposes
+
+ Parameters
+ ----------
+ enable : boolean, default True
+ Pass False to disable logging
+ """
+ if enable:
+ set_memory_pool(_logging_memory_pool)
+ else:
+ set_memory_pool(_default_memory_pool)
+
+
def total_allocated_bytes():
cdef CMemoryPool* pool = c_get_memory_pool()
return pool.bytes_allocated()