You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/09/06 17:24:14 UTC

arrow git commit: ARROW-1405: [Python] Expose LoggingMemoryPool in Python API

Repository: arrow
Updated Branches:
  refs/heads/master 5abd12321 -> 54d624b2e


ARROW-1405: [Python] Expose LoggingMemoryPool in Python API

I removed some output of pointer contents in the LoggingMemoryPool implementation which was showing up as garbage in the console. Example:

```
n [1]: import pyarrow as pa

In [2]: import pyarrow.parquet as pq

In [3]: pa.log_memory_allocations(True)

In [4]: t = pq.read_table('/home/wesm/Downloads/part-00000-6570e34b-b42c-4a39-8adf-21d3a97fb87d.snappy.parquet')
Allocate: size = 320
Allocate: size = 64
Allocate: size = 192
Free: size = 192
Allocate: size = 320
Allocate: size = 64
Allocate: size = 192
Allocate: size = 192
Allocate: size = 1152
Allocate: size = 64
Allocate: size = 64
Allocate: size = 64
Reallocate: old_size = 64 - new_size = 128
Reallocate: old_size = 128 - new_size = 192
Reallocate: old_size = 192 - new_size = 320
Reallocate: old_size = 320 - new_size = 576
Reallocate: old_size = 64 - new_size = 128
Reallocate: old_size = 576 - new_size = 1088
Reallocate: old_size = 128 - new_size = 256
```

Author: Wes McKinney <we...@twosigma.com>

Closes #1044 from wesm/ARROW-1405 and squashes the following commits:

33111644 [Wes McKinney] Restore includes to prior revision
41d8506f [Wes McKinney] Docstring
9d5e96a4 [Wes McKinney] Expose LoggingMemoryPool in Python API


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/54d624b2
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/54d624b2
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/54d624b2

Branch: refs/heads/master
Commit: 54d624b2eeb90afb2724b5137bed5dd0e5d8b0a1
Parents: 5abd123
Author: Wes McKinney <we...@twosigma.com>
Authored: Wed Sep 6 13:24:09 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Wed Sep 6 13:24:09 2017 -0400

----------------------------------------------------------------------
 cpp/src/arrow/memory_pool.cc |  8 ++++----
 python/doc/source/api.rst    |  1 +
 python/pyarrow/__init__.py   |  5 ++++-
 python/pyarrow/lib.pxd       |  4 ----
 python/pyarrow/memory.pxi    | 27 ++++++++++++++++++++++++++-
 5 files changed, 35 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/54d624b2/cpp/src/arrow/memory_pool.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index b5b4461..7fd999e 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -162,20 +162,20 @@ LoggingMemoryPool::LoggingMemoryPool(MemoryPool* pool) : pool_(pool) {}
 
 Status LoggingMemoryPool::Allocate(int64_t size, uint8_t** out) {
   Status s = pool_->Allocate(size, out);
-  std::cout << "Allocate: size = " << size << " - out = " << *out << std::endl;
+  std::cout << "Allocate: size = " << size << std::endl;
   return s;
 }
 
 Status LoggingMemoryPool::Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) {
   Status s = pool_->Reallocate(old_size, new_size, ptr);
-  std::cout << "Reallocate: old_size = " << old_size << " - new_size = " << new_size
-            << " - ptr = " << *ptr << std::endl;
+  std::cout << "Reallocate: old_size = " << old_size
+            << " - new_size = " << new_size << std::endl;
   return s;
 }
 
 void LoggingMemoryPool::Free(uint8_t* buffer, int64_t size) {
   pool_->Free(buffer, size);
-  std::cout << "Free: buffer = " << buffer << " - size = " << size << std::endl;
+  std::cout << "Free: size = " << size << std::endl;
 }
 
 int64_t LoggingMemoryPool::bytes_allocated() const {

http://git-wip-us.apache.org/repos/asf/arrow/blob/54d624b2/python/doc/source/api.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/api.rst b/python/doc/source/api.rst
index 473b16d..cc24e49 100644
--- a/python/doc/source/api.rst
+++ b/python/doc/source/api.rst
@@ -222,6 +222,7 @@ Memory Pools
    default_memory_pool
    total_allocated_bytes
    set_memory_pool
+   log_memory_allocations
 
 .. _api.type_classes:
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/54d624b2/python/pyarrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index a4a6ed4..2b6c9fe 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -76,9 +76,12 @@ from pyarrow.lib import (HdfsFile, NativeFile, PythonFile,
                          have_libhdfs, have_libhdfs3, MockOutputStream)
 
 from pyarrow.lib import (MemoryPool, total_allocated_bytes,
-                         set_memory_pool, default_memory_pool)
+                         set_memory_pool, default_memory_pool,
+                         log_memory_allocations)
+
 from pyarrow.lib import (ChunkedArray, Column, RecordBatch, Table,
                          concat_tables)
+
 from pyarrow.lib import (ArrowException,
                          ArrowKeyError,
                          ArrowInvalid,

http://git-wip-us.apache.org/repos/asf/arrow/blob/54d624b2/python/pyarrow/lib.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index 48a58f7..8fdcf55 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -35,10 +35,6 @@ cdef class MemoryPool:
     cdef void init(self, CMemoryPool* pool)
 
 
-cdef class LoggingMemoryPool(MemoryPool):
-    pass
-
-
 cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool)
 
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/54d624b2/python/pyarrow/memory.pxi
----------------------------------------------------------------------
diff --git a/python/pyarrow/memory.pxi b/python/pyarrow/memory.pxi
index 6671a01..3d2601f 100644
--- a/python/pyarrow/memory.pxi
+++ b/python/pyarrow/memory.pxi
@@ -36,7 +36,12 @@ cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool):
 
 
 cdef class LoggingMemoryPool(MemoryPool):
-    pass
+    cdef:
+        unique_ptr[CLoggingMemoryPool] logging_pool
+
+    def __cinit__(self, MemoryPool pool):
+        self.logging_pool.reset(new CLoggingMemoryPool(pool.pool))
+        self.init(self.logging_pool.get())
 
 
 def default_memory_pool():
@@ -50,6 +55,26 @@ def set_memory_pool(MemoryPool pool):
     c_set_default_memory_pool(pool.pool)
 
 
+cdef MemoryPool _default_memory_pool = default_memory_pool()
+cdef LoggingMemoryPool _logging_memory_pool = (
+    LoggingMemoryPool(_default_memory_pool))
+
+
+def log_memory_allocations(enable=True):
+    """
+    Enable or disable memory allocator logging for debugging purposes
+
+    Parameters
+    ----------
+    enable : boolean, default True
+        Pass False to disable logging
+    """
+    if enable:
+        set_memory_pool(_logging_memory_pool)
+    else:
+        set_memory_pool(_default_memory_pool)
+
+
 def total_allocated_bytes():
     cdef CMemoryPool* pool = c_get_memory_pool()
     return pool.bytes_allocated()