You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/04/16 13:28:41 UTC

arrow git commit: ARROW-830: [Python] Expose jemalloc memory pool and other memory pool functions in public pyarrow API

Repository: arrow
Updated Branches:
  refs/heads/master 30e03a907 -> ee5cb2ad1


ARROW-830: [Python] Expose jemalloc memory pool and other memory pool functions in public pyarrow API

Author: Wes McKinney <we...@twosigma.com>

Closes #550 from wesm/ARROW-830 and squashes the following commits:

c1ca9fb [Wes McKinney] Expose jemalloc memory pool and other memory pool functions in public pyarrow API


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/ee5cb2ad
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/ee5cb2ad
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/ee5cb2ad

Branch: refs/heads/master
Commit: ee5cb2ad171f0f4c7673f2937dc226d62aad972c
Parents: 30e03a9
Author: Wes McKinney <we...@twosigma.com>
Authored: Sun Apr 16 09:28:34 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Sun Apr 16 09:28:34 2017 -0400

----------------------------------------------------------------------
 python/README.md                      |  2 +-
 python/doc/source/api.rst             | 12 +++++++++++
 python/doc/source/jemalloc.rst        |  8 ++-----
 python/pyarrow/__init__.py            | 13 +++++++++++-
 python/pyarrow/_memory.pyx            | 12 ++++++++---
 python/pyarrow/tests/test_jemalloc.py | 34 +++++++++++++++++-------------
 6 files changed, 55 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/ee5cb2ad/python/README.md
----------------------------------------------------------------------
diff --git a/python/README.md b/python/README.md
index 25a3a67..ed008ea 100644
--- a/python/README.md
+++ b/python/README.md
@@ -89,7 +89,7 @@ export PYARROW_CMAKE_OPTIONS=-DPYARROW_BUILD_PARQUET=on
 
 ```bash
 pip install -r doc/requirements.txt
-python setup.py build_sphinx
+python setup.py build_sphinx -s doc/source
 ```
 
 [1]: https://github.com/apache/parquet-cpp
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/arrow/blob/ee5cb2ad/python/doc/source/api.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/api.rst b/python/doc/source/api.rst
index 514dcf9..801ab34 100644
--- a/python/doc/source/api.rst
+++ b/python/doc/source/api.rst
@@ -151,3 +151,15 @@ Interprocess Communication and Messaging
    FileWriter
    StreamReader
    StreamWriter
+
+Memory Pools
+------------
+
+.. autosummary::
+   :toctree: generated/
+
+   MemoryPool
+   default_memory_pool
+   jemalloc_memory_pool
+   total_allocated_bytes
+   set_memory_pool

http://git-wip-us.apache.org/repos/asf/arrow/blob/ee5cb2ad/python/doc/source/jemalloc.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/jemalloc.rst b/python/doc/source/jemalloc.rst
index 33fe617..8d7a5dc 100644
--- a/python/doc/source/jemalloc.rst
+++ b/python/doc/source/jemalloc.rst
@@ -35,18 +35,14 @@ operations.
 .. code:: python
 
     import pyarrow as pa
-    import pyarrow.jemalloc
-    import pyarrow.memory
 
-    jemalloc_pool = pyarrow.jemalloc.default_pool()
+    jemalloc_pool = pyarrow.jemalloc_memory_pool()
 
     # Explicitly use jemalloc for allocating memory for an Arrow Table object
     array = pa.Array.from_pylist([1, 2, 3], memory_pool=jemalloc_pool)
 
     # Set the global pool
-    pyarrow.memory.set_default_pool(jemalloc_pool)
+    pyarrow.set_memory_pool(jemalloc_pool)
     # This operation has no explicit MemoryPool specified and will thus will
     # also use jemalloc for its allocations.
     array = pa.Array.from_pylist([1, 2, 3])
-
-

http://git-wip-us.apache.org/repos/asf/arrow/blob/ee5cb2ad/python/pyarrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 66bde49..506d567 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -62,7 +62,8 @@ from pyarrow._io import (HdfsFile, NativeFile, PythonFileInterface,
                          memory_map, create_memory_map,
                          get_record_batch_size, get_tensor_size)
 
-from pyarrow._memory import MemoryPool, total_allocated_bytes
+from pyarrow._memory import (MemoryPool, total_allocated_bytes,
+                             set_memory_pool, default_memory_pool)
 from pyarrow._table import Column, RecordBatch, Table, concat_tables
 from pyarrow._error import (ArrowException,
                             ArrowKeyError,
@@ -72,6 +73,16 @@ from pyarrow._error import (ArrowException,
                             ArrowNotImplementedError,
                             ArrowTypeError)
 
+
+def jemalloc_memory_pool():
+    """
+    Returns a jemalloc-based memory allocator, which can be passed to
+    pyarrow.set_memory_pool
+    """
+    from pyarrow._jemalloc import default_pool
+    return default_pool()
+
+
 from pyarrow.filesystem import Filesystem, HdfsClient, LocalFilesystem
 
 from pyarrow.ipc import FileReader, FileWriter, StreamReader, StreamWriter

http://git-wip-us.apache.org/repos/asf/arrow/blob/ee5cb2ad/python/pyarrow/_memory.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/_memory.pyx b/python/pyarrow/_memory.pyx
index 98dbf66..8b73a17 100644
--- a/python/pyarrow/_memory.pyx
+++ b/python/pyarrow/_memory.pyx
@@ -22,6 +22,7 @@
 from pyarrow.includes.libarrow cimport CMemoryPool, CLoggingMemoryPool
 from pyarrow.includes.pyarrow cimport set_default_memory_pool, get_memory_pool
 
+
 cdef class MemoryPool:
     cdef init(self, CMemoryPool* pool):
         self.pool = pool
@@ -29,24 +30,29 @@ cdef class MemoryPool:
     def bytes_allocated(self):
         return self.pool.bytes_allocated()
 
+
 cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool):
     if memory_pool is None:
         return get_memory_pool()
     else:
         return memory_pool.pool
 
+
 cdef class LoggingMemoryPool(MemoryPool):
     pass
 
-def default_pool():
-    cdef: 
+
+def default_memory_pool():
+    cdef:
         MemoryPool pool = MemoryPool()
     pool.init(get_memory_pool())
     return pool
 
-def set_default_pool(MemoryPool pool):
+
+def set_memory_pool(MemoryPool pool):
     set_default_memory_pool(pool.pool)
 
+
 def total_allocated_bytes():
     cdef CMemoryPool* pool = get_memory_pool()
     return pool.bytes_allocated()

http://git-wip-us.apache.org/repos/asf/arrow/blob/ee5cb2ad/python/pyarrow/tests/test_jemalloc.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_jemalloc.py b/python/pyarrow/tests/test_jemalloc.py
index c6cc2cc..0a4d8a6 100644
--- a/python/pyarrow/tests/test_jemalloc.py
+++ b/python/pyarrow/tests/test_jemalloc.py
@@ -18,12 +18,16 @@
 import gc
 import pytest
 
+import pyarrow as pa
+
+
 try:
-    import pyarrow.jemalloc
+    pa.jemalloc_memory_pool()
     HAVE_JEMALLOC = True
 except ImportError:
     HAVE_JEMALLOC = False
 
+
 jemalloc = pytest.mark.skipif(not HAVE_JEMALLOC,
                               reason='jemalloc support not built')
 
@@ -31,33 +35,33 @@ jemalloc = pytest.mark.skipif(not HAVE_JEMALLOC,
 @jemalloc
 def test_different_memory_pool():
     gc.collect()
-    bytes_before_default = pyarrow.total_allocated_bytes()
-    bytes_before_jemalloc = pyarrow.jemalloc.default_pool().bytes_allocated()
+    bytes_before_default = pa.total_allocated_bytes()
+    bytes_before_jemalloc = pa.jemalloc_memory_pool().bytes_allocated()
 
     # it works
-    array = pyarrow.from_pylist([1, None, 3, None],  # noqa
-                                memory_pool=pyarrow.jemalloc.default_pool())
+    array = pa.from_pylist([1, None, 3, None],  # noqa
+                                memory_pool=pa.jemalloc_memory_pool())
     gc.collect()
-    assert pyarrow.total_allocated_bytes() == bytes_before_default
-    assert (pyarrow.jemalloc.default_pool().bytes_allocated() >
+    assert pa.total_allocated_bytes() == bytes_before_default
+    assert (pa.jemalloc_memory_pool().bytes_allocated() >
             bytes_before_jemalloc)
 
 
 @jemalloc
 def test_default_memory_pool():
     gc.collect()
-    bytes_before_default = pyarrow.total_allocated_bytes()
-    bytes_before_jemalloc = pyarrow.jemalloc.default_pool().bytes_allocated()
+    bytes_before_default = pa.total_allocated_bytes()
+    bytes_before_jemalloc = pa.jemalloc_memory_pool().bytes_allocated()
 
-    old_memory_pool = pyarrow.memory.default_pool()
-    pyarrow.memory.set_default_pool(pyarrow.jemalloc.default_pool())
+    old_memory_pool = pa.default_memory_pool()
+    pa.set_memory_pool(pa.jemalloc_memory_pool())
 
-    array = pyarrow.from_pylist([1, None, 3, None])  # noqa
+    array = pa.from_pylist([1, None, 3, None])  # noqa
 
-    pyarrow.memory.set_default_pool(old_memory_pool)
+    pa.set_memory_pool(old_memory_pool)
     gc.collect()
 
-    assert pyarrow.total_allocated_bytes() == bytes_before_default
+    assert pa.total_allocated_bytes() == bytes_before_default
 
-    assert (pyarrow.jemalloc.default_pool().bytes_allocated() >
+    assert (pa.jemalloc_memory_pool().bytes_allocated() >
             bytes_before_jemalloc)