You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/04/16 13:28:41 UTC
arrow git commit: ARROW-830: [Python] Expose jemalloc memory pool and
other memory pool functions in public pyarrow API
Repository: arrow
Updated Branches:
refs/heads/master 30e03a907 -> ee5cb2ad1
ARROW-830: [Python] Expose jemalloc memory pool and other memory pool functions in public pyarrow API
Author: Wes McKinney <we...@twosigma.com>
Closes #550 from wesm/ARROW-830 and squashes the following commits:
c1ca9fb [Wes McKinney] Expose jemalloc memory pool and other memory pool functions in public pyarrow API
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/ee5cb2ad
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/ee5cb2ad
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/ee5cb2ad
Branch: refs/heads/master
Commit: ee5cb2ad171f0f4c7673f2937dc226d62aad972c
Parents: 30e03a9
Author: Wes McKinney <we...@twosigma.com>
Authored: Sun Apr 16 09:28:34 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Sun Apr 16 09:28:34 2017 -0400
----------------------------------------------------------------------
python/README.md | 2 +-
python/doc/source/api.rst | 12 +++++++++++
python/doc/source/jemalloc.rst | 8 ++-----
python/pyarrow/__init__.py | 13 +++++++++++-
python/pyarrow/_memory.pyx | 12 ++++++++---
python/pyarrow/tests/test_jemalloc.py | 34 +++++++++++++++++-------------
6 files changed, 55 insertions(+), 26 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/ee5cb2ad/python/README.md
----------------------------------------------------------------------
diff --git a/python/README.md b/python/README.md
index 25a3a67..ed008ea 100644
--- a/python/README.md
+++ b/python/README.md
@@ -89,7 +89,7 @@ export PYARROW_CMAKE_OPTIONS=-DPYARROW_BUILD_PARQUET=on
```bash
pip install -r doc/requirements.txt
-python setup.py build_sphinx
+python setup.py build_sphinx -s doc/source
```
[1]: https://github.com/apache/parquet-cpp
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/arrow/blob/ee5cb2ad/python/doc/source/api.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/api.rst b/python/doc/source/api.rst
index 514dcf9..801ab34 100644
--- a/python/doc/source/api.rst
+++ b/python/doc/source/api.rst
@@ -151,3 +151,15 @@ Interprocess Communication and Messaging
FileWriter
StreamReader
StreamWriter
+
+Memory Pools
+------------
+
+.. autosummary::
+ :toctree: generated/
+
+ MemoryPool
+ default_memory_pool
+ jemalloc_memory_pool
+ total_allocated_bytes
+ set_memory_pool
http://git-wip-us.apache.org/repos/asf/arrow/blob/ee5cb2ad/python/doc/source/jemalloc.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/jemalloc.rst b/python/doc/source/jemalloc.rst
index 33fe617..8d7a5dc 100644
--- a/python/doc/source/jemalloc.rst
+++ b/python/doc/source/jemalloc.rst
@@ -35,18 +35,14 @@ operations.
.. code:: python
import pyarrow as pa
- import pyarrow.jemalloc
- import pyarrow.memory
- jemalloc_pool = pyarrow.jemalloc.default_pool()
+ jemalloc_pool = pyarrow.jemalloc_memory_pool()
# Explicitly use jemalloc for allocating memory for an Arrow Table object
array = pa.Array.from_pylist([1, 2, 3], memory_pool=jemalloc_pool)
# Set the global pool
- pyarrow.memory.set_default_pool(jemalloc_pool)
+ pyarrow.set_memory_pool(jemalloc_pool)
# This operation has no explicit MemoryPool specified and will thus will
# also use jemalloc for its allocations.
array = pa.Array.from_pylist([1, 2, 3])
-
-
http://git-wip-us.apache.org/repos/asf/arrow/blob/ee5cb2ad/python/pyarrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 66bde49..506d567 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -62,7 +62,8 @@ from pyarrow._io import (HdfsFile, NativeFile, PythonFileInterface,
memory_map, create_memory_map,
get_record_batch_size, get_tensor_size)
-from pyarrow._memory import MemoryPool, total_allocated_bytes
+from pyarrow._memory import (MemoryPool, total_allocated_bytes,
+ set_memory_pool, default_memory_pool)
from pyarrow._table import Column, RecordBatch, Table, concat_tables
from pyarrow._error import (ArrowException,
ArrowKeyError,
@@ -72,6 +73,16 @@ from pyarrow._error import (ArrowException,
ArrowNotImplementedError,
ArrowTypeError)
+
+def jemalloc_memory_pool():
+ """
+ Returns a jemalloc-based memory allocator, which can be passed to
+ pyarrow.set_memory_pool
+ """
+ from pyarrow._jemalloc import default_pool
+ return default_pool()
+
+
from pyarrow.filesystem import Filesystem, HdfsClient, LocalFilesystem
from pyarrow.ipc import FileReader, FileWriter, StreamReader, StreamWriter
http://git-wip-us.apache.org/repos/asf/arrow/blob/ee5cb2ad/python/pyarrow/_memory.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/_memory.pyx b/python/pyarrow/_memory.pyx
index 98dbf66..8b73a17 100644
--- a/python/pyarrow/_memory.pyx
+++ b/python/pyarrow/_memory.pyx
@@ -22,6 +22,7 @@
from pyarrow.includes.libarrow cimport CMemoryPool, CLoggingMemoryPool
from pyarrow.includes.pyarrow cimport set_default_memory_pool, get_memory_pool
+
cdef class MemoryPool:
cdef init(self, CMemoryPool* pool):
self.pool = pool
@@ -29,24 +30,29 @@ cdef class MemoryPool:
def bytes_allocated(self):
return self.pool.bytes_allocated()
+
cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool):
if memory_pool is None:
return get_memory_pool()
else:
return memory_pool.pool
+
cdef class LoggingMemoryPool(MemoryPool):
pass
-def default_pool():
- cdef:
+
+def default_memory_pool():
+ cdef:
MemoryPool pool = MemoryPool()
pool.init(get_memory_pool())
return pool
-def set_default_pool(MemoryPool pool):
+
+def set_memory_pool(MemoryPool pool):
set_default_memory_pool(pool.pool)
+
def total_allocated_bytes():
cdef CMemoryPool* pool = get_memory_pool()
return pool.bytes_allocated()
http://git-wip-us.apache.org/repos/asf/arrow/blob/ee5cb2ad/python/pyarrow/tests/test_jemalloc.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_jemalloc.py b/python/pyarrow/tests/test_jemalloc.py
index c6cc2cc..0a4d8a6 100644
--- a/python/pyarrow/tests/test_jemalloc.py
+++ b/python/pyarrow/tests/test_jemalloc.py
@@ -18,12 +18,16 @@
import gc
import pytest
+import pyarrow as pa
+
+
try:
- import pyarrow.jemalloc
+ pa.jemalloc_memory_pool()
HAVE_JEMALLOC = True
except ImportError:
HAVE_JEMALLOC = False
+
jemalloc = pytest.mark.skipif(not HAVE_JEMALLOC,
reason='jemalloc support not built')
@@ -31,33 +35,33 @@ jemalloc = pytest.mark.skipif(not HAVE_JEMALLOC,
@jemalloc
def test_different_memory_pool():
gc.collect()
- bytes_before_default = pyarrow.total_allocated_bytes()
- bytes_before_jemalloc = pyarrow.jemalloc.default_pool().bytes_allocated()
+ bytes_before_default = pa.total_allocated_bytes()
+ bytes_before_jemalloc = pa.jemalloc_memory_pool().bytes_allocated()
# it works
- array = pyarrow.from_pylist([1, None, 3, None], # noqa
- memory_pool=pyarrow.jemalloc.default_pool())
+ array = pa.from_pylist([1, None, 3, None], # noqa
+ memory_pool=pa.jemalloc_memory_pool())
gc.collect()
- assert pyarrow.total_allocated_bytes() == bytes_before_default
- assert (pyarrow.jemalloc.default_pool().bytes_allocated() >
+ assert pa.total_allocated_bytes() == bytes_before_default
+ assert (pa.jemalloc_memory_pool().bytes_allocated() >
bytes_before_jemalloc)
@jemalloc
def test_default_memory_pool():
gc.collect()
- bytes_before_default = pyarrow.total_allocated_bytes()
- bytes_before_jemalloc = pyarrow.jemalloc.default_pool().bytes_allocated()
+ bytes_before_default = pa.total_allocated_bytes()
+ bytes_before_jemalloc = pa.jemalloc_memory_pool().bytes_allocated()
- old_memory_pool = pyarrow.memory.default_pool()
- pyarrow.memory.set_default_pool(pyarrow.jemalloc.default_pool())
+ old_memory_pool = pa.default_memory_pool()
+ pa.set_memory_pool(pa.jemalloc_memory_pool())
- array = pyarrow.from_pylist([1, None, 3, None]) # noqa
+ array = pa.from_pylist([1, None, 3, None]) # noqa
- pyarrow.memory.set_default_pool(old_memory_pool)
+ pa.set_memory_pool(old_memory_pool)
gc.collect()
- assert pyarrow.total_allocated_bytes() == bytes_before_default
+ assert pa.total_allocated_bytes() == bytes_before_default
- assert (pyarrow.jemalloc.default_pool().bytes_allocated() >
+ assert (pa.jemalloc_memory_pool().bytes_allocated() >
bytes_before_jemalloc)