You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2018/07/30 13:31:38 UTC
[arrow] branch master updated: ARROW-2660: [Python] Experimental
zero-copy pickling
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 2422d9c ARROW-2660: [Python] Experimental zero-copy pickling
2422d9c is described below
commit 2422d9c821e8998e65df4e48920864c7f268c2bb
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Mon Jul 30 15:31:21 2018 +0200
ARROW-2660: [Python] Experimental zero-copy pickling
Zero-copy pickling of buffers and buffer-based objects will be possible using PEP 574 (if/when accepted). The PyPI backport "pickle5" helps us test that possibility.
Author: Antoine Pitrou <an...@python.org>
Closes #2161 from pitrou/ARROW-2660-zero-copy-pickling and squashes the following commits:
50f0491 <Antoine Pitrou> Fix test on Python 2.7 (hopefully)
132939c <Antoine Pitrou> Add pickle5 to CI environments
892302a <Antoine Pitrou> ARROW-2660: Zero-copy pickling
---
ci/cpp-python-msvc-build.bat | 4 +++-
ci/travis_script_python.sh | 3 +++
python/pyarrow/compat.py | 10 +++++-----
python/pyarrow/io.pxi | 9 ++++++---
python/pyarrow/tests/test_array.py | 38 ++++++++++++++++++++++++++++++++++----
5 files changed, 51 insertions(+), 13 deletions(-)
diff --git a/ci/cpp-python-msvc-build.bat b/ci/cpp-python-msvc-build.bat
index d3f540b..ecc68e0 100644
--- a/ci/cpp-python-msvc-build.bat
+++ b/ci/cpp-python-msvc-build.bat
@@ -133,6 +133,8 @@ popd
pushd python
+pip install pickle5
+
set PYARROW_CXXFLAGS=/WX
set PYARROW_CMAKE_GENERATOR=%GENERATOR%
set PYARROW_BUNDLE_ARROW_CPP=ON
@@ -167,6 +169,6 @@ pip install %WHEEL_PATH% || exit /B
python -c "import pyarrow" || exit /B
python -c "import pyarrow.parquet" || exit /B
-pip install pandas pytest pytest-faulthandler
+pip install pandas pickle5 pytest pytest-faulthandler
py.test -r sxX --durations=15 --pyargs pyarrow.tests || exit /B
diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh
index 4eeb103..0743f86 100755
--- a/ci/travis_script_python.sh
+++ b/ci/travis_script_python.sh
@@ -102,6 +102,9 @@ pushd $ARROW_PYTHON_DIR
# Other stuff pip install
pip install -q -r requirements.txt
+if [ "$PYTHON_VERSION" == "3.6" ]; then
+ pip install -q pickle5
+fi
if [ "$ARROW_TRAVIS_COVERAGE" == "1" ]; then
export PYARROW_GENERATE_COVERAGE=1
pip install -q coverage
diff --git a/python/pyarrow/compat.py b/python/pyarrow/compat.py
index 44e156e..bbb1bd8 100644
--- a/python/pyarrow/compat.py
+++ b/python/pyarrow/compat.py
@@ -107,7 +107,10 @@ if PY2:
def unichar(s):
return unichr(s)
else:
- import pickle as builtin_pickle
+ try:
+ import pickle5 as builtin_pickle
+ except ImportError:
+ import pickle as builtin_pickle
unicode_type = str
def lzip(*x):
@@ -142,10 +145,7 @@ else:
try:
import cloudpickle as pickle
except ImportError:
- try:
- import cPickle as pickle
- except ImportError:
- import pickle
+ pickle = builtin_pickle
def encode_file_path(path):
import os
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 4566477..f0e98ab 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -19,7 +19,7 @@
# arrow::ipc
from libc.stdlib cimport malloc, free
-from pyarrow.compat import frombytes, tobytes, encode_file_path
+from pyarrow.compat import builtin_pickle, frombytes, tobytes, encode_file_path
from io import BufferedIOBase, UnsupportedOperation
import re
@@ -823,8 +823,11 @@ cdef class Buffer:
else:
return NotImplemented
- def __reduce__(self):
- return py_buffer, (self.to_pybytes(),)
+ def __reduce_ex__(self, protocol):
+ if protocol >= 5:
+ return py_buffer, (builtin_pickle.PickleBuffer(self),)
+ else:
+ return py_buffer, (self.to_pybytes(),)
def to_pybytes(self):
return cp.PyBytes_FromStringAndSize(
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 7ab54dd..af2708f 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -17,6 +17,7 @@
import collections
import datetime
+import pickle
import pytest
import struct
import sys
@@ -24,7 +25,10 @@ import sys
import numpy as np
import pandas as pd
import pandas.util.testing as tm
-import pickle
+try:
+ import pickle5
+except ImportError:
+ pickle5 = None
import pyarrow as pa
from pyarrow.pandas_compat import get_logical_type
@@ -633,7 +637,7 @@ def test_cast_date64_to_int():
assert result.equals(expected)
-@pytest.mark.parametrize(
+pickle_test_parametrize = pytest.mark.parametrize(
('data', 'typ'),
[
([True, False, True, True], pa.bool_()),
@@ -647,12 +651,38 @@ def test_cast_date64_to_int():
pa.struct([pa.field('a', pa.int64()), pa.field('b', pa.string())]))
]
)
+
+
+@pickle_test_parametrize
def test_array_pickle(data, typ):
# Allocate here so that we don't have any Arrow data allocated.
# This is needed to ensure that allocator tests can be reliable.
array = pa.array(data, type=typ)
- result = pickle.loads(pickle.dumps(array))
- assert array.equals(result)
+ for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
+ result = pickle.loads(pickle.dumps(array, proto))
+ assert array.equals(result)
+
+
+@pickle_test_parametrize
+def test_array_pickle5(data, typ):
+ # Test zero-copy pickling with protocol 5 (PEP 574)
+ picklemod = pickle5 or pickle
+ if pickle5 is None and picklemod.HIGHEST_PROTOCOL < 5:
+ pytest.skip("need pickle5 package or Python 3.8+")
+
+ array = pa.array(data, type=typ)
+ addresses = [buf.address if buf is not None else 0
+ for buf in array.buffers()]
+
+ for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
+ buffers = []
+ pickled = picklemod.dumps(array, proto, buffer_callback=buffers.append)
+ result = picklemod.loads(pickled, buffers=buffers)
+ assert array.equals(result)
+
+ result_addresses = [buf.address if buf is not None else 0
+ for buf in result.buffers()]
+ assert result_addresses == addresses
@pytest.mark.parametrize(