You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/01/31 03:35:11 UTC
[arrow] branch master updated: ARROW-2062: [Python] Do not use
memory maps in test_serialization.py to try to improve Travis CI flakiness
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new e112995 ARROW-2062: [Python] Do not use memory maps in test_serialization.py to try to improve Travis CI flakiness
e112995 is described below
commit e112995fdfa4917ec5b683eead5b07a7921d1600
Author: Wes McKinney <we...@twosigma.com>
AuthorDate: Tue Jan 30 22:35:06 2018 -0500
ARROW-2062: [Python] Do not use memory maps in test_serialization.py to try to improve Travis CI flakiness
Author: Wes McKinney <we...@twosigma.com>
Closes #1536 from wesm/ARROW-2062 and squashes the following commits:
22300cd8 [Wes McKinney] Do not use memory maps in serialization tests
ff5141fc [Wes McKinney] Add large_buffer fixture
---
python/pyarrow/tests/test_serialization.py | 103 +++++++++++++++--------------
1 file changed, 52 insertions(+), 51 deletions(-)
diff --git a/python/pyarrow/tests/test_serialization.py b/python/pyarrow/tests/test_serialization.py
index 9cad81f..284c7fb 100644
--- a/python/pyarrow/tests/test_serialization.py
+++ b/python/pyarrow/tests/test_serialization.py
@@ -210,11 +210,12 @@ def make_serialization_context():
serialization_context = make_serialization_context()
-def serialization_roundtrip(value, f, ctx=serialization_context):
- f.seek(0)
- pa.serialize_to(value, f, ctx)
- f.seek(0)
- result = pa.deserialize_from(f, None, ctx)
+def serialization_roundtrip(value, scratch_buffer, ctx=serialization_context):
+ writer = pa.FixedSizeBufferWriter(scratch_buffer)
+ pa.serialize_to(value, writer, ctx)
+
+ reader = pa.BufferReader(scratch_buffer)
+ result = pa.deserialize_from(reader, None, ctx)
assert_equal(value, result)
_check_component_roundtrip(value)
@@ -230,6 +231,10 @@ def _check_component_roundtrip(value):
@pytest.yield_fixture(scope='session')
+def large_buffer(size=100*1024*1024):
+ return pa.allocate_buffer(size)
+
+
def large_memory_map(tmpdir_factory, size=100*1024*1024):
path = (tmpdir_factory.mktemp('data')
.join('pyarrow-serialization-tmp-file').strpath)
@@ -243,11 +248,11 @@ def large_memory_map(tmpdir_factory, size=100*1024*1024):
return path
-def test_primitive_serialization(large_memory_map):
- with pa.memory_map(large_memory_map, mode="r+") as mmap:
- for obj in PRIMITIVE_OBJECTS:
- serialization_roundtrip(obj, mmap)
- serialization_roundtrip(obj, mmap, pa.pandas_serialization_context)
+def test_primitive_serialization(large_buffer):
+ for obj in PRIMITIVE_OBJECTS:
+ serialization_roundtrip(obj, large_buffer)
+ serialization_roundtrip(obj, large_buffer,
+ pa.pandas_serialization_context)
def test_serialize_to_buffer():
@@ -258,34 +263,31 @@ def test_serialize_to_buffer():
assert_equal(value, result)
-def test_complex_serialization(large_memory_map):
- with pa.memory_map(large_memory_map, mode="r+") as mmap:
- for obj in COMPLEX_OBJECTS:
- serialization_roundtrip(obj, mmap)
+def test_complex_serialization(large_buffer):
+ for obj in COMPLEX_OBJECTS:
+ serialization_roundtrip(obj, large_buffer)
-def test_custom_serialization(large_memory_map):
- with pa.memory_map(large_memory_map, mode="r+") as mmap:
- for obj in CUSTOM_OBJECTS:
- serialization_roundtrip(obj, mmap)
+def test_custom_serialization(large_buffer):
+ for obj in CUSTOM_OBJECTS:
+ serialization_roundtrip(obj, large_buffer)
-def test_default_dict_serialization(large_memory_map):
+def test_default_dict_serialization(large_buffer):
pytest.importorskip("cloudpickle")
- with pa.memory_map(large_memory_map, mode="r+") as mmap:
- obj = defaultdict(lambda: 0, [("hello", 1), ("world", 2)])
- serialization_roundtrip(obj, mmap)
+
+ obj = defaultdict(lambda: 0, [("hello", 1), ("world", 2)])
+ serialization_roundtrip(obj, large_buffer)
-def test_numpy_serialization(large_memory_map):
- with pa.memory_map(large_memory_map, mode="r+") as mmap:
- for t in ["bool", "int8", "uint8", "int16", "uint16", "int32",
- "uint32", "float16", "float32", "float64"]:
- obj = np.random.randint(0, 10, size=(100, 100)).astype(t)
- serialization_roundtrip(obj, mmap)
+def test_numpy_serialization(large_buffer):
+ for t in ["bool", "int8", "uint8", "int16", "uint16", "int32",
+ "uint32", "float16", "float32", "float64"]:
+ obj = np.random.randint(0, 10, size=(100, 100)).astype(t)
+ serialization_roundtrip(obj, large_buffer)
-def test_datetime_serialization(large_memory_map):
+def test_datetime_serialization(large_buffer):
data = [
# Principia Mathematica published
datetime.datetime(year=1687, month=7, day=5),
@@ -309,32 +311,31 @@ def test_datetime_serialization(large_memory_map):
datetime.datetime(year=1970, month=1, day=3, hour=4,
minute=0, second=0)
]
- with pa.memory_map(large_memory_map, mode="r+") as mmap:
- for d in data:
- serialization_roundtrip(d, mmap)
+ for d in data:
+ serialization_roundtrip(d, large_buffer)
-def test_torch_serialization(large_memory_map):
+def test_torch_serialization(large_buffer):
pytest.importorskip("torch")
import torch
- with pa.memory_map(large_memory_map, mode="r+") as mmap:
- # These are the only types that are supported for the
- # PyTorch to NumPy conversion
- for t in ["float32", "float64",
- "uint8", "int16", "int32", "int64"]:
- obj = torch.from_numpy(np.random.randn(1000).astype(t))
- serialization_roundtrip(obj, mmap)
-
-
-def test_numpy_immutable(large_memory_map):
- with pa.memory_map(large_memory_map, mode="r+") as mmap:
- obj = np.zeros([10])
- mmap.seek(0)
- pa.serialize_to(obj, mmap, serialization_context)
- mmap.seek(0)
- result = pa.deserialize_from(mmap, None, serialization_context)
- with pytest.raises(ValueError):
- result[0] = 1.0
+ # These are the only types that are supported for the
+ # PyTorch to NumPy conversion
+ for t in ["float32", "float64",
+ "uint8", "int16", "int32", "int64"]:
+ obj = torch.from_numpy(np.random.randn(1000).astype(t))
+ serialization_roundtrip(obj, large_buffer)
+
+
+def test_numpy_immutable(large_buffer):
+ obj = np.zeros([10])
+
+ writer = pa.FixedSizeBufferWriter(large_buffer)
+ pa.serialize_to(obj, writer, serialization_context)
+
+ reader = pa.BufferReader(large_buffer)
+ result = pa.deserialize_from(reader, None, serialization_context)
+ with pytest.raises(ValueError):
+ result[0] = 1.0
# see https://issues.apache.org/jira/browse/ARROW-1695
--
To stop receiving notification emails like this one, please contact
wesm@apache.org.