You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/05/16 01:38:32 UTC
arrow git commit: ARROW-961: [Python] Rename InMemoryOutputStream to
BufferOutputStream
Repository: arrow
Updated Branches:
refs/heads/master 86a905562 -> 47e289a9a
ARROW-961: [Python] Rename InMemoryOutputStream to BufferOutputStream
Better API naming consistency with C++
Author: Wes McKinney <we...@twosigma.com>
Closes #690 from wesm/ARROW-961 and squashes the following commits:
85b352c [Wes McKinney] Add more graceful deprecation warnings for renamed classes, test suite
69a99cd [Wes McKinney] Fix Cython compilation
a15910a [Wes McKinney] Rename InMemoryOutputStream to BufferOutputStream
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/47e289a9
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/47e289a9
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/47e289a9
Branch: refs/heads/master
Commit: 47e289a9a6d38fe9b0581dbc73a464cae084ef75
Parents: 86a9055
Author: Wes McKinney <we...@twosigma.com>
Authored: Mon May 15 21:38:15 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Mon May 15 21:38:15 2017 -0400
----------------------------------------------------------------------
python/doc/source/api.rst | 2 +-
python/doc/source/ipc.rst | 4 +-
python/doc/source/memory.rst | 12 ++---
python/pyarrow/__init__.py | 37 ++++++++++++++-
python/pyarrow/includes/libarrow.pxd | 5 +-
python/pyarrow/io.pxi | 4 +-
python/pyarrow/tests/test_deprecations.py | 64 ++++++++++++++++++++++++++
python/pyarrow/tests/test_io.py | 6 +--
python/pyarrow/tests/test_ipc.py | 4 +-
python/pyarrow/tests/test_parquet.py | 2 +-
10 files changed, 120 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/47e289a9/python/doc/source/api.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/api.rst b/python/doc/source/api.rst
index e7bea70..c145935 100644
--- a/python/doc/source/api.rst
+++ b/python/doc/source/api.rst
@@ -162,7 +162,7 @@ Input / Output and Shared Memory
Buffer
BufferReader
- InMemoryOutputStream
+ BufferOutputStream
NativeFile
MemoryMappedFile
memory_map
http://git-wip-us.apache.org/repos/asf/arrow/blob/47e289a9/python/doc/source/ipc.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/ipc.rst b/python/doc/source/ipc.rst
index c7de9c4..f0844cd 100644
--- a/python/doc/source/ipc.rst
+++ b/python/doc/source/ipc.rst
@@ -60,7 +60,7 @@ this we use :class:`~pyarrow.RecordBatchStreamWriter`, which can write to a writ
.. ipython:: python
- sink = pa.InMemoryOutputStream()
+ sink = pa.BufferOutputStream()
writer = pa.RecordBatchStreamWriter(sink, batch.schema)
Here we used an in-memory Arrow buffer stream, but this could have been a
@@ -109,7 +109,7 @@ The :class:`~pyarrow.RecordBatchFileWriter` has the same API as
.. ipython:: python
- sink = pa.InMemoryOutputStream()
+ sink = pa.BufferOutputStream()
writer = pa.RecordBatchFileWriter(sink, batch.schema)
for i in range(10):
http://git-wip-us.apache.org/repos/asf/arrow/blob/47e289a9/python/doc/source/memory.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/memory.rst b/python/doc/source/memory.rst
index d1020da..ccc6298 100644
--- a/python/doc/source/memory.rst
+++ b/python/doc/source/memory.rst
@@ -104,8 +104,8 @@ There are several kinds of :class:`~pyarrow.NativeFile` options available:
memory maps
* :class:`~pyarrow.BufferReader`, for reading :class:`~pyarrow.Buffer` objects
as a file
-* :class:`~pyarrow.InMemoryOutputStream`, for writing data in-memory, producing
- a Buffer at the end
+* :class:`~pyarrow.BufferOutputStream`, for writing data in-memory, producing a
+ Buffer at the end
* :class:`~pyarrow.HdfsFile`, for reading and writing data to the Hadoop Filesystem
* :class:`~pyarrow.PythonFile`, for interfacing with Python file objects in C++
@@ -124,11 +124,11 @@ then precisely track amount of memory that has been allocated:
PyArrow uses a default built-in memory pool, but in the future there may be
additional memory pools (and subpools) to choose from. Let's consider an
-``InMemoryOutputStream``, which is like a ``BytesIO``:
+``BufferOutputStream``, which is like a ``BytesIO``:
.. ipython:: python
- stream = pa.InMemoryOutputStream()
+ stream = pa.BufferOutputStream()
stream.write(b'foo')
pa.total_allocated_bytes()
for i in range(1024): stream.write(b'foo')
@@ -150,7 +150,7 @@ pass in a custom memory pool:
my_pool = pa.jemalloc_memory_pool()
my_pool
my_pool.bytes_allocated()
- stream = pa.InMemoryOutputStream(my_pool)
+ stream = pa.BufferOutputStream(my_pool)
stream.write(b'foo')
my_pool.bytes_allocated()
@@ -215,7 +215,7 @@ file interfaces that can read and write to Arrow Buffers.
.. ipython:: python
- writer = pa.InMemoryOutputStream()
+ writer = pa.BufferOutputStream()
writer.write(b'hello, friends')
buf = writer.get_result()
http://git-wip-us.apache.org/repos/asf/arrow/blob/47e289a9/python/pyarrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index d6d2aa4..632a443 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -70,7 +70,7 @@ from pyarrow.lib import (null, bool_,
Date32Value, Date64Value, TimestampValue)
from pyarrow.lib import (HdfsFile, NativeFile, PythonFile,
- Buffer, BufferReader, InMemoryOutputStream,
+ Buffer, BufferReader, BufferOutputStream,
OSFile, MemoryMappedFile, memory_map,
frombuffer, read_tensor, write_tensor,
memory_map, create_memory_map,
@@ -108,3 +108,38 @@ from pyarrow.ipc import (RecordBatchFileReader, RecordBatchFileWriter,
localfs = LocalFilesystem.get_instance()
+
+
+# ----------------------------------------------------------------------
+# 0.4.0 deprecations
+
+import warnings
+
+def _deprecate_class(old_name, new_name, klass, next_version='0.5.0'):
+ msg = ('pyarrow.{0} has been renamed to '
+ '{1}, will be removed in {2}'
+ .format(old_name, new_name, next_version))
+ def deprecated_factory(*args, **kwargs):
+ warnings.warn(msg, FutureWarning)
+ return klass(*args)
+ return deprecated_factory
+
+FileReader = _deprecate_class('FileReader',
+ 'RecordBatchFileReader',
+ RecordBatchFileReader, '0.5.0')
+
+FileWriter = _deprecate_class('FileWriter',
+ 'RecordBatchFileWriter',
+ RecordBatchFileWriter, '0.5.0')
+
+StreamReader = _deprecate_class('StreamReader',
+ 'RecordBatchStreamReader',
+ RecordBatchStreamReader, '0.5.0')
+
+StreamWriter = _deprecate_class('StreamWriter',
+ 'RecordBatchStreamWriter',
+ RecordBatchStreamWriter, '0.5.0')
+
+InMemoryOutputStream = _deprecate_class('InMemoryOutputStream',
+ 'BufferOutputStream',
+ BufferOutputStream, '0.5.0')
http://git-wip-us.apache.org/repos/asf/arrow/blob/47e289a9/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index b03dd59..a7e2733 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -507,8 +507,9 @@ cdef extern from "arrow/io/memory.h" namespace "arrow::io" nogil:
CBufferReader(const shared_ptr[CBuffer]& buffer)
CBufferReader(const uint8_t* data, int64_t nbytes)
- cdef cppclass BufferOutputStream(OutputStream):
- BufferOutputStream(const shared_ptr[ResizableBuffer]& buffer)
+ cdef cppclass CBufferOutputStream" arrow::io::BufferOutputStream"\
+ (OutputStream):
+ CBufferOutputStream(const shared_ptr[ResizableBuffer]& buffer)
cdef extern from "arrow/ipc/metadata.h" namespace "arrow::ipc" nogil:
http://git-wip-us.apache.org/repos/asf/arrow/blob/47e289a9/python/pyarrow/io.pxi
----------------------------------------------------------------------
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 4cbf603..a153f22 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -515,14 +515,14 @@ cdef shared_ptr[PoolBuffer] allocate_buffer(CMemoryPool* pool):
return result
-cdef class InMemoryOutputStream(NativeFile):
+cdef class BufferOutputStream(NativeFile):
cdef:
shared_ptr[PoolBuffer] buffer
def __cinit__(self, MemoryPool memory_pool=None):
self.buffer = allocate_buffer(maybe_unbox_memory_pool(memory_pool))
- self.wr_file.reset(new BufferOutputStream(
+ self.wr_file.reset(new CBufferOutputStream(
<shared_ptr[ResizableBuffer]> self.buffer))
self.is_readable = 0
self.is_writeable = 1
http://git-wip-us.apache.org/repos/asf/arrow/blob/47e289a9/python/pyarrow/tests/test_deprecations.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_deprecations.py b/python/pyarrow/tests/test_deprecations.py
new file mode 100644
index 0000000..62b9666
--- /dev/null
+++ b/python/pyarrow/tests/test_deprecations.py
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Check that various deprecation warnings are raised
+
+import pyarrow as pa
+import pytest
+
+
+def test_inmemory_output_stream():
+ with pytest.warns(FutureWarning):
+ stream = pa.InMemoryOutputStream()
+ assert isinstance(stream, pa.BufferOutputStream)
+
+
+def test_file_reader_writer():
+ data = [
+ pa.array([1, 2, 3, 4]),
+ pa.array(['foo', 'bar', 'baz', None]),
+ pa.array([True, None, False, True])
+ ]
+ batch = pa.RecordBatch.from_arrays(data, ['f0', 'f1', 'f2'])
+
+ sink = pa.BufferOutputStream()
+
+ with pytest.warns(FutureWarning):
+ stream_writer = pa.StreamWriter(sink, batch.schema)
+ assert isinstance(stream_writer, pa.RecordBatchStreamWriter)
+
+ sink2 = pa.BufferOutputStream()
+ with pytest.warns(FutureWarning):
+ file_writer = pa.FileWriter(sink2, batch.schema)
+ assert isinstance(file_writer, pa.RecordBatchFileWriter)
+
+ file_writer.write_batch(batch)
+ stream_writer.write_batch(batch)
+
+ file_writer.close()
+ stream_writer.close()
+
+ buf = sink.get_result()
+ buf2 = sink2.get_result()
+
+ with pytest.warns(FutureWarning):
+ stream_reader = pa.StreamReader(buf)
+ assert isinstance(stream_reader, pa.RecordBatchStreamReader)
+
+ with pytest.warns(FutureWarning):
+ file_reader = pa.FileReader(buf2)
+ assert isinstance(file_reader, pa.RecordBatchFileReader)
http://git-wip-us.apache.org/repos/asf/arrow/blob/47e289a9/python/pyarrow/tests/test_io.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index 610dedc..f91dc6f 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -179,7 +179,7 @@ def test_memory_output_stream():
# 10 bytes
val = b'dataabcdef'
- f = pa.InMemoryOutputStream()
+ f = pa.BufferOutputStream()
K = 1000
for i in range(K):
@@ -192,7 +192,7 @@ def test_memory_output_stream():
def test_inmemory_write_after_closed():
- f = pa.InMemoryOutputStream()
+ f = pa.BufferOutputStream()
f.write(b'ok')
f.get_result()
@@ -212,7 +212,7 @@ def test_buffer_protocol_ref_counting():
def test_nativefile_write_memoryview():
- f = pa.InMemoryOutputStream()
+ f = pa.BufferOutputStream()
data = b'ok'
arr = np.frombuffer(data, dtype='S1')
http://git-wip-us.apache.org/repos/asf/arrow/blob/47e289a9/python/pyarrow/tests/test_ipc.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 4d19804..994876d 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -209,7 +209,7 @@ class TestSocket(MessagingTest, unittest.TestCase):
class TestInMemoryFile(TestFile):
def _get_sink(self):
- return pa.InMemoryOutputStream()
+ return pa.BufferOutputStream()
def _get_source(self):
return self.sink.get_result()
@@ -219,7 +219,7 @@ def test_ipc_zero_copy_numpy():
df = pd.DataFrame({'foo': [1.5]})
batch = pa.RecordBatch.from_pandas(df)
- sink = pa.InMemoryOutputStream()
+ sink = pa.BufferOutputStream()
write_file(batch, sink)
buffer = sink.get_result()
reader = pa.BufferReader(buffer)
http://git-wip-us.apache.org/repos/asf/arrow/blob/47e289a9/python/pyarrow/tests/test_parquet.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
index 7144de2..5dbe657 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -179,7 +179,7 @@ def _test_dataframe(size=10000, seed=0):
def test_pandas_parquet_native_file_roundtrip(tmpdir):
df = _test_dataframe(10000)
arrow_table = pa.Table.from_pandas(df)
- imos = pa.InMemoryOutputStream()
+ imos = pa.BufferOutputStream()
pq.write_table(arrow_table, imos, version="2.0")
buf = imos.get_result()
reader = pa.BufferReader(buf)