You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/05/16 01:38:32 UTC

arrow git commit: ARROW-961: [Python] Rename InMemoryOutputStream to BufferOutputStream

Repository: arrow
Updated Branches:
  refs/heads/master 86a905562 -> 47e289a9a


ARROW-961: [Python] Rename InMemoryOutputStream to BufferOutputStream

Better API naming consistency with C++

Author: Wes McKinney <we...@twosigma.com>

Closes #690 from wesm/ARROW-961 and squashes the following commits:

85b352c [Wes McKinney] Add more graceful deprecation warnings for renamed classes, test suite
69a99cd [Wes McKinney] Fix Cython compilation
a15910a [Wes McKinney] Rename InMemoryOutputStream to BufferOutputStream


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/47e289a9
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/47e289a9
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/47e289a9

Branch: refs/heads/master
Commit: 47e289a9a6d38fe9b0581dbc73a464cae084ef75
Parents: 86a9055
Author: Wes McKinney <we...@twosigma.com>
Authored: Mon May 15 21:38:15 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Mon May 15 21:38:15 2017 -0400

----------------------------------------------------------------------
 python/doc/source/api.rst                 |  2 +-
 python/doc/source/ipc.rst                 |  4 +-
 python/doc/source/memory.rst              | 12 ++---
 python/pyarrow/__init__.py                | 37 ++++++++++++++-
 python/pyarrow/includes/libarrow.pxd      |  5 +-
 python/pyarrow/io.pxi                     |  4 +-
 python/pyarrow/tests/test_deprecations.py | 64 ++++++++++++++++++++++++++
 python/pyarrow/tests/test_io.py           |  6 +--
 python/pyarrow/tests/test_ipc.py          |  4 +-
 python/pyarrow/tests/test_parquet.py      |  2 +-
 10 files changed, 120 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/47e289a9/python/doc/source/api.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/api.rst b/python/doc/source/api.rst
index e7bea70..c145935 100644
--- a/python/doc/source/api.rst
+++ b/python/doc/source/api.rst
@@ -162,7 +162,7 @@ Input / Output and Shared Memory
 
    Buffer
    BufferReader
-   InMemoryOutputStream
+   BufferOutputStream
    NativeFile
    MemoryMappedFile
    memory_map

http://git-wip-us.apache.org/repos/asf/arrow/blob/47e289a9/python/doc/source/ipc.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/ipc.rst b/python/doc/source/ipc.rst
index c7de9c4..f0844cd 100644
--- a/python/doc/source/ipc.rst
+++ b/python/doc/source/ipc.rst
@@ -60,7 +60,7 @@ this we use :class:`~pyarrow.RecordBatchStreamWriter`, which can write to a writ
 
 .. ipython:: python
 
-   sink = pa.InMemoryOutputStream()
+   sink = pa.BufferOutputStream()
    writer = pa.RecordBatchStreamWriter(sink, batch.schema)
 
 Here we used an in-memory Arrow buffer stream, but this could have been a
@@ -109,7 +109,7 @@ The :class:`~pyarrow.RecordBatchFileWriter` has the same API as
 
 .. ipython:: python
 
-   sink = pa.InMemoryOutputStream()
+   sink = pa.BufferOutputStream()
    writer = pa.RecordBatchFileWriter(sink, batch.schema)
 
    for i in range(10):

http://git-wip-us.apache.org/repos/asf/arrow/blob/47e289a9/python/doc/source/memory.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/memory.rst b/python/doc/source/memory.rst
index d1020da..ccc6298 100644
--- a/python/doc/source/memory.rst
+++ b/python/doc/source/memory.rst
@@ -104,8 +104,8 @@ There are several kinds of :class:`~pyarrow.NativeFile` options available:
   memory maps
 * :class:`~pyarrow.BufferReader`, for reading :class:`~pyarrow.Buffer` objects
   as a file
-* :class:`~pyarrow.InMemoryOutputStream`, for writing data in-memory, producing
-  a Buffer at the end
+* :class:`~pyarrow.BufferOutputStream`, for writing data in-memory, producing a
+  Buffer at the end
 * :class:`~pyarrow.HdfsFile`, for reading and writing data to the Hadoop Filesystem
 * :class:`~pyarrow.PythonFile`, for interfacing with Python file objects in C++
 
@@ -124,11 +124,11 @@ then precisely track amount of memory that has been allocated:
 
 PyArrow uses a default built-in memory pool, but in the future there may be
 additional memory pools (and subpools) to choose from. Let's consider an
-``InMemoryOutputStream``, which is like a ``BytesIO``:
+``BufferOutputStream``, which is like a ``BytesIO``:
 
 .. ipython:: python
 
-   stream = pa.InMemoryOutputStream()
+   stream = pa.BufferOutputStream()
    stream.write(b'foo')
    pa.total_allocated_bytes()
    for i in range(1024): stream.write(b'foo')
@@ -150,7 +150,7 @@ pass in a custom memory pool:
    my_pool = pa.jemalloc_memory_pool()
    my_pool
    my_pool.bytes_allocated()
-   stream = pa.InMemoryOutputStream(my_pool)
+   stream = pa.BufferOutputStream(my_pool)
    stream.write(b'foo')
    my_pool.bytes_allocated()
 
@@ -215,7 +215,7 @@ file interfaces that can read and write to Arrow Buffers.
 
 .. ipython:: python
 
-   writer = pa.InMemoryOutputStream()
+   writer = pa.BufferOutputStream()
    writer.write(b'hello, friends')
 
    buf = writer.get_result()

http://git-wip-us.apache.org/repos/asf/arrow/blob/47e289a9/python/pyarrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index d6d2aa4..632a443 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -70,7 +70,7 @@ from pyarrow.lib import (null, bool_,
                          Date32Value, Date64Value, TimestampValue)
 
 from pyarrow.lib import (HdfsFile, NativeFile, PythonFile,
-                         Buffer, BufferReader, InMemoryOutputStream,
+                         Buffer, BufferReader, BufferOutputStream,
                          OSFile, MemoryMappedFile, memory_map,
                          frombuffer, read_tensor, write_tensor,
                          memory_map, create_memory_map,
@@ -108,3 +108,38 @@ from pyarrow.ipc import (RecordBatchFileReader, RecordBatchFileWriter,
 
 
 localfs = LocalFilesystem.get_instance()
+
+
+# ----------------------------------------------------------------------
+# 0.4.0 deprecations
+
+import warnings
+
+def _deprecate_class(old_name, new_name, klass, next_version='0.5.0'):
+    msg = ('pyarrow.{0} has been renamed to '
+           '{1}, will be removed in {2}'
+           .format(old_name, new_name, next_version))
+    def deprecated_factory(*args, **kwargs):
+        warnings.warn(msg, FutureWarning)
+        return klass(*args)
+    return deprecated_factory
+
+FileReader = _deprecate_class('FileReader',
+                              'RecordBatchFileReader',
+                              RecordBatchFileReader, '0.5.0')
+
+FileWriter = _deprecate_class('FileWriter',
+                              'RecordBatchFileWriter',
+                              RecordBatchFileWriter, '0.5.0')
+
+StreamReader = _deprecate_class('StreamReader',
+                                'RecordBatchStreamReader',
+                                RecordBatchStreamReader, '0.5.0')
+
+StreamWriter = _deprecate_class('StreamWriter',
+                                'RecordBatchStreamWriter',
+                                RecordBatchStreamWriter, '0.5.0')
+
+InMemoryOutputStream = _deprecate_class('InMemoryOutputStream',
+                                        'BufferOutputStream',
+                                        BufferOutputStream, '0.5.0')

http://git-wip-us.apache.org/repos/asf/arrow/blob/47e289a9/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index b03dd59..a7e2733 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -507,8 +507,9 @@ cdef extern from "arrow/io/memory.h" namespace "arrow::io" nogil:
         CBufferReader(const shared_ptr[CBuffer]& buffer)
         CBufferReader(const uint8_t* data, int64_t nbytes)
 
-    cdef cppclass BufferOutputStream(OutputStream):
-        BufferOutputStream(const shared_ptr[ResizableBuffer]& buffer)
+    cdef cppclass CBufferOutputStream" arrow::io::BufferOutputStream"\
+        (OutputStream):
+        CBufferOutputStream(const shared_ptr[ResizableBuffer]& buffer)
 
 
 cdef extern from "arrow/ipc/metadata.h" namespace "arrow::ipc" nogil:

http://git-wip-us.apache.org/repos/asf/arrow/blob/47e289a9/python/pyarrow/io.pxi
----------------------------------------------------------------------
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 4cbf603..a153f22 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -515,14 +515,14 @@ cdef shared_ptr[PoolBuffer] allocate_buffer(CMemoryPool* pool):
     return result
 
 
-cdef class InMemoryOutputStream(NativeFile):
+cdef class BufferOutputStream(NativeFile):
 
     cdef:
         shared_ptr[PoolBuffer] buffer
 
     def __cinit__(self, MemoryPool memory_pool=None):
         self.buffer = allocate_buffer(maybe_unbox_memory_pool(memory_pool))
-        self.wr_file.reset(new BufferOutputStream(
+        self.wr_file.reset(new CBufferOutputStream(
             <shared_ptr[ResizableBuffer]> self.buffer))
         self.is_readable = 0
         self.is_writeable = 1

http://git-wip-us.apache.org/repos/asf/arrow/blob/47e289a9/python/pyarrow/tests/test_deprecations.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_deprecations.py b/python/pyarrow/tests/test_deprecations.py
new file mode 100644
index 0000000..62b9666
--- /dev/null
+++ b/python/pyarrow/tests/test_deprecations.py
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Check that various deprecation warnings are raised
+
+import pyarrow as pa
+import pytest
+
+
+def test_inmemory_output_stream():
+    with pytest.warns(FutureWarning):
+        stream = pa.InMemoryOutputStream()
+        assert isinstance(stream, pa.BufferOutputStream)
+
+
+def test_file_reader_writer():
+    data = [
+        pa.array([1, 2, 3, 4]),
+        pa.array(['foo', 'bar', 'baz', None]),
+        pa.array([True, None, False, True])
+    ]
+    batch = pa.RecordBatch.from_arrays(data, ['f0', 'f1', 'f2'])
+
+    sink = pa.BufferOutputStream()
+
+    with pytest.warns(FutureWarning):
+        stream_writer = pa.StreamWriter(sink, batch.schema)
+        assert isinstance(stream_writer, pa.RecordBatchStreamWriter)
+
+    sink2 = pa.BufferOutputStream()
+    with pytest.warns(FutureWarning):
+        file_writer = pa.FileWriter(sink2, batch.schema)
+        assert isinstance(file_writer, pa.RecordBatchFileWriter)
+
+    file_writer.write_batch(batch)
+    stream_writer.write_batch(batch)
+
+    file_writer.close()
+    stream_writer.close()
+
+    buf = sink.get_result()
+    buf2 = sink2.get_result()
+
+    with pytest.warns(FutureWarning):
+        stream_reader = pa.StreamReader(buf)
+        assert isinstance(stream_reader, pa.RecordBatchStreamReader)
+
+    with pytest.warns(FutureWarning):
+        file_reader = pa.FileReader(buf2)
+        assert isinstance(file_reader, pa.RecordBatchFileReader)

http://git-wip-us.apache.org/repos/asf/arrow/blob/47e289a9/python/pyarrow/tests/test_io.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index 610dedc..f91dc6f 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -179,7 +179,7 @@ def test_memory_output_stream():
     # 10 bytes
     val = b'dataabcdef'
 
-    f = pa.InMemoryOutputStream()
+    f = pa.BufferOutputStream()
 
     K = 1000
     for i in range(K):
@@ -192,7 +192,7 @@ def test_memory_output_stream():
 
 
 def test_inmemory_write_after_closed():
-    f = pa.InMemoryOutputStream()
+    f = pa.BufferOutputStream()
     f.write(b'ok')
     f.get_result()
 
@@ -212,7 +212,7 @@ def test_buffer_protocol_ref_counting():
 
 
 def test_nativefile_write_memoryview():
-    f = pa.InMemoryOutputStream()
+    f = pa.BufferOutputStream()
     data = b'ok'
 
     arr = np.frombuffer(data, dtype='S1')

http://git-wip-us.apache.org/repos/asf/arrow/blob/47e289a9/python/pyarrow/tests/test_ipc.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 4d19804..994876d 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -209,7 +209,7 @@ class TestSocket(MessagingTest, unittest.TestCase):
 class TestInMemoryFile(TestFile):
 
     def _get_sink(self):
-        return pa.InMemoryOutputStream()
+        return pa.BufferOutputStream()
 
     def _get_source(self):
         return self.sink.get_result()
@@ -219,7 +219,7 @@ def test_ipc_zero_copy_numpy():
     df = pd.DataFrame({'foo': [1.5]})
 
     batch = pa.RecordBatch.from_pandas(df)
-    sink = pa.InMemoryOutputStream()
+    sink = pa.BufferOutputStream()
     write_file(batch, sink)
     buffer = sink.get_result()
     reader = pa.BufferReader(buffer)

http://git-wip-us.apache.org/repos/asf/arrow/blob/47e289a9/python/pyarrow/tests/test_parquet.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
index 7144de2..5dbe657 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -179,7 +179,7 @@ def _test_dataframe(size=10000, seed=0):
 def test_pandas_parquet_native_file_roundtrip(tmpdir):
     df = _test_dataframe(10000)
     arrow_table = pa.Table.from_pandas(df)
-    imos = pa.InMemoryOutputStream()
+    imos = pa.BufferOutputStream()
     pq.write_table(arrow_table, imos, version="2.0")
     buf = imos.get_result()
     reader = pa.BufferReader(buf)