You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2018/12/22 23:59:18 UTC
[arrow] branch master updated: ARROW-4098: [Python] Deprecate open_file/open_stream top level APIs in favor of using ipc namespace

This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new e179dda  ARROW-4098: [Python] Deprecate open_file/open_stream top level APIs in favor of using ipc namespace
e179dda is described below

commit e179dda432e1f67020a0c832a11fc496eec67e7f
Author: Krisztián Szűcs <sz...@gmail.com>
AuthorDate: Sun Dec 23 00:59:06 2018 +0100

    ARROW-4098: [Python] Deprecate open_file/open_stream top level APIs in favor of using ipc namespace
    
    This will mean some user code will have to change (e.g. https://github.com/apache/spark/blob/8edae94fa7ec1a1cc2c69e0924da0da85d4aac83/python/pyspark/serializers.py#L240) but it is the most maintainable option for the long term. We should not remove the deprecated APIs until we are confident that at least our open source downstream dependencies are taken care of
    
    Author: Krisztián Szűcs <sz...@gmail.com>
    Author: Wes McKinney <we...@apache.org>
    
    Closes #3244 from wesm/ARROW-4098 and squashes the following commits:
    
    ec3c54be <Krisztián Szűcs> update ipc doc
    9017e7ff <Krisztián Szűcs> remove accidentally committed file
    36b6a861 <Wes McKinney> Fix up API docs
    7ed5343e <Wes McKinney> Deprecate pyarrow.open_stream/open_file in favor of ipc-namespaced versions
---
 docs/source/python/api.rst       |  4 ++--
 docs/source/python/ipc.rst       | 10 +++++-----
 python/pyarrow/__init__.py       | 22 ++++++++++++++++++++++
 python/pyarrow/tests/test_ipc.py | 40 ++++++++++++++++++++--------------------
 4 files changed, 49 insertions(+), 27 deletions(-)

diff --git a/docs/source/python/api.rst b/docs/source/python/api.rst
index 40ccb68..0bad76f 100644
--- a/docs/source/python/api.rst
+++ b/docs/source/python/api.rst
@@ -259,14 +259,14 @@ Serialization and IPC
 .. autosummary::
    :toctree: generated/
 
+   ipc.open_file
+   ipc.open_stream
    Message
    MessageReader
    RecordBatchFileReader
    RecordBatchFileWriter
    RecordBatchStreamReader
    RecordBatchStreamWriter
-   open_file
-   open_stream
    read_message
    read_record_batch
    get_record_batch_size
diff --git a/docs/source/python/ipc.rst b/docs/source/python/ipc.rst
index 3f7e787..812d843 100644
--- a/docs/source/python/ipc.rst
+++ b/docs/source/python/ipc.rst
@@ -84,11 +84,11 @@ particular stream. Now we can do:
 
 Now ``buf`` contains the complete stream as an in-memory byte buffer. We can
 read such a stream with :class:`~pyarrow.RecordBatchStreamReader` or the
-convenience function ``pyarrow.open_stream``:
+convenience function ``pyarrow.ipc.open_stream``:
 
 .. ipython:: python
 
-   reader = pa.open_stream(buf)
+   reader = pa.ipc.open_stream(buf)
    reader.schema
 
    batches = [b for b in reader]
@@ -125,11 +125,11 @@ The :class:`~pyarrow.RecordBatchFileWriter` has the same API as
 The difference between :class:`~pyarrow.RecordBatchFileReader` and
 :class:`~pyarrow.RecordBatchStreamReader` is that the input source must have a
 ``seek`` method for random access. The stream reader only requires read
-operations. We can also use the ``pyarrow.open_file`` method to open a file:
+operations. We can also use the ``pyarrow.ipc.open_file`` method to open a file:
 
 .. ipython:: python
 
-   reader = pa.open_file(buf)
+   reader = pa.ipc.open_file(buf)
 
 Because we have access to the entire payload, we know the number of record
 batches in the file, and can read any at random:
@@ -149,7 +149,7 @@ DataFrame output:
 
 .. ipython:: python
 
-   df = pa.open_file(buf).read_pandas()
+   df = pa.ipc.open_file(buf).read_pandas()
    df[:5]
 
 Arbitrary Object Serialization
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 7f0a371..0d1c1be 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -146,6 +146,28 @@ from pyarrow.ipc import (Message, MessageReader,
                          open_stream,
                          open_file,
                          serialize_pandas, deserialize_pandas)
+import pyarrow.ipc as ipc
+
+
+def open_stream(source):
+    """
+    pyarrow.open_stream deprecated since 0.12, use pyarrow.ipc.open_stream
+    """
+    import warnings
+    warnings.warn("pyarrow.open_stream is deprecated, please use "
+                  "pyarrow.ipc.open_stream")
+    return ipc.open_stream(source)
+
+
+def open_file(source):
+    """
+    pyarrow.open_file deprecated since 0.12, use pyarrow.ipc.open_file
+    """
+    import warnings
+    warnings.warn("pyarrow.open_file is deprecated, please use "
+                  "pyarrow.ipc.open_file")
+    return ipc.open_file(source)
+
 
 localfs = LocalFileSystem.get_instance()
 
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 0fb66f8..67a91b9 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -80,7 +80,7 @@ class FileFormatFixture(IpcFixture):
         _, batches = self.write_batches(as_table=as_table)
         file_contents = pa.BufferReader(self.get_source())
 
-        reader = pa.open_file(file_contents)
+        reader = pa.ipc.open_file(file_contents)
 
         assert reader.num_record_batches == len(batches)
 
@@ -121,7 +121,7 @@ def stream_fixture():
 def test_empty_file():
     buf = b''
     with pytest.raises(pa.ArrowInvalid):
-        pa.open_file(pa.BufferReader(buf))
+        pa.ipc.open_file(pa.BufferReader(buf))
 
 
 def test_file_simple_roundtrip(file_fixture):
@@ -142,7 +142,7 @@ def test_file_read_all(sink_factory):
     _, batches = fixture.write_batches()
     file_contents = pa.BufferReader(fixture.get_source())
 
-    reader = pa.open_file(file_contents)
+    reader = pa.ipc.open_file(file_contents)
 
     result = reader.read_all()
     expected = pa.Table.from_batches(batches)
@@ -154,8 +154,8 @@ def test_open_file_from_buffer(file_fixture):
     _, batches = file_fixture.write_batches()
     source = file_fixture.get_source()
 
-    reader1 = pa.open_file(source)
-    reader2 = pa.open_file(pa.BufferReader(source))
+    reader1 = pa.ipc.open_file(source)
+    reader2 = pa.ipc.open_file(pa.BufferReader(source))
     reader3 = pa.RecordBatchFileReader(source)
 
     result1 = reader1.read_all()
@@ -170,7 +170,7 @@ def test_file_read_pandas(file_fixture):
     frames, _ = file_fixture.write_batches()
 
     file_contents = pa.BufferReader(file_fixture.get_source())
-    reader = pa.open_file(file_contents)
+    reader = pa.ipc.open_file(file_contents)
     result = reader.read_pandas()
 
     expected = pd.concat(frames)
@@ -189,8 +189,8 @@ def test_file_pathlib(file_fixture, tmpdir):
     with open(path, 'wb') as f:
         f.write(source)
 
-    t1 = pa.open_file(pathlib.Path(path)).read_all()
-    t2 = pa.open_file(pa.OSFile(path)).read_all()
+    t1 = pa.ipc.open_file(pathlib.Path(path)).read_all()
+    t2 = pa.ipc.open_file(pa.OSFile(path)).read_all()
 
     assert t1.equals(t2)
 
@@ -198,7 +198,7 @@ def test_file_pathlib(file_fixture, tmpdir):
 def test_empty_stream():
     buf = io.BytesIO(b'')
     with pytest.raises(pa.ArrowInvalid):
-        pa.open_stream(buf)
+        pa.ipc.open_stream(buf)
 
 
 def test_stream_categorical_roundtrip(stream_fixture):
@@ -213,7 +213,7 @@ def test_stream_categorical_roundtrip(stream_fixture):
     writer.write_batch(pa.RecordBatch.from_pandas(df))
     writer.close()
 
-    table = (pa.open_stream(pa.BufferReader(stream_fixture.get_source()))
+    table = (pa.ipc.open_stream(pa.BufferReader(stream_fixture.get_source()))
              .read_all())
     assert_frame_equal(table.to_pandas(), df)
 
@@ -223,8 +223,8 @@ def test_open_stream_from_buffer(stream_fixture):
     _, batches = stream_fixture.write_batches()
     source = stream_fixture.get_source()
 
-    reader1 = pa.open_stream(source)
-    reader2 = pa.open_stream(pa.BufferReader(source))
+    reader1 = pa.ipc.open_stream(source)
+    reader2 = pa.ipc.open_stream(pa.BufferReader(source))
     reader3 = pa.RecordBatchStreamReader(source)
 
     result1 = reader1.read_all()
@@ -250,7 +250,7 @@ def test_stream_write_dispatch(stream_fixture):
     writer.write(batch)
     writer.close()
 
-    table = (pa.open_stream(pa.BufferReader(stream_fixture.get_source()))
+    table = (pa.ipc.open_stream(pa.BufferReader(stream_fixture.get_source()))
              .read_all())
     assert_frame_equal(table.to_pandas(),
                        pd.concat([df, df], ignore_index=True))
@@ -271,7 +271,7 @@ def test_stream_write_table_batches(stream_fixture):
     writer.write_table(table, chunksize=15)
     writer.close()
 
-    batches = list(pa.open_stream(stream_fixture.get_source()))
+    batches = list(pa.ipc.open_stream(stream_fixture.get_source()))
 
     assert list(map(len, batches)) == [10, 15, 5, 10]
     result_table = pa.Table.from_batches(batches)
@@ -283,7 +283,7 @@ def test_stream_write_table_batches(stream_fixture):
 def test_stream_simple_roundtrip(stream_fixture):
     _, batches = stream_fixture.write_batches()
     file_contents = pa.BufferReader(stream_fixture.get_source())
-    reader = pa.open_stream(file_contents)
+    reader = pa.ipc.open_stream(file_contents)
 
     assert reader.schema.equals(batches[0].schema)
 
@@ -301,7 +301,7 @@ def test_stream_simple_roundtrip(stream_fixture):
 def test_stream_read_all(stream_fixture):
     _, batches = stream_fixture.write_batches()
     file_contents = pa.BufferReader(stream_fixture.get_source())
-    reader = pa.open_stream(file_contents)
+    reader = pa.ipc.open_stream(file_contents)
 
     result = reader.read_all()
     expected = pa.Table.from_batches(batches)
@@ -311,7 +311,7 @@ def test_stream_read_all(stream_fixture):
 def test_stream_read_pandas(stream_fixture):
     frames, _ = stream_fixture.write_batches()
     file_contents = stream_fixture.get_source()
-    reader = pa.open_stream(file_contents)
+    reader = pa.ipc.open_stream(file_contents)
     result = reader.read_pandas()
 
     expected = pd.concat(frames)
@@ -393,7 +393,7 @@ class StreamReaderServer(threading.Thread):
         connection, client_address = self._sock.accept()
         try:
             source = connection.makefile(mode='rb')
-            reader = pa.open_stream(source)
+            reader = pa.ipc.open_stream(source)
             self._schema = reader.schema
             if self._do_read_all:
                 self._table = reader.read_all()
@@ -494,7 +494,7 @@ def test_ipc_stream_no_batches():
     writer.close()
 
     source = sink.getvalue()
-    reader = pa.open_stream(source)
+    reader = pa.ipc.open_stream(source)
     result = reader.read_all()
 
     assert result.schema.equals(table.schema)
@@ -636,7 +636,7 @@ def write_file(batch, sink):
 
 
 def read_file(source):
-    reader = pa.open_file(source)
+    reader = pa.ipc.open_file(source)
     return [reader.get_batch(i)
             for i in range(reader.num_record_batches)]