You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/01/13 18:59:46 UTC
arrow git commit: ARROW-481: [Python] Fix 2.7 regression in Parquet path to open file code path

Repository: arrow
Updated Branches:
  refs/heads/master cb83b8d30 -> 281cb9697


ARROW-481: [Python] Fix 2.7 regression in Parquet path to open file code path

Author: Wes McKinney <we...@twosigma.com>

Closes #283 from wesm/ARROW-481 and squashes the following commits:

8ed919e [Wes McKinney] Fix Python 2.7 regression in Parquet path to open file code path


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/281cb969
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/281cb969
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/281cb969

Branch: refs/heads/master
Commit: 281cb969772a5878cd64a487cf2be3944f801c3d
Parents: cb83b8d
Author: Wes McKinney <we...@twosigma.com>
Authored: Fri Jan 13 13:59:40 2017 -0500
Committer: Wes McKinney <we...@twosigma.com>
Committed: Fri Jan 13 13:59:40 2017 -0500

----------------------------------------------------------------------
 python/pyarrow/io.pyx            |  6 ++----
 python/pyarrow/tests/test_ipc.py | 12 ++++++------
 2 files changed, 8 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/281cb969/python/pyarrow/io.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/io.pyx b/python/pyarrow/io.pyx
index 2d8e4e8..1939fe8 100644
--- a/python/pyarrow/io.pyx
+++ b/python/pyarrow/io.pyx
@@ -395,13 +395,11 @@ def buffer_from_bytes(object obj):
 cdef get_reader(object source, shared_ptr[ReadableFileInterface]* reader):
     cdef NativeFile nf
 
-    if isinstance(source, bytes):
-        source = BytesReader(source)
+    if isinstance(source, six.string_types):
+        source = MemoryMappedFile(source, mode='r')
     elif not isinstance(source, NativeFile) and hasattr(source, 'read'):
         # Optimistically hope this is file-like
         source = PythonFileInterface(source, mode='r')
-    elif isinstance(source, six.string_types):
-        source = MemoryMappedFile(source, mode='r')
 
     if isinstance(source, NativeFile):
         nf = source

http://git-wip-us.apache.org/repos/asf/arrow/blob/281cb969/python/pyarrow/tests/test_ipc.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 14cbb30..368a9d4 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -23,7 +23,7 @@ from pandas.util.testing import assert_frame_equal
 import pandas as pd
 
 import pyarrow as A
-import pyarrow.io as arrow_io
+import pyarrow.io as aio
 import pyarrow.ipc as ipc
 
 
@@ -63,7 +63,7 @@ class RoundtripTest(object):
         writer.close()
 
         file_contents = self._get_source()
-        reader = ipc.ArrowFileReader(file_contents)
+        reader = ipc.ArrowFileReader(aio.BytesReader(file_contents))
 
         assert reader.num_record_batches == num_batches
 
@@ -76,7 +76,7 @@ class RoundtripTest(object):
 class InMemoryStreamTest(RoundtripTest):
 
     def _get_sink(self):
-        return arrow_io.InMemoryOutputStream()
+        return aio.InMemoryOutputStream()
 
     def _get_source(self):
         return self.sink.get_result()
@@ -91,10 +91,10 @@ def test_ipc_zero_copy_numpy():
     df = pd.DataFrame({'foo': [1.5]})
 
     batch = A.RecordBatch.from_pandas(df)
-    sink = arrow_io.InMemoryOutputStream()
+    sink = aio.InMemoryOutputStream()
     write_file(batch, sink)
     buffer = sink.get_result()
-    reader = arrow_io.BufferReader(buffer)
+    reader = aio.BufferReader(buffer)
 
     batches = read_file(reader)
 
@@ -118,7 +118,7 @@ def big_batch():
 
 
 def write_to_memory2(batch):
-    sink = arrow_io.InMemoryOutputStream()
+    sink = aio.InMemoryOutputStream()
     write_file(batch, sink)
     return sink.get_result()