You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/09/17 16:11:21 UTC

[arrow] branch master updated: ARROW-3227: [Python] Require bytes-like input to NativeFile.write

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 4488110  ARROW-3227: [Python] Require bytes-like input to NativeFile.write
4488110 is described below

commit 4488110ee710c86dd861857b46675d0b3325bc3e
Author: Wes McKinney <we...@apache.org>
AuthorDate: Mon Sep 17 12:11:10 2018 -0400

    ARROW-3227: [Python] Require bytes-like input to NativeFile.write
    
    Author: Wes McKinney <we...@apache.org>
    
    Closes #2570 from wesm/ARROW-3227 and squashes the following commits:
    
    77c850762 <Wes McKinney> Better test name
    c1becfb68 <Wes McKinney> Require bytes-like input to NativeFile.write
---
 python/pyarrow/io.pxi           | 12 +++++++++---
 python/pyarrow/tests/test_io.py | 15 +++++++++++----
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 8414f07..0b740f2 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -204,18 +204,24 @@ cdef class NativeFile:
         """
         Write byte from any object implementing buffer protocol (bytes,
         bytearray, ndarray, pyarrow.Buffer)
+
+        Parameters
+        ----------
+        data : bytes-like object or exporter of buffer protocol
+
+        Returns
+        -------
+        nbytes : number of bytes written
         """
         self._assert_writable()
 
-        if isinstance(data, six.string_types):
-            data = tobytes(data)
-
         cdef Buffer arrow_buffer = py_buffer(data)
 
         cdef const uint8_t* buf = arrow_buffer.buffer.get().data()
         cdef int64_t bufsize = len(arrow_buffer)
         with nogil:
             check_status(self.wr_file.get().Write(buf, bufsize))
+        return bufsize
 
     def read(self, nbytes=None):
         """
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index c1a210d..f5e5339 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -64,7 +64,7 @@ def test_python_file_write():
     s1 = b'enga\xc3\xb1ado'
     s2 = b'foobar'
 
-    f.write(s1.decode('utf8'))
+    f.write(s1)
     assert f.tell() == len(s1)
 
     f.write(s2)
@@ -745,7 +745,7 @@ def test_memory_map_writer(tmpdir):
     f = pa.memory_map(path, mode='r+b')
 
     f.seek(10)
-    f.write('peekaboo')
+    f.write(b'peekaboo')
     assert f.tell() == 18
 
     f.seek(10)
@@ -762,7 +762,7 @@ def test_memory_map_writer(tmpdir):
 
     # Does not truncate file
     f3 = pa.memory_map(path, mode='w')
-    f3.write('foo')
+    f3.write(b'foo')
 
     with pa.memory_map(path) as f4:
         assert f4.size() == SIZE
@@ -817,7 +817,7 @@ def test_os_file_writer(tmpdir):
 
     # Truncates file
     f2 = pa.OSFile(path, mode='w')
-    f2.write('foo')
+    f2.write(b'foo')
 
     with pa.OSFile(path) as f3:
         assert f3.size() == 3
@@ -826,6 +826,13 @@ def test_os_file_writer(tmpdir):
         f2.read(5)
 
 
+def test_native_file_write_reject_unicode():
+    # ARROW-3227
+    nf = pa.BufferOutputStream()
+    with pytest.raises(TypeError):
+        nf.write(u'foo')
+
+
 def test_native_file_modes(tmpdir):
     path = os.path.join(str(tmpdir), guid())
     with open(path, 'wb') as f: