You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2022/08/29 10:24:53 UTC

[arrow] branch master updated: ARROW-17449: [Python] Better repr for Buffer, MemoryPool, NativeFile and Codec (#13921)

This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 6f302a3070 ARROW-17449: [Python] Better repr for Buffer, MemoryPool, NativeFile and Codec (#13921)
6f302a3070 is described below

commit 6f302a3070200f9404da30b6785620b2e72a968f
Author: Miles Granger <mi...@gmail.com>
AuthorDate: Mon Aug 29 12:24:44 2022 +0200

    ARROW-17449: [Python] Better repr for Buffer, MemoryPool, NativeFile and Codec (#13921)
    
    Example:
    ```python
    In [1]: import io
    In [2]: import pyarrow as pa
    
    In [3]: pa.PythonFile(io.BytesIO())
    Out[3]: <pyarrow.PythonFile closed=False own_file=False is_seekable=False is_writable=True is_readable=False>
    
    In [4]: pa.Codec('gzip')
    Out[4]: <pyarrow.Codec name=gzip compression_level=9>
    
    In [5]: pool = pa.default_memory_pool()
    In [6]: pool
    Out[6]: <pyarrow.MemoryPool backend_name=jemalloc bytes_allocated=0 max_memory=0>
    
    In [7]: pa.allocate_buffer(1024, memory_pool=pool)
    Out[7]: <pyarrow.Buffer address=0x7fd660a08000 size=1024 is_cpu=True is_mutable=True
    ```
    
    Authored-by: Miles Granger <mi...@gmail.com>
    Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
 python/pyarrow/io.pxi           | 45 ++++++++++++++++++++++++++++++++++++++++-
 python/pyarrow/memory.pxi       | 11 ++++++++++
 python/pyarrow/table.pxi        |  2 +-
 python/pyarrow/tests/test_io.py |  6 ++++++
 python/pyarrow/types.pxi        |  2 +-
 5 files changed, 63 insertions(+), 3 deletions(-)

diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index d1d3feb3c1..3dd60735c3 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -121,6 +121,15 @@ cdef class NativeFile(_Weakrefable):
     def __exit__(self, exc_type, exc_value, tb):
         self.close()
 
+    def __repr__(self):
+        name = f"pyarrow.{self.__class__.__name__}"
+        return (f"<{name} "
+                f"closed={self.closed} "
+                f"own_file={self.own_file} "
+                f"is_seekable={self.is_seekable} "
+                f"is_writable={self.is_writable} "
+                f"is_readable={self.is_readable}>")
+
     @property
     def mode(self):
         """
@@ -766,6 +775,13 @@ cdef class PythonFile(NativeFile):
     As a downside, there is a non-zero redirection cost in translating
     Arrow stream calls to Python method calls.  Furthermore, Python's
     Global Interpreter Lock may limit parallelism in some situations.
+
+    Examples
+    --------
+    >>> import io
+    >>> import pyarrow as pa
+    >>> pa.PythonFile(io.BytesIO())
+    <pyarrow.PythonFile closed=False own_file=False is_seekable=False is_writable=True is_readable=False>
     """
     cdef:
         object handle
@@ -1053,6 +1069,14 @@ cdef class Buffer(_Weakrefable):
     def __len__(self):
         return self.size
 
+    def __repr__(self):
+        name = f"pyarrow.{self.__class__.__name__}"
+        return (f"<{name} "
+                f"address={hex(self.address)} "
+                f"size={self.size} "
+                f"is_cpu={self.is_cpu} "
+                f"is_mutable={self.is_mutable}>")
+
     @property
     def size(self):
         """
@@ -1843,6 +1867,17 @@ cdef class Codec(_Weakrefable):
     ------
     ValueError
         If invalid compression value is passed.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> pa.Codec.is_available('gzip')
+    True
+    >>> codec = pa.Codec('gzip')
+    >>> codec.name
+    'gzip'
+    >>> codec.compression_level
+    9
     """
 
     def __init__(self, str compression not None, compression_level=None):
@@ -1964,7 +1999,9 @@ cdef class Codec(_Weakrefable):
     @property
     def compression_level(self):
         """Returns the compression level parameter of the codec"""
-        return frombytes(self.unwrap().compression_level())
+        if self.name == 'snappy':
+            return None
+        return self.unwrap().compression_level()
 
     def compress(self, object buf, asbytes=False, memory_pool=None):
         """
@@ -2080,6 +2117,12 @@ cdef class Codec(_Weakrefable):
 
         return pybuf if asbytes else out_buf
 
+    def __repr__(self):
+        name = f"pyarrow.{self.__class__.__name__}"
+        return (f"<{name} "
+                f"name={self.name} "
+                f"compression_level={self.compression_level}>")
+
 
 def compress(object buf, codec='lz4', asbytes=False, memory_pool=None):
     """
diff --git a/python/pyarrow/memory.pxi b/python/pyarrow/memory.pxi
index 2258be78d5..1ddcb01ccb 100644
--- a/python/pyarrow/memory.pxi
+++ b/python/pyarrow/memory.pxi
@@ -76,6 +76,12 @@ cdef class MemoryPool(_Weakrefable):
         """
         return frombytes(self.pool.backend_name())
 
+    def __repr__(self):
+        name = f"pyarrow.{self.__class__.__name__}"
+        return (f"<{name} "
+                f"backend_name={self.backend_name} "
+                f"bytes_allocated={self.bytes_allocated()} "
+                f"max_memory={self.max_memory()}>")
 
 cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool):
     if memory_pool is None:
@@ -118,6 +124,11 @@ cdef class ProxyMemoryPool(MemoryPool):
 def default_memory_pool():
     """
     Return the process-global memory pool.
+
+    Examples
+    --------
+    >>> default_memory_pool()
+    <pyarrow.MemoryPool backend_name=... bytes_allocated=0 max_memory=...>
     """
     cdef:
         MemoryPool pool = MemoryPool.__new__(MemoryPool)
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index b8c98df1f0..931677f984 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -2002,7 +2002,7 @@ cdef class RecordBatch(_PandasConvertible):
         >>> batch = pa.RecordBatch.from_arrays([n_legs, animals],
         ...                                     names=["n_legs", "animals"])
         >>> batch.serialize()
-        <pyarrow.lib.Buffer object at ...>
+        <pyarrow.Buffer address=0x... size=... is_cpu=True is_mutable=True>
         """
         cdef shared_ptr[CBuffer] buffer
         cdef CIpcWriteOptions options = CIpcWriteOptions.Defaults()
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index ca49c5218e..a6488d70df 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -719,6 +719,12 @@ def test_compression_level(compression):
     if not Codec.is_available(compression):
         pytest.skip("{} support is not built".format(compression))
 
+    codec = Codec(compression)
+    if codec.name == "snappy":
+        assert codec.compression_level is None
+    else:
+        assert isinstance(codec.compression_level, int)
+
     # These codecs do not support a compression level
     no_level = ['snappy']
     if compression in no_level:
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 1babbc4154..d37363e06f 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -2127,7 +2127,7 @@ cdef class Schema(_Weakrefable):
         Write schema to Buffer:
 
         >>> schema.serialize()
-        <pyarrow.lib.Buffer object at ...>
+        <pyarrow.Buffer address=0x... size=... is_cpu=True is_mutable=True>
         """
         cdef:
             shared_ptr[CBuffer] buffer