You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2022/08/29 10:24:53 UTC
[arrow] branch master updated: ARROW-17449: [Python] Better repr for Buffer, MemoryPool, NativeFile and Codec (#13921)
This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 6f302a3070 ARROW-17449: [Python] Better repr for Buffer, MemoryPool, NativeFile and Codec (#13921)
6f302a3070 is described below
commit 6f302a3070200f9404da30b6785620b2e72a968f
Author: Miles Granger <mi...@gmail.com>
AuthorDate: Mon Aug 29 12:24:44 2022 +0200
ARROW-17449: [Python] Better repr for Buffer, MemoryPool, NativeFile and Codec (#13921)
Example:
```python
In [1]: import io
In [2]: import pyarrow as pa
In [3]: pa.PythonFile(io.BytesIO())
Out[3]: <pyarrow.PythonFile closed=False own_file=False is_seekable=False is_writable=True is_readable=False>
In [4]: pa.Codec('gzip')
Out[4]: <pyarrow.Codec name=gzip compression_level=9>
In [5]: pool = pa.default_memory_pool()
In [6]: pool
Out[6]: <pyarrow.MemoryPool backend_name=jemalloc bytes_allocated=0 max_memory=0>
In [7]: pa.allocate_buffer(1024, memory_pool=pool)
Out[7]: <pyarrow.Buffer address=0x7fd660a08000 size=1024 is_cpu=True is_mutable=True
```
Authored-by: Miles Granger <mi...@gmail.com>
Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
python/pyarrow/io.pxi | 45 ++++++++++++++++++++++++++++++++++++++++-
python/pyarrow/memory.pxi | 11 ++++++++++
python/pyarrow/table.pxi | 2 +-
python/pyarrow/tests/test_io.py | 6 ++++++
python/pyarrow/types.pxi | 2 +-
5 files changed, 63 insertions(+), 3 deletions(-)
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index d1d3feb3c1..3dd60735c3 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -121,6 +121,15 @@ cdef class NativeFile(_Weakrefable):
def __exit__(self, exc_type, exc_value, tb):
self.close()
+ def __repr__(self):
+ name = f"pyarrow.{self.__class__.__name__}"
+ return (f"<{name} "
+ f"closed={self.closed} "
+ f"own_file={self.own_file} "
+ f"is_seekable={self.is_seekable} "
+ f"is_writable={self.is_writable} "
+ f"is_readable={self.is_readable}>")
+
@property
def mode(self):
"""
@@ -766,6 +775,13 @@ cdef class PythonFile(NativeFile):
As a downside, there is a non-zero redirection cost in translating
Arrow stream calls to Python method calls. Furthermore, Python's
Global Interpreter Lock may limit parallelism in some situations.
+
+ Examples
+ --------
+ >>> import io
+ >>> import pyarrow as pa
+ >>> pa.PythonFile(io.BytesIO())
+ <pyarrow.PythonFile closed=False own_file=False is_seekable=False is_writable=True is_readable=False>
"""
cdef:
object handle
@@ -1053,6 +1069,14 @@ cdef class Buffer(_Weakrefable):
def __len__(self):
return self.size
+ def __repr__(self):
+ name = f"pyarrow.{self.__class__.__name__}"
+ return (f"<{name} "
+ f"address={hex(self.address)} "
+ f"size={self.size} "
+ f"is_cpu={self.is_cpu} "
+ f"is_mutable={self.is_mutable}>")
+
@property
def size(self):
"""
@@ -1843,6 +1867,17 @@ cdef class Codec(_Weakrefable):
------
ValueError
If invalid compression value is passed.
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> pa.Codec.is_available('gzip')
+ True
+ >>> codec = pa.Codec('gzip')
+ >>> codec.name
+ 'gzip'
+ >>> codec.compression_level
+ 9
"""
def __init__(self, str compression not None, compression_level=None):
@@ -1964,7 +1999,9 @@ cdef class Codec(_Weakrefable):
@property
def compression_level(self):
"""Returns the compression level parameter of the codec"""
- return frombytes(self.unwrap().compression_level())
+ if self.name == 'snappy':
+ return None
+ return self.unwrap().compression_level()
def compress(self, object buf, asbytes=False, memory_pool=None):
"""
@@ -2080,6 +2117,12 @@ cdef class Codec(_Weakrefable):
return pybuf if asbytes else out_buf
+ def __repr__(self):
+ name = f"pyarrow.{self.__class__.__name__}"
+ return (f"<{name} "
+ f"name={self.name} "
+ f"compression_level={self.compression_level}>")
+
def compress(object buf, codec='lz4', asbytes=False, memory_pool=None):
"""
diff --git a/python/pyarrow/memory.pxi b/python/pyarrow/memory.pxi
index 2258be78d5..1ddcb01ccb 100644
--- a/python/pyarrow/memory.pxi
+++ b/python/pyarrow/memory.pxi
@@ -76,6 +76,12 @@ cdef class MemoryPool(_Weakrefable):
"""
return frombytes(self.pool.backend_name())
+ def __repr__(self):
+ name = f"pyarrow.{self.__class__.__name__}"
+ return (f"<{name} "
+ f"backend_name={self.backend_name} "
+ f"bytes_allocated={self.bytes_allocated()} "
+ f"max_memory={self.max_memory()}>")
cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool):
if memory_pool is None:
@@ -118,6 +124,11 @@ cdef class ProxyMemoryPool(MemoryPool):
def default_memory_pool():
"""
Return the process-global memory pool.
+
+ Examples
+ --------
+ >>> default_memory_pool()
+ <pyarrow.MemoryPool backend_name=... bytes_allocated=0 max_memory=...>
"""
cdef:
MemoryPool pool = MemoryPool.__new__(MemoryPool)
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index b8c98df1f0..931677f984 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -2002,7 +2002,7 @@ cdef class RecordBatch(_PandasConvertible):
>>> batch = pa.RecordBatch.from_arrays([n_legs, animals],
... names=["n_legs", "animals"])
>>> batch.serialize()
- <pyarrow.lib.Buffer object at ...>
+ <pyarrow.Buffer address=0x... size=... is_cpu=True is_mutable=True>
"""
cdef shared_ptr[CBuffer] buffer
cdef CIpcWriteOptions options = CIpcWriteOptions.Defaults()
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index ca49c5218e..a6488d70df 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -719,6 +719,12 @@ def test_compression_level(compression):
if not Codec.is_available(compression):
pytest.skip("{} support is not built".format(compression))
+ codec = Codec(compression)
+ if codec.name == "snappy":
+ assert codec.compression_level is None
+ else:
+ assert isinstance(codec.compression_level, int)
+
# These codecs do not support a compression level
no_level = ['snappy']
if compression in no_level:
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 1babbc4154..d37363e06f 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -2127,7 +2127,7 @@ cdef class Schema(_Weakrefable):
Write schema to Buffer:
>>> schema.serialize()
- <pyarrow.lib.Buffer object at ...>
+ <pyarrow.Buffer address=0x... size=... is_cpu=True is_mutable=True>
"""
cdef:
shared_ptr[CBuffer] buffer