You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/04/07 14:12:08 UTC

[GitHub] [arrow] lidavidm commented on a diff in pull request #12800: ARROW-15776: [Python] Expose IpcReadOptions

lidavidm commented on code in PR #12800:
URL: https://github.com/apache/arrow/pull/12800#discussion_r845179940


##########
python/pyarrow/ipc.pxi:
##########
@@ -100,6 +100,74 @@ cdef _wrap_read_stats(CIpcReadStats c):
                      c.num_replaced_dictionaries)
 
 
+cdef class IpcReadOptions(_Weakrefable):
+    """
+    Serialization options for reading IPC format.
+
+    Parameters
+    ----------
+    use_threads : bool
+        Whether to use the global CPU thread pool to parallelize any
+        computational tasks like decompression.
+    ensure_native_endian : bool
+        Whether to convert incoming data to platform-native endianness.
+        Default is true.
+    included_fields : list
+        If empty (the default), return all deserialized fields.
+        If non-empty, the values are the indices of fields to read on
+        the top-level schema.
+    memory_pool : MemoryPool, default None
+        Uses default memory pool if not specified
+    """
+    __slots__ = ()
+
+    # cdef block is in lib.pxd
+
+    def __init__(self, *, bint ensure_native_endian=True,
+                 bint use_threads=True, list included_fields=None,
+                 MemoryPool memory_pool=None):
+        self.c_options = CIpcReadOptions.Defaults()
+        self.ensure_native_endian = ensure_native_endian
+        self.use_threads = use_threads
+        self.included_fields = included_fields
+        self.memory_pool = memory_pool
+
+    @property
+    def ensure_native_endian(self):
+        return self.c_options.ensure_native_endian
+
+    @ensure_native_endian.setter
+    def ensure_native_endian(self, bint value):
+        self.c_options.ensure_native_endian = value
+
+    @property
+    def use_threads(self):
+        return self.c_options.use_threads
+
+    @use_threads.setter
+    def use_threads(self, bint value):
+        self.c_options.use_threads = value
+
+    @property
+    def included_fields(self):
+        return self.c_options.included_fields
+
+    @included_fields.setter
+    def included_fields(self, list value):
+        self.c_options.included_fields = value or list()

Review Comment:
   Hmm vector here isn't nullable so they'd be effectively the same? (We should probably accept both None and empty list though.)



##########
python/pyarrow/ipc.pxi:
##########
@@ -100,6 +100,74 @@ cdef _wrap_read_stats(CIpcReadStats c):
                      c.num_replaced_dictionaries)
 
 
+cdef class IpcReadOptions(_Weakrefable):
+    """
+    Serialization options for reading IPC format.
+
+    Parameters
+    ----------
+    use_threads : bool
+        Whether to use the global CPU thread pool to parallelize any
+        computational tasks like decompression.
+    ensure_native_endian : bool
+        Whether to convert incoming data to platform-native endianness.
+        Default is true.
+    included_fields : list
+        If empty (the default), return all deserialized fields.
+        If non-empty, the values are the indices of fields to read on
+        the top-level schema.
+    memory_pool : MemoryPool, default None
+        Uses default memory pool if not specified
+    """
+    __slots__ = ()
+
+    # cdef block is in lib.pxd
+
+    def __init__(self, *, bint ensure_native_endian=True,
+                 bint use_threads=True, list included_fields=None,
+                 MemoryPool memory_pool=None):
+        self.c_options = CIpcReadOptions.Defaults()
+        self.ensure_native_endian = ensure_native_endian
+        self.use_threads = use_threads
+        self.included_fields = included_fields
+        self.memory_pool = memory_pool
+
+    @property
+    def ensure_native_endian(self):
+        return self.c_options.ensure_native_endian
+
+    @ensure_native_endian.setter
+    def ensure_native_endian(self, bint value):
+        self.c_options.ensure_native_endian = value
+
+    @property
+    def use_threads(self):
+        return self.c_options.use_threads
+
+    @use_threads.setter
+    def use_threads(self, bint value):
+        self.c_options.use_threads = value
+
+    @property
+    def included_fields(self):
+        return self.c_options.included_fields
+
+    @included_fields.setter
+    def included_fields(self, list value):
+        self.c_options.included_fields = value or list()
+
+    @property
+    def memory_pool(self):
+        cdef:
+            MemoryPool pool = MemoryPool.__new__(MemoryPool)
+        pool.init(self.c_options.memory_pool)
+        return pool

Review Comment:
   It certainly seems like for consistency we should keep memory_pool separate, yeah.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org