You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "paleolimbot (via GitHub)" <gi...@apache.org> on 2023/06/14 14:44:55 UTC

[GitHub] [arrow-nanoarrow] paleolimbot commented on a diff in pull request #117: feat(python): Python schema, array, and array view skeleton

paleolimbot commented on code in PR #117:
URL: https://github.com/apache/arrow-nanoarrow/pull/117#discussion_r1229740544


##########
.github/workflows/python.yaml:
##########
@@ -57,3 +57,31 @@ jobs:
       - name: Run tests
         run: |
           pytest python/tests -v -s
+
+      - name: Run doctests
+        if: success() && matrix.python-version == '3.10'
+        run: |
+          # Needs editable install to run --doctest-cython
+          pip install pytest-cython
+          pip install -e python

Review Comment:
   I think it's installed without `-e` above (maybe I'll just add it there, and maybe also add `pytest-cython` to pyproject.toml while I'm at it).



##########
python/nanoarrow/_lib.pyx:
##########
@@ -0,0 +1,869 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: language_level = 3
+# cython: linetrace=True
+
+"""Low-level nanoarrow Python bindings
+
+This Cython extension provides low-level Python wrappers around the
+Arrow C Data and Arrow C Stream interface structs. In general, there
+is one wrapper per C struct and pointer validity is managed by keeping
+strong references to Python objects. These wrappers are intended to
+be literal and stay close to the structure definitions.
+"""
+
+from libc.stdint cimport uintptr_t, int64_t
+from cpython.mem cimport PyMem_Malloc, PyMem_Free
+from cpython.bytes cimport PyBytes_FromStringAndSize
+from cpython cimport Py_buffer
+from nanoarrow_c cimport *
+
+def c_version():
+    """Return the nanoarrow C library version string
+    """
+    return ArrowNanoarrowVersion().decode("UTF-8")
+
+cdef class SchemaHolder:
+    """Memory holder for an ArrowSchema
+
+    This class is responsible for the lifecycle of the ArrowSchema
+    whose memory it is responsible. When this object is deleted,
+    a non-NULL release callback is invoked.
+    """
+    cdef ArrowSchema c_schema
+
+    def __cinit__(self):
+        self.c_schema.release = NULL
+
+    def __dealloc__(self):
+        if self.c_schema.release != NULL:
+          self.c_schema.release(&self.c_schema)
+
+    def _addr(self):
+        return <uintptr_t>&self.c_schema
+
+cdef class ArrayHolder:
+    """Memory holder for an ArrowArray
+
+    This class is responsible for the lifecycle of the ArrowArray
+    whose memory it is responsible. When this object is deleted,
+    a non-NULL release callback is invoked.
+    """
+    cdef ArrowArray c_array
+
+    def __cinit__(self):
+        self.c_array.release = NULL
+
+    def __dealloc__(self):
+        if self.c_array.release != NULL:
+          self.c_array.release(&self.c_array)
+
+    def _addr(self):
+        return <uintptr_t>&self.c_array
+
+cdef class ArrayStreamHolder:
+    """Memory holder for an ArrowArrayStream
+
+    This class is responsible for the lifecycle of the ArrowArrayStream
+    whose memory it is responsible. When this object is deleted,
+    a non-NULL release callback is invoked.
+    """
+    cdef ArrowArrayStream c_array_stream
+
+    def __cinit__(self):
+        self.c_array_stream.release = NULL
+
+    def __dealloc__(self):
+        if self.c_array_stream.release != NULL:
+          self.c_array_stream.release(&self.c_array_stream)
+
+    def _addr(self):
+        return <uintptr_t>&self.c_array_stream
+
+cdef class ArrayViewHolder:
+    """Memory holder for an ArrowArrayView
+
+    This class is responsible for the lifecycle of the ArrowArrayView
+    whose memory it is responsible. When this object is deleted,
+    ArrowArrayViewReset() is called on the contents.
+    """
+    cdef ArrowArrayView c_array_view
+
+    def __init__(self):
+        ArrowArrayViewInitFromType(&self.c_array_view, NANOARROW_TYPE_UNINITIALIZED)
+
+    def __dealloc__(self):
+        ArrowArrayViewReset(&self.c_array_view)
+
+    def _addr(self):
+        return <uintptr_t>&self.c_array_view
+
+
+class NanoarrowException(RuntimeError):
+    """An error resulting from a call to the nanoarrow C library
+
+    Calls to the nanoarrow C library and/or the Arrow C Stream interface
+    callbacks return an errno error code and sometimes a message with extra
+    detail. This exception wraps a RuntimeError to format a suitable message
+    and store the components of the original error.
+    """
+
+    def __init__(self, what, code, message):
+        self.what = what
+        self.code = code
+        self.message = message
+
+        if self.message == "":
+            super().__init__(f"{self.what} failed ({self.code})")
+        else:
+            super().__init__(f"{self.what} failed ({self.code}): {self.message}")
+
+
+cdef class Error:
+    """Memory holder for an ArrowError
+
+    ArrowError is the C struct that is optionally passed to nanoarrow functions
+    when a detailed error message might be returned. This class holds a C
+    reference to the object and provides helpers for raising exceptions based
+    on the contained message.
+    """
+    cdef ArrowError c_error
+
+    def __cinit__(self):
+        self.c_error.message[0] = 0
+
+    def raise_message(self, what, code):
+        """Raise a NanoarrowException from this message
+        """
+        raise NanoarrowException(what, code, self.c_error.message.decode("UTF-8"))
+
+    @staticmethod
+    def raise_error(what, code):
+        """Raise a NanoarrowException without a message
+        """
+        raise NanoarrowException(what, code, "")
+
+
+cdef class Schema:
+    """ArrowSchema wrapper
+
+    This class provides a user-facing interface to access the fields of
+    an ArrowSchema as defined in the Arrow C Data interface. These objects
+    are usually created using `nanoarrow.schema()`. This Python wrapper
+    allows access to schema fields but does not automatically deserialize
+    their content: use `.view()` to validate and deserialize the content
+    into a more easily inspectable object.
+
+    Examples
+    --------
+
+    >>> import pyarrow as pa
+    >>> import nanoarrow as na
+    >>> schema = na.schema(pa.int32())
+    >>> schema.is_valid()
+    True
+    >>> schema.format
+    'i'
+    >>> schema.name
+    ''
+    >>> schema_view = schema.view()
+    >>> schema_view.type
+    'int32'
+    """
+    cdef object _base
+    cdef ArrowSchema* _ptr
+
+    @staticmethod
+    def allocate():
+        base = SchemaHolder()
+        return Schema(base, base._addr())
+
+    def __init__(self, object base, uintptr_t addr):
+        self._base = base,

Review Comment:
   Definitely not!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org