You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2022/04/07 15:27:00 UTC

[arrow] branch master updated: ARROW-15431: [Python] Address docstrings in Schema

This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 76d064c729 ARROW-15431: [Python] Address docstrings in Schema
76d064c729 is described below

commit 76d064c729f5e2287bf2a2d5e02d1fb192ae5738
Author: Alenka Frim <fr...@gmail.com>
AuthorDate: Thu Apr 7 17:26:51 2022 +0200

    ARROW-15431: [Python] Address docstrings in Schema
    
    This PR is adding docstring examples to:
    
    - `pyarrow.Schema` class methods and attributes.
    
    Closes #12783 from AlenkaF/ARROW-15431
    
    Lead-authored-by: Alenka Frim <fr...@gmail.com>
    Co-authored-by: Alenka Frim <Al...@users.noreply.github.com>
    Co-authored-by: Will Jones <wi...@gmail.com>
    Co-authored-by: Joris Van den Bossche <jo...@gmail.com>
    Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
 python/pyarrow/types.pxi | 313 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 307 insertions(+), 6 deletions(-)

diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 569a4b61a0..0a54b401b1 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -1367,6 +1367,42 @@ cdef class Field(_Weakrefable):
 
 
 cdef class Schema(_Weakrefable):
+    """
+    A named collection of types a.k.a schema. A schema defines the
+    column names and types in a record batch or table data structure.
+    They also contain metadata about the columns. For example, schemas 
+    converted from Pandas contain metadata about their original Pandas 
+    types so they can be converted back to the same types.
+
+    Warnings
+    --------
+    Do not call this class's constructor directly. Instead use
+    :func:`pyarrow.schema` factory function which makes a new Arrow
+    Schema object.
+
+    Examples
+    --------
+    Create a new Arrow Schema object:
+
+    >>> import pyarrow as pa
+    >>> pa.schema([
+    ...     ('some_int', pa.int32()),
+    ...     ('some_string', pa.string())
+    ... ])
+    some_int: int32
+    some_string: string
+
+    Create Arrow Schema with metadata:
+
+    >>> pa.schema([
+    ...     pa.field('n_legs', pa.int64()),
+    ...     pa.field('animals', pa.string())],
+    ...     metadata={"n_legs": "Number of legs per animal"})
+    n_legs: int64
+    animals: string
+    -- schema metadata --
+    n_legs: 'Number of legs per animal'
+    """
 
     def __cinit__(self):
         pass
@@ -1413,6 +1449,19 @@ cdef class Schema(_Weakrefable):
     def pandas_metadata(self):
         """
         Return deserialized-from-JSON pandas metadata field (if it exists)
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame({'n_legs': [2, 4, 5, 100],
+        ...                    'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
+        >>> schema = pa.Table.from_pandas(df).schema
+
+        Select pandas metadata field from Arrow Schema:
+
+        >>> schema.pandas_metadata
+        {'index_columns': [{'kind': 'range', 'name': None, 'start': 0, 'stop': 4, 'step': 1}], ...
         """
         metadata = self.metadata
         key = b'pandas'
@@ -1430,6 +1479,18 @@ cdef class Schema(_Weakrefable):
         Returns
         -------
         list of str
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
+
+        Get the names of the schema's fields:
+
+        >>> schema.names
+        ['n_legs', 'animals']
         """
         cdef int i
         result = []
@@ -1446,11 +1507,43 @@ cdef class Schema(_Weakrefable):
         Returns
         -------
         list of DataType
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
+
+        Get the types of the schema's fields:
+
+        >>> schema.types
+        [DataType(int64), DataType(string)]
         """
         return [field.type for field in self]
 
     @property
     def metadata(self):
+        """
+        The schema's metadata.
+
+        Returns
+        -------
+        metadata: dict
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"})
+
+        Get the metadata of the schema's fields:
+
+        >>> schema.metadata
+        {b'n_legs': b'Number of legs per animal'}
+        """
         wrapped = pyarrow_wrap_metadata(self.schema.metadata())
         if wrapped is not None:
             return wrapped.to_dict()
@@ -1470,6 +1563,23 @@ cdef class Schema(_Weakrefable):
         Returns
         -------
         table: pyarrow.Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
+
+        Create an empty table with schema's fields:
+
+        >>> schema.empty_table()
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[]]
+        animals: [[]]
         """
         arrays = [_empty_array(field.type) for field in self]
         return Table.from_arrays(arrays, schema=self)
@@ -1487,6 +1597,28 @@ cdef class Schema(_Weakrefable):
         Returns
         -------
         is_equal : bool
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema1 = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"})
+        >>> schema2 = pa.schema([
+        ...     ('some_int', pa.int32()),
+        ...     ('some_string', pa.string())
+        ... ])
+
+        Test two equal schemas:
+
+        >>> schema1.equals(schema1)
+        True
+
+        Test two unequal schemas:
+
+        >>> schema1.equals(schema2)
+        False
         """
         return self.sp_schema.get().Equals(deref(other.schema),
                                            check_metadata)
@@ -1512,17 +1644,20 @@ cdef class Schema(_Weakrefable):
 
         Examples
         --------
-
         >>> import pandas as pd
         >>> import pyarrow as pa
         >>> df = pd.DataFrame({
-            ...     'int': [1, 2],
-            ...     'str': ['a', 'b']
-            ... })
+        ...     'int': [1, 2],
+        ...     'str': ['a', 'b']
+        ... })
+
+        Create an Arrow Schema from the schema of a pandas dataframe:
+
         >>> pa.Schema.from_pandas(df)
         int: int64
         str: string
-        __index_level_0__: int64
+        -- schema metadata --
+        pandas: '{"index_columns": [{"kind": "range", "name": null, ...
         """
         from pyarrow.pandas_compat import dataframe_to_types
         names, types, metadata = dataframe_to_types(
@@ -1545,6 +1680,23 @@ cdef class Schema(_Weakrefable):
         Returns
         -------
         pyarrow.Field
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
+
+        Select the second field:
+
+        >>> schema.field(1)
+        pyarrow.Field<animals: string>
+
+        Select the field of the column named 'n_legs':
+
+        >>> schema.field('n_legs')
+        pyarrow.Field<n_legs: int64>
         """
         if isinstance(i, (bytes, str)):
             field_index = self.get_field_index(i)
@@ -1574,7 +1726,7 @@ cdef class Schema(_Weakrefable):
 
     def field_by_name(self, name):
         """
-        Access a field by its name rather than the column index.
+        DEPRECATED
 
         Parameters
         ----------
@@ -1616,6 +1768,28 @@ cdef class Schema(_Weakrefable):
             The index of the field with the given name; -1 if the
             name isn't found or there are several fields with the given
             name.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
+
+        Get the index of the field named 'animals':
+
+        >>> schema.get_field_index("animals")
+        1
+
+        Index in case of several fields with the given name:
+
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string()),
+        ...     pa.field('animals', pa.bool_())],
+        ...     metadata={"n_legs": "Number of legs per animal"})
+        >>> schema.get_field_index("animals")
+        -1
         """
         return self.schema.GetFieldIndex(tobytes(name))
 
@@ -1631,6 +1805,19 @@ cdef class Schema(_Weakrefable):
         Returns
         -------
         indices : List[int]
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string()),
+        ...     pa.field('animals', pa.bool_())])
+
+        Get the indexes of the fields named 'animals':
+
+        >>> schema.get_all_field_indices("animals")
+        [1, 2]
         """
         return self.schema.GetAllFieldIndices(tobytes(name))
 
@@ -1649,6 +1836,27 @@ cdef class Schema(_Weakrefable):
         -------
         schema: Schema
             New object with appended field.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
+
+        Append a field 'extra' at the end of the schema:
+
+        >>> schema_new = schema.append(pa.field('extra', pa.bool_()))
+        >>> schema_new
+        n_legs: int64
+        animals: string
+        extra: bool
+
+        Original schema is unmodified:
+
+        >>> schema
+        n_legs: int64
+        animals: string
         """
         return self.insert(self.schema.num_fields(), field)
 
@@ -1664,6 +1872,20 @@ cdef class Schema(_Weakrefable):
         Returns
         -------
         schema: Schema
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
+
+        Insert a new field on the second position:
+
+        >>> schema.insert(1, pa.field('extra', pa.bool_()))
+        n_legs: int64
+        extra: bool
+        animals: string
         """
         cdef:
             shared_ptr[CSchema] new_schema
@@ -1687,6 +1909,18 @@ cdef class Schema(_Weakrefable):
         Returns
         -------
         schema: Schema
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
+
+        Remove the second field of the schema:
+
+        >>> schema.remove(1)
+        n_legs: int64
         """
         cdef shared_ptr[CSchema] new_schema
 
@@ -1707,6 +1941,19 @@ cdef class Schema(_Weakrefable):
         Returns
         -------
         schema: Schema
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
+
+        Replace the second field of the schema with a new field 'extra':
+
+        >>> schema.set(1, pa.field('replaced', pa.bool_()))
+        n_legs: int64
+        replaced: bool
         """
         cdef:
             shared_ptr[CSchema] new_schema
@@ -1720,6 +1967,14 @@ cdef class Schema(_Weakrefable):
         return pyarrow_wrap_schema(new_schema)
 
     def add_metadata(self, metadata):
+        """
+        DEPRECATED
+
+        Parameters
+        ----------
+        metadata : dict
+            Keys and values must be string-like / coercible to bytes
+        """
         warnings.warn("The 'add_metadata' method is deprecated, use "
                       "'with_metadata' instead", FutureWarning, stacklevel=2)
         return self.with_metadata(metadata)
@@ -1736,6 +1991,21 @@ cdef class Schema(_Weakrefable):
         Returns
         -------
         schema : pyarrow.Schema
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
+
+        Add metadata to existing schema field:
+
+        >>> schema.with_metadata({"n_legs": "Number of legs per animal"})
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
         """
         cdef shared_ptr[CSchema] c_schema
 
@@ -1757,6 +2027,18 @@ cdef class Schema(_Weakrefable):
         Returns
         -------
         serialized : Buffer
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
+
+        Write schema to Buffer:
+
+        >>> schema.serialize()
+        <pyarrow.lib.Buffer object at ...>
         """
         cdef:
             shared_ptr[CBuffer] buffer
@@ -1774,6 +2056,25 @@ cdef class Schema(_Weakrefable):
         Returns
         -------
         schema : pyarrow.Schema
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"})
+        >>> schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+
+        Create a new schema with removing the metadata from the original:
+
+        >>> schema.remove_metadata()
+        n_legs: int64
+        animals: string
         """
         cdef shared_ptr[CSchema] new_schema
         with nogil: