You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2022/04/07 15:27:00 UTC
[arrow] branch master updated: ARROW-15431: [Python] Address docstrings in Schema
This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 76d064c729 ARROW-15431: [Python] Address docstrings in Schema
76d064c729 is described below
commit 76d064c729f5e2287bf2a2d5e02d1fb192ae5738
Author: Alenka Frim <fr...@gmail.com>
AuthorDate: Thu Apr 7 17:26:51 2022 +0200
ARROW-15431: [Python] Address docstrings in Schema
This PR is adding docstring examples to:
- `pyarrow.Schema` class methods and attributes.
Closes #12783 from AlenkaF/ARROW-15431
Lead-authored-by: Alenka Frim <fr...@gmail.com>
Co-authored-by: Alenka Frim <Al...@users.noreply.github.com>
Co-authored-by: Will Jones <wi...@gmail.com>
Co-authored-by: Joris Van den Bossche <jo...@gmail.com>
Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
python/pyarrow/types.pxi | 313 ++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 307 insertions(+), 6 deletions(-)
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 569a4b61a0..0a54b401b1 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -1367,6 +1367,42 @@ cdef class Field(_Weakrefable):
cdef class Schema(_Weakrefable):
+ """
+ A named collection of types a.k.a schema. A schema defines the
+ column names and types in a record batch or table data structure.
+ They also contain metadata about the columns. For example, schemas
+ converted from Pandas contain metadata about their original Pandas
+ types so they can be converted back to the same types.
+
+ Warnings
+ --------
+ Do not call this class's constructor directly. Instead use
+ :func:`pyarrow.schema` factory function which makes a new Arrow
+ Schema object.
+
+ Examples
+ --------
+ Create a new Arrow Schema object:
+
+ >>> import pyarrow as pa
+ >>> pa.schema([
+ ... ('some_int', pa.int32()),
+ ... ('some_string', pa.string())
+ ... ])
+ some_int: int32
+ some_string: string
+
+ Create Arrow Schema with metadata:
+
+ >>> pa.schema([
+ ... pa.field('n_legs', pa.int64()),
+ ... pa.field('animals', pa.string())],
+ ... metadata={"n_legs": "Number of legs per animal"})
+ n_legs: int64
+ animals: string
+ -- schema metadata --
+ n_legs: 'Number of legs per animal'
+ """
def __cinit__(self):
pass
@@ -1413,6 +1449,19 @@ cdef class Schema(_Weakrefable):
def pandas_metadata(self):
"""
Return deserialized-from-JSON pandas metadata field (if it exists)
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> import pandas as pd
+ >>> df = pd.DataFrame({'n_legs': [2, 4, 5, 100],
+ ... 'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
+ >>> schema = pa.Table.from_pandas(df).schema
+
+ Select pandas metadata field from Arrow Schema:
+
+ >>> schema.pandas_metadata
+ {'index_columns': [{'kind': 'range', 'name': None, 'start': 0, 'stop': 4, 'step': 1}], ...
"""
metadata = self.metadata
key = b'pandas'
@@ -1430,6 +1479,18 @@ cdef class Schema(_Weakrefable):
Returns
-------
list of str
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> schema = pa.schema([
+ ... pa.field('n_legs', pa.int64()),
+ ... pa.field('animals', pa.string())])
+
+ Get the names of the schema's fields:
+
+ >>> schema.names
+ ['n_legs', 'animals']
"""
cdef int i
result = []
@@ -1446,11 +1507,43 @@ cdef class Schema(_Weakrefable):
Returns
-------
list of DataType
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> schema = pa.schema([
+ ... pa.field('n_legs', pa.int64()),
+ ... pa.field('animals', pa.string())])
+
+ Get the types of the schema's fields:
+
+ >>> schema.types
+ [DataType(int64), DataType(string)]
"""
return [field.type for field in self]
@property
def metadata(self):
+ """
+ The schema's metadata.
+
+ Returns
+ -------
+ metadata: dict
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> schema = pa.schema([
+ ... pa.field('n_legs', pa.int64()),
+ ... pa.field('animals', pa.string())],
+ ... metadata={"n_legs": "Number of legs per animal"})
+
+ Get the metadata of the schema's fields:
+
+ >>> schema.metadata
+ {b'n_legs': b'Number of legs per animal'}
+ """
wrapped = pyarrow_wrap_metadata(self.schema.metadata())
if wrapped is not None:
return wrapped.to_dict()
@@ -1470,6 +1563,23 @@ cdef class Schema(_Weakrefable):
Returns
-------
table: pyarrow.Table
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> schema = pa.schema([
+ ... pa.field('n_legs', pa.int64()),
+ ... pa.field('animals', pa.string())])
+
+ Create an empty table with schema's fields:
+
+ >>> schema.empty_table()
+ pyarrow.Table
+ n_legs: int64
+ animals: string
+ ----
+ n_legs: [[]]
+ animals: [[]]
"""
arrays = [_empty_array(field.type) for field in self]
return Table.from_arrays(arrays, schema=self)
@@ -1487,6 +1597,28 @@ cdef class Schema(_Weakrefable):
Returns
-------
is_equal : bool
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> schema1 = pa.schema([
+ ... pa.field('n_legs', pa.int64()),
+ ... pa.field('animals', pa.string())],
+ ... metadata={"n_legs": "Number of legs per animal"})
+ >>> schema2 = pa.schema([
+ ... ('some_int', pa.int32()),
+ ... ('some_string', pa.string())
+ ... ])
+
+ Test two equal schemas:
+
+ >>> schema1.equals(schema1)
+ True
+
+ Test two unequal schemas:
+
+ >>> schema1.equals(schema2)
+ False
"""
return self.sp_schema.get().Equals(deref(other.schema),
check_metadata)
@@ -1512,17 +1644,20 @@ cdef class Schema(_Weakrefable):
Examples
--------
-
>>> import pandas as pd
>>> import pyarrow as pa
>>> df = pd.DataFrame({
- ... 'int': [1, 2],
- ... 'str': ['a', 'b']
- ... })
+ ... 'int': [1, 2],
+ ... 'str': ['a', 'b']
+ ... })
+
+ Create an Arrow Schema from the schema of a pandas dataframe:
+
>>> pa.Schema.from_pandas(df)
int: int64
str: string
- __index_level_0__: int64
+ -- schema metadata --
+ pandas: '{"index_columns": [{"kind": "range", "name": null, ...
"""
from pyarrow.pandas_compat import dataframe_to_types
names, types, metadata = dataframe_to_types(
@@ -1545,6 +1680,23 @@ cdef class Schema(_Weakrefable):
Returns
-------
pyarrow.Field
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> schema = pa.schema([
+ ... pa.field('n_legs', pa.int64()),
+ ... pa.field('animals', pa.string())])
+
+ Select the second field:
+
+ >>> schema.field(1)
+ pyarrow.Field<animals: string>
+
+ Select the field of the column named 'n_legs':
+
+ >>> schema.field('n_legs')
+ pyarrow.Field<n_legs: int64>
"""
if isinstance(i, (bytes, str)):
field_index = self.get_field_index(i)
@@ -1574,7 +1726,7 @@ cdef class Schema(_Weakrefable):
def field_by_name(self, name):
"""
- Access a field by its name rather than the column index.
+ DEPRECATED
Parameters
----------
@@ -1616,6 +1768,28 @@ cdef class Schema(_Weakrefable):
The index of the field with the given name; -1 if the
name isn't found or there are several fields with the given
name.
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> schema = pa.schema([
+ ... pa.field('n_legs', pa.int64()),
+ ... pa.field('animals', pa.string())])
+
+ Get the index of the field named 'animals':
+
+ >>> schema.get_field_index("animals")
+ 1
+
+ Index in case of several fields with the given name:
+
+ >>> schema = pa.schema([
+ ... pa.field('n_legs', pa.int64()),
+ ... pa.field('animals', pa.string()),
+ ... pa.field('animals', pa.bool_())],
+ ... metadata={"n_legs": "Number of legs per animal"})
+ >>> schema.get_field_index("animals")
+ -1
"""
return self.schema.GetFieldIndex(tobytes(name))
@@ -1631,6 +1805,19 @@ cdef class Schema(_Weakrefable):
Returns
-------
indices : List[int]
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> schema = pa.schema([
+ ... pa.field('n_legs', pa.int64()),
+ ... pa.field('animals', pa.string()),
+ ... pa.field('animals', pa.bool_())])
+
+ Get the indexes of the fields named 'animals':
+
+ >>> schema.get_all_field_indices("animals")
+ [1, 2]
"""
return self.schema.GetAllFieldIndices(tobytes(name))
@@ -1649,6 +1836,27 @@ cdef class Schema(_Weakrefable):
-------
schema: Schema
New object with appended field.
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> schema = pa.schema([
+ ... pa.field('n_legs', pa.int64()),
+ ... pa.field('animals', pa.string())])
+
+ Append a field 'extra' at the end of the schema:
+
+ >>> schema_new = schema.append(pa.field('extra', pa.bool_()))
+ >>> schema_new
+ n_legs: int64
+ animals: string
+ extra: bool
+
+ Original schema is unmodified:
+
+ >>> schema
+ n_legs: int64
+ animals: string
"""
return self.insert(self.schema.num_fields(), field)
@@ -1664,6 +1872,20 @@ cdef class Schema(_Weakrefable):
Returns
-------
schema: Schema
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> schema = pa.schema([
+ ... pa.field('n_legs', pa.int64()),
+ ... pa.field('animals', pa.string())])
+
+ Insert a new field on the second position:
+
+ >>> schema.insert(1, pa.field('extra', pa.bool_()))
+ n_legs: int64
+ extra: bool
+ animals: string
"""
cdef:
shared_ptr[CSchema] new_schema
@@ -1687,6 +1909,18 @@ cdef class Schema(_Weakrefable):
Returns
-------
schema: Schema
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> schema = pa.schema([
+ ... pa.field('n_legs', pa.int64()),
+ ... pa.field('animals', pa.string())])
+
+ Remove the second field of the schema:
+
+ >>> schema.remove(1)
+ n_legs: int64
"""
cdef shared_ptr[CSchema] new_schema
@@ -1707,6 +1941,19 @@ cdef class Schema(_Weakrefable):
Returns
-------
schema: Schema
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> schema = pa.schema([
+ ... pa.field('n_legs', pa.int64()),
+ ... pa.field('animals', pa.string())])
+
+ Replace the second field of the schema with a new field 'extra':
+
+ >>> schema.set(1, pa.field('replaced', pa.bool_()))
+ n_legs: int64
+ replaced: bool
"""
cdef:
shared_ptr[CSchema] new_schema
@@ -1720,6 +1967,14 @@ cdef class Schema(_Weakrefable):
return pyarrow_wrap_schema(new_schema)
def add_metadata(self, metadata):
+ """
+ DEPRECATED
+
+ Parameters
+ ----------
+ metadata : dict
+ Keys and values must be string-like / coercible to bytes
+ """
warnings.warn("The 'add_metadata' method is deprecated, use "
"'with_metadata' instead", FutureWarning, stacklevel=2)
return self.with_metadata(metadata)
@@ -1736,6 +1991,21 @@ cdef class Schema(_Weakrefable):
Returns
-------
schema : pyarrow.Schema
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> schema = pa.schema([
+ ... pa.field('n_legs', pa.int64()),
+ ... pa.field('animals', pa.string())])
+
+ Add metadata to existing schema field:
+
+ >>> schema.with_metadata({"n_legs": "Number of legs per animal"})
+ n_legs: int64
+ animals: string
+ -- schema metadata --
+ n_legs: 'Number of legs per animal'
"""
cdef shared_ptr[CSchema] c_schema
@@ -1757,6 +2027,18 @@ cdef class Schema(_Weakrefable):
Returns
-------
serialized : Buffer
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> schema = pa.schema([
+ ... pa.field('n_legs', pa.int64()),
+ ... pa.field('animals', pa.string())])
+
+ Write schema to Buffer:
+
+ >>> schema.serialize()
+ <pyarrow.lib.Buffer object at ...>
"""
cdef:
shared_ptr[CBuffer] buffer
@@ -1774,6 +2056,25 @@ cdef class Schema(_Weakrefable):
Returns
-------
schema : pyarrow.Schema
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> schema = pa.schema([
+ ... pa.field('n_legs', pa.int64()),
+ ... pa.field('animals', pa.string())],
+ ... metadata={"n_legs": "Number of legs per animal"})
+ >>> schema
+ n_legs: int64
+ animals: string
+ -- schema metadata --
+ n_legs: 'Number of legs per animal'
+
+ Create a new schema with removing the metadata from the original:
+
+ >>> schema.remove_metadata()
+ n_legs: int64
+ animals: string
"""
cdef shared_ptr[CSchema] new_schema
with nogil: