You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/08/06 00:37:05 UTC
[arrow] branch master updated: ARROW-3325: [Python][FOLLOWUP] In
Python 2.7, a class's __doc__ member is not writable (#5018)
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 42f4f34 ARROW-3325: [Python][FOLLOWUP] In Python 2.7, a class's __doc__ member is not writable (#5018)
42f4f34 is described below
commit 42f4f344f33e6754f28d91ea035e24ded7aa834a
Author: Wes McKinney <we...@users.noreply.github.com>
AuthorDate: Mon Aug 5 19:36:50 2019 -0500
ARROW-3325: [Python][FOLLOWUP] In Python 2.7, a class's __doc__ member is not writable (#5018)
---
python/pyarrow/parquet.py | 112 +++++++++++++++++++++++-----------------------
1 file changed, 56 insertions(+), 56 deletions(-)
diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index eb00a35..82536e4 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -890,7 +890,63 @@ def _open_dataset_file(dataset, path, meta=None):
common_metadata=dataset.common_metadata)
+_read_docstring_common = """\
+read_dictionary : list, default None
+ List of names or column paths (for nested types) to read directly
+ as DictionaryArray. Only supported for BYTE_ARRAY storage. To read
+ a flat column as dictionary-encoded pass the column name. For
+ nested types, you must pass the full column "path", which could be
+ something like level1.level2.list.item. Refer to the Parquet
+ file's schema to obtain the paths.
+memory_map : boolean, default True
+ If the source is a file path, use a memory map to read file, which can
+ improve performance in some environments"""
+
+
class ParquetDataset(object):
+
+ __doc__ = """
+Encapsulates details of reading a complete Parquet dataset possibly
+consisting of multiple files and partitions in subdirectories
+
+Parameters
+----------
+path_or_paths : str or List[str]
+ A directory name, single file name, or list of file names
+filesystem : FileSystem, default None
+ If nothing passed, paths assumed to be found in the local on-disk
+ filesystem
+metadata : pyarrow.parquet.FileMetaData
+ Use metadata obtained elsewhere to validate file schemas
+schema : pyarrow.parquet.Schema
+ Use schema obtained elsewhere to validate file schemas. Alternative to
+ metadata parameter
+split_row_groups : boolean, default False
+ Divide files into pieces for each row group in the file
+validate_schema : boolean, default True
+ Check that individual file schemas are all the same / compatible
+filters : List[Tuple] or List[List[Tuple]] or None (default)
+ List of filters to apply, like ``[[('x', '=', 0), ...], ...]``. This
+ implements partition-level (hive) filtering only, i.e., to prevent the
+ loading of some files of the dataset.
+
+ Predicates are expressed in disjunctive normal form (DNF). This means
+ that the innermost tuple describe a single column predicate. These
+ inner predicate make are all combined with a conjunction (AND) into a
+ larger predicate. The most outer list then combines all filters
+ with a disjunction (OR). By this, we should be able to express all
+ kinds of filters that are possible using boolean logic.
+
+ This function also supports passing in as List[Tuple]. These predicates
+ are evaluated as a conjunction. To express OR in predictates, one must
+ use the (preferred) List[List[Tuple]] notation.
+metadata_nthreads: int, default 1
+ How many threads to allow the thread pool which is used to read the
+ dataset metadata. Increasing this is helpful to read partitioned
+ datasets.
+{0}
+""".format(_read_docstring_common)
+
def __init__(self, path_or_paths, filesystem=None, schema=None,
metadata=None, split_row_groups=False, validate_schema=True,
filters=None, metadata_nthreads=1,
@@ -1105,62 +1161,6 @@ def _make_manifest(path_or_paths, fs, pathsep='/', metadata_nthreads=1,
return pieces, partitions, common_metadata_path, metadata_path
-_read_docstring_common = """\
-read_dictionary : list, default None
- List of names or column paths (for nested types) to read directly
- as DictionaryArray. Only supported for BYTE_ARRAY storage. To read
- a flat column as dictionary-encoded pass the column name. For
- nested types, you must pass the full column "path", which could be
- something like level1.level2.list.item. Refer to the Parquet
- file's schema to obtain the paths.
-memory_map : boolean, default True
- If the source is a file path, use a memory map to read file, which can
- improve performance in some environments"""
-
-
-ParquetDataset.__doc__ = """
-Encapsulates details of reading a complete Parquet dataset possibly
-consisting of multiple files and partitions in subdirectories
-
-Parameters
-----------
-path_or_paths : str or List[str]
- A directory name, single file name, or list of file names
-filesystem : FileSystem, default None
- If nothing passed, paths assumed to be found in the local on-disk
- filesystem
-metadata : pyarrow.parquet.FileMetaData
- Use metadata obtained elsewhere to validate file schemas
-schema : pyarrow.parquet.Schema
- Use schema obtained elsewhere to validate file schemas. Alternative to
- metadata parameter
-split_row_groups : boolean, default False
- Divide files into pieces for each row group in the file
-validate_schema : boolean, default True
- Check that individual file schemas are all the same / compatible
-filters : List[Tuple] or List[List[Tuple]] or None (default)
- List of filters to apply, like ``[[('x', '=', 0), ...], ...]``. This
- implements partition-level (hive) filtering only, i.e., to prevent the
- loading of some files of the dataset.
-
- Predicates are expressed in disjunctive normal form (DNF). This means
- that the innermost tuple describe a single column predicate. These
- inner predicate make are all combined with a conjunction (AND) into a
- larger predicate. The most outer list then combines all filters
- with a disjunction (OR). By this, we should be able to express all
- kinds of filters that are possible using boolean logic.
-
- This function also supports passing in as List[Tuple]. These predicates
- are evaluated as a conjunction. To express OR in predictates, one must
- use the (preferred) List[List[Tuple]] notation.
-metadata_nthreads: int, default 1
- How many threads to allow the thread pool which is used to read the
- dataset metadata. Increasing this is helpful to read partitioned
- datasets.
-{0}
-""".format(_read_docstring_common)
-
-
_read_table_docstring = """
{0}