You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2022/07/20 06:23:23 UTC
[arrow] branch master updated: ARROW-16094: [Python] Address docstrings in Filesystems (Utilities) (#13582)
This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 39980dcdbf ARROW-16094: [Python] Address docstrings in Filesystems (Utilities) (#13582)
39980dcdbf is described below
commit 39980dcdbfbc74ee1bdd345591a728dbc6e21dfe
Author: Alenka Frim <Al...@users.noreply.github.com>
AuthorDate: Wed Jul 20 08:23:18 2022 +0200
ARROW-16094: [Python] Address docstrings in Filesystems (Utilities) (#13582)
Authored-by: Alenka Frim <fr...@gmail.com>
Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
python/pyarrow/_s3fs.pyx | 8 ++++++--
python/pyarrow/conftest.py | 23 +++++++++++++++++++++++
python/pyarrow/fs.py | 24 ++++++++++++++++++------
3 files changed, 47 insertions(+), 8 deletions(-)
diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx
index fe22612059..d9335995dc 100644
--- a/python/pyarrow/_s3fs.pyx
+++ b/python/pyarrow/_s3fs.pyx
@@ -44,6 +44,10 @@ def initialize_s3(S3LogLevel log_level=S3LogLevel.Fatal):
----------
log_level : S3LogLevel
level of logging
+
+ Examples
+ --------
+ >>> fs.initialize_s3(fs.S3LogLevel.Error) # doctest: +SKIP
"""
cdef CS3GlobalOptions options
options.log_level = <CS3LogLevel> log_level
@@ -70,8 +74,8 @@ def resolve_s3_region(bucket):
Examples
--------
- >>> resolve_s3_region('ursa-labs-taxi-data')
- 'us-east-2'
+ >>> fs.resolve_s3_region('registry.opendata.aws')
+ 'us-east-1'
"""
cdef:
c_string c_bucket
diff --git a/python/pyarrow/conftest.py b/python/pyarrow/conftest.py
index 638dad8568..b956dbef3c 100644
--- a/python/pyarrow/conftest.py
+++ b/python/pyarrow/conftest.py
@@ -17,6 +17,7 @@
import pytest
from pyarrow import Codec
+from pyarrow import fs
groups = [
'brotli',
@@ -241,3 +242,25 @@ def _docdir(request):
else:
yield
+
+
+# Define doctest_namespace for fs module docstring import
+@pytest.fixture(autouse=True)
+def add_fs(doctest_namespace, request, tmp_path):
+
+ # Trigger ONLY for the doctests
+ doctest_m = request.config.option.doctestmodules
+ doctest_c = getattr(request.config.option, "doctest_cython", False)
+
+ if doctest_m or doctest_c:
+ # fs import
+ doctest_namespace["fs"] = fs
+
+ # Creation of an object and file with data
+ local = fs.LocalFileSystem()
+ path = tmp_path / 'fileinfo.dat'
+ with local.open_output_stream(str(path)) as stream:
+ stream.write(b'data')
+ doctest_namespace["local"] = local
+ doctest_namespace["local_path"] = tmp_path
+ yield
diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py
index 932fc82789..b2db818a9a 100644
--- a/python/pyarrow/fs.py
+++ b/python/pyarrow/fs.py
@@ -227,16 +227,28 @@ def copy_files(source, destination,
Examples
--------
- Copy an S3 bucket's files to a local directory:
+ Inspect an S3 bucket's files:
- >>> copy_files("s3://your-bucket-name",
- ... "local-directory") # doctest: +SKIP
+ >>> s3, path = fs.FileSystem.from_uri(
+ ... "s3://registry.opendata.aws/roda/ndjson/")
+ >>> selector = fs.FileSelector(path)
+ >>> s3.get_file_info(selector)
+ [<FileInfo for 'registry.opendata.aws/roda/ndjson/index.ndjson':...]
- Using a FileSystem object:
+ Copy one file from S3 bucket to a local directory:
- >>> copy_files("your-bucket-name", "local-directory",
- ... source_filesystem=S3FileSystem(...)) # doctest: +SKIP
+ >>> fs.copy_files("s3://registry.opendata.aws/roda/ndjson/index.ndjson",
+ ... "file:///{}/index_copy.ndjson".format(local_path))
+ >>> fs.LocalFileSystem().get_file_info(str(local_path)+
+ ... '/index_copy.ndjson')
+ <FileInfo for '.../index_copy.ndjson': type=FileType.File, size=...>
+
+ Copy file using a FileSystem object:
+
+ >>> fs.copy_files("registry.opendata.aws/roda/ndjson/index.ndjson",
+ ... "file:///{}/index_copy.ndjson".format(local_path),
+ ... source_filesystem=fs.S3FileSystem())
"""
source_fs, source_path = _resolve_filesystem_and_path(
source, source_filesystem