You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2022/07/20 06:23:23 UTC

[arrow] branch master updated: ARROW-16094: [Python] Address docstrings in Filesystems (Utilities) (#13582)

This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 39980dcdbf ARROW-16094: [Python] Address docstrings in Filesystems (Utilities) (#13582)
39980dcdbf is described below

commit 39980dcdbfbc74ee1bdd345591a728dbc6e21dfe
Author: Alenka Frim <Al...@users.noreply.github.com>
AuthorDate: Wed Jul 20 08:23:18 2022 +0200

    ARROW-16094: [Python] Address docstrings in Filesystems (Utilities) (#13582)
    
    Authored-by: Alenka Frim <fr...@gmail.com>
    Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
 python/pyarrow/_s3fs.pyx   |  8 ++++++--
 python/pyarrow/conftest.py | 23 +++++++++++++++++++++++
 python/pyarrow/fs.py       | 24 ++++++++++++++++++------
 3 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx
index fe22612059..d9335995dc 100644
--- a/python/pyarrow/_s3fs.pyx
+++ b/python/pyarrow/_s3fs.pyx
@@ -44,6 +44,10 @@ def initialize_s3(S3LogLevel log_level=S3LogLevel.Fatal):
     ----------
     log_level : S3LogLevel
         level of logging
+
+    Examples
+    --------
+    >>> fs.initialize_s3(fs.S3LogLevel.Error) # doctest: +SKIP
     """
     cdef CS3GlobalOptions options
     options.log_level = <CS3LogLevel> log_level
@@ -70,8 +74,8 @@ def resolve_s3_region(bucket):
 
     Examples
     --------
-    >>> resolve_s3_region('ursa-labs-taxi-data')
-    'us-east-2'
+    >>> fs.resolve_s3_region('registry.opendata.aws')
+    'us-east-1'
     """
     cdef:
         c_string c_bucket
diff --git a/python/pyarrow/conftest.py b/python/pyarrow/conftest.py
index 638dad8568..b956dbef3c 100644
--- a/python/pyarrow/conftest.py
+++ b/python/pyarrow/conftest.py
@@ -17,6 +17,7 @@
 
 import pytest
 from pyarrow import Codec
+from pyarrow import fs
 
 groups = [
     'brotli',
@@ -241,3 +242,25 @@ def _docdir(request):
 
     else:
         yield
+
+
+# Define doctest_namespace for fs module docstring import
+@pytest.fixture(autouse=True)
+def add_fs(doctest_namespace, request, tmp_path):
+
+    # Trigger ONLY for the doctests
+    doctest_m = request.config.option.doctestmodules
+    doctest_c = getattr(request.config.option, "doctest_cython", False)
+
+    if doctest_m or doctest_c:
+        # fs import
+        doctest_namespace["fs"] = fs
+
+        # Creation of an object and file with data
+        local = fs.LocalFileSystem()
+        path = tmp_path / 'fileinfo.dat'
+        with local.open_output_stream(str(path)) as stream:
+            stream.write(b'data')
+        doctest_namespace["local"] = local
+        doctest_namespace["local_path"] = tmp_path
+    yield
diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py
index 932fc82789..b2db818a9a 100644
--- a/python/pyarrow/fs.py
+++ b/python/pyarrow/fs.py
@@ -227,16 +227,28 @@ def copy_files(source, destination,
 
     Examples
     --------
-    Copy an S3 bucket's files to a local directory:
+    Inspect an S3 bucket's files:
 
-    >>> copy_files("s3://your-bucket-name",
-    ...            "local-directory") # doctest: +SKIP
+    >>> s3, path = fs.FileSystem.from_uri(
+    ...            "s3://registry.opendata.aws/roda/ndjson/")
+    >>> selector = fs.FileSelector(path)
+    >>> s3.get_file_info(selector)
+    [<FileInfo for 'registry.opendata.aws/roda/ndjson/index.ndjson':...]
 
-    Using a FileSystem object:
+    Copy one file from S3 bucket to a local directory:
 
-    >>> copy_files("your-bucket-name", "local-directory",
-    ...            source_filesystem=S3FileSystem(...)) # doctest: +SKIP
+    >>> fs.copy_files("s3://registry.opendata.aws/roda/ndjson/index.ndjson",
+    ...               "file:///{}/index_copy.ndjson".format(local_path))
 
+    >>> fs.LocalFileSystem().get_file_info(str(local_path)+
+    ...                                    '/index_copy.ndjson')
+    <FileInfo for '.../index_copy.ndjson': type=FileType.File, size=...>
+
+    Copy file using a FileSystem object:
+
+    >>> fs.copy_files("registry.opendata.aws/roda/ndjson/index.ndjson",
+    ...               "file:///{}/index_copy.ndjson".format(local_path),
+    ...               source_filesystem=fs.S3FileSystem())
     """
     source_fs, source_path = _resolve_filesystem_and_path(
         source, source_filesystem