You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/07/21 15:15:34 UTC
[arrow] branch master updated: ARROW-15430: [Python] Address docstrings in Filesystems (Interface) (#13564)
This is an automated email from the ASF dual-hosted git repository.
alenka pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 8a2acaa40b ARROW-15430: [Python] Address docstrings in Filesystems (Interface) (#13564)
8a2acaa40b is described below
commit 8a2acaa40b97e6eafba196812d15a71f49f69d6a
Author: Alenka Frim <Al...@users.noreply.github.com>
AuthorDate: Thu Jul 21 17:15:26 2022 +0200
ARROW-15430: [Python] Address docstrings in Filesystems (Interface) (#13564)
Authored-by: Alenka Frim <fr...@gmail.com>
Signed-off-by: Alenka Frim <fr...@gmail.com>
---
python/pyarrow/_fs.pyx | 201 +++++++++++++++++++++++++++++++++++++++++++--
python/pyarrow/conftest.py | 5 +-
2 files changed, 197 insertions(+), 9 deletions(-)
diff --git a/python/pyarrow/_fs.pyx b/python/pyarrow/_fs.pyx
index e5a7d7cb14..e7b028a07d 100644
--- a/python/pyarrow/_fs.pyx
+++ b/python/pyarrow/_fs.pyx
@@ -100,6 +100,48 @@ cdef class FileInfo(_Weakrefable):
If given, the filesystem entry size in bytes. This should only
be given if `type` is `FileType.File`.
+ Examples
+ --------
+ Generate a file:
+
+ >>> from pyarrow import fs
+ >>> local = fs.LocalFileSystem()
+ >>> path_fs = local_path + '/pyarrow-fs-example.dat'
+ >>> with local.open_output_stream(path_fs) as stream:
+ ... stream.write(b'data')
+ 4
+
+ Get FileInfo object using ``get_file_info()``:
+
+ >>> file_info = local.get_file_info(path_fs)
+ >>> file_info
+ <FileInfo for '.../pyarrow-fs-example.dat': type=FileType.File, size=4>
+
+ Inspect FileInfo attributes:
+
+ >>> file_info.type
+ <FileType.File: 2>
+
+ >>> file_info.is_file
+ True
+
+ >>> file_info.path
+ '/.../pyarrow-fs-example.dat'
+
+ >>> file_info.base_name
+ 'pyarrow-fs-example.dat'
+
+ >>> file_info.size
+ 4
+
+ >>> file_info.extension
+ 'dat'
+
+ >>> file_info.mtime # doctest: +SKIP
+ datetime.datetime(2022, 6, 29, 7, 56, 10, 873922, tzinfo=datetime.timezone.utc)
+
+ >>> file_info.mtime_ns # doctest: +SKIP
+ 1656489370873922073
"""
def __init__(self, path, FileType type=FileType.Unknown, *,
@@ -179,6 +221,12 @@ cdef class FileInfo(_Weakrefable):
def path(self):
"""
The full file path in the filesystem.
+
+ Examples
+ --------
+ >>> file_info = local.get_file_info(path)
+ >>> file_info.path
+ '/.../pyarrow-fs-example.dat'
"""
return frombytes(self.info.path())
@@ -188,6 +236,12 @@ cdef class FileInfo(_Weakrefable):
The file base name.
Component after the last directory separator.
+
+ Examples
+ --------
+ >>> file_info = local.get_file_info(path)
+ >>> file_info.base_name
+ 'pyarrow-fs-example.dat'
"""
return frombytes(self.info.base_name())
@@ -210,6 +264,12 @@ cdef class FileInfo(_Weakrefable):
def extension(self):
"""
The file extension.
+
+ Examples
+ --------
+ >>> file_info = local.get_file_info(path)
+ >>> file_info.extension
+ 'dat'
"""
return frombytes(self.info.extension())
@@ -221,6 +281,12 @@ cdef class FileInfo(_Weakrefable):
Returns
-------
mtime : datetime.datetime or None
+
+ Examples
+ --------
+ >>> file_info = local.get_file_info(path)
+ >>> file_info.mtime # doctest: +SKIP
+ datetime.datetime(2022, 6, 29, 7, 56, 10, 873922, tzinfo=datetime.timezone.utc)
"""
cdef int64_t nanoseconds
nanoseconds = TimePoint_to_ns(self.info.mtime())
@@ -236,6 +302,12 @@ cdef class FileInfo(_Weakrefable):
Returns
-------
mtime_ns : int or None
+
+ Examples
+ --------
+ >>> file_info = local.get_file_info(path)
+ >>> file_info.mtime_ns # doctest: +SKIP
+ 1656489370873922073
"""
cdef int64_t nanoseconds
nanoseconds = TimePoint_to_ns(self.info.mtime())
@@ -260,6 +332,31 @@ cdef class FileSelector(_Weakrefable):
If true, an empty selection is returned.
recursive : bool, default False
Whether to recurse into subdirectories.
+
+ Examples
+ --------
+ List the contents of a directory and subdirectories:
+
+ >>> selector_1 = fs.FileSelector(local_path, recursive=True)
+ >>> local.get_file_info(selector_1) # doctest: +SKIP
+ [<FileInfo for 'tmp/alphabet/example.dat': type=FileType.File, size=4>,
+ <FileInfo for 'tmp/alphabet/subdir': type=FileType.Directory>,
+ <FileInfo for 'tmp/alphabet/subdir/example_copy.dat': type=FileType.File, size=4>]
+
+ List only the contents of the base directory:
+
+ >>> selector_2 = fs.FileSelector(local_path)
+ >>> local.get_file_info(selector_2) # doctest: +SKIP
+ [<FileInfo for 'tmp/alphabet/example.dat': type=FileType.File, size=4>,
+ <FileInfo for 'tmp/alphabet/subdir': type=FileType.Directory>]
+
+ Return empty selection if the directory doesn't exist:
+
+ >>> selector_not_found = fs.FileSelector(local_path + '/missing',
+ ... recursive=True,
+ ... allow_not_found=True)
+ >>> local.get_file_info(selector_not_found)
+ []
"""
def __init__(self, base_dir, bint allow_not_found=False,
@@ -335,6 +432,22 @@ cdef class FileSystem(_Weakrefable):
tuple of (FileSystem, str path)
With (filesystem, path) tuple where path is the abstract path
inside the FileSystem instance.
+
+ Examples
+ --------
+ Create a new FileSystem subclass from a URI:
+
+ >>> uri = 'file:///{}/pyarrow-fs-example.dat'.format(local_path)
+ >>> local_new, path_new = fs.FileSystem.from_uri(uri)
+ >>> local_new
+ <pyarrow._fs.LocalFileSystem object at ...
+ >>> path_new
+ '/.../pyarrow-fs-example.dat'
+
+ Or from a s3 bucket:
+
+ >>> fs.FileSystem.from_uri("s3://usgs-landsat/collection02/")
+ (<pyarrow._s3fs.S3FileSystem object at ...>, 'usgs-landsat/collection02')
"""
cdef:
c_string c_path
@@ -422,6 +535,13 @@ cdef class FileSystem(_Weakrefable):
FileInfo or list of FileInfo
Single FileInfo object is returned for a single path, otherwise
a list of FileInfo objects is returned.
+
+ Examples
+ --------
+ >>> local
+ <pyarrow._fs.LocalFileSystem object at ...>
+ >>> local.get_file_info("/{}/pyarrow-fs-example.dat".format(local_path))
+ <FileInfo for '/.../pyarrow-fs-example.dat': type=FileType.File, size=4>
"""
cdef:
CFileInfo info
@@ -521,6 +641,28 @@ cdef class FileSystem(_Weakrefable):
The path of the file or the directory to be moved.
dest : str
The destination path where the file or directory is moved to.
+
+ Examples
+ --------
+ Create a new folder with a file:
+
+ >>> local.create_dir('/tmp/other_dir')
+ >>> local.copy_file(path,'/tmp/move_example.dat')
+
+ Move the file:
+
+ >>> local.move('/tmp/move_example.dat',
+ ... '/tmp/other_dir/move_example_2.dat')
+
+ Inspect the file info:
+
+ >>> local.get_file_info('/tmp/other_dir/move_example_2.dat')
+ <FileInfo for '/tmp/other_dir/move_example_2.dat': type=FileType.File, size=4>
+ >>> local.get_file_info('/tmp/move_example.dat')
+ <FileInfo for '/tmp/move_example.dat': type=FileType.NotFound>
+
+ Delete the folder:
+ >>> local.delete_dir('/tmp/other_dir')
"""
cdef:
c_string source = _path_as_bytes(src)
@@ -541,6 +683,18 @@ cdef class FileSystem(_Weakrefable):
The path of the file to be copied from.
dest : str
The destination path where the file is copied to.
+
+ Examples
+ --------
+ >>> local.copy_file(path,
+ ... local_path + '/pyarrow-fs-example_copy.dat')
+
+ Inspect the file info:
+
+ >>> local.get_file_info(local_path + '/pyarrow-fs-example_copy.dat')
+ <FileInfo for '/.../pyarrow-fs-example_copy.dat': type=FileType.File, size=4>
+ >>> local.get_file_info(path)
+ <FileInfo for '/.../pyarrow-fs-example.dat': type=FileType.File, size=4>
"""
cdef:
c_string source = _path_as_bytes(src)
@@ -591,6 +745,14 @@ cdef class FileSystem(_Weakrefable):
Returns
-------
stream : NativeFile
+
+ Examples
+ --------
+ Print the data from the file with `open_input_file()`:
+
+ >>> with local.open_input_file(path) as f:
+ ... print(f.readall())
+ b'data'
"""
cdef:
c_string pathstr = _path_as_bytes(path)
@@ -625,6 +787,14 @@ cdef class FileSystem(_Weakrefable):
Returns
-------
stream : NativeFile
+
+ Examples
+ --------
+ Print the data from the file with `open_input_stream()`:
+
+ >>> with local.open_input_stream(path) as f:
+ ... print(f.readall())
+ b'data'
"""
cdef:
c_string pathstr = _path_as_bytes(path)
@@ -670,6 +840,13 @@ cdef class FileSystem(_Weakrefable):
Returns
-------
stream : NativeFile
+
+ Examples
+ --------
+ >>> local = fs.LocalFileSystem()
+ >>> with local.open_output_stream(path) as stream:
+ ... stream.write(b'data')
+ 4
"""
cdef:
c_string pathstr = _path_as_bytes(path)
@@ -727,6 +904,20 @@ cdef class FileSystem(_Weakrefable):
Returns
-------
stream : NativeFile
+
+ Examples
+ --------
+ Append new data to a FileSystem subclass with nonempty file:
+
+ >>> with local.open_append_stream(path) as f:
+ ... f.write(b'+newly added')
+ 12
+
+ Print out the content fo the file:
+
+ >>> with local.open_input_file(path) as f:
+ ... print(f.readall())
+ b'data+newly added'
"""
cdef:
c_string pathstr = _path_as_bytes(path)
@@ -796,11 +987,9 @@ cdef class LocalFileSystem(FileSystem):
>>> with local.open_output_stream('/tmp/local_fs.dat') as stream:
... stream.write(b'data')
- ...
4
>>> with local.open_input_stream('/tmp/local_fs.dat') as stream:
... print(stream.readall())
- ...
b'data'
Create a FileSystem object inferred from a URI of the saved file:
@@ -834,11 +1023,10 @@ cdef class LocalFileSystem(FileSystem):
>>> with local.open_append_stream('/tmp/local_fs-copy.dat') as f:
... f.write(b'+newly added')
- ...
12
+
>>> with local.open_input_stream('/tmp/local_fs-copy.dat') as f:
... print(f.readall())
- ...
b'data+newly added'
Create a directory, copy a file into it and then delete the whole directory:
@@ -939,8 +1127,7 @@ cdef class SubTreeFileSystem(FileSystem):
>>> from pyarrow import fs
>>> local = fs.LocalFileSystem()
>>> with local.open_output_stream('/tmp/local_fs.dat') as stream:
- ... stream.write(b'data')
- ...
+ ... stream.write(b'data')
4
Create a directory and a SubTreeFileSystem instance:
@@ -952,7 +1139,6 @@ cdef class SubTreeFileSystem(FileSystem):
>>> with subtree.open_append_stream('sub_tree_fs.dat') as f:
... f.write(b'+newly added')
- ...
12
Print out the attributes:
@@ -971,6 +1157,7 @@ cdef class SubTreeFileSystem(FileSystem):
Delete the file and directory:
+ >>> subtree.delete_file('sub_tree_fs.dat')
>>> local.delete_dir('/tmp/sub_tree')
>>> local.delete_file('/tmp/local_fs.dat')
diff --git a/python/pyarrow/conftest.py b/python/pyarrow/conftest.py
index b956dbef3c..bea735bd3a 100644
--- a/python/pyarrow/conftest.py
+++ b/python/pyarrow/conftest.py
@@ -258,9 +258,10 @@ def add_fs(doctest_namespace, request, tmp_path):
# Creation of an object and file with data
local = fs.LocalFileSystem()
- path = tmp_path / 'fileinfo.dat'
+ path = tmp_path / 'pyarrow-fs-example.dat'
with local.open_output_stream(str(path)) as stream:
stream.write(b'data')
doctest_namespace["local"] = local
- doctest_namespace["local_path"] = tmp_path
+ doctest_namespace["local_path"] = str(tmp_path)
+ doctest_namespace["path"] = str(path)
yield