You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/07/09 09:36:40 UTC

[GitHub] [arrow] pitrou commented on a change in pull request #7688: ARROW-9383: [Python] Support fsspec filesystems in Dataset API

pitrou commented on a change in pull request #7688:
URL: https://github.com/apache/arrow/pull/7688#discussion_r452090022



##########
File path: python/pyarrow/dataset.py
##########
@@ -239,15 +240,18 @@ def _ensure_filesystem(fs_or_uri):
                 )
             filesystem = SubTreeFileSystem(prefix, filesystem)
         return filesystem, is_local
-    elif isinstance(fs_or_uri, (LocalFileSystem, _MockFileSystem)):
-        return fs_or_uri, True
-    elif isinstance(fs_or_uri, FileSystem):
-        return fs_or_uri, False
-    else:
+
+    try:
+        filesystem = _ensure_filesystem(fs_or_uri)

Review comment:
       It's a bit confusing to have those two functions named the same. Can you rename one or other to distinguish them better?

##########
File path: python/pyarrow/fs.py
##########
@@ -63,6 +63,31 @@ def __getattr__(name):
     )
 
 
+def _ensure_filesystem(filesystem):
+    if isinstance(filesystem, FileSystem):
+        return filesystem
+
+    # handle fsspec-compatible filesystems
+    try:
+        import fsspec
+        if isinstance(filesystem, fsspec.AbstractFileSystem):
+            if type(filesystem).__name__ == 'LocalFileSystem':
+                # In case its a simple LocalFileSystem, use native arrow one
+                return LocalFileSystem()
+            return PyFileSystem(FSSpecHandler(filesystem))
+    except ImportError:
+        pass

Review comment:
       For readability, I would prefer:
   ```python
   try:
       import fsspec
   except ImportError:
       pass
   else:
       ...
   ```

##########
File path: python/pyarrow/fs.py
##########
@@ -63,6 +63,31 @@ def __getattr__(name):
     )
 
 
+def _ensure_filesystem(filesystem):
+    if isinstance(filesystem, FileSystem):
+        return filesystem
+
+    # handle fsspec-compatible filesystems
+    try:
+        import fsspec
+        if isinstance(filesystem, fsspec.AbstractFileSystem):
+            if type(filesystem).__name__ == 'LocalFileSystem':
+                # In case its a simple LocalFileSystem, use native arrow one
+                return LocalFileSystem()
+            return PyFileSystem(FSSpecHandler(filesystem))
+    except ImportError:
+        pass
+
+    from pyarrow.filesystem import LocalFileSystem as LegacyLocalFileSystem
+
+    # map old filesystems to new ones
+    if isinstance(filesystem, LegacyLocalFileSystem):
+        return LocalFileSystem()
+    # TODO handle HDFS?
+
+    raise TypeError("Unrecognized fielsystem: {}".format(type(filesystem)))

Review comment:
       typo: "filesystem"




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org