You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2020/04/13 07:25:03 UTC
[arrow] branch master updated: ARROW-8408: [Python] Add memory_map argument to feather.read_feather

This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 3e93d42  ARROW-8408: [Python] Add memory_map argument to feather.read_feather
3e93d42 is described below

commit 3e93d42cfa2125762448cbdbfd3c2a1b19a45fde
Author: Wes McKinney <we...@apache.org>
AuthorDate: Mon Apr 13 09:24:32 2020 +0200

    ARROW-8408: [Python] Add memory_map argument to feather.read_feather
    
    While most people don't need to care about this, it helps with producing apples-to-apples performance numbers when comparing compressed versus uncompressed read performance
    
    Closes #6905 from wesm/ARROW-8408
    
    Authored-by: Wes McKinney <we...@apache.org>
    Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
 python/pyarrow/feather.py            | 9 +++++----
 python/pyarrow/tests/test_feather.py | 3 +++
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
index 3aec187..6d909ef 100644
--- a/python/pyarrow/feather.py
+++ b/python/pyarrow/feather.py
@@ -184,7 +184,7 @@ def write_feather(df, dest, compression=None, compression_level=None,
         raise
 
 
-def read_feather(source, columns=None, use_threads=True):
+def read_feather(source, columns=None, use_threads=True, memory_map=True):
     """
     Read a pandas.DataFrame from Feather format. To read as pyarrow.Table use
     feather.read_table.
@@ -197,15 +197,16 @@ def read_feather(source, columns=None, use_threads=True):
         read.
     use_threads: bool, default True
         Whether to parallelize reading using multiple threads.
+    memory_map : boolean, default True
+        Use memory mapping when opening file on disk
 
     Returns
     -------
     df : pandas.DataFrame
     """
     _check_pandas_version()
-    return read_table(source, columns=columns).to_pandas(
-        use_threads=use_threads
-    )
+    return (read_table(source, columns=columns, memory_map=memory_map)
+            .to_pandas(use_threads=use_threads))
 
 
 def read_table(source, columns=None, memory_map=True):
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index b5d77fd..779649b 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -160,6 +160,9 @@ def test_read_table(version):
     result = read_table(path, memory_map=False)
     assert_frame_equal(table.to_pandas(), result.to_pandas())
 
+    result = read_feather(path, memory_map=False)
+    assert_frame_equal(table.to_pandas(), result)
+
 
 @pytest.mark.pandas
 def test_float_nulls(version):