You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2020/04/13 07:25:03 UTC
[arrow] branch master updated: ARROW-8408: [Python] Add memory_map
argument to feather.read_feather
This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 3e93d42 ARROW-8408: [Python] Add memory_map argument to feather.read_feather
3e93d42 is described below
commit 3e93d42cfa2125762448cbdbfd3c2a1b19a45fde
Author: Wes McKinney <we...@apache.org>
AuthorDate: Mon Apr 13 09:24:32 2020 +0200
ARROW-8408: [Python] Add memory_map argument to feather.read_feather
While most people don't need to care about this, it helps with producing apples-to-apples performance numbers when comparing compressed versus uncompressed read performance
Closes #6905 from wesm/ARROW-8408
Authored-by: Wes McKinney <we...@apache.org>
Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
python/pyarrow/feather.py | 9 +++++----
python/pyarrow/tests/test_feather.py | 3 +++
2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
index 3aec187..6d909ef 100644
--- a/python/pyarrow/feather.py
+++ b/python/pyarrow/feather.py
@@ -184,7 +184,7 @@ def write_feather(df, dest, compression=None, compression_level=None,
raise
-def read_feather(source, columns=None, use_threads=True):
+def read_feather(source, columns=None, use_threads=True, memory_map=True):
"""
Read a pandas.DataFrame from Feather format. To read as pyarrow.Table use
feather.read_table.
@@ -197,15 +197,16 @@ def read_feather(source, columns=None, use_threads=True):
read.
use_threads: bool, default True
Whether to parallelize reading using multiple threads.
+ memory_map : boolean, default True
+ Use memory mapping when opening file on disk
Returns
-------
df : pandas.DataFrame
"""
_check_pandas_version()
- return read_table(source, columns=columns).to_pandas(
- use_threads=use_threads
- )
+ return (read_table(source, columns=columns, memory_map=memory_map)
+ .to_pandas(use_threads=use_threads))
def read_table(source, columns=None, memory_map=True):
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index b5d77fd..779649b 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -160,6 +160,9 @@ def test_read_table(version):
result = read_table(path, memory_map=False)
assert_frame_equal(table.to_pandas(), result.to_pandas())
+ result = read_feather(path, memory_map=False)
+ assert_frame_equal(table.to_pandas(), result)
+
@pytest.mark.pandas
def test_float_nulls(version):