You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2017/05/14 15:23:30 UTC
arrow git commit: ARROW-1022: [Python] Add multithreaded read option
to read_feather
Repository: arrow
Updated Branches:
refs/heads/master 5739e04b3 -> d8d3d8435
ARROW-1022: [Python] Add multithreaded read option to read_feather
Author: Wes McKinney <we...@twosigma.com>
Closes #682 from wesm/ARROW-1022 and squashes the following commits:
8fd241e [Wes McKinney] Add multithreaded read option to read_feather
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/d8d3d843
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/d8d3d843
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/d8d3d843
Branch: refs/heads/master
Commit: d8d3d84354d827e45c8267cd05aecd2aa36cf60b
Parents: 5739e04
Author: Wes McKinney <we...@twosigma.com>
Authored: Sun May 14 17:23:26 2017 +0200
Committer: Uwe L. Korn <uw...@xhochy.com>
Committed: Sun May 14 17:23:26 2017 +0200
----------------------------------------------------------------------
python/pyarrow/feather.py | 10 ++++++----
python/pyarrow/tests/test_feather.py | 11 +++++++++--
2 files changed, 15 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/d8d3d843/python/pyarrow/feather.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
index 3754aec..34783a7 100644
--- a/python/pyarrow/feather.py
+++ b/python/pyarrow/feather.py
@@ -37,7 +37,7 @@ class FeatherReader(ext.FeatherReader):
self.source = source
self.open(source)
- def read(self, columns=None):
+ def read(self, columns=None, nthreads=1):
if columns is not None:
column_set = set(columns)
else:
@@ -53,7 +53,7 @@ class FeatherReader(ext.FeatherReader):
names.append(name)
table = Table.from_arrays(columns, names=names)
- return table.to_pandas()
+ return table.to_pandas(nthreads=nthreads)
class FeatherWriter(object):
@@ -118,7 +118,7 @@ def write_feather(df, dest):
raise
-def read_feather(source, columns=None):
+def read_feather(source, columns=None, nthreads=1):
"""
Read a pandas.DataFrame from Feather format
@@ -128,10 +128,12 @@ def read_feather(source, columns=None):
columns : sequence, optional
Only read a specific set of columns. If not provided, all columns are
read
+ nthreads : int, default 1
+ Number of CPU threads to use when reading to pandas.DataFrame
Returns
-------
df : pandas.DataFrame
"""
reader = FeatherReader(source)
- return reader.read(columns=columns)
+ return reader.read(columns=columns, nthreads=nthreads)
http://git-wip-us.apache.org/repos/asf/arrow/blob/d8d3d843/python/pyarrow/tests/test_feather.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index 69c32be..287e0da 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -61,7 +61,8 @@ class TestFeatherReader(unittest.TestCase):
return counts
def _check_pandas_roundtrip(self, df, expected=None, path=None,
- columns=None, null_counts=None):
+ columns=None, null_counts=None,
+ nthreads=1):
if path is None:
path = random_path()
@@ -70,7 +71,7 @@ class TestFeatherReader(unittest.TestCase):
if not os.path.exists(path):
raise Exception('file not written')
- result = read_feather(path, columns)
+ result = read_feather(path, columns, nthreads=nthreads)
if expected is None:
expected = df
@@ -293,6 +294,12 @@ class TestFeatherReader(unittest.TestCase):
df = pd.DataFrame({'strings': [''] * 10})
self._check_pandas_roundtrip(df)
+ def test_multithreaded_read(self):
+ data = {'c{0}'.format(i): [''] * 10
+ for i in range(100)}
+ df = pd.DataFrame(data)
+ self._check_pandas_roundtrip(df, nthreads=4)
+
def test_nan_as_null(self):
# Create a nan that is not numpy.nan
values = np.array(['foo', np.nan, np.nan * 2, 'bar'] * 10)