You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2018/05/02 04:55:34 UTC
[arrow] branch master updated: ARROW-2332: Feather Reader option to
return Table
This is an automated email from the ASF dual-hosted git repository.
uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 5bef985 ARROW-2332: Feather Reader option to return Table
5bef985 is described below
commit 5bef985faea5c8dfdf8f25abd503ed0c9983f661
Author: Dhruv Madeka <ma...@bu.edu>
AuthorDate: Wed May 2 06:55:21 2018 +0200
ARROW-2332: Feather Reader option to return Table
Author: Dhruv Madeka <ma...@bu.edu>
Closes #1960 from dmadeka/feather-table and squashes the following commits:
cfb4c204 <Dhruv Madeka> Create read_table function
1ae2edd9 <Dhruv Madeka> Deprecate read and move to read_table
a12e8b77 <Dhruv Madeka> Fix Pep8 Issues causing build fails
14afeec6 <Dhruv Madeka> ARROW-2332 Table Read
---
python/pyarrow/feather.py | 34 +++++++++++++++++++++++++++++++---
python/pyarrow/tests/test_feather.py | 25 ++++++++++++++++++++++++-
2 files changed, 55 insertions(+), 4 deletions(-)
diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
index 3ba9d65..6ebf900 100644
--- a/python/pyarrow/feather.py
+++ b/python/pyarrow/feather.py
@@ -20,6 +20,7 @@ import os
import six
import pandas as pd
+import warnings
from pyarrow.compat import pdapi
from pyarrow.lib import FeatherError # noqa
@@ -42,7 +43,12 @@ class FeatherReader(ext.FeatherReader):
self.source = source
self.open(source)
- def read(self, columns=None, nthreads=1):
+ def read(self, *args, **kwargs):
+ warnings.warn("read has been deprecated. Use read_pandas instead.",
+ DeprecationWarning)
+ return self.read_pandas(*args, **kwargs)
+
+ def read_table(self, columns=None):
if columns is not None:
column_set = set(columns)
else:
@@ -58,7 +64,10 @@ class FeatherReader(ext.FeatherReader):
names.append(name)
table = Table.from_arrays(columns, names=names)
- return table.to_pandas(nthreads=nthreads)
+ return table
+
+ def read_pandas(self, columns=None, nthreads=1):
+ return self.read_table(columns=columns).to_pandas(nthreads=nthreads)
class FeatherWriter(object):
@@ -129,4 +138,23 @@ def read_feather(source, columns=None, nthreads=1):
df : pandas.DataFrame
"""
reader = FeatherReader(source)
- return reader.read(columns=columns, nthreads=nthreads)
+ return reader.read_pandas(columns=columns, nthreads=nthreads)
+
+
+def read_table(source, columns=None):
+ """
+ Read a pyarrow.Table from Feather format
+
+ Parameters
+ ----------
+ source : string file path, or file-like object
+ columns : sequence, optional
+ Only read a specific set of columns. If not provided, all columns are
+ read
+
+ Returns
+ -------
+ table : pyarrow.Table
+ """
+ reader = FeatherReader(source)
+ return reader.read_table(columns=columns)
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index 171f28d..9bbfe05 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -29,7 +29,7 @@ import pandas as pd
import pyarrow as pa
from pyarrow.feather import (read_feather, write_feather,
- FeatherReader)
+ read_table, FeatherReader)
from pyarrow.lib import FeatherWriter
@@ -129,6 +129,29 @@ class TestFeatherReader(unittest.TestCase):
df = pd.DataFrame(data)
self._check_pandas_roundtrip(df)
+ def test_read_table(self):
+ num_values = (100, 100)
+ path = random_path()
+
+ self.test_files.append(path)
+ writer = FeatherWriter()
+ writer.open(path)
+
+ values = np.random.randint(0, 100, size=num_values)
+
+ for i in range(100):
+ writer.write_array('col_' + str(i), values[:, i])
+
+ writer.close()
+
+ data = pd.DataFrame(values,
+ columns=['col_' + str(i) for i in range(100)])
+ table = pa.Table.from_pandas(data)
+
+ result = read_table(path)
+
+ assert_frame_equal(table.to_pandas(), result.to_pandas())
+
def test_float_nulls(self):
num_values = 100
--
To stop receiving notification emails like this one, please contact
uwe@apache.org.