You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2018/05/02 04:55:34 UTC
[arrow] branch master updated: ARROW-2332: Feather Reader option to return Table

This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 5bef985  ARROW-2332: Feather Reader option to return Table
5bef985 is described below

commit 5bef985faea5c8dfdf8f25abd503ed0c9983f661
Author: Dhruv Madeka <ma...@bu.edu>
AuthorDate: Wed May 2 06:55:21 2018 +0200

    ARROW-2332: Feather Reader option to return Table
    
    Author: Dhruv Madeka <ma...@bu.edu>
    
    Closes #1960 from dmadeka/feather-table and squashes the following commits:
    
    cfb4c204 <Dhruv Madeka> Create read_table function
    1ae2edd9 <Dhruv Madeka> Deprecate read and move to read_table
    a12e8b77 <Dhruv Madeka> Fix Pep8 Issues causing build fails
    14afeec6 <Dhruv Madeka> ARROW-2332 Table Read
---
 python/pyarrow/feather.py            | 34 +++++++++++++++++++++++++++++++---
 python/pyarrow/tests/test_feather.py | 25 ++++++++++++++++++++++++-
 2 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
index 3ba9d65..6ebf900 100644
--- a/python/pyarrow/feather.py
+++ b/python/pyarrow/feather.py
@@ -20,6 +20,7 @@ import os
 
 import six
 import pandas as pd
+import warnings
 
 from pyarrow.compat import pdapi
 from pyarrow.lib import FeatherError  # noqa
@@ -42,7 +43,12 @@ class FeatherReader(ext.FeatherReader):
         self.source = source
         self.open(source)
 
-    def read(self, columns=None, nthreads=1):
+    def read(self, *args, **kwargs):
+        warnings.warn("read has been deprecated. Use read_pandas instead.",
+                      DeprecationWarning)
+        return self.read_pandas(*args, **kwargs)
+
+    def read_table(self, columns=None):
         if columns is not None:
             column_set = set(columns)
         else:
@@ -58,7 +64,10 @@ class FeatherReader(ext.FeatherReader):
                 names.append(name)
 
         table = Table.from_arrays(columns, names=names)
-        return table.to_pandas(nthreads=nthreads)
+        return table
+
+    def read_pandas(self, columns=None, nthreads=1):
+        return self.read_table(columns=columns).to_pandas(nthreads=nthreads)
 
 
 class FeatherWriter(object):
@@ -129,4 +138,23 @@ def read_feather(source, columns=None, nthreads=1):
     df : pandas.DataFrame
     """
     reader = FeatherReader(source)
-    return reader.read(columns=columns, nthreads=nthreads)
+    return reader.read_pandas(columns=columns, nthreads=nthreads)
+
+
+def read_table(source, columns=None):
+    """
+    Read a pyarrow.Table from Feather format
+
+    Parameters
+    ----------
+    source : string file path, or file-like object
+    columns : sequence, optional
+        Only read a specific set of columns. If not provided, all columns are
+        read
+
+    Returns
+    -------
+    table : pyarrow.Table
+    """
+    reader = FeatherReader(source)
+    return reader.read_table(columns=columns)
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index 171f28d..9bbfe05 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -29,7 +29,7 @@ import pandas as pd
 
 import pyarrow as pa
 from pyarrow.feather import (read_feather, write_feather,
-                             FeatherReader)
+                             read_table, FeatherReader)
 from pyarrow.lib import FeatherWriter
 
 
@@ -129,6 +129,29 @@ class TestFeatherReader(unittest.TestCase):
         df = pd.DataFrame(data)
         self._check_pandas_roundtrip(df)
 
+    def test_read_table(self):
+        num_values = (100, 100)
+        path = random_path()
+
+        self.test_files.append(path)
+        writer = FeatherWriter()
+        writer.open(path)
+
+        values = np.random.randint(0, 100, size=num_values)
+
+        for i in range(100):
+            writer.write_array('col_' + str(i), values[:, i])
+
+        writer.close()
+
+        data = pd.DataFrame(values,
+                            columns=['col_' + str(i) for i in range(100)])
+        table = pa.Table.from_pandas(data)
+
+        result = read_table(path)
+
+        assert_frame_equal(table.to_pandas(), result.to_pandas())
+
     def test_float_nulls(self):
         num_values = 100
 

-- 
To stop receiving notification emails like this one, please contact
uwe@apache.org.