You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/07/01 22:12:15 UTC
arrow git commit: ARROW-1125: partial schemas for Table.from_pandas
Repository: arrow
Updated Branches:
refs/heads/master 930db87d6 -> c294ec3db
ARROW-1125: partial schemas for Table.from_pandas
Author: fjetter <fl...@blue-yonder.com>
Closes #790 from fjetter/ARROW-1125-partial-schemas and squashes the following commits:
0a58b708 [fjetter] Remove trailing whitespaces
87ccb0c4 [fjetter] Fix indentation to respect max line length
92001422 [fjetter] Remove template from TypeNotImplemented status message
67dbba5d [fjetter] Remove range from test due to pandas bug on Windows
4890b5af [fjetter] Refactor TypeNotImplemented message
9de8611c [fjetter] Partial schema test in test_convert_pandas.py
dcf44f09 [fjetter] Allow partial schemas in Table.from_pandas again
66671a27 [fjetter] Improved NotImplemented messages in PandasConverter
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/c294ec3d
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/c294ec3d
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/c294ec3d
Branch: refs/heads/master
Commit: c294ec3dbdd3f5cfa55a6eb5c7b27535b240ccf0
Parents: 930db87
Author: fjetter <fl...@blue-yonder.com>
Authored: Sat Jul 1 18:12:09 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Sat Jul 1 18:12:09 2017 -0400
----------------------------------------------------------------------
cpp/src/arrow/python/pandas_convert.cc | 22 ++++++++++++----------
cpp/src/arrow/type.h | 3 +++
python/pyarrow/table.pxi | 3 ++-
python/pyarrow/tests/test_convert_pandas.py | 24 ++++++++++++++++++++++--
python/pyarrow/tests/test_table.py | 18 ------------------
5 files changed, 39 insertions(+), 31 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/c294ec3d/cpp/src/arrow/python/pandas_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/pandas_convert.cc b/cpp/src/arrow/python/pandas_convert.cc
index 2918f9e..9b65570 100644
--- a/cpp/src/arrow/python/pandas_convert.cc
+++ b/cpp/src/arrow/python/pandas_convert.cc
@@ -362,23 +362,25 @@ class PandasConverter {
Status Visit(const Time32Type& type) { return VisitNative<Int32Type>(); }
Status Visit(const Time64Type& type) { return VisitNative<Int64Type>(); }
- Status Visit(const NullType& type) { return Status::NotImplemented("null"); }
+ Status TypeNotImplemented(std::string type_name) {
+ std::stringstream ss;
+ ss << "PandasConverter doesn't implement <" << type_name << "> conversion. ";
+ return Status::NotImplemented(ss.str());
+ }
- Status Visit(const BinaryType& type) { return Status::NotImplemented(type.ToString()); }
+ Status Visit(const NullType& type) { return TypeNotImplemented(type.ToString()); }
+
+ Status Visit(const BinaryType& type) { return TypeNotImplemented(type.ToString()); }
Status Visit(const FixedSizeBinaryType& type) {
- return Status::NotImplemented(type.ToString());
+ return TypeNotImplemented(type.ToString());
}
- Status Visit(const DecimalType& type) {
- return Status::NotImplemented(type.ToString());
- }
+ Status Visit(const DecimalType& type) { return TypeNotImplemented(type.ToString()); }
- Status Visit(const DictionaryType& type) {
- return Status::NotImplemented(type.ToString());
- }
+ Status Visit(const DictionaryType& type) { return TypeNotImplemented(type.ToString()); }
- Status Visit(const NestedType& type) { return Status::NotImplemented(type.ToString()); }
+ Status Visit(const NestedType& type) { return TypeNotImplemented(type.ToString()); }
Status Convert() {
if (PyArray_NDIM(arr_) != 1) {
http://git-wip-us.apache.org/repos/asf/arrow/blob/c294ec3d/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 3e85291..8338800 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -194,6 +194,7 @@ class ARROW_EXPORT FloatingPoint : public PrimitiveCType {
class ARROW_EXPORT NestedType : public DataType {
public:
using DataType::DataType;
+ static std::string name() { return "nested"; }
};
class NoExtraMeta {};
@@ -406,6 +407,7 @@ class ARROW_EXPORT FixedSizeBinaryType : public FixedWidthType {
Status Accept(TypeVisitor* visitor) const override;
std::string ToString() const override;
+ static std::string name() {return "fixed_size_binary"; }
std::vector<BufferDescr> GetBufferLayout() const override;
@@ -674,6 +676,7 @@ class ARROW_EXPORT DictionaryType : public FixedWidthType {
Status Accept(TypeVisitor* visitor) const override;
std::string ToString() const override;
+ static std::string name() { return "dictionary"; }
bool ordered() const { return ordered_; }
http://git-wip-us.apache.org/repos/asf/arrow/blob/c294ec3d/python/pyarrow/table.pxi
----------------------------------------------------------------------
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 7d44f2e..ef83636 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -333,7 +333,8 @@ cdef tuple _dataframe_to_arrays(
for name in df.columns:
col = df[name]
if schema is not None:
- type = schema.field_by_name(name).type
+ field = schema.field_by_name(name)
+ type = getattr(field, "type", None)
arr = arrays.append(
Array.from_pandas(
http://git-wip-us.apache.org/repos/asf/arrow/blob/c294ec3d/python/pyarrow/tests/test_convert_pandas.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index 49b7eb7..9cce7bb 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -102,13 +102,11 @@ class TestPandasConversion(unittest.TestCase):
df = pd.DataFrame({'a': [None, None, None]})
self._check_pandas_roundtrip(df)
-
def test_all_none_category(self):
df = pd.DataFrame({'a': [None, None, None]})
df['a'] = df['a'].astype('category')
self._check_pandas_roundtrip(df)
-
def test_float_no_nulls(self):
data = {}
fields = []
@@ -654,3 +652,25 @@ class TestPandasConversion(unittest.TestCase):
table = pa.Table.from_pandas(df)
result_df = table.to_pandas()
tm.assert_frame_equal(result_df, df)
+
+ def test_partial_schema(self):
+ data = OrderedDict([
+ ('a', [0, 1, 2, 3, 4]),
+ ('b', np.array([-10, -5, 0, 5, 10], dtype=np.int32)),
+ ('c', [-10, -5, 0, 5, 10])
+ ])
+ df = pd.DataFrame(data)
+
+ partial_schema = pa.schema([
+ pa.field('a', pa.int64()),
+ pa.field('b', pa.int32())
+ ])
+
+ expected_schema = pa.schema([
+ pa.field('a', pa.int64()),
+ pa.field('b', pa.int32()),
+ pa.field('c', pa.int64())
+ ])
+
+ self._check_pandas_roundtrip(df, schema=partial_schema,
+ expected_schema=expected_schema)
http://git-wip-us.apache.org/repos/asf/arrow/blob/c294ec3d/python/pyarrow/tests/test_table.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index afc9520..3198941 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -238,24 +238,6 @@ def test_concat_tables():
assert result.equals(expected)
-def test_table_pandas():
- data = [
- pa.array(range(5)),
- pa.array([-10, -5, 0, 5, 10])
- ]
- table = pa.Table.from_arrays(data, names=('a', 'b'))
-
- # TODO: Use this part once from_pandas is implemented
- # data = {'a': range(5), 'b': [-10, -5, 0, 5, 10]}
- # df = pd.DataFrame(data)
- # pa.Table.from_pandas(df)
-
- df = table.to_pandas()
- assert set(df.columns) == set(('a', 'b'))
- assert df.shape == (5, 2)
- assert df.loc[0, 'b'] == -10
-
-
def test_table_negative_indexing():
data = [
pa.array(range(5)),