You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/09/22 17:50:30 UTC
[arrow] branch master updated: ARROW-3269: [Python] Fix warnings in unit test suite

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 4b53b30  ARROW-3269: [Python] Fix warnings in unit test suite
4b53b30 is described below

commit 4b53b30e135a3ef6686867ab0cdd13a30fa01dac
Author: Krisztián Szűcs <sz...@gmail.com>
AuthorDate: Sat Sep 22 13:50:21 2018 -0400

    ARROW-3269: [Python] Fix warnings in unit test suite
    
    The remaining ones are kinda out of control:
    - `source:1396: DeprecationWarning: invalid escape sequence \(` is a strange heisenwarning
    - `FutureWarning: In the future, NAT != NAT will be True rather than False.` comes from `tm.assert_frame_equal`, but We need to check `NaT` values - I can silent it though
    
    ```
    ================================================== warnings summary ===================================================
    source:1396: DeprecationWarning: invalid escape sequence \(
    source:1401: DeprecationWarning: invalid escape sequence \(
    
    /Users/krisz/.conda/envs/arrow36/lib/python3.6/site-packages/pandas/core/dtypes/missing.py:431: FutureWarning: In the future, NAT != NAT will be True rather than False.
      if left_value != right_value:
    
    /Users/krisz/.conda/envs/arrow36/lib/python3.6/site-packages/pandas/core/dtypes/missing.py:431: DeprecationWarning: elementwise != comparison failed; this will raise an error in the future.
      if left_value != right_value:
    
    /Users/krisz/.conda/envs/arrow36/lib/python3.6/site-packages/pandas/core/dtypes/missing.py:431: FutureWarning: In the future, NAT != NAT will be True rather than False.
      if left_value != right_value:
    
    /Users/krisz/.conda/envs/arrow36/lib/python3.6/site-packages/pandas/core/dtypes/missing.py:431: FutureWarning: In the future, NAT != NAT will be True rather than False.
      if left_value != right_value:
    
    -- Docs: https://docs.pytest.org/en/latest/warnings.html
    ```
    
    Author: Krisztián Szűcs <sz...@gmail.com>
    
    Closes #2595 from kszucs/ARROW-3269 and squashes the following commits:
    
    bc61ac060 <Krisztián Szűcs> fix arrow related warnings
---
 python/pyarrow/tests/test_array.py          |  7 +++----
 python/pyarrow/tests/test_convert_pandas.py |  5 +++--
 python/pyarrow/tests/test_ipc.py            |  2 +-
 python/pyarrow/tests/test_parquet.py        | 18 ++++++++++++------
 4 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 410c9d0..9dd443c 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -32,7 +32,6 @@ except ImportError:
 
 import pyarrow as pa
 from pyarrow.pandas_compat import get_logical_type
-import pyarrow.formatting as fmt
 
 
 def test_total_bytes_allocated():
@@ -53,7 +52,7 @@ def test_constructor_raises():
 
 def test_list_format():
     arr = pa.array([[1], None, [2, 3, None]])
-    result = fmt.array_format(arr)
+    result = arr.format()
     expected = """\
 [
   [
@@ -71,7 +70,7 @@ def test_list_format():
 
 def test_string_format():
     arr = pa.array([u'', None, u'foo'])
-    result = fmt.array_format(arr)
+    result = arr.format()
     expected = """\
 [
   "",
@@ -83,7 +82,7 @@ def test_string_format():
 
 def test_long_array_format():
     arr = pa.array(range(100))
-    result = fmt.array_format(arr, window=2)
+    result = arr.format(window=2)
     expected = """\
 [
   0,
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index bb53c14..db87d9a 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -2069,11 +2069,12 @@ class TestConvertMisc(object):
             assert isinstance(converted, pa.NullArray)
             assert len(converted) == 3
             assert converted.null_count == 3
-            assert converted[0] is pa.NA
+            for item in converted:
+                assert item is pa.NA
 
         _check_series(pd.Series([None] * 3, dtype=object))
         _check_series(pd.Series([np.nan] * 3, dtype=object))
-        _check_series(pd.Series([np.sqrt(-1)] * 3, dtype=object))
+        _check_series(pd.Series([None, np.nan, None], dtype=object))
 
     def test_partial_schema(self):
         data = OrderedDict([
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 296882f..b1fa06f 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -295,7 +295,7 @@ def test_stream_simple_roundtrip(stream_fixture):
     assert total == len(batches)
 
     with pytest.raises(StopIteration):
-        reader.get_next_batch()
+        reader.read_next_batch()
 
 
 def test_stream_read_all(stream_fixture):
diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
index 64fd82d..e7970bb 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -1523,6 +1523,11 @@ def _filter_partition(df, part_keys):
     to_drop = []
     for name, value in part_keys:
         to_drop.append(name)
+
+        # to avoid pandas warning
+        if isinstance(value, (datetime.date, datetime.datetime)):
+            value = pd.Timestamp(value)
+
         predicate &= df[name] == value
 
     return df[predicate].drop(to_drop, axis=1)
@@ -1970,9 +1975,8 @@ carat        cut  color  clarity  depth  table  price     x     y     z
  0.26  Very Good      H      SI1   61.9   55.0    337  4.07  4.11  2.53
  0.22       Fair      E      VS2   65.1   61.0    337  3.87  3.78  2.49
  0.23  Very Good      H      VS1   59.4   61.0    338  4.00  4.05  2.39"""
-    expected = pd.read_csv(
-        io.BytesIO(expected_string), sep=r'\s{2,}', index_col=None, header=0
-    )
+    expected = pd.read_csv(io.BytesIO(expected_string), sep=r'\s{2,}',
+                           index_col=None, header=0, engine='python')
     table = _read_table(datadir / 'v0.7.1.parquet')
     result = table.to_pandas()
     tm.assert_frame_equal(result, expected)
@@ -1992,8 +1996,9 @@ carat        cut  color  clarity  depth  table  price     x     y     z
  0.22       Fair      E      VS2   65.1   61.0    337  3.87  3.78  2.49
  0.23  Very Good      H      VS1   59.4   61.0    338  4.00  4.05  2.39"""
     expected = pd.read_csv(
-        io.BytesIO(expected_string),
-        sep=r'\s{2,}', index_col=['cut', 'color', 'clarity'], header=0
+        io.BytesIO(expected_string), sep=r'\s{2,}',
+        index_col=['cut', 'color', 'clarity'],
+        header=0, engine='python'
     ).sort_index()
 
     table = _read_table(datadir / 'v0.7.1.all-named-index.parquet')
@@ -2016,7 +2021,8 @@ carat        cut  color  clarity  depth  table  price     x     y     z
  0.23  Very Good      H      VS1   59.4   61.0    338  4.00  4.05  2.39"""
     expected = pd.read_csv(
         io.BytesIO(expected_string),
-        sep=r'\s{2,}', index_col=['cut', 'color', 'clarity'], header=0
+        sep=r'\s{2,}', index_col=['cut', 'color', 'clarity'],
+        header=0, engine='python'
     ).sort_index()
     expected.index = expected.index.set_names(['cut', None, 'clarity'])