You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "AlenkaF (via GitHub)" <gi...@apache.org> on 2023/06/19 10:30:51 UTC

[GitHub] [arrow] AlenkaF commented on a diff in pull request #36162: GH-21761: [Python] accept pyarrow scalars in array constructor

AlenkaF commented on code in PR #36162:
URL: https://github.com/apache/arrow/pull/36162#discussion_r1233863309


##########
python/pyarrow/tests/test_convert_builtin.py:
##########
@@ -2363,3 +2363,113 @@ def test_array_from_pylist_offset_overflow():
     assert isinstance(arr, pa.ChunkedArray)
     assert len(arr) == 2**31
     assert len(arr.chunks) > 1
+
+
+@parametrize_with_collections_types
+@pytest.mark.parametrize(('data', 'scalar_data'), [
+    ([True, False, None], [pa.scalar(True), pa.scalar(False), None]),
+    ([1, 2, None], [pa.scalar(1), pa.scalar(2), None]),
+    ([1, None, None], [pa.scalar(1), None, pa.scalar(None, type=pa.int64())]),
+    ([None, None], [pa.scalar(None), pa.scalar(None)]),
+    ([1., 2., None], [pa.scalar(1.), pa.scalar(2.), None]),
+    ([None, datetime.date.today()], [None, pa.scalar(datetime.date.today())]),
+    ([datetime.time(1, 1, 1), None], [pa.scalar(datetime.time(1, 1, 1)), None]),
+    ([datetime.timedelta(seconds=10)], [pa.scalar(datetime.timedelta(seconds=10))]),
+    ([None, datetime.datetime(2014, 1, 1)], [
+     None, pa.scalar(datetime.datetime(2014, 1, 1))]),
+    ([pa.MonthDayNano([1, -1, -10100])], [pa.scalar(pa.MonthDayNano([1, -1, -10100]))]),
+    (["a", "b"], [pa.scalar("a"), pa.scalar("b")]),
+    ([b"a", b"b"], [pa.scalar(b"a"), pa.scalar(b"b")]),
+    ([1, 2, 3], pa.scalar([1, 2, 3])),
+    (["a", "b"], pa.scalar(["a", "b"])),
+])
+def test_array_accepts_pyarrow_scalar(seq, data, scalar_data):
+    if type(seq(scalar_data)) == set:
+        pytest.skip("TODO: look at the reordering of the elements in the set")
+    expect = pa.array(data)
+    result = pa.array(seq(scalar_data))
+    assert expect.equals(result)
+
+
+@parametrize_with_collections_types
+@pytest.mark.parametrize(('data', 'scalar_data', 'value_type'), [
+    ([1, 2, None], [pa.scalar(1, type=pa.int8()),
+     pa.scalar(2, type=pa.int8()), None], pa.int8()),
+    ([1, None], [pa.scalar(1.0, type=pa.int32()), None], pa.int32()),
+    (["aaa", "bbb"], [pa.scalar("aaa", type=pa.binary(3)),
+     pa.scalar("bbb", type=pa.binary(3))], pa.binary(3)),
+    ([b"a"], [pa.scalar("a", type=pa.large_binary())], pa.large_binary()),
+    (["a"], [pa.scalar("a", type=pa.large_string())], pa.large_string()),
+    (
+        ["a"],
+        [pa.scalar("a", type=pa.dictionary(pa.int64(), pa.string()))],
+        pa.dictionary(pa.int64(), pa.string())
+    ),
+    (
+        ["a", "b"],
+        [pa.scalar("a", pa.dictionary(pa.int64(), pa.string())),
+         pa.scalar("b", pa.dictionary(pa.int64(), pa.string()))],
+        pa.dictionary(pa.int64(), pa.string())
+    ),
+    (
+        [1],
+        [pa.scalar(1, type=pa.dictionary(pa.int64(), pa.int32()))],
+        pa.dictionary(pa.int64(), pa.int32())
+    ),
+    (
+        [(1, 2)],
+        [pa.scalar([('a', 1), ('b', 2)], type=pa.struct(
+            [('a', pa.int8()), ('b', pa.int8())]))],
+        pa.struct([('a', pa.int8()), ('b', pa.int8())])
+    ),
+    (
+        [(1, 'bar')],
+        [pa.scalar([('a', 1), ('b', 'bar')], type=pa.struct(
+            [('a', pa.int8()), ('b', pa.string())]))],
+        pa.struct([('a', pa.int8()), ('b', pa.string())])
+    )
+])
+def test_array_accepts_pyarrow_scalar_with_type(seq, data, scalar_data, value_type):
+    if type(seq(scalar_data)) == set:
+        pytest.skip("TODO: look at the reordering of the elements in the set")
+    expect = pa.array(data, type=value_type)
+    result = pa.array(seq(scalar_data), type=value_type)
+    assert expect.equals(result)
+
+
+def test_array_accepts_pyarrow_scalar_something():
+    arr = pa.array([1, 2, 3])
+    result = pa.array([arr.sum()])
+    expect = pa.array([6])
+    assert expect.equals(result)
+
+
+@parametrize_with_collections_types
+def test_array_accepts_pyarrow_scalar_errors(seq):
+    sequence = seq([pa.scalar(1), pa.scalar("a"), pa.scalar(3.0)])
+    with pytest.raises(pa.ArrowInvalid,
+                       match="cannot mix scalars with different types"):
+        pa.array(sequence)
+
+    sequence = seq([1, pa.scalar("a"), None])
+    with pytest.raises(pa.ArrowInvalid,
+                       match="pyarrow scalars cannot be mixed with other "
+                             "Python scalar values currently"):
+        pa.array(sequence)
+
+    sequence = seq([np.float16("0.1"), pa.scalar("a"), None])
+    with pytest.raises(pa.ArrowInvalid,
+                       match="pyarrow scalars cannot be mixed with other "
+                             "Python scalar values currently"):
+        pa.array(sequence)
+
+    sequence = seq([pa.scalar("a"), np.float16("0.1"), None])
+    with pytest.raises(pa.ArrowInvalid,
+                       match="pyarrow scalars cannot be mixed with other "
+                             "Python scalar values currently"):
+        pa.array(sequence)
+
+    with pytest.raises(pa.ArrowInvalid,
+                       match="Cannot append scalar of type string "
+                             "to builder for type int32"):
+        pa.array([pa.scalar("a")], type=pa.int32())

Review Comment:
   This should fail with a different message.
   Also casting `int64` to `int8` should work but it currently doesn't.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org