You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2022/04/04 08:48:02 UTC

[arrow] branch master updated: ARROW-15783: [Python] Initialize static pandas data on write

This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 9ac8301592 ARROW-15783: [Python] Initialize static pandas data on write
9ac8301592 is described below

commit 9ac8301592c075901893f1b472c01b75aba26c36
Author: emkornfield <em...@gmail.com>
AuthorDate: Mon Apr 4 10:47:53 2022 +0200

    ARROW-15783: [Python] Initialize static pandas data on write
    
    I'm not sure if this is the best place to ensure we always set this, but it seemed reasonable, happy to change it.
    
    The underlying issue is this was the first type were we required C++ code for writing pandas data.
    
    Closes #12753 from emkornfield/interval_conversion
    
    Lead-authored-by: emkornfield <em...@gmail.com>
    Co-authored-by: Micah Kornfield <mi...@google.com>
    Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
 cpp/src/arrow/python/arrow_to_pandas.cc   |  6 ++++--
 python/pyarrow/tests/read_record_batch.py | 25 +++++++++++++++++++++++++
 python/pyarrow/tests/test_ipc.py          | 21 ++++++++++++++++++++-
 3 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc
index c078a58d5d..957dd3c5ca 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -348,7 +348,10 @@ class PandasWriter {
   };
 
   PandasWriter(const PandasOptions& options, int64_t num_rows, int num_columns)
-      : options_(options), num_rows_(num_rows), num_columns_(num_columns) {}
+      : options_(options), num_rows_(num_rows), num_columns_(num_columns) {
+    PyAcquireGIL lock;
+    internal::InitPandasStaticData();
+  }
   virtual ~PandasWriter() {}
 
   void SetBlockData(PyObject* arr) {
@@ -371,7 +374,6 @@ class PandasWriter {
       return Status::OK();
     }
     PyAcquireGIL lock;
-
     npy_intp placement_dims[1] = {num_columns_};
     PyObject* placement_arr = PyArray_SimpleNew(1, placement_dims, NPY_INT64);
     RETURN_IF_PYERROR();
diff --git a/python/pyarrow/tests/read_record_batch.py b/python/pyarrow/tests/read_record_batch.py
new file mode 100644
index 0000000000..d565d25414
--- /dev/null
+++ b/python/pyarrow/tests/read_record_batch.py
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This file is called from a test in test_ipc.py.
+
+import sys
+
+import pyarrow as pa
+
+with open(sys.argv[1], 'rb') as f:
+    pa.ipc.open_file(f).read_all().to_pandas()
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 8a61876f7c..917e2b3ab8 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -26,7 +26,7 @@ import weakref
 import numpy as np
 
 import pyarrow as pa
-from pyarrow.tests.util import changed_environ
+from pyarrow.tests.util import changed_environ, invoke_script
 
 
 try:
@@ -240,6 +240,25 @@ def test_empty_stream():
         pa.ipc.open_stream(buf)
 
 
+@pytest.mark.pandas
+def test_read_year_month_nano_interval(tmpdir):
+    """ARROW-15783: Verify to_pandas works for interval types.
+
+    Interval types require static structures to be enabled. This test verifies
+    that they are when no other library functions are invoked.
+    """
+    mdn_interval_type = pa.month_day_nano_interval()
+    schema = pa.schema([pa.field('nums', mdn_interval_type)])
+
+    path = tmpdir.join('file.arrow').strpath
+    with pa.OSFile(path, 'wb') as sink:
+        with pa.ipc.new_file(sink, schema) as writer:
+            interval_array = pa.array([(1, 2, 3)], type=mdn_interval_type)
+            batch = pa.record_batch([interval_array], schema)
+            writer.write(batch)
+    invoke_script('read_record_batch.py', path)
+
+
 @pytest.mark.pandas
 def test_stream_categorical_roundtrip(stream_fixture):
     df = pd.DataFrame({