You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/02/24 14:35:29 UTC

[GitHub] [arrow] jorisvandenbossche commented on a change in pull request #12505: ARROW-15291: [C++][Python] Segfault in StructArray.to_numpy and to_pandas if it contains an ExtensionArray

jorisvandenbossche commented on a change in pull request #12505:
URL: https://github.com/apache/arrow/pull/12505#discussion_r813939597



##########
File path: cpp/src/arrow/python/arrow_to_pandas.cc
##########
@@ -656,8 +656,19 @@ Status ConvertStruct(PandasOptions options, const ChunkedArray& data,
     // Convert the struct arrays first
     for (int32_t i = 0; i < num_fields; i++) {
       const auto field = arr->field(static_cast<int>(i));
-      RETURN_NOT_OK(ConvertArrayToPandas(options, field, nullptr,
-                                         fields_data[i + fields_data_offset].ref()));
+      // In case the field is an extension array, use .storage() to convert to Pandas
+      if (field->type()->id() == Type::EXTENSION){
+        // Save the field object as an Extension Array
+        const ExtensionArray& arr_ext = checked_cast<const ExtensionArray&>(*field);
+        // Save the storage Array and use it to convert to Pandas
+        const std::shared_ptr<Array> field_ext = arr_ext.storage();
+        RETURN_NOT_OK(ConvertArrayToPandas(options, field_ext, nullptr,
+                                           fields_data[i + fields_data_offset].ref()));
+      }
+      else{
+        RETURN_NOT_OK(ConvertArrayToPandas(options, field, nullptr,
+                                           fields_data[i + fields_data_offset].ref()));
+      }

Review comment:
       We tried that when we were pairing on this earlier, and that gives a compilation error (see below). I wasn't able to directly figure out how to "overwrite" the `field` variable, so that's why we for now used the repeated `ConvertArrayToPandas` in both if and else clause.
   
   <details>
   
   ```
   FAILED: src/arrow/python/CMakeFiles/arrow_python_objlib.dir/arrow_to_pandas.cc.o 
   /home/joris/miniconda3/envs/arrow-dev/bin/ccache /home/joris/miniconda3/envs/arrow-dev/bin/x86_64-conda-linux-gnu-c++ -DARROW_EXTRA_ERROR_CONTEXT -DARROW_HAVE_RUNTIME_AVX2 -DARROW_HAVE_RUNTIME_AVX512 -DARROW_HAVE_RUNTIME_BMI2 -DARROW_HAVE_RUNTIME_SSE4_2 -DARROW_HAVE_SSE4_2 -DARROW_HDFS -DARROW_JEMALLOC -DARROW_JEMALLOC_INCLUDE_DIR="" -DARROW_PYTHON_EXPORTING -DARROW_WITH_BROTLI -DARROW_WITH_BZ2 -DARROW_WITH_LZ4 -DARROW_WITH_RE2 -DARROW_WITH_SNAPPY -DARROW_WITH_TIMING_TESTS -DARROW_WITH_UTF8PROC -DARROW_WITH_ZLIB -DARROW_WITH_ZSTD -DURI_STATIC_BUILD -Isrc -I../src -I../src/generated -isystem ../thirdparty/flatbuffers/include -isystem jemalloc_ep-prefix/src -isystem xsimd_ep/src/xsimd_ep-install/include -isystem ../thirdparty/hadoop/include -isystem /home/joris/miniconda3/envs/arrow-dev/lib/python3.8/site-packages/numpy/core/include -isystem /home/joris/miniconda3/envs/arrow-dev/include/python3.8 -Wno-noexcept-type -fvisibility-inlines-hidden -std=c++17 -fmessage-length=0 -march=noc
 ona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/joris/miniconda3/envs/arrow-dev/include -fdiagnostics-color=always -ggdb -O0  -Wall -Wno-conversion -Wno-deprecated-declarations -Wno-sign-conversion -Wunused-result -Werror -fno-semantic-interposition -msse4.2  -g -fPIC -std=c++11 -MD -MT src/arrow/python/CMakeFiles/arrow_python_objlib.dir/arrow_to_pandas.cc.o -MF src/arrow/python/CMakeFiles/arrow_python_objlib.dir/arrow_to_pandas.cc.o.d -o src/arrow/python/CMakeFiles/arrow_python_objlib.dir/arrow_to_pandas.cc.o -c ../src/arrow/python/arrow_to_pandas.cc
   ../src/arrow/python/arrow_to_pandas.cc: In function 'arrow::Status arrow::py::{anonymous}::ConvertStruct(arrow::py::PandasOptions, const arrow::ChunkedArray&, PyObject**)':
   ../src/arrow/python/arrow_to_pandas.cc:662:33: error: no match for 'operator=' (operand types are 'const std::shared_ptr<arrow::Array>' and 'const std::shared_ptr<arrow::Array>')
     662 |         field = arr_ext.storage();
         |                                 ^
   In file included from /home/joris/miniconda3/envs/arrow-dev/x86_64-conda-linux-gnu/include/c++/9.3.0/memory:81,
                    from ../src/arrow/python/arrow_to_pandas.h:25,
                    from ../src/arrow/python/arrow_to_pandas.cc:20:
   /home/joris/miniconda3/envs/arrow-dev/x86_64-conda-linux-gnu/include/c++/9.3.0/bits/shared_ptr.h:309:19: note: candidate: 'std::shared_ptr<_Tp>& std::shared_ptr<_Tp>::operator=(const std::shared_ptr<_Tp>&) [with _Tp = arrow::Array]' <near match>
     309 |       shared_ptr& operator=(const shared_ptr&) noexcept = default;
         |                   ^~~~~~~~
   /home/joris/miniconda3/envs/arrow-dev/x86_64-conda-linux-gnu/include/c++/9.3.0/bits/shared_ptr.h:309:19: note:   passing 'const std::shared_ptr<arrow::Array>*' as 'this' argument discards qualifiers
   /home/joris/miniconda3/envs/arrow-dev/x86_64-conda-linux-gnu/include/c++/9.3.0/bits/shared_ptr.h:313:2: note: candidate: 'std::shared_ptr<_Tp>::_Assignable<const std::shared_ptr<_Yp>&> std::shared_ptr<_Tp>::operator=(const std::shared_ptr<_Yp>&) [with _Yp = arrow::Array; _Tp = arrow::Array; std::shared_ptr<_Tp>::_Assignable<const std::shared_ptr<_Yp>&> = std::shared_ptr<arrow::Array>&]' <near match>
     313 |  operator=(const shared_ptr<_Yp>& __r) noexcept
         |  ^~~~~~~~
   /home/joris/miniconda3/envs/arrow-dev/x86_64-conda-linux-gnu/include/c++/9.3.0/bits/shared_ptr.h:313:2: note:   passing 'const std::shared_ptr<arrow::Array>*' as 'this' argument discards qualifiers
   /home/joris/miniconda3/envs/arrow-dev/x86_64-conda-linux-gnu/include/c++/9.3.0/bits/shared_ptr.h:324:2: note: candidate: 'template<class _Yp> std::shared_ptr<_Tp>::_Assignable<std::auto_ptr<_Up> > std::shared_ptr<_Tp>::operator=(std::auto_ptr<_Up>&&) [with _Yp = _Yp; _Tp = arrow::Array]'
     324 |  operator=(auto_ptr<_Yp>&& __r)
         |  ^~~~~~~~
   /home/joris/miniconda3/envs/arrow-dev/x86_64-conda-linux-gnu/include/c++/9.3.0/bits/shared_ptr.h:324:2: note:   template argument deduction/substitution failed:
   ../src/arrow/python/arrow_to_pandas.cc:662:33: note:   types 'std::auto_ptr<_Up>' and 'const std::shared_ptr<arrow::Array>' have incompatible cv-qualifiers
     662 |         field = arr_ext.storage();
         |                                 ^
   In file included from /home/joris/miniconda3/envs/arrow-dev/x86_64-conda-linux-gnu/include/c++/9.3.0/memory:81,
                    from ../src/arrow/python/arrow_to_pandas.h:25,
                    from ../src/arrow/python/arrow_to_pandas.cc:20:
   /home/joris/miniconda3/envs/arrow-dev/x86_64-conda-linux-gnu/include/c++/9.3.0/bits/shared_ptr.h:333:7: note: candidate: 'std::shared_ptr<_Tp>& std::shared_ptr<_Tp>::operator=(std::shared_ptr<_Tp>&&) [with _Tp = arrow::Array]' <near match>
     333 |       operator=(shared_ptr&& __r) noexcept
         |       ^~~~~~~~
   /home/joris/miniconda3/envs/arrow-dev/x86_64-conda-linux-gnu/include/c++/9.3.0/bits/shared_ptr.h:333:7: note:   conversion of argument 1 would be ill-formed:
   ../src/arrow/python/arrow_to_pandas.cc:662:32: error: cannot bind rvalue reference of type 'std::shared_ptr<arrow::Array>&&' to lvalue of type 'const std::shared_ptr<arrow::Array>'
     662 |         field = arr_ext.storage();
         |                 ~~~~~~~~~~~~~~~^~
   In file included from /home/joris/miniconda3/envs/arrow-dev/x86_64-conda-linux-gnu/include/c++/9.3.0/memory:81,
                    from ../src/arrow/python/arrow_to_pandas.h:25,
                    from ../src/arrow/python/arrow_to_pandas.cc:20:
   /home/joris/miniconda3/envs/arrow-dev/x86_64-conda-linux-gnu/include/c++/9.3.0/bits/shared_ptr.h:341:2: note: candidate: 'template<class _Yp> std::shared_ptr<_Tp>::_Assignable<std::shared_ptr<_Yp> > std::shared_ptr<_Tp>::operator=(std::shared_ptr<_Yp>&&) [with _Yp = _Yp; _Tp = arrow::Array]'
     341 |  operator=(shared_ptr<_Yp>&& __r) noexcept
         |  ^~~~~~~~
   /home/joris/miniconda3/envs/arrow-dev/x86_64-conda-linux-gnu/include/c++/9.3.0/bits/shared_ptr.h:341:2: note:   template argument deduction/substitution failed:
   ../src/arrow/python/arrow_to_pandas.cc:662:33: note:   types 'std::shared_ptr<_Tp>' and 'const std::shared_ptr<arrow::Array>' have incompatible cv-qualifiers
     662 |         field = arr_ext.storage();
         |                                 ^
   In file included from /home/joris/miniconda3/envs/arrow-dev/x86_64-conda-linux-gnu/include/c++/9.3.0/memory:81,
                    from ../src/arrow/python/arrow_to_pandas.h:25,
                    from ../src/arrow/python/arrow_to_pandas.cc:20:
   /home/joris/miniconda3/envs/arrow-dev/x86_64-conda-linux-gnu/include/c++/9.3.0/bits/shared_ptr.h:349:2: note: candidate: 'template<class _Yp, class _Del> std::shared_ptr<_Tp>::_Assignable<std::unique_ptr<_Up, _Ep> > std::shared_ptr<_Tp>::operator=(std::unique_ptr<_Up, _Ep>&&) [with _Yp = _Yp; _Del = _Del; _Tp = arrow::Array]'
     349 |  operator=(unique_ptr<_Yp, _Del>&& __r)
         |  ^~~~~~~~
   /home/joris/miniconda3/envs/arrow-dev/x86_64-conda-linux-gnu/include/c++/9.3.0/bits/shared_ptr.h:349:2: note:   template argument deduction/substitution failed:
   ../src/arrow/python/arrow_to_pandas.cc:662:33: note:   types 'std::unique_ptr<_Tp, _Dp>' and 'const std::shared_ptr<arrow::Array>' have incompatible cv-qualifiers
     662 |         field = arr_ext.storage();
         |                                 ^
   ninja: build stopped: subcommand failed.
   [1/6] Building CXX object src/arrow/python/CMakeFiles/arrow_python_objlib.dir/arrow_to_pandas.cc.o
   ```
   
   </details>




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org