You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@arrow.apache.org by 1057445597 <10...@qq.com> on 2022/12/20 02:37:59 UTC

[c++][parquet][8.0.0] The coredump appears when constructed parquetfileReader

arrow version 8.0.0 linux


Not necessarily, It's been running for a while。


code


```
do {
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; auto access_file_result =
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; s3fs_-&gt;OpenInputFile(dataset()-&gt;parquet_files_[file_index]);
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; if (!access_file_result.ok()) {
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; res =
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; errors::InvalidArgument(access_file_result.status().ToString());
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; break;
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }


&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; auto access_file = access_file_result.ValueOrDie();


&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; parquet::ArrowReaderProperties properties;
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; properties.set_use_threads(true);
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; properties.set_pre_buffer(true);
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; parquet::ReaderProperties parquet_properties =
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; parquet::default_reader_properties();


&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; std::shared_ptr<parquet::arrow::FileReaderBuilder&gt; builder =
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; std::make_shared<parquet::arrow::FileReaderBuilder&gt;();
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; builder-&gt;Open(access_file, parquet_properties);


&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; std::unique_ptr<parquet::arrow::FileReader&gt; reader;
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; builder-&gt;properties(properties)-&gt;Build(&amp;reader);
....
}while(0);
```


call stack


```
(gdb) bt
#0&nbsp; 0x00007f34f9dd6704 in parquet::ParquetFileReader::metadata() const ()
&nbsp; &nbsp;from /usr/local/lib/python3.8/dist-packages/tensorflow_io/python/ops/libtensorflow_io.so
#1&nbsp; 0x00007f34f9f572b9 in parquet::arrow::FileReader::Make(arrow::MemoryPool*, std::unique_ptr<parquet::ParquetFileReader, std::default_delete<parquet::ParquetFileReader&gt; &gt;, parquet::ArrowReaderProperties const&amp;, std::unique_ptr<parquet::arrow::FileReader, std::default_delete<parquet::arrow::FileReader&gt; &gt;*) () from /usr/local/lib/python3.8/dist-packages/tensorflow_io/python/ops/libtensorflow_io.so
#2&nbsp; 0x00007f34f9f573ec in parquet::arrow::FileReaderBuilder::Build(std::unique_ptr<parquet::arrow::FileReader, std::default_delete<parquet::arrow::FileReader&gt; &gt;*) () from /usr/local/lib/python3.8/dist-packages/tensorflow_io/python/ops/libtensorflow_io.so
#3&nbsp; 0x00007f34f9cdd611 in tensorflow::data::ArrowS3DatasetOp::Dataset::Iterator::ReadFile(int, bool) ()
&nbsp; &nbsp;from /usr/local/lib/python3.8/dist-packages/tensorflow_io/python/ops/libtensorflow_io.so
#4&nbsp; 0x00007f34f9cded3c in tensorflow::data::ArrowS3DatasetOp::Dataset::Iterator::SetupStreamsLocked(tensorflow::Env*) ()
&nbsp; &nbsp;from /usr/local/lib/python3.8/dist-packages/tensorflow_io/python/ops/libtensorflow_io.so
#5&nbsp; 0x00007f34f9cff311 in tensorflow::data::ArrowDatasetBase::ArrowBaseIterator<tensorflow::data::ArrowS3DatasetOp::Dataset&gt;::GetNextInternal(tensorflow::data::IteratorContext*, std::vector<tensorflow::Tensor, std::allocator<tensorflow::Tensor&gt; &gt;*, bool*) ()
&nbsp; &nbsp;from /usr/local/lib/python3.8/dist-packages/tensorflow_io/python/ops/libtensorflow_io.so
#6&nbsp; 0x00007f3e57badaa4 in tensorflow::data::DatasetBaseIterator::GetNext(tensorflow::data::IteratorContext*, std::vector<tensorflow::Tensor, std::allocator<tensorflow::Tensor&gt; &gt;*, bool*) ()
&nbsp; &nbsp;from /usr/local/lib/python3.8/dist-packages/tensorflow/python/../libtensorflow_framework.so.2
#7&nbsp; 0x00007f3e720938f6 in tensorflow::data::ParallelMapDatasetOp::Dataset::Iterator::CallFunction(std::shared_ptr<tensorflow::data::IteratorContext&gt; const&amp;, std::shared_ptr<tensorflow::data::ParallelMapDatasetOp::Dataset::Iterator::InvocationResult&gt; const&amp;) ()
&nbsp; &nbsp;from /usr/local/lib/python3.8/dist-packages/tensorflow/python/_pywrap_tensorflow_internal.so
#8&nbsp; 0x00007f3e72099022 in tensorflow::data::ParallelMapDatasetOp::Dataset::Iterator::RunnerThread(std::shared_ptr<tensorflow::data::IteratorContext&gt; const&amp;) () from /usr/local/lib/python3.8/dist-packages/tensorflow/python/_pywrap_tensorflow_internal.so
#9&nbsp; 0x00007f3e70c6e145 in tensorflow::data::(anonymous namespace)::WorkQueueFunc(std::function<void ()&gt; const&amp;, std::shared_ptr<tensorflow::Notification&gt;) () from /usr/local/lib/python3.8/dist-packages/tensorflow/python/_pywrap_tensorflow_internal.so
#10 0x00007f3e70c6ef5d in std::_Function_handler<void (), std::_Bind<void (*(std::function<void ()&gt;, std::shared_ptr<tensorflow::Notification&gt;))(std::function<void ()&gt; const&amp;, std::shared_ptr<tensorflow::Notification&gt;)&gt; &gt;::_M_invoke(std::_Any_data const&amp;) ()
&nbsp; &nbsp;from /usr/local/lib/python3.8/dist-packages/tensorflow/python/_pywrap_tensorflow_internal.so
#11 0x00007f3e58356791 in tensorflow::UnboundedWorkQueue::PooledThreadFunc() ()
&nbsp; &nbsp;from /usr/local/lib/python3.8/dist-packages/tensorflow/python/../libtensorflow_framework.so.2
#12 0x00007f3e5835d5d8 in tensorflow::(anonymous namespace)::PThread::ThreadFn(void*) ()
&nbsp; &nbsp;from /usr/local/lib/python3.8/dist-packages/tensorflow/python/../libtensorflow_framework.so.2
#13 0x00007f3f3f931609 in start_thread (arg=<optimized out&gt;) at pthread_create.c:477
#14 0x00007f3f3fa6b133 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
```


Are there any tips to troubleshoot this error?


### Component(s)


C++





1057445597
1057445597@qq.com



&nbsp;