You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@parquet.apache.org by "Wes McKinney (JIRA)" <ji...@apache.org> on 2019/08/16 14:05:00 UTC

[jira] [Resolved] (PARQUET-1169) [C++] Segment fault when using NextBatch of parquet::arrow::ColumnReader in parquet-cpp

     [ https://issues.apache.org/jira/browse/PARQUET-1169?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Wes McKinney resolved PARQUET-1169.
-----------------------------------
    Resolution: Cannot Reproduce

I tried with master with the below updated code and the issue seems to be fixed

{code}
#include <iostream>
#include <string>

#include "arrow/api.h"
#include "arrow/io/file.h"
#include "arrow/testing/gtest_util.h"
#include "parquet/arrow/reader.h"

using arrow::Array;
using arrow::ChunkedArray;
using arrow::default_memory_pool;
using arrow::io::FileMode;
using arrow::io::MemoryMappedFile;
using parquet::arrow::ColumnReader;
using parquet::arrow::FileReader;
using parquet::arrow::OpenFile;

int main(int argc, char** argv) {
  if (argc > 1) {
    std::string file_name = argv[1];
    std::shared_ptr<MemoryMappedFile> file;
    ABORT_NOT_OK(MemoryMappedFile::Open(file_name, FileMode::READ, &file));
    std::unique_ptr<FileReader> file_reader;
    ABORT_NOT_OK(OpenFile(file, default_memory_pool(), &file_reader));
    std::unique_ptr<ColumnReader> column_reader;
    ABORT_NOT_OK(file_reader->GetColumn(0, &column_reader));

    std::shared_ptr<ChunkedArray> array1;
    ABORT_NOT_OK(column_reader->NextBatch(1, &array1));
    std::cout << "length " << array1->length() << std::endl;

    std::shared_ptr<ChunkedArray> array2;
    // segment fault
    ABORT_NOT_OK(column_reader->NextBatch(1, &array2));
    std::cout << "length " << array2->length() << std::endl;
  }
  return 0;
}
{code}

> [C++] Segment fault when using NextBatch of parquet::arrow::ColumnReader in parquet-cpp
> ---------------------------------------------------------------------------------------
>
>                 Key: PARQUET-1169
>                 URL: https://issues.apache.org/jira/browse/PARQUET-1169
>             Project: Parquet
>          Issue Type: Bug
>          Components: parquet-cpp
>            Reporter: Jian Fang
>            Priority: Major
>             Fix For: cpp-1.6.0
>
>         Attachments: test.parquet
>
>
> When I running the below code, I consistently get segment fault, not sure whether this is a bug or I did something wrong. Anyone here could help me take a look?
> {code:c++}
> #include <iostream>
> #include <string>
> #include "arrow/array.h"
> #include "arrow/io/file.h"
> #include "arrow/test-util.h"
> #include "parquet/arrow/reader.h"
> using arrow::Array;
> using arrow::default_memory_pool;
> using arrow::io::FileMode;
> using arrow::io::MemoryMappedFile;
> using parquet::arrow::ColumnReader;
> using parquet::arrow::FileReader;
> using parquet::arrow::OpenFile;
> int main(int argc, char** argv) {
>   if (argc > 1) {
>     std::string file_name = argv[1];
>     std::shared_ptr<MemoryMappedFile> file;
>     ABORT_NOT_OK(MemoryMappedFile::Open(file_name, FileMode::READ, &file));
>     std::unique_ptr<FileReader> file_reader;
>     ABORT_NOT_OK(OpenFile(file, default_memory_pool(), &file_reader));
>     std::unique_ptr<ColumnReader> column_reader;
>     ABORT_NOT_OK(file_reader->GetColumn(0, &column_reader));
>     std::shared_ptr<Array> array1;
>     ABORT_NOT_OK(column_reader->NextBatch(1, &array1));
>     std::cout << "length " << array1->length() << std::endl;
>     std::shared_ptr<Array> array2;
>     // segment fault
>     ABORT_NOT_OK(column_reader->NextBatch(1, &array2));
>     std::cout << "length " << array2->length() << std::endl;
>   }
>   return 0;
> }
> {code}
> Command to compile this program:
> {code}
> g++ test.c -I/usr/local/include/arrow -I/usr/local/include/parquet --std=c++11 -lparquet -larrow -lgtest -o parquet_test
> {code}
> Command to run the program
> {code}
> ./parquet_test test.parquet
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.14#76016)