You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jira@arrow.apache.org by "zzh (Jira)" <ji...@apache.org> on 2022/03/10 01:33:00 UTC

[jira] [Updated] (ARROW-15881) [c++] When link to libavrocpp.so, call parquet::arrow::WriteTable in a child thread get segmentation fault

     [ https://issues.apache.org/jira/browse/ARROW-15881?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

zzh updated ARROW-15881:
------------------------
    Description: 
When I try to use Arrow to write parquet files, I encounter a error. The parquet::arrow::WriteTable out child thread can call successful, but  parquet::arrow::WriteTable in child thread while cause Segmentation fault.

In my CMakeList.txt,I link libavrocpp.so because I want to use avro in project after finish this test. And I find that if I remove the link of libavrocpp.so, code while run successful.

The code like this:
{code:cpp}
arrow::Int64Builder test_a;
for (int i = 0; i < 1e7; ++i) {
  PARQUET_THROW_NOT_OK(test_a.Append(i));
}
auto sc = arrow::schema({arrow::field("A", arrow::int64())});
auto table = arrow::Table::Make(sc,{test_a.Finish().ValueOrDie()});
const string &fileid = sole::uuid4().str();
string filename = "test.parq";
try {
  std::shared_ptr<arrow::io::FileOutputStream> outfile;
  PARQUET_ASSIGN_OR_THROW(
          outfile,arrow::io::FileOutputStream::Open(filename)
  );
  PARQUET_THROW_NOT_OK(
          parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), outfile, table->num_rows())
  );
} catch (exception &ex) {
  cout << ex.what() << endl;
}
shared_ptr<std::thread> thread = make_shared<std::thread>([=]() {
    arrow::Int64Builder test_a;
      for (int i = 0; i < 1e7; ++i) {
        PARQUET_THROW_NOT_OK(test_a.Append(i));
    }
    auto sc = arrow::schema({arrow::field("A", arrow::int64())});
    auto table = arrow::Table::Make(sc,{test_a.Finish().ValueOrDie()});
    const string &fileid = sole::uuid4().str();
    string filename = "test.parq";
    try {
        std::shared_ptr<arrow::io::FileOutputStream> outfile;
        PARQUET_ASSIGN_OR_THROW(
                outfile,arrow::io::FileOutputStream::Open(filename)
        );
        PARQUET_THROW_NOT_OK(
                parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), outfile, table->num_rows())
        );
    } catch (exception &ex) {
        cout << ex.what() << endl;
    }
};{code}
 

The stack message is in the picture in attachment.

 

  was:
When I try to use Arrow to write parquet files, I encounter a error. The parquet::arrow::WriteTable out child thread can call successful, but  parquet::arrow::WriteTable in child thread while cause Segmentation fault.

The code like this:
{code:cpp}
arrow::Int64Builder test_a;
for (int i = 0; i < 1e7; ++i) {
  PARQUET_THROW_NOT_OK(test_a.Append(i));
}
auto sc = arrow::schema({arrow::field("A", arrow::int64())});
auto table = arrow::Table::Make(sc,{test_a.Finish().ValueOrDie()});
const string &fileid = sole::uuid4().str();
string filename = "test.parq";
try {
  std::shared_ptr<arrow::io::FileOutputStream> outfile;
  PARQUET_ASSIGN_OR_THROW(
          outfile,arrow::io::FileOutputStream::Open(filename)
  );
  PARQUET_THROW_NOT_OK(
          parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), outfile, table->num_rows())
  );
} catch (exception &ex) {
  cout << ex.what() << endl;
}
shared_ptr<std::thread> thread = make_shared<std::thread>([=]() {
    arrow::Int64Builder test_a;
      for (int i = 0; i < 1e7; ++i) {
        PARQUET_THROW_NOT_OK(test_a.Append(i));
    }
    auto sc = arrow::schema({arrow::field("A", arrow::int64())});
    auto table = arrow::Table::Make(sc,{test_a.Finish().ValueOrDie()});
    const string &fileid = sole::uuid4().str();
    string filename = "test.parq";
    try {
        std::shared_ptr<arrow::io::FileOutputStream> outfile;
        PARQUET_ASSIGN_OR_THROW(
                outfile,arrow::io::FileOutputStream::Open(filename)
        );
        PARQUET_THROW_NOT_OK(
                parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), outfile, table->num_rows())
        );
    } catch (exception &ex) {
        cout << ex.what() << endl;
    }
};{code}
 

The stack message is in the picture in attachment.

 

        Summary: [c++] When link to libavrocpp.so, call parquet::arrow::WriteTable in a child thread get segmentation fault  (was: [c++] call parquet::arrow::WriteTable in a child thread get segmentation fault)

> [c++] When link to libavrocpp.so, call parquet::arrow::WriteTable in a child thread get segmentation fault
> ----------------------------------------------------------------------------------------------------------
>
>                 Key: ARROW-15881
>                 URL: https://issues.apache.org/jira/browse/ARROW-15881
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: C++
>    Affects Versions: 7.0.0
>         Environment: CentOS7,gcc7.0+,C++17
>            Reporter: zzh
>            Priority: Major
>              Labels: newbie
>         Attachments: message.png
>
>
> When I try to use Arrow to write parquet files, I encounter a error. The parquet::arrow::WriteTable out child thread can call successful, but  parquet::arrow::WriteTable in child thread while cause Segmentation fault.
> In my CMakeList.txt,I link libavrocpp.so because I want to use avro in project after finish this test. And I find that if I remove the link of libavrocpp.so, code while run successful.
> The code like this:
> {code:cpp}
> arrow::Int64Builder test_a;
> for (int i = 0; i < 1e7; ++i) {
>   PARQUET_THROW_NOT_OK(test_a.Append(i));
> }
> auto sc = arrow::schema({arrow::field("A", arrow::int64())});
> auto table = arrow::Table::Make(sc,{test_a.Finish().ValueOrDie()});
> const string &fileid = sole::uuid4().str();
> string filename = "test.parq";
> try {
>   std::shared_ptr<arrow::io::FileOutputStream> outfile;
>   PARQUET_ASSIGN_OR_THROW(
>           outfile,arrow::io::FileOutputStream::Open(filename)
>   );
>   PARQUET_THROW_NOT_OK(
>           parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), outfile, table->num_rows())
>   );
> } catch (exception &ex) {
>   cout << ex.what() << endl;
> }
> shared_ptr<std::thread> thread = make_shared<std::thread>([=]() {
>     arrow::Int64Builder test_a;
>       for (int i = 0; i < 1e7; ++i) {
>         PARQUET_THROW_NOT_OK(test_a.Append(i));
>     }
>     auto sc = arrow::schema({arrow::field("A", arrow::int64())});
>     auto table = arrow::Table::Make(sc,{test_a.Finish().ValueOrDie()});
>     const string &fileid = sole::uuid4().str();
>     string filename = "test.parq";
>     try {
>         std::shared_ptr<arrow::io::FileOutputStream> outfile;
>         PARQUET_ASSIGN_OR_THROW(
>                 outfile,arrow::io::FileOutputStream::Open(filename)
>         );
>         PARQUET_THROW_NOT_OK(
>                 parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), outfile, table->num_rows())
>         );
>     } catch (exception &ex) {
>         cout << ex.what() << endl;
>     }
> };{code}
>  
> The stack message is in the picture in attachment.
>  



--
This message was sent by Atlassian Jira
(v8.20.1#820001)