You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/12/03 05:57:04 UTC

[GitHub] [arrow] hu6360567 opened a new issue #11846: Crash when import/export between C++ and Rust

hu6360567 opened a new issue #11846:
URL: https://github.com/apache/arrow/issues/11846


   The following code works fine, when it is compiled in cmake debug mode.
   But crashes when it is compiled in cmake release mode.
   As far as what I found, the underlying array data is released after `import_array` consumed.
   
   environment:
   MacOS 10.15, Apple clang version 12.0.0, rustc 1.56.0
   
   ```cpp
   #include <arrow/api.h>
   #include <arrow/c/abi.h>
   #include <arrow/c/bridge.h>
   #include <arrow/c/helpers.h>
   #include <iostream>
   
   // rust ffi method
   /*
   #[no_mangle]
   pub extern "C" fn import_array(content: *const FFI_ArrowArray, schema: *const FFI_ArrowSchema) {
       let array = match unsafe { ArrowArray::try_from_raw(content, schema) } {
           Ok(arr) => { arr },
           Err(e) => {
               eprintln!("{}", e);
               return;
           }
       };
       let array = StructArray::from(array.to_data().unwrap());
       let array = RecordBatch::from(&array);
       println!("{:?}", array);
   
     #[test]
     fn test_import() {
         let col1 = StringArray::from(vec!["a", "b", "c"]);
         let col2 = Int8Array::from(vec![1, 2, 3]);
   
         let rb_schema = Schema::new(
             vec![
                 Field::new("col1", col1.data_type().clone(), false),
                 Field::new("col2", col2.data_type().clone(), false),
             ]
         );
   
         let rb = match RecordBatch::try_new(Arc::new(rb_schema), vec![Arc::new(col1), Arc::new(col2)]) {
             Ok(rb) => rb,
             Err(e) => {
                 eprintln!("{}", e);
                 return;
             }
         };
   
         let (content, schema) = StructArray::from(rb).to_raw().unwrap();
   
         import_array(content, schema);
     }
   }
    */
   extern "C" void import_array(const ArrowArray *content, const ArrowSchema *schema);
   
   
   #define DEBUG_C_INTERFACE(msg, content_ptr, schema_ptr)         \
   do {\
   printf(msg ": ArrayIsRelease[%s], SchemaIsRelease[%s]\n",       \
           ArrowArrayIsReleased(content_ptr) ? "TRUE" : "FALSE",   \
           ArrowSchemaIsReleased(schema_ptr) ? "TRUE" : "FALSE");  \
   } while(0)
   
   
   std::shared_ptr<arrow::RecordBatch> generateRB() {
       auto key_builder = arrow::StringBuilder();
       auto value_builder = arrow::StringBuilder();
       key_builder.Append("key0");
       value_builder.Append("value0");
   
       auto key_array = *key_builder.Finish();
       auto value_array = *value_builder.Finish();
   
       auto schema = arrow::schema({
                                           arrow::field("key", key_array->type(), false),
                                           arrow::field("value", value_array->type(), false)
                                   });
   
       return arrow::RecordBatch::Make(schema, 1, {key_array, value_array});
   }
   
   void test1() {
       auto property = generateRB();
       ArrowArray array{};
       ArrowSchema schema{};
       DEBUG_C_INTERFACE("TEST1 Before Export", &array, &schema);
       std::cout << property->column_data(0).use_count() << "," << property->column_data(1).use_count() << std::endl;
       arrow::ExportRecordBatch(*property, &array, &schema);
       DEBUG_C_INTERFACE("TEST1 After Export", &array, &schema);
       std::cout << property->column_data(0).use_count() << "," << property->column_data(1).use_count() << std::endl;
   
       DEBUG_C_INTERFACE("TEST1 Before Import", &array, &schema);
       std::cout << property->column_data(0).use_count() << "," << property->column_data(1).use_count() << std::endl;
       auto arrow_array = *arrow::ImportArray(&array, &schema);
       DEBUG_C_INTERFACE("TEST1 After Import", &array, &schema);
       std::cout << arrow_array->ToString() << std::endl;
       std::cout << property->column_data(0).use_count() << "," << property->column_data(1).use_count() << std::endl;
   }
   
   void test2() {
       auto property = generateRB();
       ArrowArray array{};
       ArrowSchema schema{};
       DEBUG_C_INTERFACE("TEST2 Before Export", &array, &schema);
       std::cout << property->column_data(0).use_count() << "," << property->column_data(1).use_count() << std::endl;
       arrow::ExportRecordBatch(*property, &array, &schema);
       DEBUG_C_INTERFACE("TEST2 After Export", &array, &schema);
       std::cout << property->column_data(0).use_count() << "," << property->column_data(1).use_count() << std::endl;
   
       DEBUG_C_INTERFACE("TEST2 Before Import", &array, &schema);
       std::cout << property->column_data(0).use_count() << "," << property->column_data(1).use_count() << std::endl;
       import_array(&array, &schema);
       DEBUG_C_INTERFACE("TEST2 After Import", &array, &schema);
       std::cout << property->column_data(0).use_count() << "," << property->column_data(1).use_count() << std::endl;
   }
   
   int main() {
       std::cout << "TEST1" << std::endl;
       test1();
   
       std::cout << "TEST2" << std::endl;
       test2();
   }
   ```
   
   OUTPUT
   ```
   TEST1
   TEST1 Before Export: ArrayIsRelease[TRUE], SchemaIsRelease[TRUE]
   3,3
   TEST1 After Export: ArrayIsRelease[FALSE], SchemaIsRelease[FALSE]
   5,5
   TEST1 Before Import: ArrayIsRelease[FALSE], SchemaIsRelease[FALSE]
   5,5
   TEST1 After Import: ArrayIsRelease[TRUE], SchemaIsRelease[TRUE]
   -- is_valid: all not null
   -- child 0 type: string
     [
       "key0"
     ]
   -- child 1 type: string
     [
       "value0"
     ]
   5,5
   TEST2
   TEST2 Before Export: ArrayIsRelease[TRUE], SchemaIsRelease[TRUE]
   3,3
   TEST2 After Export: ArrayIsRelease[FALSE], SchemaIsRelease[FALSE]
   5,5
   TEST2 Before Import: ArrayIsRelease[FALSE], SchemaIsRelease[FALSE]
   5,5
   RecordBatch { schema: Schema { fields: [Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, Field { name: "value", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }], metadata: {} }, columns: [StringArray
   [
     "key0",
   ], StringArray
   [
     "value0",
   ]] }
   TEST2 After Import: ArrayIsRelease[FALSE], SchemaIsRelease[FALSE]
   
   Process finished with exit code 139 (interrupted by signal 11: SIGSEGV)
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] hu6360567 closed issue #11846: Crash when import/export between C++ and Rust

Posted by GitBox <gi...@apache.org>.
hu6360567 closed issue #11846:
URL: https://github.com/apache/arrow/issues/11846


   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] westonpace commented on issue #11846: Crash when import/export between C++ and Rust

Posted by GitBox <gi...@apache.org>.
westonpace commented on issue #11846:
URL: https://github.com/apache/arrow/issues/11846#issuecomment-1006754217


   @hu6360567 I noticed that you reopened this issue.  Is there still work to be done here?


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] hu6360567 commented on issue #11846: Crash when import/export between C++ and Rust

Posted by GitBox <gi...@apache.org>.
hu6360567 commented on issue #11846:
URL: https://github.com/apache/arrow/issues/11846#issuecomment-985286141


   I've figured out the key problem, I missed up rust/C++ mangaged FFI pointers.
   In `import_array`,  it consumes the C++ managed pointers.
   
   As https://github.com/apache/arrow-rs/issues/994#issuecomment-985254643 mentioned, when calling `arrow::ExportRecordBatch`, the output pointers should be created in rust.
   
   Fix patch as below,
   ```rust
   #[no_mangle]
   pub extern "C" fn create_array(content: *mut *const FFI_ArrowArray, schema: *mut *const FFI_ArrowSchema) {
       unsafe {
           let array = ArrowArray::empty();
           let (c, s) = ArrowArray::into_raw(array);
           *content = c;
           *schema = s;
       }
   }
   ```
   
   ```c++
   void test2() {
       auto property = generateRB();
       const ArrowArray *array = nullptr;
       const ArrowSchema *schema = nullptr;
       create_array(&array, &schema);
       printf("array@%p, schema@%p\n", array, schema);
       DEBUG_C_INTERFACE("TEST2 Before Export", array, schema);
       std::cout << property->column_data(0).use_count() << "," << property->column_data(1).use_count() << std::endl;
       arrow::ExportRecordBatch(*property, const_cast<ArrowArray *>(array), const_cast<ArrowSchema *>(schema));
       DEBUG_C_INTERFACE("TEST2 After Export", array, schema);
       std::cout << property->column_data(0).use_count() << "," << property->column_data(1).use_count() << std::endl;
   
       DEBUG_C_INTERFACE("TEST2 Before Import", array, schema);
       std::cout << property->column_data(0).use_count() << "," << property->column_data(1).use_count() << std::endl;
       import_array(array, schema);
       DEBUG_C_INTERFACE("TEST2 After Import", array, schema);
       std::cout << property->column_data(0).use_count() << "," << property->column_data(1).use_count() << std::endl;
   }
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org