You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ee...@apache.org on 2023/06/14 19:04:05 UTC

[arrow] branch main updated: GH-35988: [C#] The C data interface implementation can leak on import (#35996)

This is an automated email from the ASF dual-hosted git repository.

eerhardt pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 1e2dfceb3a GH-35988: [C#] The C data interface implementation can leak on import (#35996)
1e2dfceb3a is described below

commit 1e2dfceb3a6bba1c34585ecc015214f24548dbd3
Author: Curt Hagenlocher <cu...@hagenlocher.org>
AuthorDate: Wed Jun 14 12:03:58 2023 -0700

    GH-35988: [C#] The C data interface implementation can leak on import (#35996)
    
    ### What changes are included in this PR?
    
    To ensure proper cleanup, immediately copies the contents of the C structure into the imported class for arrays and streams.
    Relaxes the requirement when exporting that the target structure appear uninitialized.
    
    ### Are these changes tested?
    
    Existing tests pass. We don't as yet seem to have a good way to test for memory leaks so no new tests have been added.
    
    ### Are there any user-facing changes?
    
    No.
    
    * Closes: #35988
    
    Authored-by: Curt Hagenlocher <cu...@hagenlocher.org>
    Signed-off-by: Eric Erhardt <er...@microsoft.com>
---
 csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs   |  4 --
 csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs   | 31 +++++++++----
 .../Apache.Arrow/C/CArrowArrayStreamExporter.cs    |  4 --
 .../Apache.Arrow/C/CArrowArrayStreamImporter.cs    | 54 +++++++++-------------
 csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs  |  4 --
 .../Apache.Arrow.Tests/CDataInterfaceDataTests.cs  |  1 +
 .../CDataInterfacePythonTests.cs                   |  5 ++
 7 files changed, 50 insertions(+), 53 deletions(-)

diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs
index 4e11291cde..aafb3b8987 100644
--- a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs
@@ -49,10 +49,6 @@ namespace Apache.Arrow.C
             {
                 throw new ArgumentNullException(nameof(cArray));
             }
-            if (cArray->release != null)
-            {
-                throw new ArgumentException("Cannot export array to a struct that is already initialized.", nameof(cArray));
-            }
 
             ExportedAllocationOwner allocationOwner = new ExportedAllocationOwner();
             try
diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
index db5181b56e..4375b382d9 100644
--- a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
@@ -42,6 +42,9 @@ namespace Apache.Arrow.C
         /// IArrowArray importedArray = CArrowArrayImporter.ImportArray(importedPtr);
         /// </code>
         /// </examples>
+        /// <param name="ptr">The pointer to the array being imported</param>
+        /// <param name="type">The type of the array being imported</param>
+        /// <returns>The imported C# array</returns>
         public static unsafe IArrowArray ImportArray(CArrowArray* ptr, IArrowType type)
         {
             ImportedArrowArray importedArray = null;
@@ -74,6 +77,9 @@ namespace Apache.Arrow.C
         /// RecordBatch batch = CArrowArrayImporter.ImportRecordBatch(importedPtr, schema);
         /// </code>
         /// </examples>
+        /// <param name="ptr">The pointer to the record batch being imported</param>
+        /// <param name="schema">The schema type of the record batch being imported</param>
+        /// <returns>The imported C# record batch</returns>
         public static unsafe RecordBatch ImportRecordBatch(CArrowArray* ptr, Schema schema)
         {
             ImportedArrowArray importedArray = null;
@@ -90,7 +96,7 @@ namespace Apache.Arrow.C
 
         private sealed unsafe class ImportedArrowArray : ImportedAllocationOwner
         {
-            private readonly CArrowArray* _cArray;
+            private readonly CArrowArray _cArray;
 
             public ImportedArrowArray(CArrowArray* cArray)
             {
@@ -98,34 +104,41 @@ namespace Apache.Arrow.C
                 {
                     throw new ArgumentNullException(nameof(cArray));
                 }
-                _cArray = cArray;
-                if (_cArray->release == null)
+                if (cArray->release == null)
                 {
                     throw new ArgumentException("Tried to import an array that has already been released.", nameof(cArray));
                 }
+                _cArray = *cArray;
+                cArray->release = null;
             }
 
             protected override void FinalRelease()
             {
-                if (_cArray->release != null)
+                if (_cArray.release != null)
                 {
-                    _cArray->release(_cArray);
+                    fixed (CArrowArray* cArray = &_cArray)
+                    {
+                        cArray->release(cArray);
+                    }
                 }
             }
 
             public IArrowArray GetAsArray(IArrowType type)
             {
-                return ArrowArrayFactory.BuildArray(GetAsArrayData(_cArray, type));
+                fixed (CArrowArray* cArray = &_cArray)
+                {
+                    return ArrowArrayFactory.BuildArray(GetAsArrayData(cArray, type));
+                }
             }
 
             public RecordBatch GetAsRecordBatch(Schema schema)
             {
                 IArrowArray[] arrays = new IArrowArray[schema.FieldsList.Count];
-                for (int i = 0; i < _cArray->n_children; i++)
+                for (int i = 0; i < _cArray.n_children; i++)
                 {
-                    arrays[i] = ArrowArrayFactory.BuildArray(GetAsArrayData(_cArray->children[i], schema.FieldsList[i].DataType));
+                    arrays[i] = ArrowArrayFactory.BuildArray(GetAsArrayData(_cArray.children[i], schema.FieldsList[i].DataType));
                 }
-                return new RecordBatch(schema, arrays, checked((int)_cArray->length));
+                return new RecordBatch(schema, arrays, checked((int)_cArray.length));
             }
 
             private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type)
diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs
index 2b4a5b2165..911dd7a153 100644
--- a/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs
@@ -53,10 +53,6 @@ namespace Apache.Arrow.C
             {
                 throw new ArgumentNullException(nameof(arrayStream));
             }
-            if (cArrayStream->release != null)
-            {
-                throw new ArgumentException("Cannot export array to a struct that is already initialized.", nameof(cArrayStream));
-            }
 
             cArrayStream->private_data = ExportedArrayStream.Export(arrayStream);
             cArrayStream->get_schema = (delegate* unmanaged[Stdcall]<CArrowArrayStream*, CArrowSchema*, int>)s_getSchemaArrayStream.Pointer;
diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayStreamImporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayStreamImporter.cs
index 4a37438828..b2558f07a4 100644
--- a/csharp/src/Apache.Arrow/C/CArrowArrayStreamImporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowArrayStreamImporter.cs
@@ -42,6 +42,8 @@ namespace Apache.Arrow.C
         /// IArrowArrayStream importedStream = CArrowArrayStreamImporter.ImportStream(importedPtr);
         /// </code>
         /// </examples>
+        /// <param name="ptr">The pointer to the stream being imported</param>
+        /// <returns>The imported C# array stream</returns>
         public static unsafe IArrowArrayStream ImportArrayStream(CArrowArrayStream* ptr)
         {
             return new ImportedArrowArrayStream(ptr);
@@ -49,7 +51,7 @@ namespace Apache.Arrow.C
 
         private sealed unsafe class ImportedArrowArrayStream : IArrowArrayStream
         {
-            private readonly CArrowArrayStream* _cArrayStream;
+            private readonly CArrowArrayStream _cArrayStream;
             private readonly Schema _schema;
             private bool _disposed;
 
@@ -59,29 +61,21 @@ namespace Apache.Arrow.C
                 {
                     throw new ArgumentNullException(nameof(cArrayStream));
                 }
-                _cArrayStream = cArrayStream;
-                if (_cArrayStream->release == null)
+                if (cArrayStream->release == null)
                 {
                     throw new ArgumentException("Tried to import an array stream that has already been released.", nameof(cArrayStream));
                 }
 
-                CArrowSchema* cSchema = CArrowSchema.Create();
-                try
+                CArrowSchema cSchema = new CArrowSchema();
+                int errno = cArrayStream->get_schema(cArrayStream, &cSchema);
+                if (errno != 0)
                 {
-                    int errno = _cArrayStream->get_schema(_cArrayStream, cSchema);
-                    if (errno != 0)
-                    {
-                        throw new Exception($"Unexpected error recieved from external stream. Errno: {errno}");
-                    }
-                    _schema = CArrowSchemaImporter.ImportSchema(cSchema);
-                }
-                finally
-                {
-                    if (_schema == null)
-                    {
-                        CArrowSchema.Free(cSchema);
-                    }
+                    throw new Exception($"Unexpected error recieved from external stream. Errno: {errno}");
                 }
+                _schema = CArrowSchemaImporter.ImportSchema(&cSchema);
+
+                _cArrayStream = *cArrayStream;
+                cArrayStream->release = null;
             }
 
             ~ImportedArrowArrayStream()
@@ -99,24 +93,17 @@ namespace Apache.Arrow.C
                 }
 
                 RecordBatch result = null;
-                CArrowArray* cArray = CArrowArray.Create();
-                try
+                CArrowArray cArray = new CArrowArray();
+                fixed (CArrowArrayStream* cArrayStream = &_cArrayStream)
                 {
-                    int errno = _cArrayStream->get_next(_cArrayStream, cArray);
+                    int errno = cArrayStream->get_next(cArrayStream, &cArray);
                     if (errno != 0)
                     {
                         throw new Exception($"Unexpected error recieved from external stream. Errno: {errno}");
                     }
-                    if (cArray->release != null)
+                    if (cArray.release != null)
                     {
-                        result = CArrowArrayImporter.ImportRecordBatch(cArray, _schema);
-                    }
-                }
-                finally
-                {
-                    if (result == null)
-                    {
-                        CArrowArray.Free(cArray);
+                        result = CArrowArrayImporter.ImportRecordBatch(&cArray, _schema);
                     }
                 }
 
@@ -125,10 +112,13 @@ namespace Apache.Arrow.C
 
             public void Dispose()
             {
-                if (!_disposed && _cArrayStream->release != null)
+                if (!_disposed && _cArrayStream.release != null)
                 {
                     _disposed = true;
-                    _cArrayStream->release(_cArrayStream);
+                    fixed (CArrowArrayStream * cArrayStream = &_cArrayStream)
+                    {
+                        cArrayStream->release(cArrayStream);
+                    }
                 }
                 GC.SuppressFinalize(this);
             }
diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
index c2745c4cc4..fae2455560 100644
--- a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
@@ -50,10 +50,6 @@ namespace Apache.Arrow.C
             {
                 throw new ArgumentNullException(nameof(schema));
             }
-            if (schema->release != null)
-            {
-                throw new ArgumentException("Cannot export schema to a struct that is already initialized.");
-            }
 
             schema->format = StringUtil.ToCStringUtf8(GetFormat(datatype));
             schema->name = null;
diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs
index 7039ccdfe2..89a346f9f7 100644
--- a/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs
@@ -85,6 +85,7 @@ namespace Apache.Arrow.Tests
                 CArrowArrayImporter.ImportArray(cArray, GetTestArray().Data.DataType);
             });
             Assert.True(wasCalled);
+
             CArrowArray.Free(cArray);
 
             GC.KeepAlive(releaseCallback);
diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
index 32c8127ae6..8172c4f420 100644
--- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
@@ -445,6 +445,8 @@ namespace Apache.Arrow.Tests
             Assert.Null(importedArray.GetString(2));
             Assert.Equal("foo", importedArray.GetString(3));
             Assert.Equal("bar", importedArray.GetString(4));
+
+            CArrowArray.Free(cArray);
         }
 
         [SkippableFact]
@@ -488,6 +490,7 @@ namespace Apache.Arrow.Tests
 
             Schema schema = CArrowSchemaImporter.ImportSchema(cSchema);
             RecordBatch recordBatch = CArrowArrayImporter.ImportRecordBatch(cArray, schema);
+            CArrowArray.Free(cArray);
 
             Assert.Equal(5, recordBatch.Length);
 
@@ -563,6 +566,8 @@ namespace Apache.Arrow.Tests
             }
 
             IArrowArrayStream stream = CArrowArrayStreamImporter.ImportArrayStream(cArrayStream);
+            CArrowArrayStream.Free(cArrayStream);
+
             var batch1 = stream.ReadNextRecordBatchAsync().Result;
             Assert.Equal(5, batch1.Length);