You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ee...@apache.org on 2023/06/14 19:04:05 UTC
[arrow] branch main updated: GH-35988: [C#] The C data interface implementation can leak on import (#35996)
This is an automated email from the ASF dual-hosted git repository.
eerhardt pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 1e2dfceb3a GH-35988: [C#] The C data interface implementation can leak on import (#35996)
1e2dfceb3a is described below
commit 1e2dfceb3a6bba1c34585ecc015214f24548dbd3
Author: Curt Hagenlocher <cu...@hagenlocher.org>
AuthorDate: Wed Jun 14 12:03:58 2023 -0700
GH-35988: [C#] The C data interface implementation can leak on import (#35996)
### What changes are included in this PR?
To ensure proper cleanup, immediately copies the contents of the C structure into the imported class for arrays and streams.
Relaxes the requirement when exporting that the target structure appear uninitialized.
### Are these changes tested?
Existing tests pass. We don't as yet seem to have a good way to test for memory leaks so no new tests have been added.
### Are there any user-facing changes?
No.
* Closes: #35988
Authored-by: Curt Hagenlocher <cu...@hagenlocher.org>
Signed-off-by: Eric Erhardt <er...@microsoft.com>
---
csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs | 4 --
csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs | 31 +++++++++----
.../Apache.Arrow/C/CArrowArrayStreamExporter.cs | 4 --
.../Apache.Arrow/C/CArrowArrayStreamImporter.cs | 54 +++++++++-------------
csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs | 4 --
.../Apache.Arrow.Tests/CDataInterfaceDataTests.cs | 1 +
.../CDataInterfacePythonTests.cs | 5 ++
7 files changed, 50 insertions(+), 53 deletions(-)
diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs
index 4e11291cde..aafb3b8987 100644
--- a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs
@@ -49,10 +49,6 @@ namespace Apache.Arrow.C
{
throw new ArgumentNullException(nameof(cArray));
}
- if (cArray->release != null)
- {
- throw new ArgumentException("Cannot export array to a struct that is already initialized.", nameof(cArray));
- }
ExportedAllocationOwner allocationOwner = new ExportedAllocationOwner();
try
diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
index db5181b56e..4375b382d9 100644
--- a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
@@ -42,6 +42,9 @@ namespace Apache.Arrow.C
/// IArrowArray importedArray = CArrowArrayImporter.ImportArray(importedPtr);
/// </code>
/// </examples>
+ /// <param name="ptr">The pointer to the array being imported</param>
+ /// <param name="type">The type of the array being imported</param>
+ /// <returns>The imported C# array</returns>
public static unsafe IArrowArray ImportArray(CArrowArray* ptr, IArrowType type)
{
ImportedArrowArray importedArray = null;
@@ -74,6 +77,9 @@ namespace Apache.Arrow.C
/// RecordBatch batch = CArrowArrayImporter.ImportRecordBatch(importedPtr, schema);
/// </code>
/// </examples>
+ /// <param name="ptr">The pointer to the record batch being imported</param>
+ /// <param name="schema">The schema type of the record batch being imported</param>
+ /// <returns>The imported C# record batch</returns>
public static unsafe RecordBatch ImportRecordBatch(CArrowArray* ptr, Schema schema)
{
ImportedArrowArray importedArray = null;
@@ -90,7 +96,7 @@ namespace Apache.Arrow.C
private sealed unsafe class ImportedArrowArray : ImportedAllocationOwner
{
- private readonly CArrowArray* _cArray;
+ private readonly CArrowArray _cArray;
public ImportedArrowArray(CArrowArray* cArray)
{
@@ -98,34 +104,41 @@ namespace Apache.Arrow.C
{
throw new ArgumentNullException(nameof(cArray));
}
- _cArray = cArray;
- if (_cArray->release == null)
+ if (cArray->release == null)
{
throw new ArgumentException("Tried to import an array that has already been released.", nameof(cArray));
}
+ _cArray = *cArray;
+ cArray->release = null;
}
protected override void FinalRelease()
{
- if (_cArray->release != null)
+ if (_cArray.release != null)
{
- _cArray->release(_cArray);
+ fixed (CArrowArray* cArray = &_cArray)
+ {
+ cArray->release(cArray);
+ }
}
}
public IArrowArray GetAsArray(IArrowType type)
{
- return ArrowArrayFactory.BuildArray(GetAsArrayData(_cArray, type));
+ fixed (CArrowArray* cArray = &_cArray)
+ {
+ return ArrowArrayFactory.BuildArray(GetAsArrayData(cArray, type));
+ }
}
public RecordBatch GetAsRecordBatch(Schema schema)
{
IArrowArray[] arrays = new IArrowArray[schema.FieldsList.Count];
- for (int i = 0; i < _cArray->n_children; i++)
+ for (int i = 0; i < _cArray.n_children; i++)
{
- arrays[i] = ArrowArrayFactory.BuildArray(GetAsArrayData(_cArray->children[i], schema.FieldsList[i].DataType));
+ arrays[i] = ArrowArrayFactory.BuildArray(GetAsArrayData(_cArray.children[i], schema.FieldsList[i].DataType));
}
- return new RecordBatch(schema, arrays, checked((int)_cArray->length));
+ return new RecordBatch(schema, arrays, checked((int)_cArray.length));
}
private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type)
diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs
index 2b4a5b2165..911dd7a153 100644
--- a/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs
@@ -53,10 +53,6 @@ namespace Apache.Arrow.C
{
throw new ArgumentNullException(nameof(arrayStream));
}
- if (cArrayStream->release != null)
- {
- throw new ArgumentException("Cannot export array to a struct that is already initialized.", nameof(cArrayStream));
- }
cArrayStream->private_data = ExportedArrayStream.Export(arrayStream);
cArrayStream->get_schema = (delegate* unmanaged[Stdcall]<CArrowArrayStream*, CArrowSchema*, int>)s_getSchemaArrayStream.Pointer;
diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayStreamImporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayStreamImporter.cs
index 4a37438828..b2558f07a4 100644
--- a/csharp/src/Apache.Arrow/C/CArrowArrayStreamImporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowArrayStreamImporter.cs
@@ -42,6 +42,8 @@ namespace Apache.Arrow.C
/// IArrowArrayStream importedStream = CArrowArrayStreamImporter.ImportStream(importedPtr);
/// </code>
/// </examples>
+ /// <param name="ptr">The pointer to the stream being imported</param>
+ /// <returns>The imported C# array stream</returns>
public static unsafe IArrowArrayStream ImportArrayStream(CArrowArrayStream* ptr)
{
return new ImportedArrowArrayStream(ptr);
@@ -49,7 +51,7 @@ namespace Apache.Arrow.C
private sealed unsafe class ImportedArrowArrayStream : IArrowArrayStream
{
- private readonly CArrowArrayStream* _cArrayStream;
+ private readonly CArrowArrayStream _cArrayStream;
private readonly Schema _schema;
private bool _disposed;
@@ -59,29 +61,21 @@ namespace Apache.Arrow.C
{
throw new ArgumentNullException(nameof(cArrayStream));
}
- _cArrayStream = cArrayStream;
- if (_cArrayStream->release == null)
+ if (cArrayStream->release == null)
{
throw new ArgumentException("Tried to import an array stream that has already been released.", nameof(cArrayStream));
}
- CArrowSchema* cSchema = CArrowSchema.Create();
- try
+ CArrowSchema cSchema = new CArrowSchema();
+ int errno = cArrayStream->get_schema(cArrayStream, &cSchema);
+ if (errno != 0)
{
- int errno = _cArrayStream->get_schema(_cArrayStream, cSchema);
- if (errno != 0)
- {
- throw new Exception($"Unexpected error recieved from external stream. Errno: {errno}");
- }
- _schema = CArrowSchemaImporter.ImportSchema(cSchema);
- }
- finally
- {
- if (_schema == null)
- {
- CArrowSchema.Free(cSchema);
- }
+ throw new Exception($"Unexpected error recieved from external stream. Errno: {errno}");
}
+ _schema = CArrowSchemaImporter.ImportSchema(&cSchema);
+
+ _cArrayStream = *cArrayStream;
+ cArrayStream->release = null;
}
~ImportedArrowArrayStream()
@@ -99,24 +93,17 @@ namespace Apache.Arrow.C
}
RecordBatch result = null;
- CArrowArray* cArray = CArrowArray.Create();
- try
+ CArrowArray cArray = new CArrowArray();
+ fixed (CArrowArrayStream* cArrayStream = &_cArrayStream)
{
- int errno = _cArrayStream->get_next(_cArrayStream, cArray);
+ int errno = cArrayStream->get_next(cArrayStream, &cArray);
if (errno != 0)
{
throw new Exception($"Unexpected error recieved from external stream. Errno: {errno}");
}
- if (cArray->release != null)
+ if (cArray.release != null)
{
- result = CArrowArrayImporter.ImportRecordBatch(cArray, _schema);
- }
- }
- finally
- {
- if (result == null)
- {
- CArrowArray.Free(cArray);
+ result = CArrowArrayImporter.ImportRecordBatch(&cArray, _schema);
}
}
@@ -125,10 +112,13 @@ namespace Apache.Arrow.C
public void Dispose()
{
- if (!_disposed && _cArrayStream->release != null)
+ if (!_disposed && _cArrayStream.release != null)
{
_disposed = true;
- _cArrayStream->release(_cArrayStream);
+ fixed (CArrowArrayStream * cArrayStream = &_cArrayStream)
+ {
+ cArrayStream->release(cArrayStream);
+ }
}
GC.SuppressFinalize(this);
}
diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
index c2745c4cc4..fae2455560 100644
--- a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
@@ -50,10 +50,6 @@ namespace Apache.Arrow.C
{
throw new ArgumentNullException(nameof(schema));
}
- if (schema->release != null)
- {
- throw new ArgumentException("Cannot export schema to a struct that is already initialized.");
- }
schema->format = StringUtil.ToCStringUtf8(GetFormat(datatype));
schema->name = null;
diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs
index 7039ccdfe2..89a346f9f7 100644
--- a/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs
@@ -85,6 +85,7 @@ namespace Apache.Arrow.Tests
CArrowArrayImporter.ImportArray(cArray, GetTestArray().Data.DataType);
});
Assert.True(wasCalled);
+
CArrowArray.Free(cArray);
GC.KeepAlive(releaseCallback);
diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
index 32c8127ae6..8172c4f420 100644
--- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
@@ -445,6 +445,8 @@ namespace Apache.Arrow.Tests
Assert.Null(importedArray.GetString(2));
Assert.Equal("foo", importedArray.GetString(3));
Assert.Equal("bar", importedArray.GetString(4));
+
+ CArrowArray.Free(cArray);
}
[SkippableFact]
@@ -488,6 +490,7 @@ namespace Apache.Arrow.Tests
Schema schema = CArrowSchemaImporter.ImportSchema(cSchema);
RecordBatch recordBatch = CArrowArrayImporter.ImportRecordBatch(cArray, schema);
+ CArrowArray.Free(cArray);
Assert.Equal(5, recordBatch.Length);
@@ -563,6 +566,8 @@ namespace Apache.Arrow.Tests
}
IArrowArrayStream stream = CArrowArrayStreamImporter.ImportArrayStream(cArrayStream);
+ CArrowArrayStream.Free(cArrayStream);
+
var batch1 = stream.ReadNextRecordBatchAsync().Result;
Assert.Equal(5, batch1.Length);