You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ee...@apache.org on 2021/12/28 17:18:32 UTC
[arrow] branch master updated: ARROW-15071: [C#] Fixed a bug in Column.cs ValidateArrayDataTypes method
This is an automated email from the ASF dual-hosted git repository.
eerhardt pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 06b1013 ARROW-15071: [C#] Fixed a bug in Column.cs ValidateArrayDataTypes method
06b1013 is described below
commit 06b10133e486ff736e657f79ffad7b029150cfcd
Author: Zixi <zi...@gmail.com>
AuthorDate: Tue Dec 28 11:14:00 2021 -0600
ARROW-15071: [C#] Fixed a bug in Column.cs ValidateArrayDataTypes method
Fixed a bug in Column.cs ValidateArrayDataTypes method:
From: if (Data.Array(i).Data.DataType != Field.DataType)
To: if (Data.Array(i).Data.DataType.TypeId != Field.DataType.TypeId)
Added unit test in TestTableBasics and others.
Closes #11931 from zixi-bwang/CSharpUnitTesting
Lead-authored-by: Zixi <zi...@gmail.com>
Co-authored-by: Zixi <89...@users.noreply.github.com>
Signed-off-by: Eric Erhardt <er...@microsoft.com>
---
.../Apache.Arrow/Arrays/ArrayDataTypeComparer.cs | 147 +++++++++++++++++++++
csharp/src/Apache.Arrow/Arrays/FieldComparer.cs | 60 +++++++++
csharp/src/Apache.Arrow/Column.cs | 11 +-
.../test/Apache.Arrow.Tests/ArrayBuilderTests.cs | 18 +++
.../Apache.Arrow.Tests/ArrowFileReaderTests.cs | 16 +++
csharp/test/Apache.Arrow.Tests/TableTests.cs | 28 ++++
6 files changed, 279 insertions(+), 1 deletion(-)
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs b/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs
new file mode 100644
index 0000000..1e0524e
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs
@@ -0,0 +1,147 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow
+{
+ internal sealed class ArrayDataTypeComparer :
+ IArrowTypeVisitor<TimestampType>,
+ IArrowTypeVisitor<Date32Type>,
+ IArrowTypeVisitor<Date64Type>,
+ IArrowTypeVisitor<Time32Type>,
+ IArrowTypeVisitor<Time64Type>,
+ IArrowTypeVisitor<FixedSizeBinaryType>,
+ IArrowTypeVisitor<ListType>,
+ IArrowTypeVisitor<StructType>
+ {
+ private readonly IArrowType _expectedType;
+ private bool _dataTypeMatch;
+
+ public ArrayDataTypeComparer(IArrowType expectedType)
+ {
+ _expectedType = expectedType;
+ }
+
+ public bool DataTypeMatch => _dataTypeMatch;
+
+ public void Visit(TimestampType actualType)
+ {
+ if (_expectedType is TimestampType expectedType
+ && expectedType.Timezone == actualType.Timezone
+ && expectedType.Unit == actualType.Unit)
+ {
+ _dataTypeMatch = true;
+ }
+ }
+
+ public void Visit(Date32Type actualType)
+ {
+ if (_expectedType is Date32Type expectedType
+ && expectedType.Unit == actualType.Unit)
+ {
+ _dataTypeMatch = true;
+ }
+ }
+
+ public void Visit(Date64Type actualType)
+ {
+ if (_expectedType is Date64Type expectedType
+ && expectedType.Unit == actualType.Unit)
+ {
+ _dataTypeMatch = true;
+ }
+ }
+
+ public void Visit(Time32Type actualType)
+ {
+ if (_expectedType is Time32Type expectedType
+ && expectedType.Unit == actualType.Unit)
+ {
+ _dataTypeMatch = true;
+ }
+ }
+
+ public void Visit(Time64Type actualType)
+ {
+ if (_expectedType is Time64Type expectedType
+ && expectedType.Unit == actualType.Unit)
+ {
+ _dataTypeMatch = true;
+ }
+ }
+
+ public void Visit(FixedSizeBinaryType actualType)
+ {
+ if (_expectedType is FixedSizeBinaryType expectedType
+ && expectedType.ByteWidth == actualType.ByteWidth)
+ {
+ _dataTypeMatch = true;
+ }
+ }
+
+ public void Visit(ListType actualType)
+ {
+ if (_expectedType is ListType expectedType
+ && CompareNested(expectedType, actualType))
+ {
+ _dataTypeMatch = true;
+ }
+ }
+
+ public void Visit(StructType actualType)
+ {
+ if (_expectedType is StructType expectedType
+ && CompareNested(expectedType, actualType))
+ {
+ _dataTypeMatch = true;
+ }
+ }
+
+ private static bool CompareNested(NestedType expectedType, NestedType actualType)
+ {
+ if (expectedType.Fields.Count != actualType.Fields.Count)
+ {
+ return false;
+ }
+
+ for (int i = 0; i < expectedType.Fields.Count; i++)
+ {
+ if (expectedType.Fields[i].DataType.TypeId != actualType.Fields[i].DataType.TypeId)
+ {
+ return false;
+ }
+
+ var dataTypeMatch = FieldComparer.Compare(expectedType.Fields[i], actualType.Fields[i]);
+
+ if (!dataTypeMatch)
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ public void Visit(IArrowType actualType)
+ {
+ if (_expectedType.TypeId == actualType.TypeId)
+ {
+ _dataTypeMatch = true;
+ }
+ }
+ }
+}
diff --git a/csharp/src/Apache.Arrow/Arrays/FieldComparer.cs b/csharp/src/Apache.Arrow/Arrays/FieldComparer.cs
new file mode 100644
index 0000000..9c5192c
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Arrays/FieldComparer.cs
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System.Linq;
+
+namespace Apache.Arrow
+{
+ internal static class FieldComparer
+ {
+ public static bool Compare(Field expected, Field actual)
+ {
+ if (ReferenceEquals(expected, actual))
+ {
+ return true;
+ }
+
+ if (expected.Name != actual.Name || expected.IsNullable != actual.IsNullable ||
+ expected.HasMetadata != actual.HasMetadata)
+ {
+ return false;
+ }
+
+ if (expected.HasMetadata)
+ {
+ if (expected.Metadata.Count != actual.Metadata.Count)
+ {
+ return false;
+ }
+
+ if (!expected.Metadata.Keys.All(k => actual.Metadata.ContainsKey(k) && expected.Metadata[k] == actual.Metadata[k]))
+ {
+ return false;
+ }
+ }
+
+ var dataTypeComparer = new ArrayDataTypeComparer(expected.DataType);
+
+ actual.DataType.Accept(dataTypeComparer);
+
+ if (!dataTypeComparer.DataTypeMatch)
+ {
+ return false;
+ }
+
+ return true;
+ }
+ }
+}
diff --git a/csharp/src/Apache.Arrow/Column.cs b/csharp/src/Apache.Arrow/Column.cs
index 6e76e67..4eaf9a5 100644
--- a/csharp/src/Apache.Arrow/Column.cs
+++ b/csharp/src/Apache.Arrow/Column.cs
@@ -60,9 +60,18 @@ namespace Apache.Arrow
private bool ValidateArrayDataTypes()
{
+ var dataTypeComparer = new ArrayDataTypeComparer(Field.DataType);
+
for (int i = 0; i < Data.ArrayCount; i++)
{
- if (Data.Array(i).Data.DataType != Field.DataType)
+ if (Data.Array(i).Data.DataType.TypeId != Field.DataType.TypeId)
+ {
+ return false;
+ }
+
+ Data.Array(i).Data.DataType.Accept(dataTypeComparer);
+
+ if (!dataTypeComparer.DataTypeMatch)
{
return false;
}
diff --git a/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs b/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs
index 4107899..0b0754c 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs
@@ -101,6 +101,24 @@ namespace Apache.Arrow.Tests
new List<string> { "444", null, "555", "666" },
ConvertStringArrayToList(list.GetSlicedValues(3) as StringArray));
+ Assert.Throws<ArgumentOutOfRangeException>(() => list.GetValueLength(-1));
+ Assert.Throws<ArgumentOutOfRangeException>(() => list.GetValueLength(4));
+
+ listBuilder.Resize(2);
+ var truncatedList = listBuilder.Build();
+
+ Assert.Equal(
+ new List<string> { "22", "33", "444", null, "555", "666" },
+ ConvertStringArrayToList(truncatedList.GetSlicedValues(2) as StringArray));
+
+ Assert.Throws<ArgumentOutOfRangeException>(() => truncatedList.GetSlicedValues(-1));
+ Assert.Throws<ArgumentOutOfRangeException>(() => truncatedList.GetSlicedValues(3));
+
+ listBuilder.Clear();
+ var emptyList = listBuilder.Build();
+
+ Assert.Equal(0, emptyList.Length);
+
List<string> ConvertStringArrayToList(StringArray array)
{
var length = array.Length;
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowFileReaderTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowFileReaderTests.cs
index f0876c8..cc7f005 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowFileReaderTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowFileReaderTests.cs
@@ -15,7 +15,9 @@
using Apache.Arrow.Ipc;
using Apache.Arrow.Memory;
+using Apache.Arrow.Types;
using System;
+using System.Collections.Generic;
using System.IO;
using System.Threading.Tasks;
using Xunit;
@@ -155,5 +157,19 @@ namespace Apache.Arrow.Tests
ArrowReaderVerifier.CompareBatches(originalBatch1, readBatch3);
}
}
+
+ [Fact]
+ public void TestRecordBatchBasics()
+ {
+ RecordBatch recordBatch = TestData.CreateSampleRecordBatch(length: 1);
+ Assert.Throws<ArgumentOutOfRangeException>(() => new RecordBatch(recordBatch.Schema, recordBatch.Arrays, -1));
+
+ var col1 = recordBatch.Column(0);
+ var col2 = recordBatch.Column("list0");
+ ArrowReaderVerifier.CompareArrays(col1, col2);
+
+ recordBatch.Dispose();
+ }
+
}
}
diff --git a/csharp/test/Apache.Arrow.Tests/TableTests.cs b/csharp/test/Apache.Arrow.Tests/TableTests.cs
index b919bf3..8b7b5b6 100644
--- a/csharp/test/Apache.Arrow.Tests/TableTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/TableTests.cs
@@ -52,6 +52,34 @@ namespace Apache.Arrow.Tests
}
[Fact]
+ public void TestTableFromRecordBatches()
+ {
+ RecordBatch recordBatch1 = TestData.CreateSampleRecordBatch(length: 10, true);
+ RecordBatch recordBatch2 = TestData.CreateSampleRecordBatch(length: 10, true);
+ IList<RecordBatch> recordBatches = new List<RecordBatch>() { recordBatch1, recordBatch2 };
+
+ Table table1 = Table.TableFromRecordBatches(recordBatch1.Schema, recordBatches);
+ Assert.Equal(20, table1.RowCount);
+ Assert.Equal(21, table1.ColumnCount);
+
+ FixedSizeBinaryType type = new FixedSizeBinaryType(17);
+ Field newField1 = new Field(type.Name, type, false);
+ Schema newSchema1 = recordBatch1.Schema.SetField(20, newField1);
+ Assert.Throws<ArgumentException>(() => Table.TableFromRecordBatches(newSchema1, recordBatches));
+
+ List<Field> fields = new List<Field>();
+ Field.Builder fieldBuilder = new Field.Builder();
+ fields.Add(fieldBuilder.Name("Ints").DataType(Int32Type.Default).Nullable(true).Build());
+ fieldBuilder = new Field.Builder();
+ fields.Add(fieldBuilder.Name("Strings").DataType(StringType.Default).Nullable(true).Build());
+ StructType structType = new StructType(fields);
+
+ Field newField2 = new Field(structType.Name, structType, false);
+ Schema newSchema2 = recordBatch1.Schema.SetField(16, newField2);
+ Assert.Throws<ArgumentException>(() => Table.TableFromRecordBatches(newSchema2, recordBatches));
+ }
+
+ [Fact]
public void TestTableAddRemoveAndSetColumn()
{
Table table = MakeTableWithOneColumnOfTwoIntArrays(10);