You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "westonpace (via GitHub)" <gi...@apache.org> on 2023/04/18 08:14:11 UTC

[GitHub] [arrow] westonpace commented on a diff in pull request #35153: GH-35152: [C#] Improve Field interoperability with C# Type

westonpace commented on code in PR #35153:
URL: https://github.com/apache/arrow/pull/35153#discussion_r1169642803


##########
csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs:
##########
@@ -87,5 +88,130 @@ public static IArrowArray BuildArray(ArrayData data)
                     throw new NotSupportedException($"An ArrowArray cannot be built for type {data.DataType.TypeId}.");
             }
         }
+
+        // Binary
+        public static StringArray BuildArray(IEnumerable<string> values)
+        {
+            return new StringArray.Builder().AppendRange(values).Build();
+        }
+
+        public static BinaryArray BuildArray(IEnumerable<byte[]> values)
+        {
+            return new BinaryArray.Builder().AppendRange(values).Build();
+        }
+
+        // Boolean
+        public static BooleanArray BuildArray(IEnumerable<bool> values)
+        {
+            return new BooleanArray.Builder().AppendRange(values).Build();
+        }
+
+        public static BooleanArray BuildArray(IEnumerable<bool?> values)
+        {
+            return new BooleanArray.Builder().AppendRange(values).Build();
+        }
+
+        // Integers
+        public static Int8Array BuildArray(IEnumerable<sbyte> values)
+        {
+            return new Int8Array.Builder().AppendRange(values).Build();
+        }
+
+        public static Int8Array BuildArray(IEnumerable<sbyte?> values)
+        {
+            return new Int8Array.Builder().AppendRange(values).Build();
+        }
+
+        public static Int16Array BuildArray(IEnumerable<short> values)
+        {
+            return new Int16Array.Builder().AppendRange(values).Build();
+        }
+
+        public static Int16Array BuildArray(IEnumerable<short?> values)
+        {
+            return new Int16Array.Builder().AppendRange(values).Build();
+        }
+
+        public static Int32Array BuildArray(IEnumerable<int> values)
+        {
+            return new Int32Array.Builder().AppendRange(values).Build();
+        }
+
+        public static Int32Array BuildArray(IEnumerable<int?> values)
+        {
+            return new Int32Array.Builder().AppendRange(values).Build();
+        }
+
+        public static Int64Array BuildArray(IEnumerable<long> values)
+        {
+            return new Int64Array.Builder().AppendRange(values).Build();
+        }
+
+        public static Int64Array BuildArray(IEnumerable<long?> values)
+        {
+            return new Int64Array.Builder().AppendRange(values).Build();
+        }
+
+        // Unsigned Integers
+        public static UInt16Array BuildArray(IEnumerable<ushort> values)

Review Comment:
   What about UInt8Array?



##########
csharp/src/Apache.Arrow/Field.Builder.cs:
##########
@@ -52,6 +55,131 @@ public Builder DataType(IArrowType type)
                 return this;
             }
 
+            // Build DataType from C# Type
+            public Builder DataType(System.Type valueType, string timezone = null)
+            {
+                System.Type[] genericArgs;
+                System.Type child = System.Nullable.GetUnderlyingType(valueType);
+                Type elementType;
+
+                if (child != null)
+                {
+                    // Nullable Type
+                    DataType(child, timezone);
+                    Nullable(true);
+                    return this;
+                }
+
+                // Not Nullable Type
+                Nullable(false);
+
+                switch (valueType)
+                {
+                    // Binary
+                    case var _ when valueType == typeof(byte):
+                        DataType(new BinaryType());
+                        break;
+                    case var _ when valueType == typeof(IEnumerable<byte>):

Review Comment:
   You used `IEnumerable<byte>` here but `byte[]` above in the `BuildArray` method.



##########
csharp/src/Apache.Arrow/Field.Builder.cs:
##########
@@ -52,6 +55,131 @@ public Builder DataType(IArrowType type)
                 return this;
             }
 
+            // Build DataType from C# Type
+            public Builder DataType(System.Type valueType, string timezone = null)
+            {
+                System.Type[] genericArgs;
+                System.Type child = System.Nullable.GetUnderlyingType(valueType);
+                Type elementType;
+
+                if (child != null)
+                {
+                    // Nullable Type
+                    DataType(child, timezone);
+                    Nullable(true);
+                    return this;
+                }
+
+                // Not Nullable Type
+                Nullable(false);
+
+                switch (valueType)
+                {
+                    // Binary
+                    case var _ when valueType == typeof(byte):
+                        DataType(new BinaryType());
+                        break;
+                    case var _ when valueType == typeof(IEnumerable<byte>):
+                        DataType(new BinaryType());
+                        Nullable(true);
+                        break;
+                    // Boolean
+                    case var _ when valueType == typeof(bool):
+                        DataType(new BooleanType());
+                        break;
+                    // String
+                    case var _ when valueType == typeof(string):
+                        DataType(new StringType());
+                        Nullable(true);
+                        break;
+                    // Integers
+                    case var _ when valueType == typeof(sbyte):
+                        DataType(new Int8Type());
+                        break;
+                    case var _ when valueType == typeof(short):
+                        DataType(new Int16Type());
+                        break;
+                    case var _ when valueType == typeof(int):
+                        DataType(new Int32Type());
+                        break;
+                    case var _ when valueType == typeof(long):
+                        DataType(new Int64Type());
+                        break;
+                    // Unisgned Integers
+                    case var _ when valueType == typeof(ushort):

Review Comment:
   UInt8Type?



##########
csharp/src/Apache.Arrow/Field.Builder.cs:
##########
@@ -52,6 +55,131 @@ public Builder DataType(IArrowType type)
                 return this;
             }
 
+            // Build DataType from C# Type
+            public Builder DataType(System.Type valueType, string timezone = null)
+            {
+                System.Type[] genericArgs;
+                System.Type child = System.Nullable.GetUnderlyingType(valueType);
+                Type elementType;
+
+                if (child != null)
+                {
+                    // Nullable Type
+                    DataType(child, timezone);
+                    Nullable(true);
+                    return this;
+                }
+
+                // Not Nullable Type
+                Nullable(false);
+
+                switch (valueType)
+                {
+                    // Binary
+                    case var _ when valueType == typeof(byte):
+                        DataType(new BinaryType());
+                        break;
+                    case var _ when valueType == typeof(IEnumerable<byte>):
+                        DataType(new BinaryType());
+                        Nullable(true);
+                        break;
+                    // Boolean
+                    case var _ when valueType == typeof(bool):
+                        DataType(new BooleanType());
+                        break;
+                    // String
+                    case var _ when valueType == typeof(string):
+                        DataType(new StringType());
+                        Nullable(true);
+                        break;
+                    // Integers
+                    case var _ when valueType == typeof(sbyte):
+                        DataType(new Int8Type());
+                        break;
+                    case var _ when valueType == typeof(short):
+                        DataType(new Int16Type());
+                        break;
+                    case var _ when valueType == typeof(int):
+                        DataType(new Int32Type());
+                        break;
+                    case var _ when valueType == typeof(long):
+                        DataType(new Int64Type());
+                        break;
+                    // Unisgned Integers
+                    case var _ when valueType == typeof(ushort):
+                        DataType(new UInt16Type());
+                        break;
+                    case var _ when valueType == typeof(uint):
+                        DataType(new UInt32Type());
+                        break;
+                    case var _ when valueType == typeof(ulong):
+                        DataType(new UInt64Type());
+                        break;
+                    // Decimal
+                    case var _ when valueType == typeof(decimal):
+                        DataType(Decimal256Type.Default);
+                        break;
+                    // Float
+                    case var _ when valueType == typeof(float):
+                        DataType(new FloatType());
+                        break;
+                    // Double
+                    case var _ when valueType == typeof(double):
+                        DataType(new DoubleType());
+                        break;
+                    // DateTime
+                    case var _ when valueType == typeof(DateTime) || valueType == typeof(DateTimeOffset):
+                        DataType(new TimestampType(TimeUnit.Nanosecond, timezone));
+                        break;
+                    // Time
+                    case var _ when valueType == typeof(TimeSpan):
+                        DataType(new Time64Type(TimeUnit.Nanosecond));
+                        break;
+#if NETCOREAPP3_1_OR_GREATER
+                    // Dictionary: IDictionary<?,?>

Review Comment:
   I think it would be very unusual that a dictionary encoded type is represented with a .NET Dictionary.  I'm pretty sure they are two separate things.  A .NET Dictionary would probably be closer to `MapArray` but I don't see that in the .NET impl currently.



##########
csharp/src/Apache.Arrow/Schema.Builder.cs:
##########
@@ -57,6 +58,20 @@ public Builder Field(Action<Field.Builder> fieldBuilderAction)
                 return this;
             }
 
+#if NETCOREAPP3_1_OR_GREATER
+            public Builder Struct(Type structure, string timezone = null)
+            {
+                PropertyInfo[] properties = structure.GetProperties(BindingFlags.Instance | BindingFlags.Public | BindingFlags.NonPublic);

Review Comment:
   Why `NonPublic`?



##########
csharp/src/Apache.Arrow/Field.Builder.cs:
##########
@@ -52,6 +55,131 @@ public Builder DataType(IArrowType type)
                 return this;
             }
 
+            // Build DataType from C# Type
+            public Builder DataType(System.Type valueType, string timezone = null)
+            {
+                System.Type[] genericArgs;
+                System.Type child = System.Nullable.GetUnderlyingType(valueType);
+                Type elementType;
+
+                if (child != null)
+                {
+                    // Nullable Type
+                    DataType(child, timezone);
+                    Nullable(true);
+                    return this;
+                }
+
+                // Not Nullable Type
+                Nullable(false);
+
+                switch (valueType)
+                {
+                    // Binary
+                    case var _ when valueType == typeof(byte):
+                        DataType(new BinaryType());
+                        break;
+                    case var _ when valueType == typeof(IEnumerable<byte>):
+                        DataType(new BinaryType());
+                        Nullable(true);
+                        break;
+                    // Boolean
+                    case var _ when valueType == typeof(bool):
+                        DataType(new BooleanType());
+                        break;
+                    // String
+                    case var _ when valueType == typeof(string):
+                        DataType(new StringType());
+                        Nullable(true);
+                        break;
+                    // Integers
+                    case var _ when valueType == typeof(sbyte):
+                        DataType(new Int8Type());
+                        break;
+                    case var _ when valueType == typeof(short):
+                        DataType(new Int16Type());
+                        break;
+                    case var _ when valueType == typeof(int):
+                        DataType(new Int32Type());
+                        break;
+                    case var _ when valueType == typeof(long):
+                        DataType(new Int64Type());
+                        break;
+                    // Unisgned Integers
+                    case var _ when valueType == typeof(ushort):
+                        DataType(new UInt16Type());
+                        break;
+                    case var _ when valueType == typeof(uint):
+                        DataType(new UInt32Type());
+                        break;
+                    case var _ when valueType == typeof(ulong):
+                        DataType(new UInt64Type());
+                        break;
+                    // Decimal
+                    case var _ when valueType == typeof(decimal):
+                        DataType(Decimal256Type.Default);
+                        break;
+                    // Float
+                    case var _ when valueType == typeof(float):
+                        DataType(new FloatType());
+                        break;
+                    // Double
+                    case var _ when valueType == typeof(double):
+                        DataType(new DoubleType());
+                        break;
+                    // DateTime
+                    case var _ when valueType == typeof(DateTime) || valueType == typeof(DateTimeOffset):
+                        DataType(new TimestampType(TimeUnit.Nanosecond, timezone));
+                        break;
+                    // Time
+                    case var _ when valueType == typeof(TimeSpan):
+                        DataType(new Time64Type(TimeUnit.Nanosecond));
+                        break;
+#if NETCOREAPP3_1_OR_GREATER
+                    // Dictionary: IDictionary<?,?>
+                    case var dict when typeof(IDictionary).IsAssignableFrom(valueType):
+                        genericArgs = dict.GetGenericArguments();
+
+                        DataType(new DictionaryType(
+                            new Builder().DataType(genericArgs[0], timezone)._type,
+                            new Builder().DataType(genericArgs[1], timezone)._type,
+                            false
+                        ));
+
+                        Nullable(true);
+
+                        break;
+                    // IEnumerable: List, Array, ...
+                    case var list when typeof(IEnumerable).IsAssignableFrom(valueType):
+                        elementType = list.GetGenericArguments()[0];
+
+                        DataType(new ListType(new Builder().Name("item").DataType(elementType, timezone).Build()));
+
+                        Nullable(true);
+                        break;
+                    // Struct like: get all properties
+                    case var structure when (valueType.IsValueType && !valueType.IsEnum && !valueType.IsPrimitive):
+                        if (string.IsNullOrEmpty(_name))
+                            Name(structure.Name);
+
+                        PropertyInfo[] properties = structure.GetProperties(BindingFlags.Instance | BindingFlags.Public | BindingFlags.NonPublic);
+
+                        DataType(new StructType(
+                            properties
+                                .Select(property => new Builder().Name(property.Name).DataType(property.PropertyType, timezone).Build())
+                                .ToArray()
+                        ));
+
+                        break;

Review Comment:
   This seems rather powerful, though also a little dangerous :)



##########
csharp/src/Apache.Arrow/Types/Decimal256Type.cs:
##########
@@ -17,14 +17,20 @@ namespace Apache.Arrow.Types
 {
     public sealed class Decimal256Type: FixedSizeBinaryType
     {
+        public static readonly Decimal256Type Default = new(38, 18, 32);

Review Comment:
   Maybe a comment here that this corresponds to the built-in C# `decimal` type's max range?



##########
csharp/src/Apache.Arrow/Field.Builder.cs:
##########
@@ -52,6 +55,131 @@ public Builder DataType(IArrowType type)
                 return this;
             }
 
+            // Build DataType from C# Type
+            public Builder DataType(System.Type valueType, string timezone = null)

Review Comment:
   The `timezone` parameter is a little odd, especially since it gets passed down into nested types.



##########
csharp/test/Apache.Arrow.Tests/FieldTests.cs:
##########
@@ -0,0 +1,189 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+using Apache.Arrow.Types;
+using Xunit;
+
+namespace Apache.Arrow.Tests
+{
+    public class FieldTests
+    {
+        public class Build
+        {
+            [Fact]
+            public void DataType_Should_ThrowInvalidCastException()
+            {
+                // Arrange
+                var builder = new Field.Builder().Name("test");
+
+                // Act & Assert
+                try
+                {
+                    builder.DataType(typeof(object));
+                }
+                catch (InvalidCastException e)
+                {
+                    Assert.Equal($"Cannot convert System.Type<{typeof(object)}> to ArrowType", e.Message);
+                }
+            }
+
+            [Fact]
+            public void DataType_Should_InferDataType_From_NullableInt()
+            {
+                // Arrange
+                Field builder = new Field.Builder().Name("test").DataType(typeof(int?)).Build();
+
+                // Assert
+                Assert.Equal(typeof(Int32Type), builder.DataType.GetType());
+                Assert.True(builder.IsNullable);
+            }
+
+            [Fact]
+            public void DataType_Should_InferDataType_From_Int()
+            {
+                // Arrange
+                Field builder = new Field.Builder().Name("test").DataType(typeof(int)).Build();
+
+                // Assert
+                Assert.Equal(typeof(Int32Type), builder.DataType.GetType());
+                Assert.False(builder.IsNullable);
+            }
+
+            [Fact]
+            public void DataType_Should_InferDataType_From_NullableDecimal()
+            {
+                // Arrange
+                Field builder = new Field.Builder().Name("test").DataType(typeof(decimal?)).Build();
+                var dtype = builder.DataType as Decimal256Type;
+
+                // Assert
+                Assert.Equal(typeof(Decimal256Type), builder.DataType.GetType());
+                Assert.Equal(38, dtype.Precision);
+                Assert.Equal(18, dtype.Scale);
+                Assert.True(builder.IsNullable);
+            }
+
+            [Fact]
+            public void DataType_Should_InferDataType_From_Decimal()
+            {
+                // Arrange
+                Field builder = new Field.Builder().Name("test").DataType(typeof(decimal)).Build();
+                var dtype = builder.DataType as Decimal256Type;
+
+                // Assert
+                Assert.Equal(typeof(Decimal256Type), builder.DataType.GetType());
+                Assert.Equal(38, dtype.Precision);
+                Assert.Equal(18, dtype.Scale);
+                Assert.False(builder.IsNullable);
+            }
+
+#if NETCOREAPP3_1_OR_GREATER
+            [Fact]
+            public void DataType_Should_InferDataType_From_IEnumerable()
+            {
+                // Arrange
+                Field builder = new Field.Builder().Name("test").DataType(typeof(IEnumerable<string>)).Build();
+                var dtype = builder.DataType as ListType;
+                Field child = dtype.Fields[0];
+
+                // Assert
+                Assert.Equal(typeof(ListType), builder.DataType.GetType());
+                Assert.Equal(typeof(StringType), child.DataType.GetType());
+                Assert.Equal("item", child.Name);
+                Assert.True(dtype.Fields[0].IsNullable);
+            }
+
+            [Fact]
+            public void DataType_Should_InferDataType_From_Dictionary()
+            {
+                // Arrange
+                Field builder = new Field.Builder().Name("test").DataType(typeof(Dictionary<int, string>)).Build();
+                var dtype = builder.DataType as DictionaryType;
+                var k = dtype.IndexType as Int32Type;
+                var v = dtype.ValueType as StringType;
+
+                // Assert
+                Assert.Equal(typeof(Int32Type), k.GetType());
+                Assert.Equal(typeof(StringType), v.GetType());
+            }
+
+            [Fact]
+            public void DataType_Should_InferDataType_From_Structure()
+            {
+                // Arrange
+                Field field = new Field.Builder().DataType(typeof(TestStruct)).Build();
+                var dtype = field.DataType as StructType;
+                Field Name = dtype.Fields[0];
+                Field value = dtype.Fields[1];
+                Field aBc = dtype.Fields[2];

Review Comment:
   It's a bit odd that the variable name's casing matches the property name.  Maybe just stick with `name, value, abc`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org