You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "lidavidm (via GitHub)" <gi...@apache.org> on 2023/06/06 19:46:39 UTC

[GitHub] [arrow-adbc] lidavidm commented on a diff in pull request #697: feat(csharp): adding C# functionality

lidavidm commented on code in PR #697:
URL: https://github.com/apache/arrow-adbc/pull/697#discussion_r1220194997


##########
csharp/src/Apache.Arrow.Adbc.FlightSql/Apache - Backup.Arrow.Adbc.FlightSql.csproj:
##########


Review Comment:
   Did you mean to include this 'backup' file?



##########
csharp/src/Apache.Arrow.Adbc/PartitionDescriptor.cs:
##########
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Apache.Arrow.Adbc
+{
+    public struct PartitionDescriptor : IEquatable<PartitionDescriptor>
+    {
+        readonly byte[] _descriptor;
+
+        public PartitionDescriptor(byte[] descriptor)
+        {
+            _descriptor = descriptor;
+        }
+
+        public override bool Equals(object obj)
+        {
+            PartitionDescriptor? other = obj as PartitionDescriptor?;
+            return other != null && Equals(other.Value);
+        }
+
+        public bool Equals(PartitionDescriptor other)
+        {
+            if (_descriptor.Length != other._descriptor.Length)
+            {
+                return false;
+            }
+            for (int i = 0; i < _descriptor.Length; i++)
+            {
+                if (_descriptor[i] != other._descriptor[i])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public override int GetHashCode()
+        {
+            return base.GetHashCode();
+        }

Review Comment:
   Is there a need to override this then? (Or should it take into account `_descriptor`?)



##########
csharp/src/Apache.Arrow.Adbc/AdbcStatement.cs:
##########
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Text.RegularExpressions;
+using System.Threading.Tasks;
+using Apache.Arrow.Ipc;
+
+namespace Apache.Arrow.Adbc
+{
+    /// <summary>
+    /// Statements may represent queries or prepared statements. Statements may be used multiple times and can be reconfigured (e.g. they can be reused to execute multiple different queries).
+    /// </summary>
+    public abstract class AdbcStatement : IDisposable
+    {
+        public AdbcStatement()
+        {
+            
+        }
+
+        /// <summary>
+        /// Gets or sets a SQL query to be executed on this statement.
+        /// </summary>
+        public virtual string SqlQuery { get; set; }
+
+        /// <summary>
+        /// Gets or sets the Substrait plan.
+        /// </summary>
+        public virtual byte[] SubstraitPlan
+        {
+            get { throw new NotImplementedException(); }
+            set { throw new NotImplementedException(); }
+        }
+
+        public virtual void Bind(RecordBatch batch,Schema schema)
+        {
+            throw AdbcException.NotImplemented("Statement does not support Bind");
+        }
+
+        /// <summary>
+        /// Executes the statement and returns a tuple containing the number of records and the <see cref="IArrowArrayStream"/>..
+        /// </summary>
+        /// <returns>A <see cref="ValueTuple"/> where the first item is the number of records and the second is the <see cref="IArrowArrayStream"/>.</returns>
+        public abstract QueryResult ExecuteQuery();
+
+        /// <summary>
+        /// Executes the statement and returns a tuple containing the number of records and the <see cref="IArrowArrayStream"/>..
+        /// </summary>
+        /// <returns>A <see cref="ValueTuple"/> where the first item is the number of records and the second is the <see cref="IArrowArrayStream"/>.</returns>
+        public virtual async ValueTask<QueryResult> ExecuteQueryAsync()
+        {
+            return await Task.Run(() => ExecuteQuery());
+        }
+
+        /// <summary>
+        /// Executes an update command and returns the number of records effected.
+        /// </summary>
+        /// <returns></returns>
+        /// <exception cref="NotImplementedException"></exception>
+        public abstract UpdateResult ExecuteUpdate();
+
+        // <summary>
+        /// Executes an update command and returns the number of records effected.
+        /// </summary>
+        /// <returns></returns>
+        /// <exception cref="NotImplementedException"></exception>
+        public virtual async Task<UpdateResult> ExecuteUpdateAsync()
+        {
+            return await Task.Run(() => ExecuteUpdate());
+        }
+
+        /// <summary>
+        /// Execute a result set-generating query and get a list of partitions of the result set.
+        /// </summary>
+        /// <returns><see cref="PartitionedResult"/></returns>
+        public virtual PartitionedResult ExecutePartitioned()
+        {
+            throw AdbcException.NotImplemented("Statement does not support ExecutePartitioned");
+        }
+
+        /// <summary>
+        /// Get the schema for bound parameters.
+        /// </summary>
+        /// <returns><see cref="Schema"/></returns>
+        public virtual Schema GetParameterSchema()
+        {
+            throw AdbcException.NotImplemented("Statement does not support GetParameterSchema");
+        }
+
+        /// <summary>
+        ///  Turn this statement into a prepared statement to be
+        ///  executed multiple times.
+        /// </summary>
+        public virtual void Prepare()
+        {
+            throw AdbcException.NotImplemented("Statement does not support Prepare");
+        }
+
+        public virtual void Dispose()
+        {
+        }
+
+        /// <summary>
+        /// Gets a value from the Arrow array at the specified index, using the Field metadata for information.
+        /// </summary>
+        /// <param name="arrowArray">The Arrow array.</param>
+        /// <param name="field">The <see cref="Field"/> from the <see cref="Schema"/> that can be used for metadata inspection.</param>
+        /// <param name="index">The index in the array to get the value from.</param>
+        /// <returns></returns>
+        public abstract object GetValue(IArrowArray arrowArray, Field field, int index);
+
+        /// <summary>
+        /// For decimals, Arrow throws an OverflowException if a value is < decimal.min or > decimal.max
+        /// So parse the numeric value and return it as a string, if possible
+        /// </summary>
+        /// <param name="oex"></param>
+        /// <returns>A string value of the decimal that threw the exception or rethrows the OverflowException.</returns>
+        /// <exception cref="ArgumentNullException"></exception>
+        public virtual string ParseDecimalValueFromOverflowException(OverflowException oex)

Review Comment:
   This still doesn't feel like it belongs in the public API, at least not as part of a Statement object



##########
csharp/src/Apache.Arrow.Adbc/AdbcConnection.cs:
##########
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Apache.Arrow.Ipc;
+
+namespace Apache.Arrow.Adbc
+{
+    /// <summary>
+    /// Provides methods for query execution, managing prepared statements, using transactions, and so on.
+    /// </summary>
+    public abstract class AdbcConnection : IDisposable
+    {
+        private bool _autoCommit = true;
+        private bool _readOnly = false;
+        private IsolationLevel _isolationLevel = IsolationLevel.Default;

Review Comment:
   These might have to be `protected` for implementors?



##########
csharp/src/Apache.Arrow.Adbc.FlightSql/FlightSqlStatement.cs:
##########
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Threading.Tasks;
+using Apache.Arrow.Adbc.Core;
+using Apache.Arrow.Flight;
+using Grpc.Core;
+
+namespace Apache.Arrow.Adbc.FlightSql
+{
+    /// <summary>
+    /// A Flight SQL implementation of <see cref="AdbcStatement"/>.
+    /// </summary>
+    public class FlightSqlStatement : AdbcStatement
+    {
+        private FlightSqlConnection flightSqlConnection;
+        
+        public FlightSqlStatement(FlightSqlConnection flightSqlConnection)
+        {
+            this.flightSqlConnection = flightSqlConnection;
+        }
+
+        public override async ValueTask<QueryResult> ExecuteQueryAsync()
+        {
+            FlightInfo info = await GetInfo(this.SqlQuery, this.flightSqlConnection.Metadata);
+
+            return new QueryResult(info.TotalRecords, new FlightSqlResult(this.flightSqlConnection, info));
+        }
+
+        public override QueryResult ExecuteQuery()
+        {
+            return ExecuteQueryAsync().Result;
+        }
+
+        public override UpdateResult ExecuteUpdate()
+        {
+            throw new NotImplementedException();
+        }
+
+        public async ValueTask<FlightInfo> GetInfo(string query, Metadata headers)
+        {
+            FlightDescriptor commandDescripter = FlightDescriptor.CreateCommandDescriptor(query);
+
+            return await this.flightSqlConnection.FlightClient.GetInfo(commandDescripter, headers).ResponseAsync;
+        }
+
+        /// <summary>
+        /// Gets a value from the Arrow array at the specified index using the Arrow field for metadata.
+        /// </summary>
+        /// <param name="arrowArray"></param>
+        /// <param name="field"></param>
+        /// <param name="index"></param>
+        /// <returns></returns>
+        public override object GetValue(IArrowArray arrowArray, Field field, int index)

Review Comment:
   I'm still skeptical that the way to deal with this is to provide accessors for each driver, rather than having the driver convert things to the proper Arrow type in the first place.



##########
csharp/src/Apache.Arrow.Adbc.FlightSql/FlightSqlStatement.cs:
##########
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Threading.Tasks;
+using Apache.Arrow.Adbc.Core;
+using Apache.Arrow.Flight;
+using Grpc.Core;
+
+namespace Apache.Arrow.Adbc.FlightSql
+{
+    /// <summary>
+    /// A Flight SQL implementation of <see cref="AdbcStatement"/>.
+    /// </summary>
+    public class FlightSqlStatement : AdbcStatement
+    {
+        private FlightSqlConnection flightSqlConnection;
+        
+        public FlightSqlStatement(FlightSqlConnection flightSqlConnection)
+        {
+            this.flightSqlConnection = flightSqlConnection;
+        }
+
+        public override async ValueTask<QueryResult> ExecuteQueryAsync()
+        {
+            FlightInfo info = await GetInfo(this.SqlQuery, this.flightSqlConnection.Metadata);
+
+            return new QueryResult(info.TotalRecords, new FlightSqlResult(this.flightSqlConnection, info));
+        }
+
+        public override QueryResult ExecuteQuery()
+        {
+            return ExecuteQueryAsync().Result;
+        }
+
+        public override UpdateResult ExecuteUpdate()
+        {
+            throw new NotImplementedException();
+        }
+
+        public async ValueTask<FlightInfo> GetInfo(string query, Metadata headers)
+        {
+            FlightDescriptor commandDescripter = FlightDescriptor.CreateCommandDescriptor(query);
+
+            return await this.flightSqlConnection.FlightClient.GetInfo(commandDescripter, headers).ResponseAsync;
+        }
+
+        /// <summary>
+        /// Gets a value from the Arrow array at the specified index using the Arrow field for metadata.
+        /// </summary>
+        /// <param name="arrowArray"></param>
+        /// <param name="field"></param>
+        /// <param name="index"></param>
+        /// <returns></returns>
+        public override object GetValue(IArrowArray arrowArray, Field field, int index)

Review Comment:
   At least, this implementation could be used as the default implementation, and then Flight SQL shouldn't need to override anything and other drivers could use it as a starting point?



##########
csharp/src/Apache.Arrow.Adbc.FlightSql/FlightSqlStatement.cs:
##########
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Threading.Tasks;
+using Apache.Arrow.Adbc.Core;
+using Apache.Arrow.Flight;
+using Grpc.Core;
+
+namespace Apache.Arrow.Adbc.FlightSql
+{
+    /// <summary>
+    /// A Flight SQL implementation of <see cref="AdbcStatement"/>.
+    /// </summary>
+    public class FlightSqlStatement : AdbcStatement
+    {
+        private FlightSqlConnection flightSqlConnection;
+        
+        public FlightSqlStatement(FlightSqlConnection flightSqlConnection)
+        {
+            this.flightSqlConnection = flightSqlConnection;
+        }
+
+        public override async ValueTask<QueryResult> ExecuteQueryAsync()
+        {
+            FlightInfo info = await GetInfo(this.SqlQuery, this.flightSqlConnection.Metadata);
+
+            return new QueryResult(info.TotalRecords, new FlightSqlResult(this.flightSqlConnection, info));
+        }
+
+        public override QueryResult ExecuteQuery()
+        {
+            return ExecuteQueryAsync().Result;
+        }
+
+        public override UpdateResult ExecuteUpdate()
+        {
+            throw new NotImplementedException();
+        }
+
+        public async ValueTask<FlightInfo> GetInfo(string query, Metadata headers)
+        {
+            FlightDescriptor commandDescripter = FlightDescriptor.CreateCommandDescriptor(query);
+
+            return await this.flightSqlConnection.FlightClient.GetInfo(commandDescripter, headers).ResponseAsync;
+        }
+
+        /// <summary>
+        /// Gets a value from the Arrow array at the specified index using the Arrow field for metadata.
+        /// </summary>
+        /// <param name="arrowArray"></param>
+        /// <param name="field"></param>
+        /// <param name="index"></param>
+        /// <returns></returns>
+        public override object GetValue(IArrowArray arrowArray, Field field, int index)

Review Comment:
   Though it's also possible that the C♯ library lacks efficient/convenient ways to implement such conversions (as compared to Go or C++).
   
   Again at least for Snowflake the ADBC Go driver already does these conversions.



##########
csharp/src/Apache.Arrow.Adbc.FlightSql/Apache.Arrow.Adbc.FlightSql.csproj:
##########
@@ -0,0 +1,20 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFrameworks>netstandard2.0;net6.0</TargetFrameworks>
+    <AssemblyName>$(MSBuildProjectName)</AssemblyName>
+    <RootNamespace>Apache.Arrow.Adbc.FlightSql</RootNamespace>
+    <Version>0.1.0</Version>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Apache.Arrow.Flight" Version="12.0.0" />
+    <PackageReference Include="Grpc.Net.Client.Web" Version="2.53.0" />
+    <PackageReference Include="System.Net.Http.WinHttpHandler" Version="7.0.0" />

Review Comment:
   Seems this wasn't addressed?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org