You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by hv...@apache.org on 2023/02/24 01:53:17 UTC
[spark] branch branch-3.4 updated: [SPARK-42544][CONNNECT] Spark Connect Scala Client: support parameterized SQL

This is an automated email from the ASF dual-hosted git repository.

hvanhovell pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new 6752ad6fa1a [SPARK-42544][CONNNECT] Spark Connect Scala Client: support parameterized SQL
6752ad6fa1a is described below

commit 6752ad6fa1ab0b3a98c45f1c490a476443781ec7
Author: Rui Wang <ru...@databricks.com>
AuthorDate: Thu Feb 23 21:52:50 2023 -0400

    [SPARK-42544][CONNNECT] Spark Connect Scala Client: support parameterized SQL
    
    ### What changes were proposed in this pull request?
    
    Support parameterized SQL API in Scala client.
    
    ### Why are the changes needed?
    
    API coverage
    
    ### Does this PR introduce _any_ user-facing change?
    
    NO
    
    ### How was this patch tested?
    
    UT
    
    Closes #40148 from amaliujia/parameterized_sql.
    
    Authored-by: Rui Wang <ru...@databricks.com>
    Signed-off-by: Herman van Hovell <he...@databricks.com>
    (cherry picked from commit ee22a0bf3c91a6b26d608b5fc28e9472eaca6b40)
    Signed-off-by: Herman van Hovell <he...@databricks.com>
---
 .../scala/org/apache/spark/sql/SparkSession.scala  | 39 ++++++++++++++++++++--
 .../apache/spark/sql/PlanGenerationTestSuite.scala |  4 +++
 .../explain-results/parameterized_sql.explain      |  2 ++
 .../query-tests/queries/parameterized_sql.json     |  9 +++++
 .../queries/parameterized_sql.proto.bin            |  5 +++
 5 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 1761e8ce42d..b086db09365 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -22,6 +22,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.arrow.memory.RootAllocator
 
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.connect.proto
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.connect.client.{SparkConnectClient, SparkResult}
@@ -54,14 +55,48 @@ class SparkSession(private val client: SparkConnectClient, private val cleaner:
 
   private[this] val allocator = new RootAllocator()
 
+  /**
+   * Executes a SQL query substituting named parameters by the given arguments, returning the
+   * result as a `DataFrame`. This API eagerly runs DDL/DML commands, but not for SELECT queries.
+   *
+   * @param sqlText
+   *   A SQL statement with named parameters to execute.
+   * @param args
+   *   A map of parameter names to literal values.
+   *
+   * @since 3.4.0
+   */
+  @Experimental
+  def sql(sqlText: String, args: Map[String, String]): DataFrame = {
+    sql(sqlText, args.asJava)
+  }
+
+  /**
+   * Executes a SQL query substituting named parameters by the given arguments, returning the
+   * result as a `DataFrame`. This API eagerly runs DDL/DML commands, but not for SELECT queries.
+   *
+   * @param sqlText
+   *   A SQL statement with named parameters to execute.
+   * @param args
+   *   A map of parameter names to literal values.
+   *
+   * @since 3.4.0
+   */
+  @Experimental
+  def sql(sqlText: String, args: java.util.Map[String, String]): DataFrame = newDataset {
+    builder =>
+      builder
+        .setSql(proto.SQL.newBuilder().setQuery(sqlText).putAllArgs(args))
+  }
+
   /**
    * Executes a SQL query using Spark, returning the result as a `DataFrame`. This API eagerly
    * runs DDL/DML commands, but not for SELECT queries.
    *
    * @since 3.4.0
    */
-  def sql(query: String): DataFrame = newDataset { builder =>
-    builder.setSql(proto.SQL.newBuilder().setQuery(query))
+  def sql(query: String): DataFrame = {
+    sql(query, Map.empty[String, String])
   }
 
   /**
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
index 9d4ed0f912f..6a54cc88aec 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -223,6 +223,10 @@ class PlanGenerationTestSuite extends ConnectFunSuite with BeforeAndAfterAll wit
     session.sql("select 1")
   }
 
+  test("parameterized sql") {
+    session.sql("select 1", Map("minId" -> "7", "maxId" -> "20"))
+  }
+
   test("range") {
     session.range(1, 10, 1, 2)
   }
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/parameterized_sql.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/parameterized_sql.explain
new file mode 100644
index 00000000000..7f5aafb1943
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/parameterized_sql.explain
@@ -0,0 +1,2 @@
+Project [1 AS 1#0]
++- OneRowRelation
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/parameterized_sql.json b/connector/connect/common/src/test/resources/query-tests/queries/parameterized_sql.json
new file mode 100644
index 00000000000..99268661e72
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/parameterized_sql.json
@@ -0,0 +1,9 @@
+{
+  "sql": {
+    "query": "select 1",
+    "args": {
+      "minId": "7",
+      "maxId": "20"
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/parameterized_sql.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/parameterized_sql.proto.bin
new file mode 100644
index 00000000000..fd9304b4e47
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/parameterized_sql.proto.bin
@@ -0,0 +1,5 @@
+R#
+select 1
+
+minId7
+maxId20
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org