You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by hv...@apache.org on 2023/02/24 01:53:17 UTC
[spark] branch branch-3.4 updated: [SPARK-42544][CONNNECT] Spark Connect Scala Client: support parameterized SQL
This is an automated email from the ASF dual-hosted git repository.
hvanhovell pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push:
new 6752ad6fa1a [SPARK-42544][CONNNECT] Spark Connect Scala Client: support parameterized SQL
6752ad6fa1a is described below
commit 6752ad6fa1ab0b3a98c45f1c490a476443781ec7
Author: Rui Wang <ru...@databricks.com>
AuthorDate: Thu Feb 23 21:52:50 2023 -0400
[SPARK-42544][CONNNECT] Spark Connect Scala Client: support parameterized SQL
### What changes were proposed in this pull request?
Support parameterized SQL API in Scala client.
### Why are the changes needed?
API coverage
### Does this PR introduce _any_ user-facing change?
NO
### How was this patch tested?
UT
Closes #40148 from amaliujia/parameterized_sql.
Authored-by: Rui Wang <ru...@databricks.com>
Signed-off-by: Herman van Hovell <he...@databricks.com>
(cherry picked from commit ee22a0bf3c91a6b26d608b5fc28e9472eaca6b40)
Signed-off-by: Herman van Hovell <he...@databricks.com>
---
.../scala/org/apache/spark/sql/SparkSession.scala | 39 ++++++++++++++++++++--
.../apache/spark/sql/PlanGenerationTestSuite.scala | 4 +++
.../explain-results/parameterized_sql.explain | 2 ++
.../query-tests/queries/parameterized_sql.json | 9 +++++
.../queries/parameterized_sql.proto.bin | 5 +++
5 files changed, 57 insertions(+), 2 deletions(-)
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 1761e8ce42d..b086db09365 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -22,6 +22,7 @@ import scala.collection.JavaConverters._
import org.apache.arrow.memory.RootAllocator
+import org.apache.spark.annotation.Experimental
import org.apache.spark.connect.proto
import org.apache.spark.internal.Logging
import org.apache.spark.sql.connect.client.{SparkConnectClient, SparkResult}
@@ -54,14 +55,48 @@ class SparkSession(private val client: SparkConnectClient, private val cleaner:
private[this] val allocator = new RootAllocator()
+ /**
+ * Executes a SQL query substituting named parameters by the given arguments, returning the
+ * result as a `DataFrame`. This API eagerly runs DDL/DML commands, but not for SELECT queries.
+ *
+ * @param sqlText
+ * A SQL statement with named parameters to execute.
+ * @param args
+ * A map of parameter names to literal values.
+ *
+ * @since 3.4.0
+ */
+ @Experimental
+ def sql(sqlText: String, args: Map[String, String]): DataFrame = {
+ sql(sqlText, args.asJava)
+ }
+
+ /**
+ * Executes a SQL query substituting named parameters by the given arguments, returning the
+ * result as a `DataFrame`. This API eagerly runs DDL/DML commands, but not for SELECT queries.
+ *
+ * @param sqlText
+ * A SQL statement with named parameters to execute.
+ * @param args
+ * A map of parameter names to literal values.
+ *
+ * @since 3.4.0
+ */
+ @Experimental
+ def sql(sqlText: String, args: java.util.Map[String, String]): DataFrame = newDataset {
+ builder =>
+ builder
+ .setSql(proto.SQL.newBuilder().setQuery(sqlText).putAllArgs(args))
+ }
+
/**
* Executes a SQL query using Spark, returning the result as a `DataFrame`. This API eagerly
* runs DDL/DML commands, but not for SELECT queries.
*
* @since 3.4.0
*/
- def sql(query: String): DataFrame = newDataset { builder =>
- builder.setSql(proto.SQL.newBuilder().setQuery(query))
+ def sql(query: String): DataFrame = {
+ sql(query, Map.empty[String, String])
}
/**
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
index 9d4ed0f912f..6a54cc88aec 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -223,6 +223,10 @@ class PlanGenerationTestSuite extends ConnectFunSuite with BeforeAndAfterAll wit
session.sql("select 1")
}
+ test("parameterized sql") {
+ session.sql("select 1", Map("minId" -> "7", "maxId" -> "20"))
+ }
+
test("range") {
session.range(1, 10, 1, 2)
}
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/parameterized_sql.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/parameterized_sql.explain
new file mode 100644
index 00000000000..7f5aafb1943
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/parameterized_sql.explain
@@ -0,0 +1,2 @@
+Project [1 AS 1#0]
++- OneRowRelation
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/parameterized_sql.json b/connector/connect/common/src/test/resources/query-tests/queries/parameterized_sql.json
new file mode 100644
index 00000000000..99268661e72
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/parameterized_sql.json
@@ -0,0 +1,9 @@
+{
+ "sql": {
+ "query": "select 1",
+ "args": {
+ "minId": "7",
+ "maxId": "20"
+ }
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/parameterized_sql.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/parameterized_sql.proto.bin
new file mode 100644
index 00000000000..fd9304b4e47
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/parameterized_sql.proto.bin
@@ -0,0 +1,5 @@
+R#
+select 1
+
+minId7
+maxId20
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org