You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2023/02/16 06:51:29 UTC
[spark] branch master updated: [SPARK-42460][CONNECT] Clean-up results in ClientE2ETestSuite
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 9626aef10c2 [SPARK-42460][CONNECT] Clean-up results in ClientE2ETestSuite
9626aef10c2 is described below
commit 9626aef10c29c1e405602326e481058a41211dba
Author: Herman van Hovell <he...@databricks.com>
AuthorDate: Wed Feb 15 22:51:11 2023 -0800
[SPARK-42460][CONNECT] Clean-up results in ClientE2ETestSuite
### What changes were proposed in this pull request?
Clean-up results in `ClientE2ETestSuite`.
### Why are the changes needed?
`ClientE2ETestSuite` is very noisy because we do not clean-up results. This makes testing a bit annoying.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
It is a test.
Closes #40048 from hvanhovell/SPARK-42460.
Authored-by: Herman van Hovell <he...@databricks.com>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../main/scala/org/apache/spark/sql/Dataset.scala | 2 +-
.../org/apache/spark/sql/ClientE2ETestSuite.scala | 29 ++++++++++------------
.../connect/client/util/RemoteSparkSession.scala | 3 +--
3 files changed, 15 insertions(+), 19 deletions(-)
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
index 7db44907111..977c823f7c7 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2220,7 +2220,7 @@ class Dataset[T] private[sql] (val session: SparkSession, private[sql] val plan:
def collectResult(): SparkResult = session.execute(plan)
- private def withResult[E](f: SparkResult => E): E = {
+ private[sql] def withResult[E](f: SparkResult => E): E = {
val result = collectResult()
try f(result)
finally {
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
index a0dd4746a8b..60ac25ab7ba 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
@@ -32,39 +32,36 @@ class ClientE2ETestSuite extends RemoteSparkSession {
// Spark Result
test("spark result schema") {
val df = spark.sql("select val from (values ('Hello'), ('World')) as t(val)")
- val schema = df.collectResult().schema
- assert(schema == StructType(StructField("val", StringType, false) :: Nil))
+ df.withResult { result =>
+ val schema = result.schema
+ assert(schema == StructType(StructField("val", StringType, nullable = false) :: Nil))
+ }
}
test("spark result array") {
val df = spark.sql("select val from (values ('Hello'), ('World')) as t(val)")
- val result = df.collectResult()
+ val result = df.collect()
assert(result.length == 2)
- val array = result.toArray
- assert(array.length == 2)
- assert(array(0).getString(0) == "Hello")
- assert(array(1).getString(0) == "World")
+ assert(result(0).getString(0) == "Hello")
+ assert(result(1).getString(0) == "World")
}
test("simple dataset") {
val df = spark.range(10).limit(3)
- val result = df.collectResult()
+ val result = df.collect()
assert(result.length == 3)
- val array = result.toArray
- assert(array(0).getLong(0) == 0)
- assert(array(1).getLong(0) == 1)
- assert(array(2).getLong(0) == 2)
+ assert(result(0).getLong(0) == 0)
+ assert(result(1).getLong(0) == 1)
+ assert(result(2).getLong(0) == 2)
}
test("simple udf") {
-
def dummyUdf(x: Int): Int = x + 5
val myUdf = udf(dummyUdf _)
val df = spark.range(5).select(myUdf(Column("id")))
-
- val result = df.collectResult()
+ val result = df.collect()
assert(result.length == 5)
- result.toArray.zipWithIndex.foreach { case (v, idx) =>
+ result.zipWithIndex.foreach { case (v, idx) =>
assert(v.getInt(0) == idx + 5)
}
}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala
index 2d9c218b2fb..753e27efac3 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala
@@ -130,8 +130,7 @@ trait RemoteSparkSession
// Run a simple query to verify the server is really up and ready
val result = spark
.sql("select val from (values ('Hello'), ('World')) as t(val)")
- .collectResult()
- .toArray
+ .collect()
assert(result.length == 2)
success = true
debug("Spark Connect Server is up.")
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org