You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2023/02/16 06:51:29 UTC
[spark] branch master updated: [SPARK-42460][CONNECT] Clean-up results in ClientE2ETestSuite

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 9626aef10c2 [SPARK-42460][CONNECT] Clean-up results in ClientE2ETestSuite
9626aef10c2 is described below

commit 9626aef10c29c1e405602326e481058a41211dba
Author: Herman van Hovell <he...@databricks.com>
AuthorDate: Wed Feb 15 22:51:11 2023 -0800

    [SPARK-42460][CONNECT] Clean-up results in ClientE2ETestSuite
    
    ### What changes were proposed in this pull request?
    Clean-up results in `ClientE2ETestSuite`.
    
    ### Why are the changes needed?
    `ClientE2ETestSuite` is very noisy because we do not clean-up results. This makes testing a bit annoying.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    It is a test.
    
    Closes #40048 from hvanhovell/SPARK-42460.
    
    Authored-by: Herman van Hovell <he...@databricks.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../main/scala/org/apache/spark/sql/Dataset.scala  |  2 +-
 .../org/apache/spark/sql/ClientE2ETestSuite.scala  | 29 ++++++++++------------
 .../connect/client/util/RemoteSparkSession.scala   |  3 +--
 3 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
index 7db44907111..977c823f7c7 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2220,7 +2220,7 @@ class Dataset[T] private[sql] (val session: SparkSession, private[sql] val plan:
 
   def collectResult(): SparkResult = session.execute(plan)
 
-  private def withResult[E](f: SparkResult => E): E = {
+  private[sql] def withResult[E](f: SparkResult => E): E = {
     val result = collectResult()
     try f(result)
     finally {
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
index a0dd4746a8b..60ac25ab7ba 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
@@ -32,39 +32,36 @@ class ClientE2ETestSuite extends RemoteSparkSession {
   // Spark Result
   test("spark result schema") {
     val df = spark.sql("select val from (values ('Hello'), ('World')) as t(val)")
-    val schema = df.collectResult().schema
-    assert(schema == StructType(StructField("val", StringType, false) :: Nil))
+    df.withResult { result =>
+      val schema = result.schema
+      assert(schema == StructType(StructField("val", StringType, nullable = false) :: Nil))
+    }
   }
 
   test("spark result array") {
     val df = spark.sql("select val from (values ('Hello'), ('World')) as t(val)")
-    val result = df.collectResult()
+    val result = df.collect()
     assert(result.length == 2)
-    val array = result.toArray
-    assert(array.length == 2)
-    assert(array(0).getString(0) == "Hello")
-    assert(array(1).getString(0) == "World")
+    assert(result(0).getString(0) == "Hello")
+    assert(result(1).getString(0) == "World")
   }
 
   test("simple dataset") {
     val df = spark.range(10).limit(3)
-    val result = df.collectResult()
+    val result = df.collect()
     assert(result.length == 3)
-    val array = result.toArray
-    assert(array(0).getLong(0) == 0)
-    assert(array(1).getLong(0) == 1)
-    assert(array(2).getLong(0) == 2)
+    assert(result(0).getLong(0) == 0)
+    assert(result(1).getLong(0) == 1)
+    assert(result(2).getLong(0) == 2)
   }
 
   test("simple udf") {
-
     def dummyUdf(x: Int): Int = x + 5
     val myUdf = udf(dummyUdf _)
     val df = spark.range(5).select(myUdf(Column("id")))
-
-    val result = df.collectResult()
+    val result = df.collect()
     assert(result.length == 5)
-    result.toArray.zipWithIndex.foreach { case (v, idx) =>
+    result.zipWithIndex.foreach { case (v, idx) =>
       assert(v.getInt(0) == idx + 5)
     }
   }
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala
index 2d9c218b2fb..753e27efac3 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala
@@ -130,8 +130,7 @@ trait RemoteSparkSession
         // Run a simple query to verify the server is really up and ready
         val result = spark
           .sql("select val from (values ('Hello'), ('World')) as t(val)")
-          .collectResult()
-          .toArray
+          .collect()
         assert(result.length == 2)
         success = true
         debug("Spark Connect Server is up.")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org