You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kyuubi.apache.org by bo...@apache.org on 2023/10/25 12:00:17 UTC

[kyuubi] branch branch-1.8 updated: [KYUUBI #5380][UT] Create PySpark batch jobs tests for RESTful API

This is an automated email from the ASF dual-hosted git repository.

bowenliang pushed a commit to branch branch-1.8
in repository https://gitbox.apache.org/repos/asf/kyuubi.git


The following commit(s) were added to refs/heads/branch-1.8 by this push:
     new e551b2367 [KYUUBI #5380][UT] Create PySpark batch jobs tests for RESTful API
e551b2367 is described below

commit e551b23673a607f38889cffe8e11886936b6e8f1
Author: weixi <we...@outlook.com>
AuthorDate: Wed Oct 25 01:08:29 2023 +0800

    [KYUUBI #5380][UT] Create PySpark batch jobs tests for RESTful API
    
    ### _Why are the changes needed?_
    
    To close #5380.
    
    As PySpark jobs become popular approach for data exploring and processing, we need to create tests for creating PySpark jobs.
    
    According the existing Spark Jar unit tests, two PySpark job unit test were added, they are all simple PI computing jobs from Spark examples.
    
    #### case1, "pyspark submit - basic batch rest client with existing resource file"
    It's almost same with the spark jar job test case, except the following two points:
    1. param `batchType` should be set to `PYSPARK`, not `SPARK`.  please refer to #3836 for detailed information.
    2. For PySpark job,param `className` is useless, should be set to null
    
    #### case2, "pyspark submit - basic batch rest client with uploading resource file"
    
    Through the two test cases, simple PySpark jobs can be submitted normally.
    
    ### _How was this patch tested?_
    - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible
    - [ ] Add screenshots for manual tests if appropriate
    - [x] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request
    
    ### _Was this patch authored or co-authored using generative AI tooling?_
    
    No
    
    Closes #5498 from weixi62961/unittest-batchapi-pyspark-simple.
    
    Closes #5380
    
    b693efc1b [Bowen Liang] simplify sparkBatchTestResource
    72a92b5ee [Bowen Liang] Update kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/PySparkBatchRestApiSuite.scala
    b2035a3b2 [weixi] remove no necessary wrapper object "PySparkJobPI"
    27d12e8bc [weixi] rename from BatchRestApiPySparkSuite to PySparkBatchRestApiSuite
    e680e604a [weixi] Create a dedicated batch API suite for PySpark jobs.
    dc8b6bfa1 [weixi] add 2 test cases for pyspark batch job submit.
    
    Lead-authored-by: weixi <we...@outlook.com>
    Co-authored-by: Bowen Liang <li...@gf.com.cn>
    Co-authored-by: Bowen Liang <bo...@apache.org>
    Signed-off-by: liangbowen <li...@gf.com.cn>
    (cherry picked from commit 5cff4fb98cdc8d8a8d9a1c289a17a17a52a787e5)
    Signed-off-by: liangbowen <li...@gf.com.cn>
---
 .../scala/org/apache/kyuubi/BatchTestHelper.scala  |  4 +-
 .../rest/client/PySparkBatchRestApiSuite.scala     | 79 ++++++++++++++++++++++
 2 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/BatchTestHelper.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/BatchTestHelper.scala
index 27298dbf1..f49ebbeb1 100644
--- a/kyuubi-server/src/test/scala/org/apache/kyuubi/BatchTestHelper.scala
+++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/BatchTestHelper.scala
@@ -26,6 +26,8 @@ import org.apache.kyuubi.config.KyuubiConf
 import org.apache.kyuubi.engine.spark.SparkProcessBuilder
 
 trait BatchTestHelper {
+  val sparkBatchTestBatchType = "SPARK"
+
   val sparkBatchTestMainClass = "org.apache.spark.examples.SparkPi"
 
   val sparkBatchTestAppName = "Spark Pi" // the app name is hard coded in spark example code
@@ -56,7 +58,7 @@ trait BatchTestHelper {
       conf: Map[String, String] = Map.empty,
       args: Seq[String] = Seq.empty): BatchRequest = {
     newBatchRequest(
-      "SPARK",
+      sparkBatchTestBatchType,
       sparkBatchTestResource.get,
       sparkBatchTestMainClass,
       sparkBatchTestAppName,
diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/PySparkBatchRestApiSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/PySparkBatchRestApiSuite.scala
new file mode 100644
index 000000000..8e33eb382
--- /dev/null
+++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/PySparkBatchRestApiSuite.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.server.rest.client
+
+import java.nio.file.Paths
+
+import org.apache.kyuubi.{BatchTestHelper, RestClientTestHelper}
+import org.apache.kyuubi.client.{BatchRestApi, KyuubiRestClient}
+import org.apache.kyuubi.client.api.v1.dto.Batch
+import org.apache.kyuubi.config.KyuubiConf
+import org.apache.kyuubi.engine.spark.SparkProcessBuilder
+
+class PySparkBatchRestApiSuite extends RestClientTestHelper with BatchTestHelper {
+  override val sparkBatchTestBatchType: String = "PYSPARK"
+  override val sparkBatchTestMainClass: String = null // For PySpark, mainClass isn't needed.
+  override val sparkBatchTestAppName: String = "PythonPi"
+  override val sparkBatchTestResource: Option[String] = {
+    val sparkProcessBuilder = new SparkProcessBuilder("kyuubi", KyuubiConf())
+    val piScript =
+      Paths.get(sparkProcessBuilder.sparkHome, "examples/src/main/python/pi.py")
+    Some(piScript.toAbsolutePath.toString)
+  }
+
+  test("pyspark submit - basic batch rest client with existing resource file") {
+    val basicKyuubiRestClient: KyuubiRestClient =
+      KyuubiRestClient.builder(baseUri.toString)
+        .authHeaderMethod(KyuubiRestClient.AuthHeaderMethod.BASIC)
+        .username(ldapUser)
+        .password(ldapUserPasswd)
+        .socketTimeout(30000)
+        .build()
+    val batchRestApi: BatchRestApi = new BatchRestApi(basicKyuubiRestClient)
+
+    val requestObj = newSparkBatchRequest(
+      conf = Map("spark.master" -> "local"),
+      args = Seq("10"))
+    val batch: Batch = batchRestApi.createBatch(requestObj)
+
+    assert(batch.getKyuubiInstance === fe.connectionUrl)
+    assert(batch.getBatchType === "PYSPARK")
+    basicKyuubiRestClient.close()
+  }
+
+  test("pyspark submit - basic batch rest client with uploading resource file") {
+    val basicKyuubiRestClient: KyuubiRestClient =
+      KyuubiRestClient.builder(baseUri.toString)
+        .authHeaderMethod(KyuubiRestClient.AuthHeaderMethod.BASIC)
+        .username(ldapUser)
+        .password(ldapUserPasswd)
+        .socketTimeout(30000)
+        .build()
+    val batchRestApi: BatchRestApi = new BatchRestApi(basicKyuubiRestClient)
+
+    val requestObj = newSparkBatchRequest(
+      conf = Map("spark.master" -> "local"),
+      args = Seq("10"))
+    val resourceFile = Paths.get(sparkBatchTestResource.get).toFile
+    val batch: Batch = batchRestApi.createBatch(requestObj, resourceFile)
+
+    assert(batch.getKyuubiInstance === fe.connectionUrl)
+    assert(batch.getBatchType === "PYSPARK")
+    basicKyuubiRestClient.close()
+  }
+}