You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kyuubi.apache.org by ch...@apache.org on 2023/01/04 12:43:14 UTC

[kyuubi] branch master updated: [KYUUBI #3851][SPARK] Support auto set up `spark.master` when Kyuubi running inside Pod

This is an automated email from the ASF dual-hosted git repository.

chengpan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kyuubi.git


The following commit(s) were added to refs/heads/master by this push:
     new 5fdd44fe6 [KYUUBI #3851][SPARK] Support auto set up `spark.master` when Kyuubi running inside Pod
5fdd44fe6 is described below

commit 5fdd44fe6b20ab1a4a85b80156bbb8d0bb14d214
Author: xuefeimiaoao <12...@qq.com>
AuthorDate: Wed Jan 4 20:42:28 2023 +0800

    [KYUUBI #3851][SPARK] Support auto set up `spark.master` when Kyuubi running inside Pod
    
    ### _Why are the changes needed?_
    
    to close https://github.com/apache/incubator-kyuubi/issues/3851 .
    We can deploy spark on kubernetes without configure spark.master explicitly when api-server url is not exposed for us.
    
    ### _How was this patch tested?_
    - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible
    
    - [x]  Add screenshots for manual tests if appropriate
    we do not set `spark.master`:
    ![企业微信截图_16692948916568](https://user-images.githubusercontent.com/76927591/203790820-75373068-c407-4d31-9860-a13b136a084e.png)
    
    kyuubi add `spark.master` when setting `kyuubi.kubernetes.engine.master.override` to true.
    ![企业微信截图_16692948189845](https://user-images.githubusercontent.com/76927591/203790634-03fa0181-8405-4551-9af8-f394c58747a8.png)
    
    - [ ] [Run test](https://kyuubi.apache.org/docs/latest/develop_tools/testing.html#running-tests) locally before make a pull request
    
    Closes #3852 from xuefeimiaoao/feature/override_spark_master_with_pod_env.
    
    Closes #3851
    
    f3c126d8 [xuefeimiaoao] [KYUUBI #3851] [Improvement] Support AutoComplete of `spark.master` with kubernetes environment
    a6395631 [xuefeimiaoao] [KYUUBI #4058] [IT][Test][K8S] Fix the missing of connectionConf of SparkQueryTests
    
    Authored-by: xuefeimiaoao <12...@qq.com>
    Signed-off-by: Cheng Pan <ch...@apache.org>
---
 .../engine/KubernetesApplicationOperation.scala    |  2 ++
 .../org/apache/kyuubi/engine/ProcBuilder.scala     |  8 ++++++
 .../engine/spark/SparkBatchProcessBuilder.scala    |  2 ++
 .../kyuubi/engine/spark/SparkProcessBuilder.scala  | 30 ++++++++++++++++++++++
 4 files changed, 42 insertions(+)

diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala
index 81e353b84..bee69b117 100644
--- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala
+++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala
@@ -138,6 +138,8 @@ class KubernetesApplicationOperation extends ApplicationOperation with Logging {
 object KubernetesApplicationOperation extends Logging {
   val LABEL_KYUUBI_UNIQUE_KEY = "kyuubi-unique-tag"
   val SPARK_APP_ID_LABEL = "spark-app-selector"
+  val KUBERNETES_SERVICE_HOST = "KUBERNETES_SERVICE_HOST"
+  val KUBERNETES_SERVICE_PORT = "KUBERNETES_SERVICE_PORT"
 
   def toApplicationState(state: String): ApplicationState = state match {
     // https://github.com/kubernetes/kubernetes/blob/master/pkg/apis/core/types.go#L2396
diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala
index 7dc295ce5..5b69b02f5 100644
--- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala
+++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala
@@ -106,6 +106,14 @@ trait ProcBuilder {
 
   protected val extraEngineLog: Option[OperationLog]
 
+  /**
+   * Add `engine.master` if KUBERNETES_SERVICE_HOST and KUBERNETES_SERVICE_PORT
+   * are defined. So we can deploy engine on kubernetes without setting `engine.master`
+   * explicitly when kyuubi-servers are on kubernetes, which also helps in case that
+   * api-server is not exposed to us.
+   */
+  protected def completeMasterUrl(conf: KyuubiConf) = {}
+
   protected val workingDir: Path = {
     env.get("KYUUBI_WORK_DIR_ROOT").map { root =>
       val workingRoot = Paths.get(root).toAbsolutePath
diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkBatchProcessBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkBatchProcessBuilder.scala
index e7de6baa4..98f9ea5a3 100644
--- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkBatchProcessBuilder.scala
+++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkBatchProcessBuilder.scala
@@ -45,6 +45,8 @@ class SparkBatchProcessBuilder(
     }
 
     val batchKyuubiConf = new KyuubiConf(false)
+    // complete `spark.master` if absent on kubernetes
+    completeMasterUrl(batchKyuubiConf)
     batchConf.foreach(entry => { batchKyuubiConf.set(entry._1, entry._2) })
     // tag batch application
     KyuubiApplicationManager.tagApplication(batchId, "spark", clusterManager(), batchKyuubiConf)
diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala
index 41fd9b2ad..874a36c00 100644
--- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala
+++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala
@@ -28,6 +28,7 @@ import org.apache.hadoop.security.UserGroupInformation
 import org.apache.kyuubi._
 import org.apache.kyuubi.config.KyuubiConf
 import org.apache.kyuubi.engine.{KyuubiApplicationManager, ProcBuilder}
+import org.apache.kyuubi.engine.KubernetesApplicationOperation.{KUBERNETES_SERVICE_HOST, KUBERNETES_SERVICE_PORT}
 import org.apache.kyuubi.ha.HighAvailabilityConf
 import org.apache.kyuubi.ha.client.AuthTypes
 import org.apache.kyuubi.operation.log.OperationLog
@@ -55,6 +56,32 @@ class SparkProcessBuilder(
 
   override def mainClass: String = "org.apache.kyuubi.engine.spark.SparkSQLEngine"
 
+  /**
+   * Add `spark.master` if KUBERNETES_SERVICE_HOST and KUBERNETES_SERVICE_PORT
+   * are defined. So we can deploy spark on kubernetes without setting `spark.master`
+   * explicitly when kyuubi-servers are on kubernetes, which also helps in case that
+   * api-server is not exposed to us.
+   */
+  override protected def completeMasterUrl(conf: KyuubiConf): Unit = {
+    try {
+      (
+        clusterManager(),
+        sys.env.get(KUBERNETES_SERVICE_HOST),
+        sys.env.get(KUBERNETES_SERVICE_PORT)) match {
+        case (None, Some(kubernetesServiceHost), Some(kubernetesServicePort)) =>
+          // According to "https://kubernetes.io/docs/concepts/architecture/control-plane-
+          // node-communication/#node-to-control-plane", the API server is configured to listen
+          // for remote connections on a secure HTTPS port (typically 443), so we set https here.
+          val masterURL = s"k8s://https://${kubernetesServiceHost}:${kubernetesServicePort}"
+          conf.set(MASTER_KEY, masterURL)
+        case _ =>
+      }
+    } catch {
+      case e: Exception =>
+        warn("Failed when setting up spark.master with kubernetes environment automatically.", e)
+    }
+  }
+
   /**
    * Converts kyuubi config key so that Spark could identify.
    * - If the key is start with `spark.`, keep it AS IS as it is a Spark Conf
@@ -72,6 +99,9 @@ class SparkProcessBuilder(
   }
 
   override protected val commands: Array[String] = {
+    // complete `spark.master` if absent on kubernetes
+    completeMasterUrl(conf)
+
     KyuubiApplicationManager.tagApplication(engineRefId, shortName, clusterManager(), conf)
     val buffer = new ArrayBuffer[String]()
     buffer += executable