You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kyuubi.apache.org by ya...@apache.org on 2022/04/17 12:31:56 UTC

[incubator-kyuubi] branch master updated: [KYUUBI #2360] [Subtask] Configuring Hive engine heap memory and java opts

This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-kyuubi.git


The following commit(s) were added to refs/heads/master by this push:
     new 26d52faa7 [KYUUBI #2360] [Subtask] Configuring Hive engine heap memory and java opts
26d52faa7 is described below

commit 26d52faa7aea9d4e3aafac79ba699cf1a829bf79
Author: Min Zhao <zh...@163.com>
AuthorDate: Sun Apr 17 20:31:45 2022 +0800

    [KYUUBI #2360] [Subtask] Configuring Hive engine heap memory and java opts
    
    ### _Why are the changes needed?_
    
    Configuring Hive engine heap memory and java opts
    
    ### _How was this patch tested?_
    - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible
    
    - [ ] Add screenshots for manual tests if appropriate
    
    - [ ] [Run test](https://kyuubi.apache.org/docs/latest/develop_tools/testing.html#running-tests) locally before make a pull request
    
    Closes #2371 from zhaomin1423/2360.
    
    Closes #2360
    
    fc372571 [Min Zhao] [KYUUBI #2360] [Subtask] Configuring Hive engine heap memory and java opts
    52547d9c [Min Zhao] add unit tests
    
    Authored-by: Min Zhao <zh...@163.com>
    Signed-off-by: Kent Yao <ya...@apache.org>
---
 docs/deployment/settings.md                        |  3 ++
 .../org/apache/kyuubi/config/KyuubiConf.scala      | 22 +++++++++++++++
 .../kyuubi/engine/hive/HiveProcessBuilder.scala    | 21 +++++++++-----
 .../engine/hive/HiveProcessBuilderSuite.scala      | 33 ++++++++++++++++++++--
 4 files changed, 70 insertions(+), 9 deletions(-)

diff --git a/docs/deployment/settings.md b/docs/deployment/settings.md
index 304e6441e..366e9ff31 100644
--- a/docs/deployment/settings.md
+++ b/docs/deployment/settings.md
@@ -197,6 +197,9 @@ Key | Default | Meaning | Type | Since
 <code>kyuubi.engine.deregister.job.max.failures</code>|<div style='width: 65pt;word-wrap: break-word;white-space: normal'>4</div>|<div style='width: 170pt;word-wrap: break-word;white-space: normal'>Number of failures of job before deregistering the engine.</div>|<div style='width: 30pt'>int</div>|<div style='width: 20pt'>1.2.0</div>
 <code>kyuubi.engine.event.json.log.path</code>|<div style='width: 65pt;word-wrap: break-word;white-space: normal'>file:///tmp/kyuubi/events</div>|<div style='width: 170pt;word-wrap: break-word;white-space: normal'>The location of all the engine events go for the builtin JSON logger.<ul><li>Local Path: start with 'file://'</li><li>HDFS Path: start with 'hdfs://'</li></ul></div>|<div style='width: 30pt'>string</div>|<div style='width: 20pt'>1.3.0</div>
 <code>kyuubi.engine.event.loggers</code>|<div style='width: 65pt;word-wrap: break-word;white-space: normal'>SPARK</div>|<div style='width: 170pt;word-wrap: break-word;white-space: normal'>A comma separated list of engine history loggers, where engine/session/operation etc events go. We use spark logger by default.<ul> <li>SPARK: the events will be written to the spark listener bus.</li> <li>JSON: the events will be written to the location of kyuubi.engine.event.json.log.path</li> <li>JDB [...]
+<code>kyuubi.engine.hive.extra.classpath</code>|<div style='width: 65pt;word-wrap: break-word;white-space: normal'>&lt;undefined&gt;</div>|<div style='width: 170pt;word-wrap: break-word;white-space: normal'>The extra classpath for the hive query engine, for configuring location of hadoop client jars, etc</div>|<div style='width: 30pt'>string</div>|<div style='width: 20pt'>1.6.0</div>
+<code>kyuubi.engine.hive.java.options</code>|<div style='width: 65pt;word-wrap: break-word;white-space: normal'>&lt;undefined&gt;</div>|<div style='width: 170pt;word-wrap: break-word;white-space: normal'>The extra java options for the hive query engine</div>|<div style='width: 30pt'>string</div>|<div style='width: 20pt'>1.6.0</div>
+<code>kyuubi.engine.hive.memory</code>|<div style='width: 65pt;word-wrap: break-word;white-space: normal'>1g</div>|<div style='width: 170pt;word-wrap: break-word;white-space: normal'>The heap memory for the hive query engine</div>|<div style='width: 30pt'>string</div>|<div style='width: 20pt'>1.6.0</div>
 <code>kyuubi.engine.initialize.sql</code>|<div style='width: 65pt;word-wrap: break-word;white-space: normal'>SHOW DATABASES</div>|<div style='width: 170pt;word-wrap: break-word;white-space: normal'>SemiColon-separated list of SQL statements to be initialized in the newly created engine before queries. i.e. use `SHOW DATABASES` to eagerly active HiveClient. This configuration can not be used in JDBC url due to the limitation of Beeline/JDBC driver.</div>|<div style='width: 30pt'>seq</div> [...]
 <code>kyuubi.engine.operation.log.dir.root</code>|<div style='width: 65pt;word-wrap: break-word;white-space: normal'>engine_operation_logs</div>|<div style='width: 170pt;word-wrap: break-word;white-space: normal'>Root directory for query operation log at engine-side.</div>|<div style='width: 30pt'>string</div>|<div style='width: 20pt'>1.4.0</div>
 <code>kyuubi.engine.pool.name</code>|<div style='width: 65pt;word-wrap: break-word;white-space: normal'>engine-pool</div>|<div style='width: 170pt;word-wrap: break-word;white-space: normal'>The name of engine pool.</div>|<div style='width: 30pt'>string</div>|<div style='width: 20pt'>1.5.0</div>
diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala
index d202f761e..4b36d1263 100644
--- a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala
+++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala
@@ -1389,4 +1389,26 @@ object KyuubiConf {
       .version("1.6.0")
       .stringConf
       .createOptional
+
+  val ENGINE_HIVE_MEMORY: ConfigEntry[String] =
+    buildConf("kyuubi.engine.hive.memory")
+      .doc("The heap memory for the hive query engine")
+      .version("1.6.0")
+      .stringConf
+      .createWithDefault("1g")
+
+  val ENGINE_HIVE_JAVA_OPTIONS: OptionalConfigEntry[String] =
+    buildConf("kyuubi.engine.hive.java.options")
+      .doc("The extra java options for the hive query engine")
+      .version("1.6.0")
+      .stringConf
+      .createOptional
+
+  val ENGINE_HIVE_EXTRA_CLASSPATH: OptionalConfigEntry[String] =
+    buildConf("kyuubi.engine.hive.extra.classpath")
+      .doc("The extra classpath for the hive query engine, for configuring location" +
+        " of hadoop client jars, etc")
+      .version("1.6.0")
+      .stringConf
+      .createOptional
 }
diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilder.scala
index 974959dd7..5799734cd 100644
--- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilder.scala
+++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilder.scala
@@ -26,6 +26,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.kyuubi._
 import org.apache.kyuubi.config.KyuubiConf
+import org.apache.kyuubi.config.KyuubiConf.{ENGINE_HIVE_EXTRA_CLASSPATH, ENGINE_HIVE_JAVA_OPTIONS, ENGINE_HIVE_MEMORY}
 import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_USER_KEY
 import org.apache.kyuubi.engine.ProcBuilder
 import org.apache.kyuubi.operation.log.OperationLog
@@ -52,7 +53,12 @@ class HiveProcessBuilder(
     // or just leave it, because we can handle it at operation layer
     buffer += s"-D$KYUUBI_SESSION_USER_KEY=$proxyUser"
 
-    // TODO: add Kyuubi.engineEnv.HIVE_ENGINE_MEMORY or kyuubi.engine.hive.memory to configure
+    val memory = conf.get(ENGINE_HIVE_MEMORY)
+    buffer += s"-Xmx$memory"
+    val javaOptions = conf.get(ENGINE_HIVE_JAVA_OPTIONS)
+    if (javaOptions.isDefined) {
+      buffer += javaOptions.get
+    }
     // -Xmx5g
     // java options
     for ((k, v) <- conf.getAll) {
@@ -75,20 +81,21 @@ class HiveProcessBuilder(
     env.get("YARN_CONF_DIR").foreach(classpathEntries.add)
     // jars from hive distribution
     classpathEntries.add(s"$hiveHome${File.separator}lib${File.separator}*")
-    val hadoopCp = env.get("HIVE_HADOOP_CLASSPATH").orElse(env.get("HADOOP_CLASSPATH"))
-    hadoopCp.foreach(path => classpathEntries.add(s"$path${File.separator}*"))
-    if (hadoopCp.isEmpty) {
-      warn(s"HIVE_HADOOP_CLASSPATH or HADOOP_CLASSPATH don't export.")
+    val extraCp = conf.get(ENGINE_HIVE_EXTRA_CLASSPATH)
+    extraCp.foreach(classpathEntries.add)
+    if (extraCp.isEmpty) {
+      warn(s"The conf of kyuubi.engine.hive.extra.classpath is empty.")
       mainResource.foreach { path =>
         val devHadoopJars = Paths.get(path).getParent
           .resolve(s"scala-$SCALA_COMPILE_VERSION")
           .resolve("jars")
         if (!Files.exists(devHadoopJars)) {
-          throw new KyuubiException(s"The path $devHadoopJars does not exists. ")
+          throw new KyuubiException(s"The path $devHadoopJars does not exists. Please set " +
+            s"kyuubi.engine.hive.extra.classpath for configuring location of " +
+            s"hadoop client jars, etc")
         }
         classpathEntries.add(s"$devHadoopJars${File.separator}*")
       }
-
     }
     buffer += classpathEntries.asScala.mkString(File.pathSeparator)
     buffer += mainClass
diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilderSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilderSuite.scala
index 9411410e1..94aec1465 100644
--- a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilderSuite.scala
+++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilderSuite.scala
@@ -19,12 +19,12 @@ package org.apache.kyuubi.engine.hive
 
 import org.apache.kyuubi.KyuubiFunSuite
 import org.apache.kyuubi.config.KyuubiConf
+import org.apache.kyuubi.config.KyuubiConf.{ENGINE_HIVE_EXTRA_CLASSPATH, ENGINE_HIVE_JAVA_OPTIONS, ENGINE_HIVE_MEMORY}
 
 class HiveProcessBuilderSuite extends KyuubiFunSuite {
 
-  private def conf = KyuubiConf().set("kyuubi.on", "off")
-
   test("hive process builder") {
+    val conf = KyuubiConf().set("kyuubi.on", "off")
     val builder = new HiveProcessBuilder("kyuubi", conf)
     val commands = builder.toString.split('\n')
     assert(commands.head.endsWith("bin/java"), "wrong exec")
@@ -33,4 +33,33 @@ class HiveProcessBuilderSuite extends KyuubiFunSuite {
     assert(commands.exists(ss => ss.contains("kyuubi-hive-sql-engine")), "wrong classpath")
   }
 
+  test("default engine memory") {
+    val conf = KyuubiConf()
+    val builder = new HiveProcessBuilder("kyuubi", conf)
+    val commands = builder.toString.split('\n')
+    assert(commands.contains("-Xmx1g"))
+  }
+
+  test("set engine memory") {
+    val conf = KyuubiConf().set(ENGINE_HIVE_MEMORY, "5g")
+    val builder = new HiveProcessBuilder("kyuubi", conf)
+    val commands = builder.toString.split('\n')
+    assert(commands.contains("-Xmx5g"))
+  }
+
+  test("set engine java opts") {
+    val conf = KyuubiConf().set(
+      ENGINE_HIVE_JAVA_OPTIONS,
+      "-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005")
+    val builder = new HiveProcessBuilder("kyuubi", conf)
+    val commands = builder.toString.split('\n')
+    assert(commands.contains("-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005"))
+  }
+
+  test("set engine extra classpath") {
+    val conf = KyuubiConf().set(ENGINE_HIVE_EXTRA_CLASSPATH, "/dummy_classpath/*")
+    val builder = new HiveProcessBuilder("kyuubi", conf)
+    val commands = builder.toString
+    assert(commands.contains("/dummy_classpath/*"))
+  }
 }