You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kyuubi.apache.org by ul...@apache.org on 2021/10/08 08:01:48 UTC
[incubator-kyuubi] branch master updated: [KYUUBI #1039] Kill yarn
job when engine initialize timeout and yarnApplicationState is ACCEPTED
This is an automated email from the ASF dual-hosted git repository.
ulyssesyou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-kyuubi.git
The following commit(s) were added to refs/heads/master by this push:
new 2ae6acd [KYUUBI #1039] Kill yarn job when engine initialize timeout and yarnApplicationState is ACCEPTED
2ae6acd is described below
commit 2ae6acda65d4c5217eb59f9c3a2edcce30ad668d
Author: simon <zh...@cvte.com>
AuthorDate: Fri Oct 8 16:01:08 2021 +0800
[KYUUBI #1039] Kill yarn job when engine initialize timeout and yarnApplicationState is ACCEPTED
kill yarn job when engine initialize timeout and yarnApplicationState is ACCEPTED
#1039
### _How was this patch tested?_
- [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible
- [ ] Add screenshots for manual tests if appropriate
- [ ] [Run test](https://kyuubi.readthedocs.io/en/latest/develop_tools/testing.html#running-tests) locally before make a pull request
Closes #1119 from simon824/killyarnjob.
Closes #1039
7face4db [simon] get KYUUBI_HOME
da8c3ec4 [Simon] Merge branch 'apache:master' into killyarnjob
95ae8f59 [simon] add appId
8a6ddcd1 [simon] kill application by script
e0cd2af8 [simon] kill application by script
d15d38c5 [simon] kill application by script
1fd1373b [simon] mv sparklauncher to sparkProcessBuilder
5e806426 [simon] fix codestyle
a9cc4505 [simon] fix option No value Exception
9c7ca2f8 [simon] fix option No value Exception
bbfe8e25 [simon] add set sparkHome
04f23c8a [simon] fix codestyle
75599233 [simon] add spark-launcher dep
ef4b2706 [simon] implement by sparkLauncher
59c25b7c [simon] kill yarn application by restful api
49921a48 [simon] fix ut
a31d8f6a [simon] fix ut
1abc6665 [simon] rename killApplication
43a95c1d [simon] Merge branch 'master' into killyarnjob
3d9c12d3 [simon] Merge remote-tracking branch 'upstream/master'
9eaeb16d [simon] fix ut
64ee1b11 [simon] code style
5b905dbd [simon] Merge remote-tracking branch 'upstream/master'
139f3b79 [Simon] Merge branch 'apache:master' into killyarnjob
1a52401d [simon] #1039
Lead-authored-by: simon <zh...@cvte.com>
Co-authored-by: Simon <36...@qq.com>
Signed-off-by: ulysses-you <ul...@apache.org>
---
bin/stop-application.sh | 29 ++++++++++++++++++++++
.../scala/org/apache/kyuubi/engine/EngineRef.scala | 3 ++-
.../org/apache/kyuubi/engine/ProcBuilder.scala | 24 ++++++++++++++++++
3 files changed, 55 insertions(+), 1 deletion(-)
diff --git a/bin/stop-application.sh b/bin/stop-application.sh
new file mode 100755
index 0000000..3fadde4
--- /dev/null
+++ b/bin/stop-application.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+if [[ $# < 1 ]] ; then
+ echo "USAGE: $0 <application_id>"
+ exit 1
+fi
+
+if [[ -z ${HADOOP_HOME} ]]; then
+ echo "Error: HADOOP_HOME IS NOT SET! CANNOT PROCEED."
+ exit 1
+fi
+
+$HADOOP_HOME/bin/yarn application -kill $1
diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala
index 20bb6db..8abcdaf 100644
--- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala
+++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala
@@ -182,10 +182,11 @@ private[kyuubi] class EngineRef(
}
}
if (started + timeout <= System.currentTimeMillis()) {
+ val killMessage = builder.killApplication()
process.destroyForcibly()
MetricsSystem.tracing(_.incCount(MetricRegistry.name(ENGINE_TIMEOUT, appUser)))
throw KyuubiSQLException(
- s"Timeout($timeout ms) to launched Spark with $builder",
+ s"Timeout($timeout ms) to launched Spark with $builder. $killMessage",
builder.getError)
}
engineRef = getEngineByRefId(zkClient, engineSpace, engineRefId)
diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala
index 6e48170..62e3255 100644
--- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala
+++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala
@@ -22,6 +22,7 @@ import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Path}
import scala.collection.JavaConverters._
+import scala.util.matching.Regex
import org.apache.commons.lang3.StringUtils.containsIgnoreCase
@@ -148,6 +149,29 @@ trait ProcBuilder {
proc
}
+ val YARN_APP_NAME_REGEX: Regex = "application_\\d+_\\d+".r
+
+ def killApplication(line: String = lastRowOfLog): String =
+ YARN_APP_NAME_REGEX.findFirstIn(line) match {
+ case Some(appId) =>
+ env.get(KyuubiConf.KYUUBI_HOME) match {
+ case Some(kyuubiHome) =>
+ val pb = new ProcessBuilder("/bin/sh", s"$kyuubiHome/bin/stop-application.sh", appId)
+ pb.environment()
+ .putAll(env.asJava)
+ pb.redirectError(engineLog)
+ pb.redirectOutput(engineLog)
+ val process = pb.start()
+ process.waitFor() match {
+ case id if id != 0 => s"Failed to kill Application $appId, please kill it manually. "
+ case _ => s"Killed Application $appId successfully. "
+ }
+ case None =>
+ s"KYUUBI_HOME is not set! Failed to kill Application $appId, please kill it manually."
+ }
+ case None => ""
+ }
+
def close(): Unit = {
if (logCaptureThread != null) {
logCaptureThread.interrupt()