You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kyuubi.apache.org by ch...@apache.org on 2023/05/03 12:25:15 UTC
[kyuubi] branch master updated: [KYUUBI #4780] Get engine application info with interval to prevent frequent call to resource manager
This is an automated email from the ASF dual-hosted git repository.
chengpan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kyuubi.git
The following commit(s) were added to refs/heads/master by this push:
new 66de0ad8a [KYUUBI #4780] Get engine application info with interval to prevent frequent call to resource manager
66de0ad8a is described below
commit 66de0ad8a040b809908bfb44e70502650e736de0
Author: fwang12 <fw...@ebay.com>
AuthorDate: Wed May 3 20:24:59 2023 +0800
[KYUUBI #4780] Get engine application info with interval to prevent frequent call to resource manager
### _Why are the changes needed?_
To prevent frequent call to resource manager.
### _How was this patch tested?_
- [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible
- [ ] Add screenshots for manual tests if appropriate
- [x] [Run test](https://kyuubi.readthedocs.io/en/master/develop_tools/testing.html#running-tests) locally before make a pull request
Closes #4780 from turboFei/engine_ref.
Closes #4780
09f67c699 [fwang12] re-order
88c1cb33c [fwang12] sleep
Authored-by: fwang12 <fw...@ebay.com>
Signed-off-by: Cheng Pan <ch...@apache.org>
---
.../scala/org/apache/kyuubi/engine/EngineRef.scala | 39 +++++++++++++---------
1 file changed, 24 insertions(+), 15 deletions(-)
diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala
index b2b3ce909..765f36949 100644
--- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala
+++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala
@@ -206,10 +206,11 @@ private[kyuubi] class EngineRef(
builder.validateConf
val process = builder.start
var exitValue: Option[Int] = None
+ var lastApplicationInfo: Option[ApplicationInfo] = None
while (engineRef.isEmpty) {
if (exitValue.isEmpty && process.waitFor(1, TimeUnit.SECONDS)) {
exitValue = Some(process.exitValue())
- if (exitValue.get != 0) {
+ if (exitValue != Some(0)) {
val error = builder.getError
MetricsSystem.tracing { ms =>
ms.incCount(MetricRegistry.name(ENGINE_FAIL, appUser))
@@ -219,14 +220,31 @@ private[kyuubi] class EngineRef(
}
}
+ if (started + timeout <= System.currentTimeMillis()) {
+ val killMessage = engineManager.killApplication(builder.clusterManager(), engineRefId)
+ process.destroyForcibly()
+ MetricsSystem.tracing(_.incCount(MetricRegistry.name(ENGINE_TIMEOUT, appUser)))
+ throw KyuubiSQLException(
+ s"Timeout($timeout ms, you can modify ${ENGINE_INIT_TIMEOUT.key} to change it) to" +
+ s" launched $engineType engine with $redactedCmd. $killMessage",
+ builder.getError)
+ }
+ engineRef = discoveryClient.getEngineByRefId(engineSpace, engineRefId)
+
// even the submit process succeeds, the application might meet failure when initializing,
// check the engine application state from engine manager and fast fail on engine terminate
- if (exitValue == Some(0)) {
+ if (engineRef.isEmpty && exitValue == Some(0)) {
Option(engineManager).foreach { engineMgr =>
- engineMgr.getApplicationInfo(
+ if (lastApplicationInfo.isDefined) {
+ TimeUnit.SECONDS.sleep(1)
+ }
+
+ val applicationInfo = engineMgr.getApplicationInfo(
builder.clusterManager(),
engineRefId,
- Some(started)).foreach { appInfo =>
+ Some(started))
+
+ applicationInfo.foreach { appInfo =>
if (ApplicationState.isTerminated(appInfo.state)) {
MetricsSystem.tracing { ms =>
ms.incCount(MetricRegistry.name(ENGINE_FAIL, appUser))
@@ -240,19 +258,10 @@ private[kyuubi] class EngineRef(
builder.getError)
}
}
- }
- }
- if (started + timeout <= System.currentTimeMillis()) {
- val killMessage = engineManager.killApplication(builder.clusterManager(), engineRefId)
- process.destroyForcibly()
- MetricsSystem.tracing(_.incCount(MetricRegistry.name(ENGINE_TIMEOUT, appUser)))
- throw KyuubiSQLException(
- s"Timeout($timeout ms, you can modify ${ENGINE_INIT_TIMEOUT.key} to change it) to" +
- s" launched $engineType engine with $redactedCmd. $killMessage",
- builder.getError)
+ lastApplicationInfo = applicationInfo
+ }
}
- engineRef = discoveryClient.getEngineByRefId(engineSpace, engineRefId)
}
engineRef.get
} finally {