You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kyuubi.apache.org by ch...@apache.org on 2023/05/03 12:25:15 UTC

[kyuubi] branch master updated: [KYUUBI #4780] Get engine application info with interval to prevent frequent call to resource manager

This is an automated email from the ASF dual-hosted git repository.

chengpan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kyuubi.git


The following commit(s) were added to refs/heads/master by this push:
     new 66de0ad8a [KYUUBI #4780] Get engine application info with interval to prevent frequent call to resource manager
66de0ad8a is described below

commit 66de0ad8a040b809908bfb44e70502650e736de0
Author: fwang12 <fw...@ebay.com>
AuthorDate: Wed May 3 20:24:59 2023 +0800

    [KYUUBI #4780] Get engine application info with interval to prevent frequent call to resource manager
    
    ### _Why are the changes needed?_
    
    To prevent frequent call to resource manager.
    ### _How was this patch tested?_
    - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible
    
    - [ ] Add screenshots for manual tests if appropriate
    
    - [x] [Run test](https://kyuubi.readthedocs.io/en/master/develop_tools/testing.html#running-tests) locally before make a pull request
    
    Closes #4780 from turboFei/engine_ref.
    
    Closes #4780
    
    09f67c699 [fwang12] re-order
    88c1cb33c [fwang12] sleep
    
    Authored-by: fwang12 <fw...@ebay.com>
    Signed-off-by: Cheng Pan <ch...@apache.org>
---
 .../scala/org/apache/kyuubi/engine/EngineRef.scala | 39 +++++++++++++---------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala
index b2b3ce909..765f36949 100644
--- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala
+++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala
@@ -206,10 +206,11 @@ private[kyuubi] class EngineRef(
       builder.validateConf
       val process = builder.start
       var exitValue: Option[Int] = None
+      var lastApplicationInfo: Option[ApplicationInfo] = None
       while (engineRef.isEmpty) {
         if (exitValue.isEmpty && process.waitFor(1, TimeUnit.SECONDS)) {
           exitValue = Some(process.exitValue())
-          if (exitValue.get != 0) {
+          if (exitValue != Some(0)) {
             val error = builder.getError
             MetricsSystem.tracing { ms =>
               ms.incCount(MetricRegistry.name(ENGINE_FAIL, appUser))
@@ -219,14 +220,31 @@ private[kyuubi] class EngineRef(
           }
         }
 
+        if (started + timeout <= System.currentTimeMillis()) {
+          val killMessage = engineManager.killApplication(builder.clusterManager(), engineRefId)
+          process.destroyForcibly()
+          MetricsSystem.tracing(_.incCount(MetricRegistry.name(ENGINE_TIMEOUT, appUser)))
+          throw KyuubiSQLException(
+            s"Timeout($timeout ms, you can modify ${ENGINE_INIT_TIMEOUT.key} to change it) to" +
+              s" launched $engineType engine with $redactedCmd. $killMessage",
+            builder.getError)
+        }
+        engineRef = discoveryClient.getEngineByRefId(engineSpace, engineRefId)
+
         // even the submit process succeeds, the application might meet failure when initializing,
         // check the engine application state from engine manager and fast fail on engine terminate
-        if (exitValue == Some(0)) {
+        if (engineRef.isEmpty && exitValue == Some(0)) {
           Option(engineManager).foreach { engineMgr =>
-            engineMgr.getApplicationInfo(
+            if (lastApplicationInfo.isDefined) {
+              TimeUnit.SECONDS.sleep(1)
+            }
+
+            val applicationInfo = engineMgr.getApplicationInfo(
               builder.clusterManager(),
               engineRefId,
-              Some(started)).foreach { appInfo =>
+              Some(started))
+
+            applicationInfo.foreach { appInfo =>
               if (ApplicationState.isTerminated(appInfo.state)) {
                 MetricsSystem.tracing { ms =>
                   ms.incCount(MetricRegistry.name(ENGINE_FAIL, appUser))
@@ -240,19 +258,10 @@ private[kyuubi] class EngineRef(
                   builder.getError)
               }
             }
-          }
-        }
 
-        if (started + timeout <= System.currentTimeMillis()) {
-          val killMessage = engineManager.killApplication(builder.clusterManager(), engineRefId)
-          process.destroyForcibly()
-          MetricsSystem.tracing(_.incCount(MetricRegistry.name(ENGINE_TIMEOUT, appUser)))
-          throw KyuubiSQLException(
-            s"Timeout($timeout ms, you can modify ${ENGINE_INIT_TIMEOUT.key} to change it) to" +
-              s" launched $engineType engine with $redactedCmd. $killMessage",
-            builder.getError)
+            lastApplicationInfo = applicationInfo
+          }
         }
-        engineRef = discoveryClient.getEngineByRefId(engineSpace, engineRefId)
       }
       engineRef.get
     } finally {