You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@griffin.apache.org by gu...@apache.org on 2018/09/30 07:09:24 UTC

incubator-griffin git commit: [GRIFFIN-197] Treat non-existing YARN app as FAILED

Repository: incubator-griffin
Updated Branches:
  refs/heads/master 18fc4cf4c -> 3dda6b345


[GRIFFIN-197] Treat non-existing YARN app as FAILED

This avoids jobs becoming stuck in UNKNOWN state on Service side.
Also, improves logging for YARN client errors.

Author: Nikolay Sokolov <ch...@gmail.com>

Closes #421 from chemikadze/GRIFFIN-197.


Project: http://git-wip-us.apache.org/repos/asf/incubator-griffin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-griffin/commit/3dda6b34
Tree: http://git-wip-us.apache.org/repos/asf/incubator-griffin/tree/3dda6b34
Diff: http://git-wip-us.apache.org/repos/asf/incubator-griffin/diff/3dda6b34

Branch: refs/heads/master
Commit: 3dda6b3459d2b3d9f091545e49e156ca5f230e2d
Parents: 18fc4cf
Author: Nikolay Sokolov <ch...@gmail.com>
Authored: Sun Sep 30 15:09:16 2018 +0800
Committer: William Guo <gu...@apache.org>
Committed: Sun Sep 30 15:09:16 2018 +0800

----------------------------------------------------------------------
 .../org/apache/griffin/core/util/YarnNetUtil.java   | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-griffin/blob/3dda6b34/service/src/main/java/org/apache/griffin/core/util/YarnNetUtil.java
----------------------------------------------------------------------
diff --git a/service/src/main/java/org/apache/griffin/core/util/YarnNetUtil.java b/service/src/main/java/org/apache/griffin/core/util/YarnNetUtil.java
index f935aad..71308ab 100644
--- a/service/src/main/java/org/apache/griffin/core/util/YarnNetUtil.java
+++ b/service/src/main/java/org/apache/griffin/core/util/YarnNetUtil.java
@@ -21,14 +21,17 @@ package org.apache.griffin.core.util;
 
 import com.google.gson.JsonObject;
 import com.google.gson.JsonParser;
-
 import org.apache.commons.lang.StringUtils;
 import org.apache.griffin.core.job.entity.JobInstanceBean;
 import org.apache.griffin.core.job.entity.LivySessionStates;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.springframework.http.HttpStatus;
+import org.springframework.web.client.HttpClientErrorException;
 import org.springframework.web.client.RestTemplate;
 
+import static org.apache.griffin.core.job.entity.LivySessionStates.State.DEAD;
+
 public class YarnNetUtil {
     private static final Logger LOGGER = LoggerFactory
             .getLogger(YarnNetUtil.class);
@@ -42,6 +45,9 @@ public class YarnNetUtil {
                                 + appId + "/state",
                         "{\"state\": \"KILLED\"}");
             }
+        } catch (HttpClientErrorException e) {
+            LOGGER.warn("client error {} from yarn: {}",
+                    e.getMessage(), e.getResponseBodyAsString());
         } catch (Exception e) {
             LOGGER.error("delete exception happens by yarn. {}", e);
         }
@@ -56,6 +62,14 @@ public class YarnNetUtil {
                 instance.setState(LivySessionStates.toLivyState(state));
             }
             return true;
+        } catch (HttpClientErrorException e) {
+            LOGGER.warn("client error {} from yarn: {}",
+                    e.getMessage(), e.getResponseBodyAsString());
+            if (e.getStatusCode() == HttpStatus.NOT_FOUND) {
+                // in sync with Livy behavior, see com.cloudera.livy.utils.SparkYarnApp
+                instance.setState(DEAD);
+                return true;
+            }
         } catch (Exception e) {
             LOGGER.error("update exception happens by yarn. {}", e);
         }