You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2017/02/07 03:32:07 UTC

hive git commit: HIVE-15827: LLAP: status tool breaks out of watch mode when live instances is 0 (Prasanth Jayachandran reviewed by Sergey Shelukhin, Siddharth Seth)

Repository: hive
Updated Branches:
  refs/heads/master b978c074d -> 95d0ce722


HIVE-15827: LLAP: status tool breaks out of watch mode when live instances is 0 (Prasanth Jayachandran reviewed by Sergey Shelukhin, Siddharth Seth)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/95d0ce72
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/95d0ce72
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/95d0ce72

Branch: refs/heads/master
Commit: 95d0ce722457eca996d3736bd06f5d95d16bc471
Parents: b978c07
Author: Prasanth Jayachandran <pr...@apache.org>
Authored: Mon Feb 6 19:31:58 2017 -0800
Committer: Prasanth Jayachandran <pr...@apache.org>
Committed: Mon Feb 6 19:31:58 2017 -0800

----------------------------------------------------------------------
 .../hive/llap/cli/LlapStatusServiceDriver.java  | 41 ++++++++++++++++++--
 1 file changed, 38 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/95d0ce72/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java
index b30f837..1b9eba6 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java
@@ -497,7 +497,7 @@ public class LlapStatusServiceDriver {
     Collection<ServiceInstance> serviceInstances;
     try {
       serviceInstances = llapRegistry.getInstances(watchTimeoutMs).getAll();
-    } catch (IOException e) {
+    } catch (Exception e) {
       throw new LlapStatusCliException(ExitCode.LLAP_REGISTRY_ERROR, "Failed to get instances from llap registry", e);
     }
 
@@ -541,7 +541,11 @@ public class LlapStatusServiceDriver {
           LOG.warn("Found more entries in LLAP registry, as compared to desired entries");
         }
       } else {
-        appStatusBuilder.setState(State.RUNNING_PARTIAL);
+        if (validatedInstances.size() > 0) {
+          appStatusBuilder.setState(State.RUNNING_PARTIAL);
+        } else {
+          appStatusBuilder.setState(State.LAUNCHING);
+        }
       }
 
       // At this point, everything that can be consumed from AppStatusBuilder has been consumed.
@@ -575,6 +579,8 @@ public class LlapStatusServiceDriver {
     private Long appStartTime;
     private Long appFinishTime;
 
+    private boolean runningThresholdAchieved = false;
+
     private final List<LlapInstance> llapInstances = new LinkedList<>();
 
     private transient Map<String, LlapInstance> containerToInstanceMap = new HashMap<>();
@@ -625,6 +631,11 @@ public class LlapStatusServiceDriver {
       return this;
     }
 
+    public AppStatusBuilder setRunningThresholdAchieved(boolean thresholdAchieved) {
+      this.runningThresholdAchieved = thresholdAchieved;
+      return this;
+    }
+
     public LlapInstance removeAndgetLlapInstanceForContainer(String containerIdString) {
       return containerToInstanceMap.remove(containerIdString);
     }
@@ -683,6 +694,10 @@ public class LlapStatusServiceDriver {
       return llapInstances;
     }
 
+    public boolean isRunningThresholdAchieved() {
+      return runningThresholdAchieved;
+    }
+
     @JsonIgnore
     public AmInfo maybeCreateAndGetAmInfo() {
       if (amInfo == null) {
@@ -994,7 +1009,7 @@ public class LlapStatusServiceDriver {
               // we have reached RUNNING state, now check if running nodes threshold is met
               final int liveInstances = statusServiceDriver.appStatusBuilder.getLiveInstances();
               final int desiredInstances = statusServiceDriver.appStatusBuilder.getDesiredInstances();
-              if (liveInstances > 0 && desiredInstances > 0) {
+              if (desiredInstances > 0) {
                 final float ratio = (float) liveInstances / (float) desiredInstances;
                 if (ratio < runningNodesThreshold) {
                   LOG.warn("Waiting until running nodes threshold is reached. Current: {} Desired: {}." +
@@ -1006,9 +1021,29 @@ public class LlapStatusServiceDriver {
                   continue;
                 } else {
                   desiredStateAttained = true;
+                  statusServiceDriver.appStatusBuilder.setRunningThresholdAchieved(true);
                 }
+              } else {
+                numAttempts--;
+                continue;
               }
             }
+          } else if (ret == ExitCode.YARN_ERROR.getInt() && watchMode) {
+            LOG.warn("Watch mode enabled and got YARN error. Retrying..");
+            numAttempts--;
+            continue;
+          } else if (ret == ExitCode.SLIDER_CLIENT_ERROR_CREATE_FAILED.getInt() && watchMode) {
+            LOG.warn("Watch mode enabled and slider client creation failed. Retrying..");
+            numAttempts--;
+            continue;
+          } else if (ret == ExitCode.SLIDER_CLIENT_ERROR_OTHER.getInt() && watchMode) {
+            LOG.warn("Watch mode enabled and got slider client error. Retrying..");
+            numAttempts--;
+            continue;
+          } else if (ret == ExitCode.LLAP_REGISTRY_ERROR.getInt() && watchMode) {
+            LOG.warn("Watch mode enabled and got LLAP registry error. Retrying..");
+            numAttempts--;
+            continue;
           }
           break;
         } finally {