You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2015/10/23 00:44:32 UTC

hive git commit: HIVE-12204: Tez queries stopped running with ApplicationNotRunningException (Vikram Dixit K via Gunther Hagleitner)

Repository: hive
Updated Branches:
  refs/heads/master 54116481e -> d6d10129e


HIVE-12204: Tez queries stopped running with ApplicationNotRunningException (Vikram Dixit K via Gunther Hagleitner)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d6d10129
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d6d10129
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d6d10129

Branch: refs/heads/master
Commit: d6d10129e6f4159c083ed7391cbb5c36482eb537
Parents: 5411648
Author: Gunther Hagleitner <gu...@apache.org>
Authored: Thu Oct 22 15:42:25 2015 -0700
Committer: Gunther Hagleitner <gu...@apache.org>
Committed: Thu Oct 22 15:42:25 2015 -0700

----------------------------------------------------------------------
 .../hive/ql/exec/tez/TezSessionPoolManager.java | 15 ++++++++
 .../apache/hadoop/hive/ql/exec/tez/TezTask.java | 39 ++++++++++++++------
 .../hive/ql/exec/tez/TestTezSessionPool.java    | 14 +++++++
 3 files changed, 57 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/d6d10129/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java
index b1e9235..c5539ff 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java
@@ -213,6 +213,21 @@ public class TezSessionPoolManager {
     }
   }
 
+  /**
+   * This is called only in extreme cases where even our retry of submit fails. This method would
+   * close even default sessions and remove it from the queue.
+   *
+   * @param tezSessionState
+   *          the session to be closed
+   * @throws Exception
+   */
+  public void destroySession(TezSessionState tezSessionState) throws Exception {
+    LOG.warn("We are closing a " + (tezSessionState.isDefault() ? "default" : "non-default")
+        + " session because of retry failure.");
+    tezSessionState.close(false);
+    openSessions.remove(tezSessionState);
+  }
+
   protected TezSessionState createSession(String sessionId) {
     return new TezSessionState(sessionId);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/d6d10129/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
index 2cf990c..032a9e6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hive.ql.exec.tez;
 
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.EnumSet;
@@ -422,17 +423,33 @@ public class TezTask extends Task<TezWork> {
     }
 
     try {
-      // ready to start execution on the cluster
-      sessionState.getSession().addAppMasterLocalFiles(resourceMap);
-      dagClient = sessionState.getSession().submitDAG(dag);
-    } catch (SessionNotRunning nr) {
-      console.printInfo("Tez session was closed. Reopening...");
-
-      // close the old one, but keep the tmp files around
-      TezSessionPoolManager.getInstance().closeAndOpen(sessionState, this.conf, inputOutputJars, true);
-      console.printInfo("Session re-established.");
-
-      dagClient = sessionState.getSession().submitDAG(dag);
+      try {
+        // ready to start execution on the cluster
+        sessionState.getSession().addAppMasterLocalFiles(resourceMap);
+        dagClient = sessionState.getSession().submitDAG(dag);
+      } catch (SessionNotRunning nr) {
+        console.printInfo("Tez session was closed. Reopening...");
+
+        // close the old one, but keep the tmp files around
+        TezSessionPoolManager.getInstance().closeAndOpen(sessionState, this.conf, inputOutputJars,
+            true);
+        console.printInfo("Session re-established.");
+
+        dagClient = sessionState.getSession().submitDAG(dag);
+      }
+    } catch (Exception e) {
+      // In case of any other exception, retry. If this also fails, report original error and exit.
+      try {
+        TezSessionPoolManager.getInstance().closeAndOpen(sessionState, this.conf, inputOutputJars,
+            true);
+        console.printInfo("Dag submit failed due to " + e.getMessage() + " stack trace: "
+            + Arrays.toString(e.getStackTrace()) + " retrying...");
+        dagClient = sessionState.getSession().submitDAG(dag);
+      } catch (Exception retryException) {
+        // we failed to submit after retrying. Destroy session and bail.
+        TezSessionPoolManager.getInstance().destroySession(sessionState);
+        throw retryException;
+      }
     }
 
     perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_SUBMIT_DAG);

http://git-wip-us.apache.org/repos/asf/hive/blob/d6d10129/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezSessionPool.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezSessionPool.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezSessionPool.java
index c148aae..3354219 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezSessionPool.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezSessionPool.java
@@ -20,17 +20,22 @@ package org.apache.hadoop.hive.ql.exec.tez;
 
 import static org.junit.Assert.*;
 
+import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.Random;
 
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.Mockito;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
 
 public class TestTezSessionPool {
 
+  private static final Log LOG = LogFactory.getLog(TestTezSessionPoolManager.class);
   HiveConf conf;
   Random random;
   private TezSessionPoolManager poolManager;
@@ -214,6 +219,15 @@ public class TestTezSessionPool {
   }
 
   @Test
+  public void testSessionDestroy() throws Exception {
+    poolManager = new TestTezSessionPoolManager();
+    TezSessionState session = Mockito.mock(TezSessionState.class);
+    Mockito.when(session.isDefault()).thenReturn(false);
+
+    poolManager.destroySession(session);
+  }
+
+  @Test
   public void testCloseAndOpenWithResources() throws Exception {
     poolManager = new TestTezSessionPoolManager();
     TezSessionState session = Mockito.mock(TezSessionState.class);