You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2015/10/23 00:44:32 UTC
hive git commit: HIVE-12204: Tez queries stopped running with
ApplicationNotRunningException (Vikram Dixit K via Gunther Hagleitner)
Repository: hive
Updated Branches:
refs/heads/master 54116481e -> d6d10129e
HIVE-12204: Tez queries stopped running with ApplicationNotRunningException (Vikram Dixit K via Gunther Hagleitner)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d6d10129
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d6d10129
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d6d10129
Branch: refs/heads/master
Commit: d6d10129e6f4159c083ed7391cbb5c36482eb537
Parents: 5411648
Author: Gunther Hagleitner <gu...@apache.org>
Authored: Thu Oct 22 15:42:25 2015 -0700
Committer: Gunther Hagleitner <gu...@apache.org>
Committed: Thu Oct 22 15:42:25 2015 -0700
----------------------------------------------------------------------
.../hive/ql/exec/tez/TezSessionPoolManager.java | 15 ++++++++
.../apache/hadoop/hive/ql/exec/tez/TezTask.java | 39 ++++++++++++++------
.../hive/ql/exec/tez/TestTezSessionPool.java | 14 +++++++
3 files changed, 57 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/d6d10129/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java
index b1e9235..c5539ff 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java
@@ -213,6 +213,21 @@ public class TezSessionPoolManager {
}
}
+ /**
+ * This is called only in extreme cases where even our retry of submit fails. This method would
+ * close even default sessions and remove it from the queue.
+ *
+ * @param tezSessionState
+ * the session to be closed
+ * @throws Exception
+ */
+ public void destroySession(TezSessionState tezSessionState) throws Exception {
+ LOG.warn("We are closing a " + (tezSessionState.isDefault() ? "default" : "non-default")
+ + " session because of retry failure.");
+ tezSessionState.close(false);
+ openSessions.remove(tezSessionState);
+ }
+
protected TezSessionState createSession(String sessionId) {
return new TezSessionState(sessionId);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/d6d10129/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
index 2cf990c..032a9e6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.ql.exec.tez;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
@@ -422,17 +423,33 @@ public class TezTask extends Task<TezWork> {
}
try {
- // ready to start execution on the cluster
- sessionState.getSession().addAppMasterLocalFiles(resourceMap);
- dagClient = sessionState.getSession().submitDAG(dag);
- } catch (SessionNotRunning nr) {
- console.printInfo("Tez session was closed. Reopening...");
-
- // close the old one, but keep the tmp files around
- TezSessionPoolManager.getInstance().closeAndOpen(sessionState, this.conf, inputOutputJars, true);
- console.printInfo("Session re-established.");
-
- dagClient = sessionState.getSession().submitDAG(dag);
+ try {
+ // ready to start execution on the cluster
+ sessionState.getSession().addAppMasterLocalFiles(resourceMap);
+ dagClient = sessionState.getSession().submitDAG(dag);
+ } catch (SessionNotRunning nr) {
+ console.printInfo("Tez session was closed. Reopening...");
+
+ // close the old one, but keep the tmp files around
+ TezSessionPoolManager.getInstance().closeAndOpen(sessionState, this.conf, inputOutputJars,
+ true);
+ console.printInfo("Session re-established.");
+
+ dagClient = sessionState.getSession().submitDAG(dag);
+ }
+ } catch (Exception e) {
+ // In case of any other exception, retry. If this also fails, report original error and exit.
+ try {
+ TezSessionPoolManager.getInstance().closeAndOpen(sessionState, this.conf, inputOutputJars,
+ true);
+ console.printInfo("Dag submit failed due to " + e.getMessage() + " stack trace: "
+ + Arrays.toString(e.getStackTrace()) + " retrying...");
+ dagClient = sessionState.getSession().submitDAG(dag);
+ } catch (Exception retryException) {
+ // we failed to submit after retrying. Destroy session and bail.
+ TezSessionPoolManager.getInstance().destroySession(sessionState);
+ throw retryException;
+ }
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_SUBMIT_DAG);
http://git-wip-us.apache.org/repos/asf/hive/blob/d6d10129/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezSessionPool.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezSessionPool.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezSessionPool.java
index c148aae..3354219 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezSessionPool.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezSessionPool.java
@@ -20,17 +20,22 @@ package org.apache.hadoop.hive.ql.exec.tez;
import static org.junit.Assert.*;
+import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
import java.util.Random;
import org.junit.Before;
import org.junit.Test;
import org.mockito.Mockito;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
public class TestTezSessionPool {
+ private static final Log LOG = LogFactory.getLog(TestTezSessionPoolManager.class);
HiveConf conf;
Random random;
private TezSessionPoolManager poolManager;
@@ -214,6 +219,15 @@ public class TestTezSessionPool {
}
@Test
+ public void testSessionDestroy() throws Exception {
+ poolManager = new TestTezSessionPoolManager();
+ TezSessionState session = Mockito.mock(TezSessionState.class);
+ Mockito.when(session.isDefault()).thenReturn(false);
+
+ poolManager.destroySession(session);
+ }
+
+ @Test
public void testCloseAndOpenWithResources() throws Exception {
poolManager = new TestTezSessionPoolManager();
TezSessionState session = Mockito.mock(TezSessionState.class);