You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ek...@apache.org on 2015/12/04 04:29:31 UTC
[1/3] hive git commit: HIVE-12567 Enhance TxnHandler retry logic to
handle ORA-08176 (Eugene Koifman, reviewed by Jason Dere)
Repository: hive
Updated Branches:
refs/heads/master 75b851bda -> 22fc39796
HIVE-12567 Enhance TxnHandler retry logic to handle ORA-08176 (Eugene Koifman, reviewed by Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5c4efb76
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5c4efb76
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5c4efb76
Branch: refs/heads/master
Commit: 5c4efb7603fa909dcf9299c0514792117d7bd8c6
Parents: 75b851b
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Thu Dec 3 18:50:15 2015 -0800
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Thu Dec 3 18:50:15 2015 -0800
----------------------------------------------------------------------
.../java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/5c4efb76/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index ca37bf0..4c395c3 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -2255,7 +2255,10 @@ public class TxnHandler {
//in MSSQL this means Communication Link Failure
return true;
}
- //see https://issues.apache.org/jira/browse/HIVE-9938
+ if("ORA-08176".equalsIgnoreCase(sqlException.getSQLState())) {
+ return true;
+ }
+ //see also https://issues.apache.org/jira/browse/HIVE-9938
}
return false;
}
[3/3] hive git commit: HIVE-12444 Global Limit optimization on ACID
table without base directory may throw exception ADDENDUM (Wei Zheng via
Eugene Koifman)
Posted by ek...@apache.org.
HIVE-12444 Global Limit optimization on ACID table without base directory may throw exception ADDENDUM (Wei Zheng via Eugene Koifman)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/22fc3979
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/22fc3979
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/22fc3979
Branch: refs/heads/master
Commit: 22fc397968fb494920598692a11e435860237143
Parents: fbb5667
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Thu Dec 3 18:53:19 2015 -0800
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Thu Dec 3 18:53:19 2015 -0800
----------------------------------------------------------------------
ql/src/test/results/clientpositive/acid_globallimit.q.out | 4 ++--
ql/src/test/results/clientpositive/tez/acid_globallimit.q.out | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/22fc3979/ql/src/test/results/clientpositive/acid_globallimit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/acid_globallimit.q.out b/ql/src/test/results/clientpositive/acid_globallimit.q.out
index 6a2a792..783e41b 100644
--- a/ql/src/test/results/clientpositive/acid_globallimit.q.out
+++ b/ql/src/test/results/clientpositive/acid_globallimit.q.out
@@ -40,11 +40,11 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: acidtest1
- Statistics: Num rows: 9173 Data size: 101822 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9173 Data size: 101613 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: UDFToString(c1) (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 9173 Data size: 101822 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9173 Data size: 101613 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
Statistics: Num rows: 10 Data size: 110 Basic stats: COMPLETE Column stats: NONE
http://git-wip-us.apache.org/repos/asf/hive/blob/22fc3979/ql/src/test/results/clientpositive/tez/acid_globallimit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/acid_globallimit.q.out b/ql/src/test/results/clientpositive/tez/acid_globallimit.q.out
index fdbddb0..d4f8e34 100644
--- a/ql/src/test/results/clientpositive/tez/acid_globallimit.q.out
+++ b/ql/src/test/results/clientpositive/tez/acid_globallimit.q.out
@@ -46,11 +46,11 @@ Stage-0
Statistics:Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
Select Operator [SEL_1]
outputColumnNames:["_col0"]
- Statistics:Num rows: 9173 Data size: 100794 Basic stats: COMPLETE Column stats: NONE
+ Statistics:Num rows: 9173 Data size: 100540 Basic stats: COMPLETE Column stats: NONE
TableScan [TS_0]
ACID table:true
alias:acidtest1
- Statistics:Num rows: 9173 Data size: 100794 Basic stats: COMPLETE Column stats: NONE
+ Statistics:Num rows: 9173 Data size: 100540 Basic stats: COMPLETE Column stats: NONE
PREHOOK: query: select cast (c1 as string) from acidtest1 limit 10
PREHOOK: type: QUERY
[2/3] hive git commit: HIVE-12529 HiveTxnManager.acquireLocks()
should not block forever (Eugene Koifman, reviewed by Alan Gates)
Posted by ek...@apache.org.
HIVE-12529 HiveTxnManager.acquireLocks() should not block forever (Eugene Koifman, reviewed by Alan Gates)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fbb5667b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fbb5667b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fbb5667b
Branch: refs/heads/master
Commit: fbb5667b525ecc6b0c013a48b86610b6ab1a1ce3
Parents: 5c4efb7
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Thu Dec 3 18:51:32 2015 -0800
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Thu Dec 3 18:51:32 2015 -0800
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 8 +-
.../hadoop/hive/metastore/txn/TxnHandler.java | 42 +++----
.../org/apache/hadoop/hive/ql/ErrorMsg.java | 2 +
.../org/apache/hadoop/hive/ql/exec/DDLTask.java | 109 ++++++++++---------
.../hadoop/hive/ql/lockmgr/DbLockManager.java | 69 ++++++++++--
.../hadoop/hive/ql/lockmgr/DbTxnManager.java | 2 +-
.../hadoop/hive/ql/lockmgr/HiveTxnManager.java | 4 +-
.../hive/ql/lockmgr/TestDbTxnManager.java | 8 +-
.../hive/ql/lockmgr/TestDbTxnManager2.java | 32 ++++++
9 files changed, 187 insertions(+), 89 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/fbb5667b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 4d881ba..803d52b 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1458,8 +1458,8 @@ public class HiveConf extends Configuration {
HIVE_UNLOCK_NUMRETRIES("hive.unlock.numretries", 10,
"The number of times you want to retry to do one unlock"),
HIVE_LOCK_SLEEP_BETWEEN_RETRIES("hive.lock.sleep.between.retries", "60s",
- new TimeValidator(TimeUnit.SECONDS),
- "The sleep time between various retries"),
+ new TimeValidator(TimeUnit.SECONDS, 0L, false, Long.MAX_VALUE, false),
+ "The maximum sleep time between various retries"),
HIVE_LOCK_MAPRED_ONLY("hive.lock.mapred.only.operation", false,
"This param is to control whether or not only do lock on queries\n" +
"that need to execute at least one mapred job."),
@@ -1503,6 +1503,10 @@ public class HiveConf extends Configuration {
"no transactions."),
HIVE_TXN_TIMEOUT("hive.txn.timeout", "300s", new TimeValidator(TimeUnit.SECONDS),
"time after which transactions are declared aborted if the client has not sent a heartbeat."),
+ TXN_MGR_DUMP_LOCK_STATE_ON_ACQUIRE_TIMEOUT("hive.txn.manager.dump.lock.state.on.acquire.timeout", false,
+ "Set this to true so that when attempt to acquire a lock on resource times out, the current state" +
+ " of the lock manager is dumped to log file. This is for debugging. See also " +
+ "hive.lock.numretries and hive.lock.sleep.between.retries."),
HIVE_TXN_MAX_OPEN_BATCH("hive.txn.max.open.batch", 1000,
"Maximum number of transactions that can be fetched in one call to open_txns().\n" +
http://git-wip-us.apache.org/repos/asf/hive/blob/fbb5667b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index 4c395c3..4c5043b 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -518,12 +518,12 @@ public class TxnHandler {
// Heartbeat on the lockid first, to assure that our lock is still valid.
// Then look up the lock info (hopefully in the cache). If these locks
// are associated with a transaction then heartbeat on that as well.
- Long txnid = getTxnIdFromLockId(dbConn, extLockId);
- if(txnid == null) {
+ LockInfo info = getTxnIdFromLockId(dbConn, extLockId);
+ if(info == null) {
throw new NoSuchLockException("No such lock " + JavaUtils.lockIdToString(extLockId));
}
- if (txnid > 0) {
- heartbeatTxn(dbConn, txnid);
+ if (info.txnId > 0) {
+ heartbeatTxn(dbConn, info.txnId);
}
else {
heartbeatLock(dbConn, extLockId);
@@ -570,28 +570,29 @@ public class TxnHandler {
dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED);
stmt = dbConn.createStatement();
//hl_txnid <> 0 means it's associated with a transaction
- String s = "delete from HIVE_LOCKS where hl_lock_ext_id = " + extLockId + " AND hl_txnid = 0";
+ String s = "delete from HIVE_LOCKS where hl_lock_ext_id = " + extLockId + " AND (hl_txnid = 0 OR" +
+ " (hl_txnid <> 0 AND hl_lock_state = '" + LOCK_WAITING + "'))";
LOG.debug("Going to execute update <" + s + ">");
int rc = stmt.executeUpdate(s);
if (rc < 1) {
LOG.debug("Going to rollback");
dbConn.rollback();
- Long txnid = getTxnIdFromLockId(dbConn, extLockId);
- if(txnid == null) {
- LOG.error("No lock found for unlock(" + rqst + ")");
+ LockInfo info = getTxnIdFromLockId(dbConn, extLockId);
+ if(info == null) {
+ //didn't find any lock with extLockId but at ReadCommitted there is a possibility that
+ //it existed when above delete ran but it didn't have the expected state.
+ LOG.error("No lock in " + LOCK_WAITING + " mode found for unlock(" + rqst + ")");
throw new NoSuchLockException("No such lock " + JavaUtils.lockIdToString(extLockId));
}
- if(txnid != 0) {
- String msg = "Unlocking locks associated with transaction" +
- " not permitted. Lockid " + JavaUtils.lockIdToString(extLockId) + " is associated with " +
- "transaction " + JavaUtils.txnIdToString(txnid);
+ if(info.txnId != 0) {
+ String msg = "Unlocking locks associated with transaction not permitted. " + info;
LOG.error(msg);
throw new TxnOpenException(msg);
}
- if(txnid == 0) {
+ if(info.txnId == 0) {
//we didn't see this lock when running DELETE stmt above but now it showed up
//so should "should never happen" happened...
- String msg = "Found lock " + JavaUtils.lockIdToString(extLockId) + " with " + JavaUtils.txnIdToString(txnid);
+ String msg = "Found lock in unexpected state " + info;
LOG.error(msg);
throw new MetaException(msg);
}
@@ -1910,22 +1911,23 @@ public class TxnHandler {
}
}
- private Long getTxnIdFromLockId(Connection dbConn, long extLockId)
+ private LockInfo getTxnIdFromLockId(Connection dbConn, long extLockId)
throws NoSuchLockException, MetaException, SQLException {
Statement stmt = null;
ResultSet rs = null;
try {
stmt = dbConn.createStatement();
- String s = "select hl_txnid from HIVE_LOCKS where hl_lock_ext_id = " +
- extLockId;
+ String s = "select hl_lock_ext_id, hl_lock_int_id, hl_db, hl_table, " +
+ "hl_partition, hl_lock_state, hl_lock_type, hl_txnid from HIVE_LOCKS where " +
+ "hl_lock_ext_id = " + extLockId;
LOG.debug("Going to execute query <" + s + ">");
rs = stmt.executeQuery(s);
if (!rs.next()) {
return null;
}
- long txnid = rs.getLong(1);
- LOG.debug("getTxnIdFromLockId(" + extLockId + ") Return " + JavaUtils.txnIdToString(txnid));
- return txnid;
+ LockInfo info = new LockInfo(rs);
+ LOG.debug("getTxnIdFromLockId(" + extLockId + ") Return " + JavaUtils.txnIdToString(info.txnId));
+ return info;
} finally {
close(rs);
closeStmt(stmt);
http://git-wip-us.apache.org/repos/asf/hive/blob/fbb5667b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 8a47605..9d9dd53 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -424,6 +424,8 @@ public enum ErrorMsg {
CTAS_LOCATION_NONEMPTY(10304, "CREATE-TABLE-AS-SELECT cannot create table with location to a non-empty directory."),
CTAS_CREATES_VOID_TYPE(10305, "CREATE-TABLE-AS-SELECT creates a VOID type, please use CAST to specify the type, near field: "),
TBL_SORTED_NOT_BUCKETED(10306, "Destination table {0} found to be sorted but not bucketed.", true),
+ //{2} should be lockid
+ LOCK_ACQUIRE_TIMEDOUT(10307, "Lock acquisition for {0} timed out after {1}ms. {2}", true),
//========================== 20000 range starts here ========================//
SCRIPT_INIT_ERROR(20000, "Unable to initialize custom script."),
SCRIPT_IO_ERROR(20001, "An error occurred while reading or writing to your custom script. "
http://git-wip-us.apache.org/repos/asf/hive/blob/fbb5667b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
index a210b95..8762e12 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
@@ -2520,7 +2520,60 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
}
return 0;
}
-
+ public static void dumpLockInfo(DataOutputStream os, ShowLocksResponse rsp) throws IOException {
+ // Write a header
+ os.writeBytes("Lock ID");
+ os.write(separator);
+ os.writeBytes("Database");
+ os.write(separator);
+ os.writeBytes("Table");
+ os.write(separator);
+ os.writeBytes("Partition");
+ os.write(separator);
+ os.writeBytes("State");
+ os.write(separator);
+ os.writeBytes("Type");
+ os.write(separator);
+ os.writeBytes("Transaction ID");
+ os.write(separator);
+ os.writeBytes("Last Hearbeat");
+ os.write(separator);
+ os.writeBytes("Acquired At");
+ os.write(separator);
+ os.writeBytes("User");
+ os.write(separator);
+ os.writeBytes("Hostname");
+ os.write(terminator);
+
+ List<ShowLocksResponseElement> locks = rsp.getLocks();
+ if (locks != null) {
+ for (ShowLocksResponseElement lock : locks) {
+ os.writeBytes(Long.toString(lock.getLockid()));
+ os.write(separator);
+ os.writeBytes(lock.getDbname());
+ os.write(separator);
+ os.writeBytes((lock.getTablename() == null) ? "NULL" : lock.getTablename());
+ os.write(separator);
+ os.writeBytes((lock.getPartname() == null) ? "NULL" : lock.getPartname());
+ os.write(separator);
+ os.writeBytes(lock.getState().toString());
+ os.write(separator);
+ os.writeBytes(lock.getType().toString());
+ os.write(separator);
+ os.writeBytes((lock.getTxnid() == 0) ? "NULL" : Long.toString(lock.getTxnid()));
+ os.write(separator);
+ os.writeBytes(Long.toString(lock.getLastheartbeat()));
+ os.write(separator);
+ os.writeBytes((lock.getAcquiredat() == 0) ? "NULL" : Long.toString(lock.getAcquiredat()));
+ os.write(separator);
+ os.writeBytes(lock.getUser());
+ os.write(separator);
+ os.writeBytes(lock.getHostname());
+ os.write(separator);
+ os.write(terminator);
+ }
+ }
+ }
private int showLocksNewFormat(ShowLocksDesc showLocks, HiveLockManager lm)
throws HiveException {
@@ -2535,59 +2588,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
// write the results in the file
DataOutputStream os = getOutputStream(showLocks.getResFile());
try {
- // Write a header
- os.writeBytes("Lock ID");
- os.write(separator);
- os.writeBytes("Database");
- os.write(separator);
- os.writeBytes("Table");
- os.write(separator);
- os.writeBytes("Partition");
- os.write(separator);
- os.writeBytes("State");
- os.write(separator);
- os.writeBytes("Type");
- os.write(separator);
- os.writeBytes("Transaction ID");
- os.write(separator);
- os.writeBytes("Last Hearbeat");
- os.write(separator);
- os.writeBytes("Acquired At");
- os.write(separator);
- os.writeBytes("User");
- os.write(separator);
- os.writeBytes("Hostname");
- os.write(terminator);
-
- List<ShowLocksResponseElement> locks = rsp.getLocks();
- if (locks != null) {
- for (ShowLocksResponseElement lock : locks) {
- os.writeBytes(Long.toString(lock.getLockid()));
- os.write(separator);
- os.writeBytes(lock.getDbname());
- os.write(separator);
- os.writeBytes((lock.getTablename() == null) ? "NULL" : lock.getTablename());
- os.write(separator);
- os.writeBytes((lock.getPartname() == null) ? "NULL" : lock.getPartname());
- os.write(separator);
- os.writeBytes(lock.getState().toString());
- os.write(separator);
- os.writeBytes(lock.getType().toString());
- os.write(separator);
- os.writeBytes((lock.getTxnid() == 0) ? "NULL" : Long.toString(lock.getTxnid()));
- os.write(separator);
- os.writeBytes(Long.toString(lock.getLastheartbeat()));
- os.write(separator);
- os.writeBytes((lock.getAcquiredat() == 0) ? "NULL" : Long.toString(lock.getAcquiredat()));
- os.write(separator);
- os.writeBytes(lock.getUser());
- os.write(separator);
- os.writeBytes(lock.getHostname());
- os.write(separator);
- os.write(terminator);
- }
-
- }
+ dumpLockInfo(os, rsp);
} catch (FileNotFoundException e) {
LOG.warn("show function: " + stringifyException(e));
return 1;
http://git-wip-us.apache.org/repos/asf/hive/blob/fbb5667b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java
index 42616ac..7d58622 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hive.ql.lockmgr;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.DDLTask;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.common.JavaUtils;
@@ -29,10 +31,15 @@ import org.apache.hadoop.hive.metastore.api.*;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.thrift.TException;
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
+import java.util.Objects;
import java.util.Set;
+import java.util.concurrent.TimeUnit;
/**
* An implementation of HiveLockManager for use with {@link org.apache.hadoop.hive.ql.lockmgr.DbTxnManager}.
@@ -44,20 +51,20 @@ public class DbLockManager implements HiveLockManager{
static final private String CLASS_NAME = DbLockManager.class.getName();
static final private Logger LOG = LoggerFactory.getLogger(CLASS_NAME);
- private static final long MAX_SLEEP = 15000;
- private HiveLockManagerCtx context;
+ private long MAX_SLEEP;
private Set<DbHiveLock> locks;
private IMetaStoreClient client;
private long nextSleep = 50;
+ private final HiveConf conf;
- DbLockManager(IMetaStoreClient client) {
+ DbLockManager(IMetaStoreClient client, HiveConf conf) {
locks = new HashSet<>();
this.client = client;
+ this.conf = conf;
}
@Override
public void setContext(HiveLockManagerCtx ctx) throws LockException {
- context = ctx;
}
@Override
@@ -81,6 +88,15 @@ public class DbLockManager implements HiveLockManager{
* @return the result of the lock attempt
*/
LockState lock(LockRequest lock, String queryId, boolean isBlocking, List<HiveLock> acquiredLocks) throws LockException {
+ Objects.requireNonNull(queryId, "queryId cannot be null");
+ nextSleep = 50;
+ /*
+ * get from conf to pick up changes; make sure not to set too low and kill the metastore
+ * MAX_SLEEP is the max time each backoff() will wait for, thus the total time to wait for
+ * successful lock acquisition is approximately (see backoff()) maxNumWaits * MAX_SLEEP.
+ */
+ MAX_SLEEP = Math.max(15000, conf.getTimeVar(HiveConf.ConfVars.HIVE_LOCK_SLEEP_BETWEEN_RETRIES, TimeUnit.MILLISECONDS));
+ int maxNumWaits = Math.max(0, conf.getIntVar(HiveConf.ConfVars.HIVE_LOCK_NUMRETRIES));
try {
LOG.info("Requesting: queryId=" + queryId + " " + lock);
LockResponse res = client.lock(lock);
@@ -91,15 +107,33 @@ public class DbLockManager implements HiveLockManager{
return LockState.WAITING;
}
}
- while (res.getState() == LockState.WAITING) {
+ int numRetries = 0;
+ long startRetry = System.currentTimeMillis();
+ while (res.getState() == LockState.WAITING && numRetries++ < maxNumWaits) {
backoff();
res = client.checkLock(res.getLockid());
}
+ long retryDuration = System.currentTimeMillis() - startRetry;
DbHiveLock hl = new DbHiveLock(res.getLockid());
locks.add(hl);
if (res.getState() != LockState.ACQUIRED) {
- throw new LockException(ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg());
+ if(res.getState() == LockState.WAITING) {
+ /**
+ * the {@link #unlock(HiveLock)} here is more about future proofing when support for
+ * multi-statement txns is added. In that case it's reasonable for the client
+ * to retry this part of txn or try something else w/o aborting the whole txn.
+ * Also for READ_COMMITTED (when and if that is supported).
+ */
+ unlock(hl);//remove the locks in Waiting state
+ LockException le = new LockException(null, ErrorMsg.LOCK_ACQUIRE_TIMEDOUT,
+ lock.toString(), Long.toString(retryDuration), res.toString());
+ if(conf.getBoolVar(HiveConf.ConfVars.TXN_MGR_DUMP_LOCK_STATE_ON_ACQUIRE_TIMEOUT)) {
+ showLocksNewFormat(le.getMessage());
+ }
+ throw le;
+ }
+ throw new LockException(ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg() + " " + res);
}
acquiredLocks.add(hl);
@@ -114,8 +148,8 @@ public class DbLockManager implements HiveLockManager{
return res.getState();
} catch (NoSuchTxnException e) {
- LOG.error("Metastore could not find txnid " + lock.getTxnid());
- throw new LockException(ErrorMsg.TXNMGR_NOT_INSTANTIATED.getMsg(), e);
+ LOG.error("Metastore could not find " + JavaUtils.txnIdToString(lock.getTxnid()));
+ throw new LockException(e, ErrorMsg.TXN_NO_SUCH_TRANSACTION, JavaUtils.txnIdToString(lock.getTxnid()));
} catch (TxnAbortedException e) {
LOG.error("Transaction " + JavaUtils.txnIdToString(lock.getTxnid()) + " already aborted.");
throw new LockException(e, ErrorMsg.TXN_ABORTED, JavaUtils.txnIdToString(lock.getTxnid()));
@@ -124,6 +158,21 @@ public class DbLockManager implements HiveLockManager{
e);
}
}
+ private void showLocksNewFormat(String preamble) throws LockException {
+ ShowLocksResponse rsp = getLocks();
+
+ // write the results in the file
+ ByteArrayOutputStream baos = new ByteArrayOutputStream(1024*2);
+ DataOutputStream os = new DataOutputStream(baos);
+ try {
+ DDLTask.dumpLockInfo(os, rsp);
+ os.flush();
+ LOG.info(baos.toString());
+ }
+ catch(IOException ex) {
+ LOG.error("Dumping lock info for " + preamble + " failed: " + ex.getMessage(), ex);
+ }
+ }
/**
* Used to make another attempt to acquire a lock (in Waiting state)
* @param extLockId
@@ -259,8 +308,8 @@ public class DbLockManager implements HiveLockManager{
/**
* Clear the memory of the locks in this object. This won't clear the locks from the database.
* It is for use with
- * {@link #DbLockManager(org.apache.hadoop.hive.metastore.IMetaStoreClient).commitTxn} and
- * {@link #DbLockManager(org.apache.hadoop.hive.metastore.IMetaStoreClient).rollbackTxn}.
+ * {@link #DbLockManager(org.apache.hadoop.hive.metastore.IMetaStoreClient, org.apache.hadoop.hive.conf.HiveConf)} .commitTxn} and
+ * {@link #DbLockManager(org.apache.hadoop.hive.metastore.IMetaStoreClient, org.apache.hadoop.hive.conf.HiveConf)} .rollbackTxn}.
*/
void clearLocalLockRecords() {
locks.clear();
http://git-wip-us.apache.org/repos/asf/hive/blob/fbb5667b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java
index 97d2282..552367c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java
@@ -96,7 +96,7 @@ public class DbTxnManager extends HiveTxnManagerImpl {
public HiveLockManager getLockManager() throws LockException {
init();
if (lockMgr == null) {
- lockMgr = new DbLockManager(client);
+ lockMgr = new DbLockManager(client, conf);
}
return lockMgr;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/fbb5667b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/HiveTxnManager.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/HiveTxnManager.java b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/HiveTxnManager.java
index 74512d7..2bfc732 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/HiveTxnManager.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/HiveTxnManager.java
@@ -57,10 +57,12 @@ public interface HiveTxnManager {
* A list of acquired locks will be stored in the
* {@link org.apache.hadoop.hive.ql.Context} object and can be retrieved
* via {@link org.apache.hadoop.hive.ql.Context#getHiveLocks}.
+ *
* @param plan query plan
* @param ctx Context for this query
* @param username name of the user for this query
- * @throws LockException if there is an error getting the locks
+ * @throws LockException if there is an error getting the locks. Use {@link LockException#getCanonicalErrorMsg()}
+ * to get more info on how to handle the exception.
*/
void acquireLocks(QueryPlan plan, Context ctx, String username) throws LockException;
http://git-wip-us.apache.org/repos/asf/hive/blob/fbb5667b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java
index c1e5c81..b7d1d18 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java
@@ -378,12 +378,14 @@ public class TestDbTxnManager {
private static class MockQueryPlan extends QueryPlan {
private final HashSet<ReadEntity> inputs;
private final HashSet<WriteEntity> outputs;
-
+ private final String queryId;
+
MockQueryPlan(TestDbTxnManager test) {
HashSet<ReadEntity> r = test.readEntities;
HashSet<WriteEntity> w = test.writeEntities;
inputs = (r == null) ? new HashSet<ReadEntity>() : r;
outputs = (w == null) ? new HashSet<WriteEntity>() : w;
+ queryId = makeQueryId();
}
@Override
@@ -395,6 +397,10 @@ public class TestDbTxnManager {
public HashSet<WriteEntity> getOutputs() {
return outputs;
}
+ @Override
+ public String getQueryId() {
+ return queryId;
+ }
}
private Table newTable(boolean isPartitioned) {
http://git-wip-us.apache.org/repos/asf/hive/blob/fbb5667b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java
index c6a7fcb..3bdcc21 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java
@@ -26,6 +26,7 @@ import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement;
import org.apache.hadoop.hive.metastore.txn.TxnDbUtil;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.Driver;
+import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
import org.apache.hadoop.hive.ql.session.SessionState;
@@ -211,6 +212,34 @@ public class TestDbTxnManager2 {
checkCmdOnDriver(cpr);
}
+ @Test
+ public void testLockRetryLimit() throws Exception {
+ conf.setIntVar(HiveConf.ConfVars.HIVE_LOCK_NUMRETRIES, 2);
+ conf.setBoolVar(HiveConf.ConfVars.TXN_MGR_DUMP_LOCK_STATE_ON_ACQUIRE_TIMEOUT, true);
+ HiveTxnManager otherTxnMgr = new DbTxnManager();
+ ((DbTxnManager)otherTxnMgr).setHiveConf(conf);
+ CommandProcessorResponse cpr = driver.run("create table T9(a int)");
+ checkCmdOnDriver(cpr);
+ cpr = driver.compileAndRespond("select * from T9");
+ checkCmdOnDriver(cpr);
+ txnMgr.acquireLocks(driver.getPlan(), ctx, "Vincent Vega");
+ List<ShowLocksResponseElement> locks = getLocks(txnMgr);
+ Assert.assertEquals("Unexpected lock count", 1, locks.size());
+ checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "default", "T9", null, locks.get(0));
+
+ cpr = driver.compileAndRespond("drop table T9");
+ checkCmdOnDriver(cpr);
+ try {
+ otherTxnMgr.acquireLocks(driver.getPlan(), ctx, "Winston Winnfield");
+ }
+ catch(LockException ex) {
+ Assert.assertEquals("Got wrong lock exception", ErrorMsg.LOCK_ACQUIRE_TIMEDOUT, ex.getCanonicalErrorMsg());
+ }
+ locks = getLocks(txnMgr);
+ Assert.assertEquals("Unexpected lock count", 1, locks.size());
+ checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "default", "T9", null, locks.get(0));
+ otherTxnMgr.closeTxnManager();
+ }
private void checkLock(LockType type, LockState state, String db, String table, String partition, ShowLocksResponseElement l) {
Assert.assertEquals(l.toString(),l.getType(), type);
@@ -226,6 +255,9 @@ public class TestDbTxnManager2 {
return s == null ? null : s.toLowerCase();
}
private List<ShowLocksResponseElement> getLocks() throws Exception {
+ return getLocks(this.txnMgr);
+ }
+ private List<ShowLocksResponseElement> getLocks(HiveTxnManager txnMgr) throws Exception {
ShowLocksResponse rsp = ((DbLockManager)txnMgr.getLockManager()).getLocks();
return rsp.getLocks();
}