You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ek...@apache.org on 2015/12/04 04:29:32 UTC

[2/3] hive git commit: HIVE-12529 HiveTxnManager.acquireLocks() should not block forever (Eugene Koifman, reviewed by Alan Gates)

HIVE-12529 HiveTxnManager.acquireLocks() should not block forever (Eugene Koifman, reviewed by Alan Gates)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fbb5667b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fbb5667b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fbb5667b

Branch: refs/heads/master
Commit: fbb5667b525ecc6b0c013a48b86610b6ab1a1ce3
Parents: 5c4efb7
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Thu Dec 3 18:51:32 2015 -0800
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Thu Dec 3 18:51:32 2015 -0800

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   8 +-
 .../hadoop/hive/metastore/txn/TxnHandler.java   |  42 +++----
 .../org/apache/hadoop/hive/ql/ErrorMsg.java     |   2 +
 .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 109 ++++++++++---------
 .../hadoop/hive/ql/lockmgr/DbLockManager.java   |  69 ++++++++++--
 .../hadoop/hive/ql/lockmgr/DbTxnManager.java    |   2 +-
 .../hadoop/hive/ql/lockmgr/HiveTxnManager.java  |   4 +-
 .../hive/ql/lockmgr/TestDbTxnManager.java       |   8 +-
 .../hive/ql/lockmgr/TestDbTxnManager2.java      |  32 ++++++
 9 files changed, 187 insertions(+), 89 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/fbb5667b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 4d881ba..803d52b 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1458,8 +1458,8 @@ public class HiveConf extends Configuration {
     HIVE_UNLOCK_NUMRETRIES("hive.unlock.numretries", 10,
         "The number of times you want to retry to do one unlock"),
     HIVE_LOCK_SLEEP_BETWEEN_RETRIES("hive.lock.sleep.between.retries", "60s",
-        new TimeValidator(TimeUnit.SECONDS),
-        "The sleep time between various retries"),
+        new TimeValidator(TimeUnit.SECONDS, 0L, false, Long.MAX_VALUE, false),
+        "The maximum sleep time between various retries"),
     HIVE_LOCK_MAPRED_ONLY("hive.lock.mapred.only.operation", false,
         "This param is to control whether or not only do lock on queries\n" +
         "that need to execute at least one mapred job."),
@@ -1503,6 +1503,10 @@ public class HiveConf extends Configuration {
         "no transactions."),
     HIVE_TXN_TIMEOUT("hive.txn.timeout", "300s", new TimeValidator(TimeUnit.SECONDS),
         "time after which transactions are declared aborted if the client has not sent a heartbeat."),
+    TXN_MGR_DUMP_LOCK_STATE_ON_ACQUIRE_TIMEOUT("hive.txn.manager.dump.lock.state.on.acquire.timeout", false,
+      "Set this to true so that when attempt to acquire a lock on resource times out, the current state" +
+        " of the lock manager is dumped to log file.  This is for debugging.  See also " +
+        "hive.lock.numretries and hive.lock.sleep.between.retries."),
 
     HIVE_TXN_MAX_OPEN_BATCH("hive.txn.max.open.batch", 1000,
         "Maximum number of transactions that can be fetched in one call to open_txns().\n" +

http://git-wip-us.apache.org/repos/asf/hive/blob/fbb5667b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index 4c395c3..4c5043b 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -518,12 +518,12 @@ public class TxnHandler {
         // Heartbeat on the lockid first, to assure that our lock is still valid.
         // Then look up the lock info (hopefully in the cache).  If these locks
         // are associated with a transaction then heartbeat on that as well.
-        Long txnid = getTxnIdFromLockId(dbConn, extLockId);
-        if(txnid == null) {
+        LockInfo info = getTxnIdFromLockId(dbConn, extLockId);
+        if(info == null) {
           throw new NoSuchLockException("No such lock " + JavaUtils.lockIdToString(extLockId));
         }
-        if (txnid > 0) {
-          heartbeatTxn(dbConn, txnid);
+        if (info.txnId > 0) {
+          heartbeatTxn(dbConn, info.txnId);
         }
         else {
           heartbeatLock(dbConn, extLockId);
@@ -570,28 +570,29 @@ public class TxnHandler {
         dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED);
         stmt = dbConn.createStatement();
         //hl_txnid <> 0 means it's associated with a transaction
-        String s = "delete from HIVE_LOCKS where hl_lock_ext_id = " + extLockId + " AND hl_txnid = 0";
+        String s = "delete from HIVE_LOCKS where hl_lock_ext_id = " + extLockId + " AND (hl_txnid = 0 OR" +
+          " (hl_txnid <> 0 AND hl_lock_state = '" + LOCK_WAITING + "'))";
         LOG.debug("Going to execute update <" + s + ">");
         int rc = stmt.executeUpdate(s);
         if (rc < 1) {
           LOG.debug("Going to rollback");
           dbConn.rollback();
-          Long txnid = getTxnIdFromLockId(dbConn, extLockId);
-          if(txnid == null) {
-            LOG.error("No lock found for unlock(" + rqst + ")");
+          LockInfo info = getTxnIdFromLockId(dbConn, extLockId);
+          if(info == null) {
+            //didn't find any lock with extLockId but at ReadCommitted there is a possibility that
+            //it existed when above delete ran but it didn't have the expected state.
+            LOG.error("No lock in " + LOCK_WAITING + " mode found for unlock(" + rqst + ")");
             throw new NoSuchLockException("No such lock " + JavaUtils.lockIdToString(extLockId));
           }
-          if(txnid != 0) {
-            String msg = "Unlocking locks associated with transaction" +
-              " not permitted.  Lockid " + JavaUtils.lockIdToString(extLockId) + " is associated with " +
-              "transaction " + JavaUtils.txnIdToString(txnid);
+          if(info.txnId != 0) {
+            String msg = "Unlocking locks associated with transaction not permitted.  " + info;
             LOG.error(msg);
             throw new TxnOpenException(msg);
           }
-          if(txnid == 0) {
+          if(info.txnId == 0) {
             //we didn't see this lock when running DELETE stmt above but now it showed up
             //so should "should never happen" happened...
-            String msg = "Found lock " + JavaUtils.lockIdToString(extLockId) + " with " + JavaUtils.txnIdToString(txnid);
+            String msg = "Found lock in unexpected state " + info;
             LOG.error(msg);
             throw new MetaException(msg);
           }
@@ -1910,22 +1911,23 @@ public class TxnHandler {
     }
   }
 
-  private Long getTxnIdFromLockId(Connection dbConn, long extLockId)
+  private LockInfo getTxnIdFromLockId(Connection dbConn, long extLockId)
     throws NoSuchLockException, MetaException, SQLException {
     Statement stmt = null;
     ResultSet rs = null;
     try {
       stmt = dbConn.createStatement();
-      String s = "select hl_txnid from HIVE_LOCKS where hl_lock_ext_id = " +
-        extLockId;
+      String s = "select hl_lock_ext_id, hl_lock_int_id, hl_db, hl_table, " +
+        "hl_partition, hl_lock_state, hl_lock_type, hl_txnid from HIVE_LOCKS where " +
+        "hl_lock_ext_id = " + extLockId;
       LOG.debug("Going to execute query <" + s + ">");
       rs = stmt.executeQuery(s);
       if (!rs.next()) {
         return null;
       }
-      long txnid = rs.getLong(1);
-      LOG.debug("getTxnIdFromLockId(" + extLockId + ") Return " + JavaUtils.txnIdToString(txnid));
-      return txnid;
+      LockInfo info = new LockInfo(rs);
+      LOG.debug("getTxnIdFromLockId(" + extLockId + ") Return " + JavaUtils.txnIdToString(info.txnId));
+      return info;
     } finally {
       close(rs);
       closeStmt(stmt);

http://git-wip-us.apache.org/repos/asf/hive/blob/fbb5667b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 8a47605..9d9dd53 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -424,6 +424,8 @@ public enum ErrorMsg {
   CTAS_LOCATION_NONEMPTY(10304, "CREATE-TABLE-AS-SELECT cannot create table with location to a non-empty directory."),
   CTAS_CREATES_VOID_TYPE(10305, "CREATE-TABLE-AS-SELECT creates a VOID type, please use CAST to specify the type, near field: "),
   TBL_SORTED_NOT_BUCKETED(10306, "Destination table {0} found to be sorted but not bucketed.", true),
+  //{2} should be lockid
+  LOCK_ACQUIRE_TIMEDOUT(10307, "Lock acquisition for {0} timed out after {1}ms.  {2}", true),
   //========================== 20000 range starts here ========================//
   SCRIPT_INIT_ERROR(20000, "Unable to initialize custom script."),
   SCRIPT_IO_ERROR(20001, "An error occurred while reading or writing to your custom script. "

http://git-wip-us.apache.org/repos/asf/hive/blob/fbb5667b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
index a210b95..8762e12 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
@@ -2520,7 +2520,60 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
     }
     return 0;
   }
-
+  public static void dumpLockInfo(DataOutputStream os, ShowLocksResponse rsp) throws IOException {
+    // Write a header
+    os.writeBytes("Lock ID");
+    os.write(separator);
+    os.writeBytes("Database");
+    os.write(separator);
+    os.writeBytes("Table");
+    os.write(separator);
+    os.writeBytes("Partition");
+    os.write(separator);
+    os.writeBytes("State");
+    os.write(separator);
+    os.writeBytes("Type");
+    os.write(separator);
+    os.writeBytes("Transaction ID");
+    os.write(separator);
+    os.writeBytes("Last Hearbeat");
+    os.write(separator);
+    os.writeBytes("Acquired At");
+    os.write(separator);
+    os.writeBytes("User");
+    os.write(separator);
+    os.writeBytes("Hostname");
+    os.write(terminator);
+
+    List<ShowLocksResponseElement> locks = rsp.getLocks();
+    if (locks != null) {
+      for (ShowLocksResponseElement lock : locks) {
+        os.writeBytes(Long.toString(lock.getLockid()));
+        os.write(separator);
+        os.writeBytes(lock.getDbname());
+        os.write(separator);
+        os.writeBytes((lock.getTablename() == null) ? "NULL" : lock.getTablename());
+        os.write(separator);
+        os.writeBytes((lock.getPartname() == null) ? "NULL" : lock.getPartname());
+        os.write(separator);
+        os.writeBytes(lock.getState().toString());
+        os.write(separator);
+        os.writeBytes(lock.getType().toString());
+        os.write(separator);
+        os.writeBytes((lock.getTxnid() == 0) ? "NULL" : Long.toString(lock.getTxnid()));
+        os.write(separator);
+        os.writeBytes(Long.toString(lock.getLastheartbeat()));
+        os.write(separator);
+        os.writeBytes((lock.getAcquiredat() == 0) ? "NULL" : Long.toString(lock.getAcquiredat()));
+        os.write(separator);
+        os.writeBytes(lock.getUser());
+        os.write(separator);
+        os.writeBytes(lock.getHostname());
+        os.write(separator);
+        os.write(terminator);
+      }
+    }
+  }
   private int showLocksNewFormat(ShowLocksDesc showLocks, HiveLockManager lm)
       throws  HiveException {
 
@@ -2535,59 +2588,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
     // write the results in the file
     DataOutputStream os = getOutputStream(showLocks.getResFile());
     try {
-      // Write a header
-      os.writeBytes("Lock ID");
-      os.write(separator);
-      os.writeBytes("Database");
-      os.write(separator);
-      os.writeBytes("Table");
-      os.write(separator);
-      os.writeBytes("Partition");
-      os.write(separator);
-      os.writeBytes("State");
-      os.write(separator);
-      os.writeBytes("Type");
-      os.write(separator);
-      os.writeBytes("Transaction ID");
-      os.write(separator);
-      os.writeBytes("Last Hearbeat");
-      os.write(separator);
-      os.writeBytes("Acquired At");
-      os.write(separator);
-      os.writeBytes("User");
-      os.write(separator);
-      os.writeBytes("Hostname");
-      os.write(terminator);
-
-      List<ShowLocksResponseElement> locks = rsp.getLocks();
-      if (locks != null) {
-        for (ShowLocksResponseElement lock : locks) {
-          os.writeBytes(Long.toString(lock.getLockid()));
-          os.write(separator);
-          os.writeBytes(lock.getDbname());
-          os.write(separator);
-          os.writeBytes((lock.getTablename() == null) ? "NULL" : lock.getTablename());
-          os.write(separator);
-          os.writeBytes((lock.getPartname() == null) ? "NULL" : lock.getPartname());
-          os.write(separator);
-          os.writeBytes(lock.getState().toString());
-          os.write(separator);
-          os.writeBytes(lock.getType().toString());
-          os.write(separator);
-          os.writeBytes((lock.getTxnid() == 0) ? "NULL" : Long.toString(lock.getTxnid()));
-          os.write(separator);
-          os.writeBytes(Long.toString(lock.getLastheartbeat()));
-          os.write(separator);
-          os.writeBytes((lock.getAcquiredat() == 0) ? "NULL" : Long.toString(lock.getAcquiredat()));
-          os.write(separator);
-          os.writeBytes(lock.getUser());
-          os.write(separator);
-          os.writeBytes(lock.getHostname());
-          os.write(separator);
-          os.write(terminator);
-        }
-
-      }
+      dumpLockInfo(os, rsp);
     } catch (FileNotFoundException e) {
       LOG.warn("show function: " + stringifyException(e));
       return 1;

http://git-wip-us.apache.org/repos/asf/hive/blob/fbb5667b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java
index 42616ac..7d58622 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hive.ql.lockmgr;
 
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.DDLTask;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.common.JavaUtils;
@@ -29,10 +31,15 @@ import org.apache.hadoop.hive.metastore.api.*;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.thrift.TException;
 
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Objects;
 import java.util.Set;
+import java.util.concurrent.TimeUnit;
 
 /**
  * An implementation of HiveLockManager for use with {@link org.apache.hadoop.hive.ql.lockmgr.DbTxnManager}.
@@ -44,20 +51,20 @@ public class DbLockManager implements HiveLockManager{
   static final private String CLASS_NAME = DbLockManager.class.getName();
   static final private Logger LOG = LoggerFactory.getLogger(CLASS_NAME);
 
-  private static final long MAX_SLEEP = 15000;
-  private HiveLockManagerCtx context;
+  private long MAX_SLEEP;
   private Set<DbHiveLock> locks;
   private IMetaStoreClient client;
   private long nextSleep = 50;
+  private final HiveConf conf;
 
-  DbLockManager(IMetaStoreClient client) {
+  DbLockManager(IMetaStoreClient client, HiveConf conf) {
     locks = new HashSet<>();
     this.client = client;
+    this.conf = conf;
   }
 
   @Override
   public void setContext(HiveLockManagerCtx ctx) throws LockException {
-    context = ctx;
   }
 
   @Override
@@ -81,6 +88,15 @@ public class DbLockManager implements HiveLockManager{
    * @return the result of the lock attempt
    */
   LockState lock(LockRequest lock, String queryId, boolean isBlocking, List<HiveLock> acquiredLocks) throws LockException {
+    Objects.requireNonNull(queryId, "queryId cannot be null");
+    nextSleep = 50;
+    /*
+     * get from conf to pick up changes; make sure not to set too low and kill the metastore
+     * MAX_SLEEP is the max time each backoff() will wait for, thus the total time to wait for
+     * successful lock acquisition is approximately (see backoff()) maxNumWaits * MAX_SLEEP.
+     */
+    MAX_SLEEP = Math.max(15000, conf.getTimeVar(HiveConf.ConfVars.HIVE_LOCK_SLEEP_BETWEEN_RETRIES, TimeUnit.MILLISECONDS));
+    int maxNumWaits = Math.max(0, conf.getIntVar(HiveConf.ConfVars.HIVE_LOCK_NUMRETRIES));
     try {
       LOG.info("Requesting: queryId=" + queryId + " " + lock);
       LockResponse res = client.lock(lock);
@@ -91,15 +107,33 @@ public class DbLockManager implements HiveLockManager{
           return LockState.WAITING;
         }
       }
-      while (res.getState() == LockState.WAITING) {
+      int numRetries = 0;
+      long startRetry = System.currentTimeMillis();
+      while (res.getState() == LockState.WAITING && numRetries++ < maxNumWaits) {
         backoff();
         res = client.checkLock(res.getLockid());
 
       }
+      long retryDuration = System.currentTimeMillis() - startRetry;
       DbHiveLock hl = new DbHiveLock(res.getLockid());
       locks.add(hl);
       if (res.getState() != LockState.ACQUIRED) {
-        throw new LockException(ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg());
+        if(res.getState() == LockState.WAITING) {
+          /**
+           * the {@link #unlock(HiveLock)} here is more about future proofing when support for
+           * multi-statement txns is added.  In that case it's reasonable for the client
+           * to retry this part of txn or try something else w/o aborting the whole txn.
+           * Also for READ_COMMITTED (when and if that is supported).
+           */
+          unlock(hl);//remove the locks in Waiting state
+          LockException le = new LockException(null, ErrorMsg.LOCK_ACQUIRE_TIMEDOUT,
+            lock.toString(), Long.toString(retryDuration), res.toString());
+          if(conf.getBoolVar(HiveConf.ConfVars.TXN_MGR_DUMP_LOCK_STATE_ON_ACQUIRE_TIMEOUT)) {
+            showLocksNewFormat(le.getMessage());
+          }
+          throw le;
+        }
+        throw new LockException(ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg() + " " + res);
       }
       acquiredLocks.add(hl);
 
@@ -114,8 +148,8 @@ public class DbLockManager implements HiveLockManager{
 
       return res.getState();
     } catch (NoSuchTxnException e) {
-      LOG.error("Metastore could not find txnid " + lock.getTxnid());
-      throw new LockException(ErrorMsg.TXNMGR_NOT_INSTANTIATED.getMsg(), e);
+      LOG.error("Metastore could not find " + JavaUtils.txnIdToString(lock.getTxnid()));
+      throw new LockException(e, ErrorMsg.TXN_NO_SUCH_TRANSACTION, JavaUtils.txnIdToString(lock.getTxnid()));
     } catch (TxnAbortedException e) {
       LOG.error("Transaction " + JavaUtils.txnIdToString(lock.getTxnid()) + " already aborted.");
       throw new LockException(e, ErrorMsg.TXN_ABORTED, JavaUtils.txnIdToString(lock.getTxnid()));
@@ -124,6 +158,21 @@ public class DbLockManager implements HiveLockManager{
           e);
     }
   }
+  private void showLocksNewFormat(String preamble) throws LockException {
+    ShowLocksResponse rsp = getLocks();
+
+    // write the results in the file
+    ByteArrayOutputStream baos = new ByteArrayOutputStream(1024*2);
+    DataOutputStream os = new DataOutputStream(baos);
+    try {
+      DDLTask.dumpLockInfo(os, rsp);
+      os.flush();
+      LOG.info(baos.toString());
+    }
+    catch(IOException ex) {
+      LOG.error("Dumping lock info for " + preamble + " failed: " + ex.getMessage(), ex);
+    }
+  }
   /**
    * Used to make another attempt to acquire a lock (in Waiting state)
    * @param extLockId
@@ -259,8 +308,8 @@ public class DbLockManager implements HiveLockManager{
   /**
    * Clear the memory of the locks in this object.  This won't clear the locks from the database.
    * It is for use with
-   * {@link #DbLockManager(org.apache.hadoop.hive.metastore.IMetaStoreClient).commitTxn} and
-   * {@link #DbLockManager(org.apache.hadoop.hive.metastore.IMetaStoreClient).rollbackTxn}.
+   * {@link #DbLockManager(org.apache.hadoop.hive.metastore.IMetaStoreClient, org.apache.hadoop.hive.conf.HiveConf)} .commitTxn} and
+   * {@link #DbLockManager(org.apache.hadoop.hive.metastore.IMetaStoreClient, org.apache.hadoop.hive.conf.HiveConf)} .rollbackTxn}.
    */
   void clearLocalLockRecords() {
     locks.clear();

http://git-wip-us.apache.org/repos/asf/hive/blob/fbb5667b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java
index 97d2282..552367c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java
@@ -96,7 +96,7 @@ public class DbTxnManager extends HiveTxnManagerImpl {
   public HiveLockManager getLockManager() throws LockException {
     init();
     if (lockMgr == null) {
-      lockMgr = new DbLockManager(client);
+      lockMgr = new DbLockManager(client, conf);
     }
     return lockMgr;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/fbb5667b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/HiveTxnManager.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/HiveTxnManager.java b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/HiveTxnManager.java
index 74512d7..2bfc732 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/HiveTxnManager.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/HiveTxnManager.java
@@ -57,10 +57,12 @@ public interface HiveTxnManager {
    * A list of acquired locks will be stored in the
    * {@link org.apache.hadoop.hive.ql.Context} object and can be retrieved
    * via {@link org.apache.hadoop.hive.ql.Context#getHiveLocks}.
+   *
    * @param plan query plan
    * @param ctx Context for this query
    * @param username name of the user for this query
-   * @throws LockException if there is an error getting the locks
+   * @throws LockException if there is an error getting the locks.  Use {@link LockException#getCanonicalErrorMsg()}
+   * to get more info on how to handle the exception.
    */
   void acquireLocks(QueryPlan plan, Context ctx, String username) throws LockException;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/fbb5667b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java
index c1e5c81..b7d1d18 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java
@@ -378,12 +378,14 @@ public class TestDbTxnManager {
   private static class MockQueryPlan extends QueryPlan {
     private final HashSet<ReadEntity> inputs;
     private final HashSet<WriteEntity> outputs;
-
+    private final String queryId;
+    
     MockQueryPlan(TestDbTxnManager test) {
       HashSet<ReadEntity> r = test.readEntities;
       HashSet<WriteEntity> w = test.writeEntities;
       inputs = (r == null) ? new HashSet<ReadEntity>() : r;
       outputs = (w == null) ? new HashSet<WriteEntity>() : w;
+      queryId = makeQueryId();
     }
 
     @Override
@@ -395,6 +397,10 @@ public class TestDbTxnManager {
     public HashSet<WriteEntity> getOutputs() {
       return outputs;
     }
+    @Override
+    public String getQueryId() {
+      return queryId;
+    }
   }
 
   private Table newTable(boolean isPartitioned) {

http://git-wip-us.apache.org/repos/asf/hive/blob/fbb5667b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java
index c6a7fcb..3bdcc21 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java
@@ -26,6 +26,7 @@ import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement;
 import org.apache.hadoop.hive.metastore.txn.TxnDbUtil;
 import org.apache.hadoop.hive.ql.Context;
 import org.apache.hadoop.hive.ql.Driver;
+import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
 import org.apache.hadoop.hive.ql.session.SessionState;
@@ -211,6 +212,34 @@ public class TestDbTxnManager2 {
     checkCmdOnDriver(cpr);
   }
 
+  @Test
+  public void testLockRetryLimit() throws Exception {
+    conf.setIntVar(HiveConf.ConfVars.HIVE_LOCK_NUMRETRIES, 2);
+    conf.setBoolVar(HiveConf.ConfVars.TXN_MGR_DUMP_LOCK_STATE_ON_ACQUIRE_TIMEOUT, true);
+    HiveTxnManager otherTxnMgr = new DbTxnManager(); 
+    ((DbTxnManager)otherTxnMgr).setHiveConf(conf);
+    CommandProcessorResponse cpr = driver.run("create table T9(a int)");
+    checkCmdOnDriver(cpr);
+    cpr = driver.compileAndRespond("select * from T9");
+    checkCmdOnDriver(cpr);
+    txnMgr.acquireLocks(driver.getPlan(), ctx, "Vincent Vega");
+    List<ShowLocksResponseElement> locks = getLocks(txnMgr);
+    Assert.assertEquals("Unexpected lock count", 1, locks.size());
+    checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "default", "T9", null, locks.get(0));
+    
+    cpr = driver.compileAndRespond("drop table T9");
+    checkCmdOnDriver(cpr);
+    try {
+      otherTxnMgr.acquireLocks(driver.getPlan(), ctx, "Winston Winnfield");
+    }
+    catch(LockException ex) {
+      Assert.assertEquals("Got wrong lock exception", ErrorMsg.LOCK_ACQUIRE_TIMEDOUT, ex.getCanonicalErrorMsg());
+    }
+    locks = getLocks(txnMgr);
+    Assert.assertEquals("Unexpected lock count", 1, locks.size());
+    checkLock(LockType.SHARED_READ, LockState.ACQUIRED, "default", "T9", null, locks.get(0));
+    otherTxnMgr.closeTxnManager();
+  }
 
   private void checkLock(LockType type, LockState state, String db, String table, String partition, ShowLocksResponseElement l) {
     Assert.assertEquals(l.toString(),l.getType(), type);
@@ -226,6 +255,9 @@ public class TestDbTxnManager2 {
     return s == null ? null : s.toLowerCase();
   }
   private List<ShowLocksResponseElement> getLocks() throws Exception {
+    return getLocks(this.txnMgr);
+  }
+  private List<ShowLocksResponseElement> getLocks(HiveTxnManager txnMgr) throws Exception {
     ShowLocksResponse rsp = ((DbLockManager)txnMgr.getLockManager()).getLocks();
     return rsp.getLocks();
   }