You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ek...@apache.org on 2015/10/01 02:42:49 UTC

[1/3] hive git commit: HIVE-11934 Transaction lock retry logic results in infinite loop(Eugene Koifman, reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/branch-1 e9bc9d9f5 -> f20a359d7


HIVE-11934 Transaction lock retry logic results in infinite loop(Eugene Koifman, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6a911348
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6a911348
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6a911348

Branch: refs/heads/branch-1
Commit: 6a91134894d3aaa83ba71c0d12e008743984f3e0
Parents: e9bc9d9
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Wed Sep 30 17:18:06 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Wed Sep 30 17:18:06 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/metastore/txn/TxnHandler.java   | 117 +++++++++----------
 1 file changed, 57 insertions(+), 60 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/6a911348/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index 52cbda6..aa8bf8a 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -91,8 +91,8 @@ public class TxnHandler {
   /**
    * Number of consecutive deadlocks we have seen
    */
-  protected int deadlockCnt;
-  private long deadlockRetryInterval;
+  private int deadlockCnt;
+  private final long deadlockRetryInterval;
   protected HiveConf conf;
   protected DatabaseProduct dbProduct;
 
@@ -115,10 +115,8 @@ public class TxnHandler {
   //
   // All public methods that write to the database have to check for deadlocks when a SQLException
   // comes back and handle it if they see one.  This has to be done with the connection pooling
-  // in mind.  To do this they should call detectDeadlock AFTER rolling back the db transaction,
-  // and then in an outer loop they should catch DeadlockException.  In the catch for this they
-  // should increment the deadlock counter and recall themselves.  See commitTxn for an example.
-  // the connection has been closed and returned to the pool.
+  // in mind.  To do this they should call checkRetryable() AFTER rolling back the db transaction,
+  // and then they should catch RetryException and call themselves recursively. See commitTxn for an example.
 
   public TxnHandler(HiveConf conf) {
     this.conf = conf;
@@ -135,7 +133,6 @@ public class TxnHandler {
     }
 
     timeout = HiveConf.getTimeVar(conf, HiveConf.ConfVars.HIVE_TXN_TIMEOUT, TimeUnit.MILLISECONDS);
-    deadlockCnt = 0;
     buildJumpTable();
     retryInterval = HiveConf.getTimeVar(conf, HiveConf.ConfVars.HMSHANDLERINTERVAL,
         TimeUnit.MILLISECONDS);
@@ -280,7 +277,6 @@ public class TxnHandler {
   }
 
   public OpenTxnsResponse openTxns(OpenTxnRequest rqst) throws MetaException {
-    deadlockCnt = 0;  // Reset deadlock count since this is a new transaction
     int numTxns = rqst.getNum_txns();
     try {
       Connection dbConn = null;
@@ -420,7 +416,6 @@ public class TxnHandler {
 
   public LockResponse lock(LockRequest rqst)
     throws NoSuchTxnException, TxnAbortedException, MetaException {
-    deadlockCnt = 0;
     try {
       Connection dbConn = null;
       try {
@@ -636,8 +631,6 @@ public class TxnHandler {
       }
     } catch (RetryException e) {
       heartbeat(ids);
-    } finally {
-      deadlockCnt = 0;
     }
   }
 
@@ -903,14 +896,14 @@ public class TxnHandler {
 
   void rollbackDBConn(Connection dbConn) {
     try {
-      if (dbConn != null) dbConn.rollback();
+      if (dbConn != null && !dbConn.isClosed()) dbConn.rollback();
     } catch (SQLException e) {
       LOG.warn("Failed to rollback db connection " + getMessage(e));
     }
   }
   protected void closeDbConn(Connection dbConn) {
     try {
-      if (dbConn != null) dbConn.close();
+      if (dbConn != null && !dbConn.isClosed()) dbConn.close();
     } catch (SQLException e) {
       LOG.warn("Failed to close db connection " + getMessage(e));
     }
@@ -922,7 +915,7 @@ public class TxnHandler {
    */
   protected void closeStmt(Statement stmt) {
     try {
-      if (stmt != null) stmt.close();
+      if (stmt != null && !stmt.isClosed()) stmt.close();
     } catch (SQLException e) {
       LOG.warn("Failed to close statement " + getMessage(e));
     }
@@ -952,15 +945,14 @@ public class TxnHandler {
     closeDbConn(dbConn);
   }
   /**
-   * Determine if an exception was such that it makse sense to retry.  Unfortunately there is no standard way to do
+   * Determine if an exception was such that it makes sense to retry.  Unfortunately there is no standard way to do
    * this, so we have to inspect the error messages and catch the telltale signs for each
-   * different database.
+   * different database.  This method will throw {@code RetryException}
+   * if the error is retry-able.
    * @param conn database connection
    * @param e exception that was thrown.
-   * @param caller name of the method calling this
-   * @throws org.apache.hadoop.hive.metastore.txn.TxnHandler.RetryException when deadlock
-   * detected and retry count has not been exceeded.
-   * TODO: make "caller" more elaborate like include lockId for example
+   * @param caller name of the method calling this (and other info useful to log)
+   * @throws org.apache.hadoop.hive.metastore.txn.TxnHandler.RetryException when the operation should be retried
    */
   protected void checkRetryable(Connection conn,
                                 SQLException e,
@@ -973,53 +965,57 @@ public class TxnHandler {
     // so I've tried to capture the different error messages (there appear to be fewer different
     // error messages than SQL states).
     // Derby and newer MySQL driver use the new SQLTransactionRollbackException
-    if (dbProduct == null && conn != null) {
-      determineDatabaseProduct(conn);
-    }
-    if (e instanceof SQLTransactionRollbackException ||
-      ((dbProduct == DatabaseProduct.MYSQL || dbProduct == DatabaseProduct.POSTGRES ||
-        dbProduct == DatabaseProduct.SQLSERVER) && e.getSQLState().equals("40001")) ||
-      (dbProduct == DatabaseProduct.POSTGRES && e.getSQLState().equals("40P01")) ||
-      (dbProduct == DatabaseProduct.ORACLE && (e.getMessage().contains("deadlock detected")
-        || e.getMessage().contains("can't serialize access for this transaction")))) {
-      if (deadlockCnt++ < ALLOWED_REPEATED_DEADLOCKS) {
-        long waitInterval = deadlockRetryInterval * deadlockCnt;
-        LOG.warn("Deadlock detected in " + caller + ". Will wait " + waitInterval +
-          "ms try again up to " + (ALLOWED_REPEATED_DEADLOCKS - deadlockCnt + 1) + " times.");
-        // Pause for a just a bit for retrying to avoid immediately jumping back into the deadlock.
-        try {
-          Thread.sleep(waitInterval);
-        } catch (InterruptedException ie) {
-          // NOP
-        }
-        throw new RetryException();
-      } else {
-        LOG.error("Too many repeated deadlocks in " + caller + ", giving up.");
-        deadlockCnt = 0;
+    boolean sendRetrySignal = false;
+    try {
+      if (dbProduct == null && conn != null) {
+        determineDatabaseProduct(conn);
       }
-    }
-    else if(isRetryable(e)) {
-      //in MSSQL this means Communication Link Failure
-      if(retryNum++ < retryLimit) {
-        LOG.warn("Retryable error detected in " + caller + ".  Will wait " + retryInterval +
-          "ms and retry up to " + (retryLimit - retryNum + 1) + " times.  Error: " + getMessage(e));
-        try {
-          Thread.sleep(retryInterval);
+      if (e instanceof SQLTransactionRollbackException ||
+        ((dbProduct == DatabaseProduct.MYSQL || dbProduct == DatabaseProduct.POSTGRES ||
+          dbProduct == DatabaseProduct.SQLSERVER) && e.getSQLState().equals("40001")) ||
+        (dbProduct == DatabaseProduct.POSTGRES && e.getSQLState().equals("40P01")) ||
+        (dbProduct == DatabaseProduct.ORACLE && (e.getMessage().contains("deadlock detected")
+          || e.getMessage().contains("can't serialize access for this transaction")))) {
+        if (deadlockCnt++ < ALLOWED_REPEATED_DEADLOCKS) {
+          long waitInterval = deadlockRetryInterval * deadlockCnt;
+          LOG.warn("Deadlock detected in " + caller + ". Will wait " + waitInterval +
+            "ms try again up to " + (ALLOWED_REPEATED_DEADLOCKS - deadlockCnt + 1) + " times.");
+          // Pause for a just a bit for retrying to avoid immediately jumping back into the deadlock.
+          try {
+            Thread.sleep(waitInterval);
+          } catch (InterruptedException ie) {
+            // NOP
+          }
+          sendRetrySignal = true;
+        } else {
+          LOG.error("Too many repeated deadlocks in " + caller + ", giving up.");
         }
-        catch(InterruptedException ex) {
-          //
+      } else if (isRetryable(e)) {
+        //in MSSQL this means Communication Link Failure
+        if (retryNum++ < retryLimit) {
+          LOG.warn("Retryable error detected in " + caller + ".  Will wait " + retryInterval +
+            "ms and retry up to " + (retryLimit - retryNum + 1) + " times.  Error: " + getMessage(e));
+          try {
+            Thread.sleep(retryInterval);
+          } catch (InterruptedException ex) {
+            //
+          }
+          sendRetrySignal = true;
+        } else {
+          LOG.error("Fatal error. Retry limit (" + retryLimit + ") reached. Last error: " + getMessage(e));
         }
-        throw new RetryException();
       }
-      else {
-        LOG.error("Fatal error. Retry limit (" + retryLimit + ") reached. Last error: " + getMessage(e));
+    }
+    finally {
+      /*if this method ends with anything except a retry signal, the caller should fail the operation
+      and propagate the error up to the its caller (Metastore client); thus must reset retry counters*/
+      if(!sendRetrySignal) {
+        deadlockCnt = 0;
         retryNum = 0;
       }
     }
-    else {
-      //if here, we got something that will propagate the error (rather than retry), so reset counters
-      deadlockCnt = 0;
-      retryNum = 0;
+    if(sendRetrySignal) {
+      throw new RetryException();
     }
   }
 
@@ -2089,6 +2085,7 @@ public class TxnHandler {
         //in MSSQL this means Communication Link Failure
         return true;
       }
+      //see https://issues.apache.org/jira/browse/HIVE-9938
     }
     return false;
   }


[3/3] hive git commit: HIVE-11883 'transactional' table property for ACID should be case insensitive (Eugene Koifman, reviewed by Ashutosh Chauhan)

Posted by ek...@apache.org.
HIVE-11883 'transactional' table property for ACID should be case insensitive (Eugene Koifman, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f20a359d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f20a359d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f20a359d

Branch: refs/heads/branch-1
Commit: f20a359d77230b6c57120d8bb71ad700b1a7d4c2
Parents: 43e08a4
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Wed Sep 30 17:19:49 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Wed Sep 30 17:19:49 2015 -0700

----------------------------------------------------------------------
 .../java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java   | 3 +++
 ql/src/test/queries/clientpositive/update_all_types.q            | 2 +-
 ql/src/test/results/clientpositive/tez/update_all_types.q.out    | 4 ++--
 ql/src/test/results/clientpositive/update_all_types.q.out        | 4 ++--
 4 files changed, 8 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f20a359d/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 5864c35..6e82f41 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -12212,6 +12212,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     if (!SessionState.get().getTxnMgr().supportsAcid()) return false;
     String tableIsTransactional =
         tab.getProperty(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL);
+    if(tableIsTransactional == null) {
+      tableIsTransactional = tab.getProperty(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL.toUpperCase());
+    }
     return tableIsTransactional != null && tableIsTransactional.equalsIgnoreCase("true");
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/f20a359d/ql/src/test/queries/clientpositive/update_all_types.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/update_all_types.q b/ql/src/test/queries/clientpositive/update_all_types.q
index 262a304..0229845 100644
--- a/ql/src/test/queries/clientpositive/update_all_types.q
+++ b/ql/src/test/queries/clientpositive/update_all_types.q
@@ -17,7 +17,7 @@ create table acid_uat(ti tinyint,
                  s string,
                  vc varchar(128),
                  ch char(36),
-                 b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+                 b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('TRANSACTIONAL'='TRUE');
 
 insert into table acid_uat
     select ctinyint,

http://git-wip-us.apache.org/repos/asf/hive/blob/f20a359d/ql/src/test/results/clientpositive/tez/update_all_types.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/update_all_types.q.out b/ql/src/test/results/clientpositive/tez/update_all_types.q.out
index ca098fb..1cfa088 100644
--- a/ql/src/test/results/clientpositive/tez/update_all_types.q.out
+++ b/ql/src/test/results/clientpositive/tez/update_all_types.q.out
@@ -13,7 +13,7 @@ create table acid_uat(ti tinyint,
                  s string,
                  vc varchar(128),
                  ch char(36),
-                 b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+                 b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('TRANSACTIONAL'='TRUE')
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@acid_uat
@@ -32,7 +32,7 @@ create table acid_uat(ti tinyint,
                  s string,
                  vc varchar(128),
                  ch char(36),
-                 b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+                 b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('TRANSACTIONAL'='TRUE')
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@acid_uat

http://git-wip-us.apache.org/repos/asf/hive/blob/f20a359d/ql/src/test/results/clientpositive/update_all_types.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/update_all_types.q.out b/ql/src/test/results/clientpositive/update_all_types.q.out
index ca098fb..1cfa088 100644
--- a/ql/src/test/results/clientpositive/update_all_types.q.out
+++ b/ql/src/test/results/clientpositive/update_all_types.q.out
@@ -13,7 +13,7 @@ create table acid_uat(ti tinyint,
                  s string,
                  vc varchar(128),
                  ch char(36),
-                 b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+                 b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('TRANSACTIONAL'='TRUE')
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@acid_uat
@@ -32,7 +32,7 @@ create table acid_uat(ti tinyint,
                  s string,
                  vc varchar(128),
                  ch char(36),
-                 b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+                 b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('TRANSACTIONAL'='TRUE')
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@acid_uat


[2/3] hive git commit: HIVE-11916 TxnHandler.getOpenTxnsInfo() and getOpenTxns() may produce inconsistent result (Eugene Koifman, reviewed by Ashutosh Chauhan)

Posted by ek...@apache.org.
HIVE-11916 TxnHandler.getOpenTxnsInfo() and getOpenTxns() may produce inconsistent result (Eugene Koifman, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/43e08a4e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/43e08a4e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/43e08a4e

Branch: refs/heads/branch-1
Commit: 43e08a4ee1ddddfd6d9c718341ee2e8a49d56818
Parents: 6a91134
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Wed Sep 30 17:19:01 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Wed Sep 30 17:19:01 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/metastore/txn/TxnHandler.java      | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/43e08a4e/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index aa8bf8a..82f0dc0 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -47,8 +47,9 @@ import java.util.concurrent.TimeUnit;
  * A handler to answer transaction related calls that come into the metastore
  * server.
  *
- * Note on log messages:  Please include txnid:X and lockid info
- * {@link org.apache.hadoop.hive.common.JavaUtils#lockIdToString(long)} in all messages.
+ * Note on log messages:  Please include txnid:X and lockid info using
+ * {@link org.apache.hadoop.hive.common.JavaUtils#txnIdToString(long)}
+ * and {@link org.apache.hadoop.hive.common.JavaUtils#lockIdToString(long)} in all messages.
  * The txnid:X and lockid:Y matches how Thrift object toString() methods are generated,
  * so keeping the format consistent makes grep'ing the logs much easier.
  */
@@ -166,7 +167,8 @@ public class TxnHandler {
         }
 
         List<TxnInfo> txnInfo = new ArrayList<TxnInfo>();
-        s = "select txn_id, txn_state, txn_user, txn_host from TXNS";
+        //need the WHERE clause below to ensure consistent results with READ_COMMITTED
+        s = "select txn_id, txn_state, txn_user, txn_host from TXNS where txn_id <= " + hwm;
         LOG.debug("Going to execute query<" + s + ">");
         rs = stmt.executeQuery(s);
         while (rs.next()) {
@@ -230,7 +232,8 @@ public class TxnHandler {
         }
 
         Set<Long> openList = new HashSet<Long>();
-        s = "select txn_id from TXNS";
+        //need the WHERE clause below to ensure consistent results with READ_COMMITTED
+        s = "select txn_id from TXNS where txn_id <= " + hwm;
         LOG.debug("Going to execute query<" + s + ">");
         rs = stmt.executeQuery(s);
         while (rs.next()) {
@@ -1459,7 +1462,7 @@ public class TxnHandler {
     LockResponse response = new LockResponse();
     response.setLockid(extLockId);
 
-    LOG.debug("checkLock(): Setting savepoint. extLockId=" + extLockId);
+    LOG.debug("checkLock(): Setting savepoint. extLockId=" + JavaUtils.lockIdToString(extLockId));
     Savepoint save = dbConn.setSavepoint();
     StringBuilder query = new StringBuilder("select hl_lock_ext_id, " +
       "hl_lock_int_id, hl_db, hl_table, hl_partition, hl_lock_state, " +
@@ -1685,7 +1688,7 @@ public class TxnHandler {
     if (rc < 1) {
       LOG.debug("Going to rollback");
       dbConn.rollback();
-      throw new NoSuchLockException("No such lock: (" + extLockId + "," +
+      throw new NoSuchLockException("No such lock: (" + JavaUtils.lockIdToString(extLockId) + "," +
         + intLockId + ")");
     }
     // We update the database, but we don't commit because there may be other
@@ -1710,7 +1713,7 @@ public class TxnHandler {
       if (rc < 1) {
         LOG.debug("Going to rollback");
         dbConn.rollback();
-        throw new NoSuchLockException("No such lock: " + extLockId);
+        throw new NoSuchLockException("No such lock: " + JavaUtils.lockIdToString(extLockId));
       }
       LOG.debug("Going to commit");
       dbConn.commit();