You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ek...@apache.org on 2015/10/01 02:42:49 UTC
[1/3] hive git commit: HIVE-11934 Transaction lock retry logic
results in infinite loop(Eugene Koifman, reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/branch-1 e9bc9d9f5 -> f20a359d7
HIVE-11934 Transaction lock retry logic results in infinite loop(Eugene Koifman, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6a911348
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6a911348
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6a911348
Branch: refs/heads/branch-1
Commit: 6a91134894d3aaa83ba71c0d12e008743984f3e0
Parents: e9bc9d9
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Wed Sep 30 17:18:06 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Wed Sep 30 17:18:06 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/metastore/txn/TxnHandler.java | 117 +++++++++----------
1 file changed, 57 insertions(+), 60 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/6a911348/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index 52cbda6..aa8bf8a 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -91,8 +91,8 @@ public class TxnHandler {
/**
* Number of consecutive deadlocks we have seen
*/
- protected int deadlockCnt;
- private long deadlockRetryInterval;
+ private int deadlockCnt;
+ private final long deadlockRetryInterval;
protected HiveConf conf;
protected DatabaseProduct dbProduct;
@@ -115,10 +115,8 @@ public class TxnHandler {
//
// All public methods that write to the database have to check for deadlocks when a SQLException
// comes back and handle it if they see one. This has to be done with the connection pooling
- // in mind. To do this they should call detectDeadlock AFTER rolling back the db transaction,
- // and then in an outer loop they should catch DeadlockException. In the catch for this they
- // should increment the deadlock counter and recall themselves. See commitTxn for an example.
- // the connection has been closed and returned to the pool.
+ // in mind. To do this they should call checkRetryable() AFTER rolling back the db transaction,
+ // and then they should catch RetryException and call themselves recursively. See commitTxn for an example.
public TxnHandler(HiveConf conf) {
this.conf = conf;
@@ -135,7 +133,6 @@ public class TxnHandler {
}
timeout = HiveConf.getTimeVar(conf, HiveConf.ConfVars.HIVE_TXN_TIMEOUT, TimeUnit.MILLISECONDS);
- deadlockCnt = 0;
buildJumpTable();
retryInterval = HiveConf.getTimeVar(conf, HiveConf.ConfVars.HMSHANDLERINTERVAL,
TimeUnit.MILLISECONDS);
@@ -280,7 +277,6 @@ public class TxnHandler {
}
public OpenTxnsResponse openTxns(OpenTxnRequest rqst) throws MetaException {
- deadlockCnt = 0; // Reset deadlock count since this is a new transaction
int numTxns = rqst.getNum_txns();
try {
Connection dbConn = null;
@@ -420,7 +416,6 @@ public class TxnHandler {
public LockResponse lock(LockRequest rqst)
throws NoSuchTxnException, TxnAbortedException, MetaException {
- deadlockCnt = 0;
try {
Connection dbConn = null;
try {
@@ -636,8 +631,6 @@ public class TxnHandler {
}
} catch (RetryException e) {
heartbeat(ids);
- } finally {
- deadlockCnt = 0;
}
}
@@ -903,14 +896,14 @@ public class TxnHandler {
void rollbackDBConn(Connection dbConn) {
try {
- if (dbConn != null) dbConn.rollback();
+ if (dbConn != null && !dbConn.isClosed()) dbConn.rollback();
} catch (SQLException e) {
LOG.warn("Failed to rollback db connection " + getMessage(e));
}
}
protected void closeDbConn(Connection dbConn) {
try {
- if (dbConn != null) dbConn.close();
+ if (dbConn != null && !dbConn.isClosed()) dbConn.close();
} catch (SQLException e) {
LOG.warn("Failed to close db connection " + getMessage(e));
}
@@ -922,7 +915,7 @@ public class TxnHandler {
*/
protected void closeStmt(Statement stmt) {
try {
- if (stmt != null) stmt.close();
+ if (stmt != null && !stmt.isClosed()) stmt.close();
} catch (SQLException e) {
LOG.warn("Failed to close statement " + getMessage(e));
}
@@ -952,15 +945,14 @@ public class TxnHandler {
closeDbConn(dbConn);
}
/**
- * Determine if an exception was such that it makse sense to retry. Unfortunately there is no standard way to do
+ * Determine if an exception was such that it makes sense to retry. Unfortunately there is no standard way to do
* this, so we have to inspect the error messages and catch the telltale signs for each
- * different database.
+ * different database. This method will throw {@code RetryException}
+ * if the error is retry-able.
* @param conn database connection
* @param e exception that was thrown.
- * @param caller name of the method calling this
- * @throws org.apache.hadoop.hive.metastore.txn.TxnHandler.RetryException when deadlock
- * detected and retry count has not been exceeded.
- * TODO: make "caller" more elaborate like include lockId for example
+ * @param caller name of the method calling this (and other info useful to log)
+ * @throws org.apache.hadoop.hive.metastore.txn.TxnHandler.RetryException when the operation should be retried
*/
protected void checkRetryable(Connection conn,
SQLException e,
@@ -973,53 +965,57 @@ public class TxnHandler {
// so I've tried to capture the different error messages (there appear to be fewer different
// error messages than SQL states).
// Derby and newer MySQL driver use the new SQLTransactionRollbackException
- if (dbProduct == null && conn != null) {
- determineDatabaseProduct(conn);
- }
- if (e instanceof SQLTransactionRollbackException ||
- ((dbProduct == DatabaseProduct.MYSQL || dbProduct == DatabaseProduct.POSTGRES ||
- dbProduct == DatabaseProduct.SQLSERVER) && e.getSQLState().equals("40001")) ||
- (dbProduct == DatabaseProduct.POSTGRES && e.getSQLState().equals("40P01")) ||
- (dbProduct == DatabaseProduct.ORACLE && (e.getMessage().contains("deadlock detected")
- || e.getMessage().contains("can't serialize access for this transaction")))) {
- if (deadlockCnt++ < ALLOWED_REPEATED_DEADLOCKS) {
- long waitInterval = deadlockRetryInterval * deadlockCnt;
- LOG.warn("Deadlock detected in " + caller + ". Will wait " + waitInterval +
- "ms try again up to " + (ALLOWED_REPEATED_DEADLOCKS - deadlockCnt + 1) + " times.");
- // Pause for a just a bit for retrying to avoid immediately jumping back into the deadlock.
- try {
- Thread.sleep(waitInterval);
- } catch (InterruptedException ie) {
- // NOP
- }
- throw new RetryException();
- } else {
- LOG.error("Too many repeated deadlocks in " + caller + ", giving up.");
- deadlockCnt = 0;
+ boolean sendRetrySignal = false;
+ try {
+ if (dbProduct == null && conn != null) {
+ determineDatabaseProduct(conn);
}
- }
- else if(isRetryable(e)) {
- //in MSSQL this means Communication Link Failure
- if(retryNum++ < retryLimit) {
- LOG.warn("Retryable error detected in " + caller + ". Will wait " + retryInterval +
- "ms and retry up to " + (retryLimit - retryNum + 1) + " times. Error: " + getMessage(e));
- try {
- Thread.sleep(retryInterval);
+ if (e instanceof SQLTransactionRollbackException ||
+ ((dbProduct == DatabaseProduct.MYSQL || dbProduct == DatabaseProduct.POSTGRES ||
+ dbProduct == DatabaseProduct.SQLSERVER) && e.getSQLState().equals("40001")) ||
+ (dbProduct == DatabaseProduct.POSTGRES && e.getSQLState().equals("40P01")) ||
+ (dbProduct == DatabaseProduct.ORACLE && (e.getMessage().contains("deadlock detected")
+ || e.getMessage().contains("can't serialize access for this transaction")))) {
+ if (deadlockCnt++ < ALLOWED_REPEATED_DEADLOCKS) {
+ long waitInterval = deadlockRetryInterval * deadlockCnt;
+ LOG.warn("Deadlock detected in " + caller + ". Will wait " + waitInterval +
+ "ms try again up to " + (ALLOWED_REPEATED_DEADLOCKS - deadlockCnt + 1) + " times.");
+ // Pause for a just a bit for retrying to avoid immediately jumping back into the deadlock.
+ try {
+ Thread.sleep(waitInterval);
+ } catch (InterruptedException ie) {
+ // NOP
+ }
+ sendRetrySignal = true;
+ } else {
+ LOG.error("Too many repeated deadlocks in " + caller + ", giving up.");
}
- catch(InterruptedException ex) {
- //
+ } else if (isRetryable(e)) {
+ //in MSSQL this means Communication Link Failure
+ if (retryNum++ < retryLimit) {
+ LOG.warn("Retryable error detected in " + caller + ". Will wait " + retryInterval +
+ "ms and retry up to " + (retryLimit - retryNum + 1) + " times. Error: " + getMessage(e));
+ try {
+ Thread.sleep(retryInterval);
+ } catch (InterruptedException ex) {
+ //
+ }
+ sendRetrySignal = true;
+ } else {
+ LOG.error("Fatal error. Retry limit (" + retryLimit + ") reached. Last error: " + getMessage(e));
}
- throw new RetryException();
}
- else {
- LOG.error("Fatal error. Retry limit (" + retryLimit + ") reached. Last error: " + getMessage(e));
+ }
+ finally {
+ /*if this method ends with anything except a retry signal, the caller should fail the operation
+ and propagate the error up to the its caller (Metastore client); thus must reset retry counters*/
+ if(!sendRetrySignal) {
+ deadlockCnt = 0;
retryNum = 0;
}
}
- else {
- //if here, we got something that will propagate the error (rather than retry), so reset counters
- deadlockCnt = 0;
- retryNum = 0;
+ if(sendRetrySignal) {
+ throw new RetryException();
}
}
@@ -2089,6 +2085,7 @@ public class TxnHandler {
//in MSSQL this means Communication Link Failure
return true;
}
+ //see https://issues.apache.org/jira/browse/HIVE-9938
}
return false;
}
[3/3] hive git commit: HIVE-11883 'transactional' table property for
ACID should be case insensitive (Eugene Koifman, reviewed by Ashutosh Chauhan)
Posted by ek...@apache.org.
HIVE-11883 'transactional' table property for ACID should be case insensitive (Eugene Koifman, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f20a359d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f20a359d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f20a359d
Branch: refs/heads/branch-1
Commit: f20a359d77230b6c57120d8bb71ad700b1a7d4c2
Parents: 43e08a4
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Wed Sep 30 17:19:49 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Wed Sep 30 17:19:49 2015 -0700
----------------------------------------------------------------------
.../java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java | 3 +++
ql/src/test/queries/clientpositive/update_all_types.q | 2 +-
ql/src/test/results/clientpositive/tez/update_all_types.q.out | 4 ++--
ql/src/test/results/clientpositive/update_all_types.q.out | 4 ++--
4 files changed, 8 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f20a359d/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 5864c35..6e82f41 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -12212,6 +12212,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
if (!SessionState.get().getTxnMgr().supportsAcid()) return false;
String tableIsTransactional =
tab.getProperty(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL);
+ if(tableIsTransactional == null) {
+ tableIsTransactional = tab.getProperty(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL.toUpperCase());
+ }
return tableIsTransactional != null && tableIsTransactional.equalsIgnoreCase("true");
}
http://git-wip-us.apache.org/repos/asf/hive/blob/f20a359d/ql/src/test/queries/clientpositive/update_all_types.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/update_all_types.q b/ql/src/test/queries/clientpositive/update_all_types.q
index 262a304..0229845 100644
--- a/ql/src/test/queries/clientpositive/update_all_types.q
+++ b/ql/src/test/queries/clientpositive/update_all_types.q
@@ -17,7 +17,7 @@ create table acid_uat(ti tinyint,
s string,
vc varchar(128),
ch char(36),
- b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+ b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('TRANSACTIONAL'='TRUE');
insert into table acid_uat
select ctinyint,
http://git-wip-us.apache.org/repos/asf/hive/blob/f20a359d/ql/src/test/results/clientpositive/tez/update_all_types.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/update_all_types.q.out b/ql/src/test/results/clientpositive/tez/update_all_types.q.out
index ca098fb..1cfa088 100644
--- a/ql/src/test/results/clientpositive/tez/update_all_types.q.out
+++ b/ql/src/test/results/clientpositive/tez/update_all_types.q.out
@@ -13,7 +13,7 @@ create table acid_uat(ti tinyint,
s string,
vc varchar(128),
ch char(36),
- b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+ b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('TRANSACTIONAL'='TRUE')
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@acid_uat
@@ -32,7 +32,7 @@ create table acid_uat(ti tinyint,
s string,
vc varchar(128),
ch char(36),
- b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+ b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('TRANSACTIONAL'='TRUE')
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@acid_uat
http://git-wip-us.apache.org/repos/asf/hive/blob/f20a359d/ql/src/test/results/clientpositive/update_all_types.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/update_all_types.q.out b/ql/src/test/results/clientpositive/update_all_types.q.out
index ca098fb..1cfa088 100644
--- a/ql/src/test/results/clientpositive/update_all_types.q.out
+++ b/ql/src/test/results/clientpositive/update_all_types.q.out
@@ -13,7 +13,7 @@ create table acid_uat(ti tinyint,
s string,
vc varchar(128),
ch char(36),
- b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+ b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('TRANSACTIONAL'='TRUE')
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@acid_uat
@@ -32,7 +32,7 @@ create table acid_uat(ti tinyint,
s string,
vc varchar(128),
ch char(36),
- b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+ b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('TRANSACTIONAL'='TRUE')
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@acid_uat
[2/3] hive git commit: HIVE-11916 TxnHandler.getOpenTxnsInfo() and
getOpenTxns() may produce inconsistent result (Eugene Koifman,
reviewed by Ashutosh Chauhan)
Posted by ek...@apache.org.
HIVE-11916 TxnHandler.getOpenTxnsInfo() and getOpenTxns() may produce inconsistent result (Eugene Koifman, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/43e08a4e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/43e08a4e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/43e08a4e
Branch: refs/heads/branch-1
Commit: 43e08a4ee1ddddfd6d9c718341ee2e8a49d56818
Parents: 6a91134
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Wed Sep 30 17:19:01 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Wed Sep 30 17:19:01 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/metastore/txn/TxnHandler.java | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/43e08a4e/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index aa8bf8a..82f0dc0 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -47,8 +47,9 @@ import java.util.concurrent.TimeUnit;
* A handler to answer transaction related calls that come into the metastore
* server.
*
- * Note on log messages: Please include txnid:X and lockid info
- * {@link org.apache.hadoop.hive.common.JavaUtils#lockIdToString(long)} in all messages.
+ * Note on log messages: Please include txnid:X and lockid info using
+ * {@link org.apache.hadoop.hive.common.JavaUtils#txnIdToString(long)}
+ * and {@link org.apache.hadoop.hive.common.JavaUtils#lockIdToString(long)} in all messages.
* The txnid:X and lockid:Y matches how Thrift object toString() methods are generated,
* so keeping the format consistent makes grep'ing the logs much easier.
*/
@@ -166,7 +167,8 @@ public class TxnHandler {
}
List<TxnInfo> txnInfo = new ArrayList<TxnInfo>();
- s = "select txn_id, txn_state, txn_user, txn_host from TXNS";
+ //need the WHERE clause below to ensure consistent results with READ_COMMITTED
+ s = "select txn_id, txn_state, txn_user, txn_host from TXNS where txn_id <= " + hwm;
LOG.debug("Going to execute query<" + s + ">");
rs = stmt.executeQuery(s);
while (rs.next()) {
@@ -230,7 +232,8 @@ public class TxnHandler {
}
Set<Long> openList = new HashSet<Long>();
- s = "select txn_id from TXNS";
+ //need the WHERE clause below to ensure consistent results with READ_COMMITTED
+ s = "select txn_id from TXNS where txn_id <= " + hwm;
LOG.debug("Going to execute query<" + s + ">");
rs = stmt.executeQuery(s);
while (rs.next()) {
@@ -1459,7 +1462,7 @@ public class TxnHandler {
LockResponse response = new LockResponse();
response.setLockid(extLockId);
- LOG.debug("checkLock(): Setting savepoint. extLockId=" + extLockId);
+ LOG.debug("checkLock(): Setting savepoint. extLockId=" + JavaUtils.lockIdToString(extLockId));
Savepoint save = dbConn.setSavepoint();
StringBuilder query = new StringBuilder("select hl_lock_ext_id, " +
"hl_lock_int_id, hl_db, hl_table, hl_partition, hl_lock_state, " +
@@ -1685,7 +1688,7 @@ public class TxnHandler {
if (rc < 1) {
LOG.debug("Going to rollback");
dbConn.rollback();
- throw new NoSuchLockException("No such lock: (" + extLockId + "," +
+ throw new NoSuchLockException("No such lock: (" + JavaUtils.lockIdToString(extLockId) + "," +
+ intLockId + ")");
}
// We update the database, but we don't commit because there may be other
@@ -1710,7 +1713,7 @@ public class TxnHandler {
if (rc < 1) {
LOG.debug("Going to rollback");
dbConn.rollback();
- throw new NoSuchLockException("No such lock: " + extLockId);
+ throw new NoSuchLockException("No such lock: " + JavaUtils.lockIdToString(extLockId));
}
LOG.debug("Going to commit");
dbConn.commit();