You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by om...@apache.org on 2011/03/04 06:37:39 UTC
svn commit: r1077814 -
/hadoop/common/branches/branch-0.20-security-202/src/core/org/apache/hadoop/ipc/Client.java
Author: omalley
Date: Fri Mar 4 05:37:39 2011
New Revision: 1077814
URL: http://svn.apache.org/viewvc?rev=1077814&view=rev
Log:
commit 0ff7e541042f52ab5f9877f5526d593618a34547
Author: Devaraj Das <dd...@yahoo-inc.com>
Date: Mon Jan 3 16:50:23 2011 -0800
. RPC handles SocketTimeOutException during SASL negotiation.
+++ b/YAHOO-CHANGES.txt
+ . RPC handles SocketTimeOutException during SASL negotiation.
+ (ddas)
+
+ core-site.xml (Krishna Ramachandran)
Modified:
hadoop/common/branches/branch-0.20-security-202/src/core/org/apache/hadoop/ipc/Client.java
Modified: hadoop/common/branches/branch-0.20-security-202/src/core/org/apache/hadoop/ipc/Client.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-202/src/core/org/apache/hadoop/ipc/Client.java?rev=1077814&r1=1077813&r2=1077814&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-202/src/core/org/apache/hadoop/ipc/Client.java (original)
+++ hadoop/common/branches/branch-0.20-security-202/src/core/org/apache/hadoop/ipc/Client.java Fri Mar 4 05:37:39 2011
@@ -401,14 +401,23 @@ public class Client {
}
}
/**
- * If multiple clients with the same principal try to connect
- * to the same server at the same time, the server assumes a
- * replay attack is in progress. This is a feature of kerberos.
+ * Three failures are handled here -
+ * 1) SocketTimeout failures. We just retry after sleeping for sometime
+ * 2) Kerberos replay attack failure. If multiple clients with the same
+ * principal try to connect to the same server at the same time, the server
+ * assumes a replay attack is in progress. This is a feature of kerberos.
* In order to work around this, what is done is that the client
* backs off randomly and tries to initiate the connection
* again.
- * The other problem is to do with ticket expiry. To handle that,
+ * 3) The third problem is to do with ticket expiry. To handle that,
* a relogin is attempted.
+ * Failure scenarios:
+ * (1) Client authenticates over kerberos and the
+ * connection fails due to kerberos replay attack, or his ticket has
+ * expired or does not exist. Also connection can fail due to timeout.
+ * (2) Client authenticates over DIGEST and the connection fails due
+ * to timeout.
+ * For MapReduce tasks, assuming the token is valid, only (2) can happen.
*/
private synchronized void handleSaslConnectionFailure(
final int currRetries,
@@ -417,8 +426,30 @@ public class Client {
throws IOException, InterruptedException{
ugi.doAs(new PrivilegedExceptionAction<Object>() {
public Object run() throws IOException, InterruptedException {
- final short MAX_BACKOFF = 5000;
+ final short maxBackoff = 5000;
closeConnection();
+ disposeSasl();
+ //TODO: currRetries is overloaded here.For both kerberos and timeout
+ //failures, currRetries is used to maintain the number of failures
+ //seen so far. Should fix this to distinguish between the two.
+ //TODO: Refactor the method to remove duplicated code (e.g.,the logic
+ //inside "currRetries < maxRetries" could be factored out to apply
+ //to both SocketTimeoutException and Kerberos exception...
+ if (ex instanceof SocketTimeoutException) {
+ if (currRetries < maxRetries) {
+ LOG.warn("Encountered " + ex + " while trying to establish" +
+ " SASL connection to the server. Will retry SASL connection"+
+ " to server with principal " +
+ serverPrincipal);
+ //we are sleeping with the Connection lock held but since this
+ //connection instance is being used for connecting to the server
+ //in question, it is okay
+ Thread.sleep((rand.nextInt(maxBackoff) + 1));
+ return null;
+ } else {
+ throw new IOException(ex);
+ }
+ }
if (shouldAuthenticateOverKrb()) {
if (currRetries < maxRetries) {
LOG.debug("Exception encountered while connecting to " +
@@ -429,12 +460,11 @@ public class Client {
} else {
UserGroupInformation.getLoginUser().reloginFromTicketCache();
}
- disposeSasl();
//have granularity of milliseconds
//we are sleeping with the Connection lock held but since this
//connection instance is being used for connecting to the server
//in question, it is okay
- Thread.sleep((rand.nextInt(MAX_BACKOFF) + 1));
+ Thread.sleep((rand.nextInt(maxBackoff) + 1));
return null;
} else {
String msg = "Couldn't setup connection for " +
@@ -467,7 +497,7 @@ public class Client {
LOG.debug("Connecting to "+server);
}
short numRetries = 0;
- final short MAX_RETRIES = 5;
+ final short maxRetries = 15;
Random rand = null;
while (true) {
setupConnection();
@@ -496,7 +526,7 @@ public class Client {
if (rand == null) {
rand = new Random();
}
- handleSaslConnectionFailure(numRetries++, MAX_RETRIES, ex, rand,
+ handleSaslConnectionFailure(numRetries++, maxRetries, ex, rand,
ticket);
continue;
}