You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by ji...@apache.org on 2013/10/11 23:49:02 UTC

svn commit: r1531436 - in /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs: ./ src/main/java/org/apache/hadoop/hdfs/security/token/delegation/ src/main/java/org/apache/hadoop/hdfs/server/namenode/ src/test/java/org/apache/hadoop/hdfs/server/namenod...

Author: jing9
Date: Fri Oct 11 21:49:01 2013
New Revision: 1531436

URL: http://svn.apache.org/r1531436
Log:
HDFS-5322. HDFS delegation token not found in cache errors seen on secure HA clusters. Contributed by Jing Zhao.

Modified:
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDelegationTokensWithHA.java

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1531436&r1=1531435&r2=1531436&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Fri Oct 11 21:49:01 2013
@@ -369,6 +369,9 @@ Release 2.2.1 - UNRELEASED
     HDFS-5335. Hive query failed with possible race in dfs output stream.
     (Haohui Mai via suresh)
 
+    HDFS-5322. HDFS delegation token not found in cache errors seen on secure HA 
+    clusters. (jing9)
+
 Release 2.2.0 - 2013-10-13
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java?rev=1531436&r1=1531435&r2=1531436&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java Fri Oct 11 21:49:01 2013
@@ -37,6 +37,7 @@ import org.apache.hadoop.hdfs.server.nam
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.ipc.RetriableException;
 import org.apache.hadoop.ipc.StandbyException;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.SecurityUtil;
@@ -115,6 +116,24 @@ public class DelegationTokenSecretManage
     return super.retrievePassword(identifier);
   }
   
+  @Override
+  public byte[] retriableRetrievePassword(DelegationTokenIdentifier identifier)
+      throws InvalidToken, StandbyException, RetriableException, IOException {
+    namesystem.checkOperation(OperationCategory.READ);
+    try {
+      return super.retrievePassword(identifier);
+    } catch (InvalidToken it) {
+      if (namesystem.inTransitionToActive()) {
+        // if the namesystem is currently in the middle of transition to 
+        // active state, let client retry since the corresponding editlog may 
+        // have not been applied yet
+        throw new RetriableException(it);
+      } else {
+        throw it;
+      }
+    }
+  }
+  
   /**
    * Returns expiry time of a token given its identifier.
    * 

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1531436&r1=1531435&r2=1531436&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Fri Oct 11 21:49:01 2013
@@ -455,6 +455,11 @@ public class FSNamesystem implements Nam
   private HAContext haContext;
 
   private final boolean haEnabled;
+  
+  /**
+   * Whether the namenode is in the middle of starting the active service
+   */
+  private volatile boolean startingActiveService = false;
     
   private INodeId inodeId;
   
@@ -903,6 +908,7 @@ public class FSNamesystem implements Nam
    * @throws IOException
    */
   void startActiveServices() throws IOException {
+    startingActiveService = true;
     LOG.info("Starting services required for active state");
     writeLock();
     try {
@@ -957,8 +963,19 @@ public class FSNamesystem implements Nam
       nnrmthread.start();
     } finally {
       writeUnlock();
+      startingActiveService = false;
     }
   }
+  
+  /**
+   * @return Whether the namenode is transitioning to active state and is in the
+   *         middle of the {@link #startActiveServices()}
+   */
+  public boolean inTransitionToActive() {
+    return haEnabled && haContext != null
+        && haContext.getState().getServiceState() == HAServiceState.ACTIVE
+        && startingActiveService;
+  }
 
   private boolean shouldUseDelegationTokens() {
     return UserGroupInformation.isSecurityEnabled() ||
@@ -6460,11 +6477,17 @@ public class FSNamesystem implements Nam
    * Verifies that the given identifier and password are valid and match.
    * @param identifier Token identifier.
    * @param password Password in the token.
-   * @throws InvalidToken
    */
   public synchronized void verifyToken(DelegationTokenIdentifier identifier,
-      byte[] password) throws InvalidToken {
-    getDelegationTokenSecretManager().verifyToken(identifier, password);
+      byte[] password) throws InvalidToken, RetriableException {
+    try {
+      getDelegationTokenSecretManager().verifyToken(identifier, password);
+    } catch (InvalidToken it) {
+      if (inTransitionToActive()) {
+        throw new RetriableException(it);
+      }
+      throw it;
+    }
   }
   
   @Override
@@ -6482,6 +6505,11 @@ public class FSNamesystem implements Nam
   }
   
   @VisibleForTesting
+  public void setEditLogTailerForTests(EditLogTailer tailer) {
+    this.editLogTailer = tailer;
+  }
+  
+  @VisibleForTesting
   void setFsLockForTests(ReentrantReadWriteLock lock) {
     this.fsLock = lock;
   }

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDelegationTokensWithHA.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDelegationTokensWithHA.java?rev=1531436&r1=1531435&r2=1531436&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDelegationTokensWithHA.java (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDelegationTokensWithHA.java Fri Oct 11 21:49:01 2013
@@ -39,6 +39,7 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.fs.AbstractFileSystem;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HAUtil;
@@ -47,19 +48,22 @@ import org.apache.hadoop.hdfs.MiniDFSNNT
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSelector;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.ipc.RetriableException;
+import org.apache.hadoop.ipc.StandbyException;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.SecurityUtilTestHelper;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.test.GenericTestUtils;
-import org.junit.AfterClass;
+import org.junit.After;
 import org.junit.Before;
-import org.junit.BeforeClass;
 import org.junit.Test;
+import org.mockito.internal.util.reflection.Whitebox;
 
 import com.google.common.base.Joiner;
 
@@ -78,8 +82,12 @@ public class TestDelegationTokensWithHA 
   private static DelegationTokenSecretManager dtSecretManager;
   private static DistributedFileSystem dfs;
 
-  @BeforeClass
-  public static void setupCluster() throws Exception {
+  private volatile boolean catchup = false;
+  
+  @Before
+  public void setupCluster() throws Exception {
+    SecurityUtilTestHelper.setTokenServiceUseIp(true);
+    
     conf.setBoolean(
         DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
     conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL,
@@ -101,18 +109,12 @@ public class TestDelegationTokensWithHA 
         nn0.getNamesystem());
   }
 
-  @AfterClass
-  public static void shutdownCluster() throws IOException {
+  @After
+  public void shutdownCluster() throws IOException {
     if (cluster != null) {
       cluster.shutdown();
     }
   }
-
-
-  @Before
-  public void prepTest() {
-    SecurityUtilTestHelper.setTokenServiceUseIp(true);
-  }
   
   @Test
   public void testDelegationTokenDFSApi() throws Exception {
@@ -155,6 +157,96 @@ public class TestDelegationTokensWithHA 
     doRenewOrCancel(token, clientConf, TokenTestAction.CANCEL);
   }
   
+  private class EditLogTailerForTest extends EditLogTailer {
+    public EditLogTailerForTest(FSNamesystem namesystem, Configuration conf) {
+      super(namesystem, conf);
+    }
+    
+    public void catchupDuringFailover() throws IOException {
+      synchronized (TestDelegationTokensWithHA.this) {
+        while (!catchup) {
+          try {
+            LOG.info("The editlog tailer is waiting to catchup...");
+            TestDelegationTokensWithHA.this.wait();
+          } catch (InterruptedException e) {}
+        }
+      }
+      super.catchupDuringFailover();
+    }
+  }
+  
+  /**
+   * Test if correct exception (StandbyException or RetriableException) can be
+   * thrown during the NN failover. 
+   */
+  @Test
+  public void testDelegationTokenDuringNNFailover() throws Exception {
+    EditLogTailer editLogTailer = nn1.getNamesystem().getEditLogTailer();
+    // stop the editLogTailer of nn1
+    editLogTailer.stop();
+    Configuration conf = (Configuration) Whitebox.getInternalState(
+        editLogTailer, "conf");
+    nn1.getNamesystem().setEditLogTailerForTests(
+        new EditLogTailerForTest(nn1.getNamesystem(), conf));
+    
+    // create token
+    final Token<DelegationTokenIdentifier> token =
+        getDelegationToken(fs, "JobTracker");
+    DelegationTokenIdentifier identifier = new DelegationTokenIdentifier();
+    byte[] tokenId = token.getIdentifier();
+    identifier.readFields(new DataInputStream(
+             new ByteArrayInputStream(tokenId)));
+
+    // Ensure that it's present in the nn0 secret manager and can
+    // be renewed directly from there.
+    LOG.info("A valid token should have non-null password, " +
+        "and should be renewed successfully");
+    assertTrue(null != dtSecretManager.retrievePassword(identifier));
+    dtSecretManager.renewToken(token, "JobTracker");
+    
+    // transition nn0 to standby
+    cluster.transitionToStandby(0);
+    
+    try {
+      cluster.getNameNodeRpc(0).renewDelegationToken(token);
+      fail("StandbyException is expected since nn0 is in standby state");
+    } catch (StandbyException e) {
+      GenericTestUtils.assertExceptionContains(
+          HAServiceState.STANDBY.toString(), e);
+    }
+    
+    new Thread() {
+      @Override
+      public void run() {
+        try {
+          cluster.transitionToActive(1);
+        } catch (Exception e) {
+          LOG.error("Transition nn1 to active failed", e);
+        }    
+      }
+    }.start();
+    
+    Thread.sleep(1000);
+    try {
+      nn1.getNamesystem().verifyToken(token.decodeIdentifier(),
+          token.getPassword());
+      fail("RetriableException/StandbyException is expected since nn1 is in transition");
+    } catch (IOException e) {
+      assertTrue(e instanceof StandbyException
+          || e instanceof RetriableException);
+      LOG.info("Got expected exception", e);
+    }
+    
+    catchup = true;
+    synchronized (this) {
+      this.notifyAll();
+    }
+    
+    Configuration clientConf = dfs.getConf();
+    doRenewOrCancel(token, clientConf, TokenTestAction.RENEW);
+    doRenewOrCancel(token, clientConf, TokenTestAction.CANCEL);
+  }
+  
   @SuppressWarnings("deprecation")
   @Test
   public void testDelegationTokenWithDoAs() throws Exception {