You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by zj...@apache.org on 2015/03/30 21:37:11 UTC

[05/20] hadoop git commit: HDFS-7501. TransactionsSinceLastCheckpoint can be negative on SBNs. Contributed by Gautam Gopalakrishnan.

HDFS-7501. TransactionsSinceLastCheckpoint can be negative on SBNs. Contributed by Gautam Gopalakrishnan.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/7d4d6150
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/7d4d6150
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/7d4d6150

Branch: refs/heads/YARN-2928
Commit: 7d4d6150f8c81a242f7676e27d65db9f31136007
Parents: 74e941d
Author: Harsh J <ha...@cloudera.com>
Authored: Sun Mar 29 00:45:01 2015 +0530
Committer: Zhijie Shen <zj...@apache.org>
Committed: Mon Mar 30 12:10:47 2015 -0700

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  3 +
 .../hdfs/server/namenode/FSNamesystem.java      |  2 +-
 .../namenode/metrics/TestNameNodeMetrics.java   | 84 ++++++++++++++++++++
 3 files changed, 88 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/7d4d6150/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index f7cc2bc..496db06 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -351,6 +351,9 @@ Release 2.8.0 - UNRELEASED
 
   BUG FIXES
 
+    HDFS-7501. TransactionsSinceLastCheckpoint can be negative on SBNs.
+    (Gautam Gopalakrishnan via harsh)
+
     HDFS-5356. MiniDFSCluster should close all open FileSystems when shutdown()
     (Rakesh R via vinayakumarb)
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/7d4d6150/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index d0999b8..0e0f484 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -4784,7 +4784,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
   @Metric({"TransactionsSinceLastCheckpoint",
       "Number of transactions since last checkpoint"})
   public long getTransactionsSinceLastCheckpoint() {
-    return getEditLog().getLastWrittenTxId() -
+    return getFSImage().getLastAppliedOrWrittenTxId() -
         getFSImage().getStorage().getMostRecentCheckpointTxId();
   }
   

http://git-wip-us.apache.org/repos/asf/hadoop/blob/7d4d6150/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
index 011db3c..64ea1e4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
@@ -22,12 +22,16 @@ import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
 import static org.apache.hadoop.test.MetricsAsserts.assertGauge;
 import static org.apache.hadoop.test.MetricsAsserts.assertQuantileGauges;
 import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import java.io.DataInputStream;
 import java.io.IOException;
 import java.util.Random;
+import com.google.common.collect.ImmutableList;
+import com.google.common.io.Files;
 
+import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
@@ -39,6 +43,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
@@ -47,7 +52,9 @@ import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
 import org.apache.hadoop.hdfs.server.namenode.top.TopAuditLogger;
 import org.apache.hadoop.metrics2.MetricsRecordBuilder;
 import org.apache.hadoop.metrics2.MetricsSource;
@@ -69,6 +76,7 @@ public class TestNameNodeMetrics {
     new Path("/testNameNodeMetrics");
   private static final String NN_METRICS = "NameNodeActivity";
   private static final String NS_METRICS = "FSNamesystem";
+  public static final Log LOG = LogFactory.getLog(TestNameNodeMetrics.class);
   
   // Number of datanodes in the cluster
   private static final int DATANODE_COUNT = 3; 
@@ -400,6 +408,82 @@ public class TestNameNodeMetrics {
   }
   
   /**
+   * Testing TransactionsSinceLastCheckpoint. Need a new cluster as
+   * the other tests in here don't use HA. See HDFS-7501.
+   */
+  @Test(timeout = 300000)
+  public void testTransactionSinceLastCheckpointMetrics() throws Exception {
+    Random random = new Random();
+    int retryCount = 0;
+    while (retryCount < 5) {
+      try {
+        int basePort = 10060 + random.nextInt(100) * 2;
+        MiniDFSNNTopology topology = new MiniDFSNNTopology()
+            .addNameservice(new MiniDFSNNTopology.NSConf("ns1")
+            .addNN(new MiniDFSNNTopology.NNConf("nn1").setHttpPort(basePort))
+            .addNN(new MiniDFSNNTopology.NNConf("nn2").setHttpPort(basePort + 1)));
+
+        HdfsConfiguration conf2 = new HdfsConfiguration();
+        // Lower the checkpoint condition for purpose of testing.
+        conf2.setInt(
+            DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY,
+            100);
+        // Check for checkpoint condition very often, for purpose of testing.
+        conf2.setInt(
+            DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY,
+            1);
+        // Poll and follow ANN txns very often, for purpose of testing.
+        conf2.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
+        MiniDFSCluster cluster2 = new MiniDFSCluster.Builder(conf2)
+            .nnTopology(topology).numDataNodes(1).build();
+        cluster2.waitActive();
+        DistributedFileSystem fs2 = cluster2.getFileSystem(0);
+        NameNode nn0 = cluster2.getNameNode(0);
+        NameNode nn1 = cluster2.getNameNode(1);
+        cluster2.transitionToActive(0);
+        fs2.mkdirs(new Path("/tmp-t1"));
+        fs2.mkdirs(new Path("/tmp-t2"));
+        HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+        // Test to ensure tracking works before the first-ever
+        // checkpoint.
+        assertEquals("SBN failed to track 2 transactions pre-checkpoint.",
+            4L, // 2 txns added further when catch-up is called.
+            cluster2.getNameNode(1).getNamesystem()
+              .getTransactionsSinceLastCheckpoint());
+        // Complete up to the boundary required for
+        // an auto-checkpoint. Using 94 to expect fsimage
+        // rounded at 100, as 4 + 94 + 2 (catch-up call) = 100.
+        for (int i = 1; i <= 94; i++) {
+          fs2.mkdirs(new Path("/tmp-" + i));
+        }
+        HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+        // Assert 100 transactions in checkpoint.
+        HATestUtil.waitForCheckpoint(cluster2, 1, ImmutableList.of(100));
+        // Test to ensure number tracks the right state of
+        // uncheckpointed edits, and does not go negative
+        // (as fixed in HDFS-7501).
+        assertEquals("Should be zero right after the checkpoint.",
+            0L,
+            cluster2.getNameNode(1).getNamesystem()
+              .getTransactionsSinceLastCheckpoint());
+        fs2.mkdirs(new Path("/tmp-t3"));
+        fs2.mkdirs(new Path("/tmp-t4"));
+        HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+        // Test to ensure we track the right numbers after
+        // the checkpoint resets it to zero again.
+        assertEquals("SBN failed to track 2 added txns after the ckpt.",
+            4L,
+            cluster2.getNameNode(1).getNamesystem()
+              .getTransactionsSinceLastCheckpoint());
+        cluster2.shutdown();
+        break;
+      } catch (Exception e) {
+        LOG.warn("Unable to set up HA cluster, exception thrown: " + e);
+        retryCount++;
+      }
+    }
+  }
+  /**
    * Test NN checkpoint and transaction-related metrics.
    */
   @Test