You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by aa...@apache.org on 2020/05/19 00:53:07 UTC
[hadoop] branch branch-3.3.0 updated: HDFS-15293. Relax the
condition for accepting a fsimage when receiving a checkpoint. Contributed
by Chen Liang
This is an automated email from the ASF dual-hosted git repository.
aajisaka pushed a commit to branch branch-3.3.0
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.3.0 by this push:
new 940a422 HDFS-15293. Relax the condition for accepting a fsimage when receiving a checkpoint. Contributed by Chen Liang
940a422 is described below
commit 940a422525258514165628ec93011507ba1ed5d1
Author: Chen Liang <va...@apache.org>
AuthorDate: Mon May 18 10:58:52 2020 -0700
HDFS-15293. Relax the condition for accepting a fsimage when receiving a checkpoint. Contributed by Chen Liang
(cherry picked from commit 7bb902bc0d0c62d63a8960db444de3abb0a6ad22)
(cherry picked from commit e452163a06daa6bbebc571127754962d8776a925)
---
.../hadoop/hdfs/server/namenode/ImageServlet.java | 39 ++++++++++++----
.../hdfs/server/namenode/TestCheckpoint.java | 53 +++++++++++++++++++++-
2 files changed, 81 insertions(+), 11 deletions(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java
index 91f24dd..a9c2a09 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java
@@ -99,6 +99,19 @@ public class ImageServlet extends HttpServlet {
"recent.image.check.enabled";
public static final boolean RECENT_IMAGE_CHECK_ENABLED_DEFAULT = true;
+ /*
+ * Specify a relaxation for the time delta check, the relaxation is to account
+ * for the scenario that there are chances that minor time difference (e.g.
+ * due to image upload delay, or minor machine clock skew) can cause ANN to
+ * reject a fsImage too aggressively.
+ */
+ private static double recentImageCheckTimePrecision = 0.75;
+
+ @VisibleForTesting
+ static void setRecentImageCheckTimePrecision(double ratio) {
+ recentImageCheckTimePrecision = ratio;
+ }
+
@Override
public void doGet(final HttpServletRequest request,
final HttpServletResponse response) throws ServletException, IOException {
@@ -592,6 +605,9 @@ public class ImageServlet extends HttpServlet {
long checkpointPeriod =
conf.getTimeDuration(DFS_NAMENODE_CHECKPOINT_PERIOD_KEY,
DFS_NAMENODE_CHECKPOINT_PERIOD_DEFAULT, TimeUnit.SECONDS);
+ checkpointPeriod = Math.round(
+ checkpointPeriod * recentImageCheckTimePrecision);
+
long checkpointTxnCount =
conf.getLong(DFS_NAMENODE_CHECKPOINT_TXNS_KEY,
DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT);
@@ -612,21 +628,24 @@ public class ImageServlet extends HttpServlet {
// a new fsImage
// 1. most recent image's txid is too far behind
// 2. last checkpoint time was too old
- response.sendError(HttpServletResponse.SC_CONFLICT,
- "Most recent checkpoint is neither too far behind in "
- + "txid, nor too old. New txnid cnt is "
- + (txid - lastCheckpointTxid)
- + ", expecting at least " + checkpointTxnCount
- + " unless too long since last upload.");
+ String message = "Rejecting a fsimage due to small time delta "
+ + "and txnid delta. Time since previous checkpoint is "
+ + timeDelta + " expecting at least " + checkpointPeriod
+ + " txnid delta since previous checkpoint is " +
+ (txid - lastCheckpointTxid) + " expecting at least "
+ + checkpointTxnCount;
+ LOG.info(message);
+ response.sendError(HttpServletResponse.SC_CONFLICT, message);
return null;
}
try {
if (nnImage.getStorage().findImageFile(nnf, txid) != null) {
- response.sendError(HttpServletResponse.SC_CONFLICT,
- "Either current namenode has checkpointed or "
- + "another checkpointer already uploaded an "
- + "checkpoint for txid " + txid);
+ String message = "Either current namenode has checkpointed or "
+ + "another checkpointer already uploaded an "
+ + "checkpoint for txid " + txid;
+ LOG.info(message);
+ response.sendError(HttpServletResponse.SC_CONFLICT, message);
return null;
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
index 572ad8b..fabe8c5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
@@ -2464,7 +2464,7 @@ public class TestCheckpoint {
}
@Test(timeout = 300000)
- public void testActiveRejectSmallerDeltaImage() throws Exception {
+ public void testActiveRejectSmallerTxidDeltaImage() throws Exception {
MiniDFSCluster cluster = null;
Configuration conf = new HdfsConfiguration();
// Set the delta txid threshold to 10
@@ -2517,6 +2517,57 @@ public class TestCheckpoint {
}
}
+ /**
+ * Test that even with txid and time delta threshold, by having time
+ * relaxation, SBN can still upload images to ANN.
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testActiveImageWithTimeDeltaRelaxation() throws Exception {
+ Configuration conf = new HdfsConfiguration();
+ // Set the delta txid threshold to some arbitrarily large value, so
+ // it does not trigger a checkpoint during this test.
+ conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 1000000);
+ // Set the delta time threshold to some arbitrarily large value, so
+ // it does not trigger a checkpoint during this test.
+ conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 900000);
+ // Set relaxation to 0, means time delta = 0 from previous image is fine,
+ // this will effectively disable reject small delta image
+ ImageServlet.setRecentImageCheckTimePrecision(0);
+
+ SecondaryNameNode secondary = null;
+
+ try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+ .numDataNodes(0).format(true).build()) {
+ // enable small delta rejection
+ NameNode active = cluster.getNameNode();
+ active.httpServer.getHttpServer()
+ .setAttribute(RECENT_IMAGE_CHECK_ENABLED, true);
+
+ secondary = startSecondaryNameNode(conf);
+
+ FileSystem fs = cluster.getFileSystem();
+ assertEquals(0, active.getNamesystem().getFSImage()
+ .getMostRecentCheckpointTxId());
+
+ // create 5 dir.
+ for (int i = 0; i < 5; i++) {
+ fs.mkdirs(new Path("dir-" + i));
+ }
+
+ // Checkpoint 1st
+ secondary.doCheckpoint();
+ // at this point, despite this is a small delta change, w.r.t both
+ // txid and time delta, due to we set relaxation to 0, this image
+ // still gets accepted
+ assertEquals(9, active.getNamesystem().getFSImage()
+ .getMostRecentCheckpointTxId());
+ } finally {
+ cleanup(secondary);
+ }
+ }
+
private static void cleanup(SecondaryNameNode snn) {
if (snn != null) {
try {
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org