You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by xk...@apache.org on 2022/11/30 15:44:31 UTC
[hadoop] branch trunk updated: HDFS-16550. Allow JN edit cache size to be set as a fraction of heap memory (#4209)
This is an automated email from the ASF dual-hosted git repository.
xkrogen pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new 2067fcb6463 HDFS-16550. Allow JN edit cache size to be set as a fraction of heap memory (#4209)
2067fcb6463 is described below
commit 2067fcb64638df2dde8100f44cce431baa23ceb8
Author: litao <to...@gmail.com>
AuthorDate: Wed Nov 30 23:44:21 2022 +0800
HDFS-16550. Allow JN edit cache size to be set as a fraction of heap memory (#4209)
---
.../java/org/apache/hadoop/hdfs/DFSConfigKeys.java | 5 +++-
.../hdfs/qjournal/server/JournaledEditsCache.java | 28 ++++++++++++++++------
.../src/main/resources/hdfs-default.xml | 18 +++++++++++++-
.../site/markdown/HDFSHighAvailabilityWithQJM.md | 10 ++++++++
.../src/site/markdown/ObserverNameNode.md | 18 ++++++++++++++
.../qjournal/server/TestJournaledEditsCache.java | 21 ++++++++++++++++
6 files changed, 91 insertions(+), 9 deletions(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index f766c48d7c5..0e10bc61c99 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -1424,7 +1424,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final long DFS_JOURNALNODE_SYNC_INTERVAL_DEFAULT = 2*60*1000L;
public static final String DFS_JOURNALNODE_EDIT_CACHE_SIZE_KEY =
"dfs.journalnode.edit-cache-size.bytes";
- public static final int DFS_JOURNALNODE_EDIT_CACHE_SIZE_DEFAULT = 1024 * 1024;
+
+ public static final String DFS_JOURNALNODE_EDIT_CACHE_SIZE_FRACTION_KEY =
+ "dfs.journalnode.edit-cache-size.fraction";
+ public static final float DFS_JOURNALNODE_EDIT_CACHE_SIZE_FRACTION_DEFAULT = 0.5f;
// Journal-node related configs for the client side.
public static final String DFS_QJOURNAL_QUEUE_SIZE_LIMIT_KEY = "dfs.qjournal.queued-edits.limit.mb";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournaledEditsCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournaledEditsCache.java
index 65f54609ef3..339b7fa7b68 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournaledEditsCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournaledEditsCache.java
@@ -40,6 +40,7 @@ import org.apache.hadoop.hdfs.server.namenode.EditLogFileOutputStream;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp;
import org.apache.hadoop.util.AutoCloseableLock;
+import org.apache.hadoop.util.Preconditions;
/**
* An in-memory cache of edits in their serialized form. This is used to serve
@@ -121,12 +122,18 @@ class JournaledEditsCache {
// ** End lock-protected fields **
JournaledEditsCache(Configuration conf) {
+ float fraction = conf.getFloat(DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_FRACTION_KEY,
+ DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_FRACTION_DEFAULT);
+ Preconditions.checkArgument((fraction > 0 && fraction < 1.0f),
+ String.format("Cache config %s is set at %f, it should be a positive float value, " +
+ "less than 1.0. The recommended value is less than 0.9.",
+ DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_FRACTION_KEY, fraction));
capacity = conf.getInt(DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_KEY,
- DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_DEFAULT);
+ (int) (Runtime.getRuntime().maxMemory() * fraction));
if (capacity > 0.9 * Runtime.getRuntime().maxMemory()) {
Journal.LOG.warn(String.format("Cache capacity is set at %d bytes but " +
"maximum JVM memory is only %d bytes. It is recommended that you " +
- "decrease the cache size or increase the heap size.",
+ "decrease the cache size/fraction or increase the heap size.",
capacity, Runtime.getRuntime().maxMemory()));
}
Journal.LOG.info("Enabling the journaled edits cache with a capacity " +
@@ -277,11 +284,12 @@ class JournaledEditsCache {
initialize(INVALID_TXN_ID);
Journal.LOG.warn(String.format("A single batch of edits was too " +
"large to fit into the cache: startTxn = %d, endTxn = %d, " +
- "input length = %d. The capacity of the cache (%s) must be " +
+ "input length = %d. The cache size (%s) or cache fraction (%s) must be " +
"increased for it to work properly (current capacity %d)." +
"Cache is now empty.",
newStartTxn, newEndTxn, inputData.length,
- DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_KEY, capacity));
+ DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_KEY,
+ DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_FRACTION_KEY, capacity));
return;
}
if (dataMap.isEmpty()) {
@@ -388,10 +396,11 @@ class JournaledEditsCache {
} else {
return new CacheMissException(lowestTxnId - requestedTxnId,
"Oldest txn ID available in the cache is %d, but requested txns " +
- "starting at %d. The cache size (%s) may need to be increased " +
- "to hold more transactions (currently %d bytes containing %d " +
+ "starting at %d. The cache size (%s) or cache fraction (%s) may need to be " +
+ "increased to hold more transactions (currently %d bytes containing %d " +
"transactions)", lowestTxnId, requestedTxnId,
- DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_KEY, capacity,
+ DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_KEY,
+ DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_FRACTION_KEY, capacity,
highestTxnId - lowestTxnId + 1);
}
}
@@ -414,4 +423,9 @@ class JournaledEditsCache {
}
+ @VisibleForTesting
+ int getCapacity() {
+ return capacity;
+ }
+
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 2a0a4945faa..bf7f99aa1fa 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -4945,7 +4945,7 @@
<property>
<name>dfs.journalnode.edit-cache-size.bytes</name>
- <value>1048576</value>
+ <value></value>
<description>
The size, in bytes, of the in-memory cache of edits to keep on the
JournalNode. This cache is used to serve edits for tailing via the RPC-based
@@ -4955,6 +4955,22 @@
</description>
</property>
+<property>
+ <name>dfs.journalnode.edit-cache-size.fraction</name>
+ <value>0.5f</value>
+ <description>
+ This ratio refers to the proportion of the maximum memory of the JVM.
+ Used to calculate the size of the edits cache that is kept in the JournalNode's memory.
+ This config is an alternative to the dfs.journalnode.edit-cache-size.bytes.
+ And it is used to serve edits for tailing via the RPC-based mechanism, and is only
+ enabled when dfs.ha.tail-edits.in-progress is true. Transactions range in size but
+ are around 200 bytes on average, so the default of 1MB can store around 5000 transactions.
+ So we can configure a reasonable value based on the maximum memory. The recommended value
+ is less than 0.9. If we set dfs.journalnode.edit-cache-size.bytes, this parameter will
+ not take effect.
+ </description>
+</property>
+
<property>
<name>dfs.journalnode.kerberos.internal.spnego.principal</name>
<value></value>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md
index 5591f4f2245..b6b408db8b4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md
@@ -502,6 +502,16 @@ lag time will be much longer. The relevant configurations are:
the oldest data in the cache was at transaction ID 20, a value of 10 would be added to the
average.
+* **dfs.journalnode.edit-cache-size.fraction** - This fraction refers to the proportion of
+ the maximum memory of the JVM. Used to calculate the size of the edits cache that is
+ kept in the JournalNode's memory. This config is an alternative to the
+ dfs.journalnode.edit-cache-size.bytes. And it is used to serve edits for tailing via
+ the RPC-based mechanism, and is only enabled when dfs.ha.tail-edits.in-progress is true.
+ Transactions range in size but are around 200 bytes on average, so the default of 1MB
+ can store around 5000 transactions. So we can configure a reasonable value based on
+ the maximum memory. The recommended value is less than 0.9. If we set
+ dfs.journalnode.edit-cache-size.bytes, this parameter will not take effect.
+
This feature is primarily useful in conjunction with the Standby/Observer Read feature. Using this
feature, read requests can be serviced from non-active NameNodes; thus tailing in-progress edits
provides these nodes with the ability to serve requests with data which is much more fresh. See the
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ObserverNameNode.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ObserverNameNode.md
index 00aeb5bd2e0..74026ec8625 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ObserverNameNode.md
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ObserverNameNode.md
@@ -194,6 +194,24 @@ few configurations to your **hdfs-site.xml**:
<value>1048576</value>
</property>
+* **dfs.journalnode.edit-cache-size.fraction** - the fraction refers to
+ the proportion of the maximum memory of the JVM.
+
+ Used to calculate the size of the edits cache that
+ is kept in the JournalNode's memory.
+ This config is an alternative to the dfs.journalnode.edit-cache-size.bytes.
+ And it is used to serve edits for tailing via the RPC-based mechanism, and is only
+ enabled when dfs.ha.tail-edits.in-progress is true. Transactions range in size but
+ are around 200 bytes on average, so the default of 1MB can store around 5000 transactions.
+ So we can configure a reasonable value based on the maximum memory. The recommended value
+ is less than 0.9. If we set dfs.journalnode.edit-cache-size.bytes, this parameter will
+ not take effect.
+
+ <property>
+ <name>dfs.journalnode.edit-cache-size.fraction</name>
+ <value>0.5f</value>
+ </property>
+
* **dfs.namenode.accesstime.precision** -- whether to enable access
time for HDFS file.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournaledEditsCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournaledEditsCache.java
index 2a178a1547e..82b8b587694 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournaledEditsCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournaledEditsCache.java
@@ -221,6 +221,27 @@ public class TestJournaledEditsCache {
cache.retrieveEdits(-1, 10, new ArrayList<>());
}
+ @Test
+ public void testCacheSizeConfigs() {
+ // Assert the default configs.
+ Configuration config = new Configuration();
+ cache = new JournaledEditsCache(config);
+ assertEquals((int) (Runtime.getRuntime().maxMemory() * 0.5f), cache.getCapacity());
+
+ // Set dfs.journalnode.edit-cache-size.bytes.
+ Configuration config1 = new Configuration();
+ config1.setInt(DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_KEY, 1);
+ config1.setFloat(DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_FRACTION_KEY, 0.1f);
+ cache = new JournaledEditsCache(config1);
+ assertEquals(1, cache.getCapacity());
+
+ // Don't set dfs.journalnode.edit-cache-size.bytes.
+ Configuration config2 = new Configuration();
+ config2.setFloat(DFSConfigKeys.DFS_JOURNALNODE_EDIT_CACHE_SIZE_FRACTION_KEY, 0.1f);
+ cache = new JournaledEditsCache(config2);
+ assertEquals((int) (Runtime.getRuntime().maxMemory() * 0.1f), cache.getCapacity());
+ }
+
private void storeEdits(int startTxn, int endTxn) throws Exception {
cache.storeEdits(createTxnData(startTxn, endTxn - startTxn + 1), startTxn,
endTxn, NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION);
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org