You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by om...@apache.org on 2011/03/04 05:21:18 UTC
svn commit: r1077501 - in
/hadoop/common/branches/branch-0.20-security-patches/src:
hdfs/org/apache/hadoop/hdfs/ hdfs/org/apache/hadoop/hdfs/server/namenode/
hdfs/org/apache/hadoop/hdfs/util/
test/org/apache/hadoop/hdfs/server/namenode/
Author: omalley
Date: Fri Mar 4 04:21:18 2011
New Revision: 1077501
URL: http://svn.apache.org/viewvc?rev=1077501&view=rev
Log:
commit ab4ef4b0c24c4b6a386efb9da55ee7d01e84228b
Author: Suresh Srinivas <su...@yahoo-inc.com>
Date: Fri Jun 11 11:07:33 2010 -0700
HDFS-1110 from https://issues.apache.org/jira/secure/attachment/12446772/hdfs-1110.y20.patch
+++ b/YAHOO-CHANGES.txt
+ HDFS-1110. Reuses objects for commonly used file names in namenode to
+ reduce the heap usage. (suresh)
+
Added:
hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameCache.java
hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/util/
hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/util/ByteArray.java
hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameCache.java
Modified:
hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/DFSConfigKeys.java
hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
Modified: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/DFSConfigKeys.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/DFSConfigKeys.java?rev=1077501&r1=1077500&r2=1077501&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/DFSConfigKeys.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/DFSConfigKeys.java Fri Mar 4 04:21:18 2011
@@ -202,4 +202,6 @@ public class DFSConfigKeys extends Commo
public static final String DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY = "dfs.secondary.namenode.keytab.file";
public static final String DFS_SECONDARY_NAMENODE_USER_NAME_KEY = "dfs.secondary.namenode.kerberos.principal";
public static final String DFS_SECONDARY_NAMENODE_KRB_HTTPS_USER_NAME_KEY = "dfs.secondary.namenode.kerberos.https.principal";
+ public static final String DFS_NAMENODE_NAME_CACHE_THRESHOLD_KEY = "dfs.namenode.name.cache.threshold";
+ public static final int DFS_NAMENODE_NAME_CACHE_THRESHOLD_DEFAULT = 10;
}
Modified: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java?rev=1077501&r1=1077500&r2=1077501&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java Fri Mar 4 04:21:18 2011
@@ -34,6 +34,7 @@ import org.apache.hadoop.hdfs.protocol.H
import org.apache.hadoop.hdfs.protocol.DirectoryListing;
import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
+import org.apache.hadoop.hdfs.util.ByteArray;
import org.apache.hadoop.hdfs.server.namenode.BlocksMap.BlockInfo;
/*************************************************
@@ -55,6 +56,12 @@ class FSDirectory implements FSConstants
private MetricsRecord directoryMetrics = null;
private final int lsLimit; // max list limit
+ /**
+ * Caches frequently used file names used in {@link INode} to reuse
+ * byte[] objects and reduce heap usage.
+ */
+ private final NameCache<ByteArray> nameCache;
+
/** Access an existing dfs name directory. */
FSDirectory(FSNamesystem ns, Configuration conf) {
this(new FSImage(), ns, conf);
@@ -72,6 +79,14 @@ class FSDirectory implements FSConstants
DFSConfigKeys.DFS_LIST_LIMIT, DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT);
this.lsLimit = configuredLimit>0 ?
configuredLimit : DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT;
+
+ int threshold = conf.getInt(
+ DFSConfigKeys.DFS_NAMENODE_NAME_CACHE_THRESHOLD_KEY,
+ DFSConfigKeys.DFS_NAMENODE_NAME_CACHE_THRESHOLD_DEFAULT);
+ NameNode.LOG.info("Caching file names occuring more than " + threshold
+ + " times ");
+ nameCache = new NameCache<ByteArray>(threshold);
+
initialize(conf);
}
@@ -105,6 +120,7 @@ class FSDirectory implements FSConstants
}
synchronized (this) {
this.ready = true;
+ this.nameCache.initialized();
this.notifyAll();
}
}
@@ -242,6 +258,7 @@ class FSDirectory implements FSConstants
synchronized (rootDir) {
try {
newParent = rootDir.addToParent(src, newNode, parentINode, false);
+ cacheName(newNode);
} catch (FileNotFoundException e) {
return null;
}
@@ -1013,7 +1030,9 @@ class FSDirectory implements FSConstants
long childDiskspace, boolean inheritPermission)
throws QuotaExceededException {
byte[][] components = INode.getPathComponents(src);
- child.setLocalName(components[components.length-1]);
+ byte[] path = components[components.length-1];
+ child.setLocalName(path);
+ cacheName(child);
INode[] inodes = new INode[components.length];
synchronized (rootDir) {
rootDir.getExistingPathINodes(components, inodes);
@@ -1377,4 +1396,20 @@ class FSDirectory implements FSConstants
node.getGroupName(),
path);
}
+
+ /**
+ * Caches frequently used file names to reuse file name objects and
+ * reduce heap size.
+ */
+ void cacheName(INode inode) {
+ // Name is cached only for files
+ if (inode.isDirectory()) {
+ return;
+ }
+ ByteArray name = new ByteArray(inode.getLocalNameBytes());
+ name = nameCache.put(name);
+ if (name != null) {
+ inode.setLocalName(name.getBytes());
+ }
+ }
}
Added: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameCache.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameCache.java?rev=1077501&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameCache.java (added)
+++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameCache.java Fri Mar 4 04:21:18 2011
@@ -0,0 +1,155 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * Caches frequently used names to facilitate reuse.
+ * (example: byte[] representation of the file name in {@link INode}).
+ *
+ * This class is used by initially adding all the file names. Cache
+ * tracks the number of times a name is used in a transient map. It promotes
+ * a name used more than {@code useThreshold} to the cache.
+ *
+ * One all the names are added, {@link #initialized()} should be called to
+ * finish initialization. The transient map where use count is tracked is
+ * discarded and cache is ready for use.
+ *
+ * <p>
+ * This class must be synchronized externally.
+ *
+ * @param <K> name to be added to the cache
+ */
+class NameCache<K> {
+ /**
+ * Class for tracking use count of a name
+ */
+ private class UseCount {
+ int count;
+ final K value; // Internal value for the name
+
+ UseCount(final K value) {
+ count = 1;
+ this.value = value;
+ }
+
+ void increment() {
+ count++;
+ }
+
+ int get() {
+ return count;
+ }
+ }
+
+ static final Log LOG = LogFactory.getLog(NameCache.class.getName());
+
+ /** indicates initialization is in progress */
+ private boolean initialized = false;
+
+ /** names used more than {@code useThreshold} is added to the cache */
+ private final int useThreshold;
+
+ /** of times a cache look up was successful */
+ private int lookups = 0;
+
+ /** Cached names */
+ final HashMap<K, K> cache = new HashMap<K, K>();
+
+ /** Names and with number of occurrences tracked during initialization */
+ Map<K, UseCount> transientMap = new HashMap<K, UseCount>();
+
+ /**
+ * Constructor
+ * @param useThreshold names occurring more than this is promoted to the
+ * cache
+ */
+ NameCache(int useThreshold) {
+ this.useThreshold = useThreshold;
+ }
+
+ /**
+ * Add a given name to the cache or track use count.
+ * exist. If the name already exists, then the internal value is returned.
+ *
+ * @param name name to be looked up
+ * @return internal value for the name if found; otherwise null
+ */
+ K put(final K name) {
+ K internal = cache.get(name);
+ if (internal != null) {
+ lookups++;
+ return internal;
+ }
+
+ // Track the usage count only during initialization
+ if (!initialized) {
+ UseCount useCount = transientMap.get(name);
+ if (useCount != null) {
+ useCount.increment();
+ if (useCount.get() >= useThreshold) {
+ promote(name);
+ }
+ return useCount.value;
+ }
+ useCount = new UseCount(name);
+ transientMap.put(name, useCount);
+ }
+ return null;
+ }
+
+ /**
+ * Lookup count when a lookup for a name returned cached object
+ * @return number of successful lookups
+ */
+ int getLookupCount() {
+ return lookups;
+ }
+
+ /**
+ * Size of the cache
+ * @return Number of names stored in the cache
+ */
+ int size() {
+ return cache.size();
+ }
+
+ /**
+ * Mark the name cache as initialized. The use count is no longer tracked
+ * and the transient map used for initializing the cache is discarded to
+ * save heap space.
+ */
+ void initialized() {
+ LOG.info("initialized with " + size() + " entries " + lookups + " lookups");
+ this.initialized = true;
+ transientMap.clear();
+ transientMap = null;
+ }
+
+ /** Promote a frequently used name to the cache */
+ private void promote(final K name) {
+ transientMap.remove(name);
+ cache.put(name, name);
+ lookups += useThreshold;
+ }
+}
Added: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/util/ByteArray.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/util/ByteArray.java?rev=1077501&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/util/ByteArray.java (added)
+++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/util/ByteArray.java Fri Mar 4 04:21:18 2011
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.util;
+
+import java.util.Arrays;
+
+/**
+ * Wrapper for byte[] to use byte[] as key in HashMap
+ */
+public class ByteArray {
+ private int hash = 0; // cache the hash code
+ private final byte[] bytes;
+
+ public ByteArray(byte[] bytes) {
+ this.bytes = bytes;
+ }
+
+ public byte[] getBytes() {
+ return bytes;
+ }
+
+ @Override
+ public int hashCode() {
+ if (hash == 0) {
+ hash = Arrays.hashCode(bytes);
+ }
+ return hash;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof ByteArray)) {
+ return false;
+ }
+ return Arrays.equals(bytes, ((ByteArray)o).bytes);
+ }
+}
Added: hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameCache.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameCache.java?rev=1077501&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameCache.java (added)
+++ hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameCache.java Fri Mar 4 04:21:18 2011
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import static org.junit.Assert.*;
+
+import org.junit.Test;
+
+/**
+ * Test for {@link NameCache} class
+ */
+public class TestNameCache {
+ @Test
+ public void testDictionary() throws Exception {
+ // Create dictionary with useThreshold 2
+ NameCache<String> cache =
+ new NameCache<String>(2);
+ String[] matching = {"part1", "part10000000", "fileabc", "abc", "filepart"};
+ String[] notMatching = {"spart1", "apart", "abcd", "def"};
+
+ for (String s : matching) {
+ // Add useThreshold times so the names are promoted to dictionary
+ cache.put(s);
+ assertTrue(s == cache.put(s));
+ }
+ for (String s : notMatching) {
+ // Add < useThreshold times so the names are not promoted to dictionary
+ cache.put(s);
+ }
+
+ // Mark dictionary as initialized
+ cache.initialized();
+
+ for (String s : matching) {
+ verifyNameReuse(cache, s, true);
+ }
+ // Check dictionary size
+ assertEquals(matching.length, cache.size());
+
+ for (String s : notMatching) {
+ verifyNameReuse(cache, s, false);
+ }
+ }
+
+ private void verifyNameReuse(NameCache<String> cache, String s, boolean reused) {
+ cache.put(s);
+ int lookupCount = cache.getLookupCount();
+ if (reused) {
+ // Dictionary returns non null internal value
+ assertNotNull(cache.put(s));
+ // Successful lookup increments lookup count
+ assertEquals(lookupCount + 1, cache.getLookupCount());
+ } else {
+ // Dictionary returns null - since name is not in the dictionary
+ assertNull(cache.put(s));
+ // Lookup count remains the same
+ assertEquals(lookupCount, cache.getLookupCount());
+ }
+ }
+}