You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by om...@apache.org on 2011/03/04 05:21:18 UTC

svn commit: r1077501 - in /hadoop/common/branches/branch-0.20-security-patches/src: hdfs/org/apache/hadoop/hdfs/ hdfs/org/apache/hadoop/hdfs/server/namenode/ hdfs/org/apache/hadoop/hdfs/util/ test/org/apache/hadoop/hdfs/server/namenode/

Author: omalley
Date: Fri Mar  4 04:21:18 2011
New Revision: 1077501

URL: http://svn.apache.org/viewvc?rev=1077501&view=rev
Log:
commit ab4ef4b0c24c4b6a386efb9da55ee7d01e84228b
Author: Suresh Srinivas <su...@yahoo-inc.com>
Date:   Fri Jun 11 11:07:33 2010 -0700

    HDFS-1110 from https://issues.apache.org/jira/secure/attachment/12446772/hdfs-1110.y20.patch
    
    +++ b/YAHOO-CHANGES.txt
    +    HDFS-1110. Reuses objects for commonly used file names in namenode to
    +    reduce the heap usage. (suresh)
    +

Added:
    hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameCache.java
    hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/util/
    hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/util/ByteArray.java
    hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameCache.java
Modified:
    hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/DFSConfigKeys.java
    hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java

Modified: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/DFSConfigKeys.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/DFSConfigKeys.java?rev=1077501&r1=1077500&r2=1077501&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/DFSConfigKeys.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/DFSConfigKeys.java Fri Mar  4 04:21:18 2011
@@ -202,4 +202,6 @@ public class DFSConfigKeys extends Commo
   public static final String  DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY = "dfs.secondary.namenode.keytab.file";
   public static final String  DFS_SECONDARY_NAMENODE_USER_NAME_KEY = "dfs.secondary.namenode.kerberos.principal";
   public static final String  DFS_SECONDARY_NAMENODE_KRB_HTTPS_USER_NAME_KEY = "dfs.secondary.namenode.kerberos.https.principal";
+  public static final String  DFS_NAMENODE_NAME_CACHE_THRESHOLD_KEY = "dfs.namenode.name.cache.threshold";
+  public static final int     DFS_NAMENODE_NAME_CACHE_THRESHOLD_DEFAULT = 10;
 }

Modified: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java?rev=1077501&r1=1077500&r2=1077501&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java Fri Mar  4 04:21:18 2011
@@ -34,6 +34,7 @@ import org.apache.hadoop.hdfs.protocol.H
 import org.apache.hadoop.hdfs.protocol.DirectoryListing;
 import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
 import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
+import org.apache.hadoop.hdfs.util.ByteArray;
 import org.apache.hadoop.hdfs.server.namenode.BlocksMap.BlockInfo;
 
 /*************************************************
@@ -55,6 +56,12 @@ class FSDirectory implements FSConstants
   private MetricsRecord directoryMetrics = null;
   private final int lsLimit;  // max list limit
 
+  /**
+   * Caches frequently used file names used in {@link INode} to reuse 
+   * byte[] objects and reduce heap usage.
+   */
+  private final NameCache<ByteArray> nameCache;
+
   /** Access an existing dfs name directory. */
   FSDirectory(FSNamesystem ns, Configuration conf) {
     this(new FSImage(), ns, conf);
@@ -72,6 +79,14 @@ class FSDirectory implements FSConstants
         DFSConfigKeys.DFS_LIST_LIMIT, DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT);
     this.lsLimit = configuredLimit>0 ? 
         configuredLimit : DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT;
+    
+    int threshold = conf.getInt(
+        DFSConfigKeys.DFS_NAMENODE_NAME_CACHE_THRESHOLD_KEY,
+        DFSConfigKeys.DFS_NAMENODE_NAME_CACHE_THRESHOLD_DEFAULT);
+    NameNode.LOG.info("Caching file names occuring more than " + threshold
+        + " times ");
+    nameCache = new NameCache<ByteArray>(threshold);
+
     initialize(conf);
   }
     
@@ -105,6 +120,7 @@ class FSDirectory implements FSConstants
     }
     synchronized (this) {
       this.ready = true;
+      this.nameCache.initialized();
       this.notifyAll();
     }
   }
@@ -242,6 +258,7 @@ class FSDirectory implements FSConstants
     synchronized (rootDir) {
       try {
         newParent = rootDir.addToParent(src, newNode, parentINode, false);
+        cacheName(newNode);
       } catch (FileNotFoundException e) {
         return null;
       }
@@ -1013,7 +1030,9 @@ class FSDirectory implements FSConstants
         long childDiskspace, boolean inheritPermission) 
   throws QuotaExceededException {
     byte[][] components = INode.getPathComponents(src);
-    child.setLocalName(components[components.length-1]);
+    byte[] path = components[components.length-1];
+    child.setLocalName(path);
+    cacheName(child);
     INode[] inodes = new INode[components.length];
     synchronized (rootDir) {
       rootDir.getExistingPathINodes(components, inodes);
@@ -1377,4 +1396,20 @@ class FSDirectory implements FSConstants
         node.getGroupName(),
         path);
   }
+  
+  /**
+   * Caches frequently used file names to reuse file name objects and
+   * reduce heap size.
+   */
+  void cacheName(INode inode) {
+    // Name is cached only for files
+    if (inode.isDirectory()) {
+      return;
+    }
+    ByteArray name = new ByteArray(inode.getLocalNameBytes());
+    name = nameCache.put(name);
+    if (name != null) {
+      inode.setLocalName(name.getBytes());
+    }
+  }
 }

Added: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameCache.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameCache.java?rev=1077501&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameCache.java (added)
+++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameCache.java Fri Mar  4 04:21:18 2011
@@ -0,0 +1,155 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * Caches frequently used names to facilitate reuse.
+ * (example: byte[] representation of the file name in {@link INode}).
+ * 
+ * This class is used by initially adding all the file names. Cache
+ * tracks the number of times a name is used in a transient map. It promotes 
+ * a name used more than {@code useThreshold} to the cache.
+ * 
+ * One all the names are added, {@link #initialized()} should be called to
+ * finish initialization. The transient map where use count is tracked is
+ * discarded and cache is ready for use.
+ * 
+ * <p>
+ * This class must be synchronized externally.
+ * 
+ * @param <K> name to be added to the cache
+ */
+class NameCache<K> {
+  /**
+   * Class for tracking use count of a name
+   */
+  private class UseCount {
+    int count;
+    final K value;  // Internal value for the name
+
+    UseCount(final K value) {
+      count = 1;
+      this.value = value;
+    }
+    
+    void increment() {
+      count++;
+    }
+    
+    int get() {
+      return count;
+    }
+  }
+
+  static final Log LOG = LogFactory.getLog(NameCache.class.getName());
+
+  /** indicates initialization is in progress */
+  private boolean initialized = false;
+
+  /** names used more than {@code useThreshold} is added to the cache */
+  private final int useThreshold;
+
+  /** of times a cache look up was successful */
+  private int lookups = 0;
+
+  /** Cached names */
+  final HashMap<K, K> cache = new HashMap<K, K>();
+
+  /** Names and with number of occurrences tracked during initialization */
+  Map<K, UseCount> transientMap = new HashMap<K, UseCount>();
+
+  /**
+   * Constructor
+   * @param useThreshold names occurring more than this is promoted to the
+   *          cache
+   */
+  NameCache(int useThreshold) {
+    this.useThreshold = useThreshold;
+  }
+  
+  /**
+   * Add a given name to the cache or track use count.
+   * exist. If the name already exists, then the internal value is returned.
+   * 
+   * @param name name to be looked up
+   * @return internal value for the name if found; otherwise null
+   */
+  K put(final K name) {
+    K internal = cache.get(name);
+    if (internal != null) {
+      lookups++;
+      return internal;
+    }
+
+    // Track the usage count only during initialization
+    if (!initialized) {
+      UseCount useCount = transientMap.get(name);
+      if (useCount != null) {
+        useCount.increment();
+        if (useCount.get() >= useThreshold) {
+          promote(name);
+        }
+        return useCount.value;
+      }
+      useCount = new UseCount(name);
+      transientMap.put(name, useCount);
+    }
+    return null;
+  }
+  
+  /**
+   * Lookup count when a lookup for a name returned cached object
+   * @return number of successful lookups
+   */
+  int getLookupCount() {
+    return lookups;
+  }
+
+  /**
+   * Size of the cache
+   * @return Number of names stored in the cache
+   */
+  int size() {
+    return cache.size();
+  }
+
+  /**
+   * Mark the name cache as initialized. The use count is no longer tracked
+   * and the transient map used for initializing the cache is discarded to
+   * save heap space.
+   */
+  void initialized() {
+    LOG.info("initialized with " + size() + " entries " + lookups + " lookups");
+    this.initialized = true;
+    transientMap.clear();
+    transientMap = null;
+  }
+  
+  /** Promote a frequently used name to the cache */
+  private void promote(final K name) {
+    transientMap.remove(name);
+    cache.put(name, name);
+    lookups += useThreshold;
+  }
+}

Added: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/util/ByteArray.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/util/ByteArray.java?rev=1077501&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/util/ByteArray.java (added)
+++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/util/ByteArray.java Fri Mar  4 04:21:18 2011
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.util;
+
+import java.util.Arrays;
+
+/** 
+ * Wrapper for byte[] to use byte[] as key in HashMap
+ */
+public class ByteArray {
+  private int hash = 0; // cache the hash code
+  private final byte[] bytes;
+  
+  public ByteArray(byte[] bytes) {
+    this.bytes = bytes;
+  }
+  
+  public byte[] getBytes() {
+    return bytes;
+  }
+  
+  @Override
+  public int hashCode() {
+    if (hash == 0) {
+      hash = Arrays.hashCode(bytes);
+    }
+    return hash;
+  }
+  
+  @Override
+  public boolean equals(Object o) {
+    if (!(o instanceof ByteArray)) {
+      return false;
+    }
+    return Arrays.equals(bytes, ((ByteArray)o).bytes);
+  }
+}

Added: hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameCache.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameCache.java?rev=1077501&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameCache.java (added)
+++ hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/namenode/TestNameCache.java Fri Mar  4 04:21:18 2011
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import static org.junit.Assert.*;
+
+import org.junit.Test;
+
+/**
+ * Test for {@link NameCache} class
+ */
+public class TestNameCache {
+  @Test
+  public void testDictionary() throws Exception {
+    // Create dictionary with useThreshold 2
+    NameCache<String> cache = 
+      new NameCache<String>(2);
+    String[] matching = {"part1", "part10000000", "fileabc", "abc", "filepart"};
+    String[] notMatching = {"spart1", "apart", "abcd", "def"};
+
+    for (String s : matching) {
+      // Add useThreshold times so the names are promoted to dictionary
+      cache.put(s);
+      assertTrue(s == cache.put(s));
+    }
+    for (String s : notMatching) {
+      // Add < useThreshold times so the names are not promoted to dictionary
+      cache.put(s);
+    }
+    
+    // Mark dictionary as initialized
+    cache.initialized();
+    
+    for (String s : matching) {
+      verifyNameReuse(cache, s, true);
+    }
+    // Check dictionary size
+    assertEquals(matching.length, cache.size());
+    
+    for (String s : notMatching) {
+      verifyNameReuse(cache, s, false);
+    }
+  }
+
+  private void verifyNameReuse(NameCache<String> cache, String s, boolean reused) {
+    cache.put(s);
+    int lookupCount = cache.getLookupCount();
+    if (reused) {
+      // Dictionary returns non null internal value
+      assertNotNull(cache.put(s));
+      // Successful lookup increments lookup count
+      assertEquals(lookupCount + 1, cache.getLookupCount());
+    } else {
+      // Dictionary returns null - since name is not in the dictionary
+      assertNull(cache.put(s));
+      // Lookup count remains the same
+      assertEquals(lookupCount, cache.getLookupCount());
+    }
+  }
+}