You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by da...@apache.org on 2013/08/09 21:52:20 UTC

svn commit: r1512470 - in /hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common: CHANGES.txt src/main/java/org/apache/hadoop/fs/HarFileSystem.java src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java

Author: daryn
Date: Fri Aug  9 19:52:20 2013
New Revision: 1512470

URL: http://svn.apache.org/r1512470
Log:
HADOOP-9757. Har metadata cache can grow without limit (Cristina Abad via daryn)

Modified:
    hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt
    hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java
    hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java

Modified: hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt?rev=1512470&r1=1512469&r2=1512470&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt Fri Aug  9 19:52:20 2013
@@ -17,6 +17,8 @@ Release 0.23.10 - UNRELEASED
 
   BUG FIXES
 
+    HADOOP-9757. Har metadata cache can grow without limit (Cristina Abad via daryn)
+
 Release 0.23.9 - 2013-07-08
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java?rev=1512470&r1=1512469&r2=1512470&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java Fri Aug  9 19:52:20 2013
@@ -24,11 +24,12 @@ import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URLDecoder;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
+import java.util.LinkedHashMap;
 import java.util.Map;
 import java.util.TreeMap;
 import java.util.HashMap;
-import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -56,10 +57,12 @@ public class HarFileSystem extends Filte
 
   private static final Log LOG = LogFactory.getLog(HarFileSystem.class);
 
+  public static final String METADATA_CACHE_ENTRIES_KEY = "fs.har.metadatacache.entries";
+  public static final int METADATA_CACHE_ENTRIES_DEFAULT = 10;
+
   public static final int VERSION = 3;
 
-  private static final Map<URI, HarMetaData> harMetaCache =
-      new ConcurrentHashMap<URI, HarMetaData>();
+  private static Map<URI, HarMetaData> harMetaCache;
 
   // uri representation of this Har filesystem
   private URI uri;
@@ -87,7 +90,14 @@ public class HarFileSystem extends Filte
   public HarFileSystem(FileSystem fs) {
     super(fs);
   }
-  
+ 
+  private synchronized void initializeMetadataCache(Configuration conf) {
+    if (harMetaCache == null) {
+      int cacheSize = conf.getInt(METADATA_CACHE_ENTRIES_KEY, METADATA_CACHE_ENTRIES_DEFAULT);
+      harMetaCache = Collections.synchronizedMap(new LruCache<URI, HarMetaData>(cacheSize));
+    }
+  }
+ 
   /**
    * Initialize a Har filesystem per har archive. The 
    * archive home directory is the top level directory
@@ -102,6 +112,9 @@ public class HarFileSystem extends Filte
    * to be used in case not specified.
    */
   public void initialize(URI name, Configuration conf) throws IOException {
+    // initialize the metadata cache, if needed
+    initializeMetadataCache(conf);
+
     // decode the name
     URI underLyingURI = decodeHarURI(name, conf);
     // we got the right har Path- now check if this is 
@@ -1066,4 +1079,18 @@ public class HarFileSystem extends Filte
   HarMetaData getMetadata() {
     return metadata;
   }
+
+  private static class LruCache<K, V> extends LinkedHashMap<K, V> {
+    private final int MAX_ENTRIES;
+
+    public LruCache(int maxEntries) {
+        super(maxEntries + 1, 1.0f, true);
+        MAX_ENTRIES = maxEntries;
+    }
+
+    @Override
+    protected boolean removeEldestEntry(Map.Entry<K, V> eldest) {
+        return size() > MAX_ENTRIES;
+    }
+  }
 }

Modified: hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java?rev=1512470&r1=1512469&r2=1512470&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java Fri Aug  9 19:52:20 2013
@@ -82,7 +82,7 @@ public class TestHarFileSystemBasics {
     localFileSystem.createNewFile(masterIndexPath);
     assertTrue(localFileSystem.exists(masterIndexPath));
 
-    writeVersionToMasterIndexImpl(HarFileSystem.VERSION);
+    writeVersionToMasterIndexImpl(HarFileSystem.VERSION, masterIndexPath);
 
     final HarFileSystem harFileSystem = new HarFileSystem(localFileSystem);
     final URI uri = new URI("har://" + harPath.toString());
@@ -90,8 +90,25 @@ public class TestHarFileSystemBasics {
     return harFileSystem;
   }
 
-  private void writeVersionToMasterIndexImpl(int version) throws IOException {
-    final Path masterIndexPath = new Path(harPath, "_masterindex");
+  private HarFileSystem createHarFileSystem(final Configuration conf, Path aHarPath)
+      throws Exception {
+    localFileSystem.mkdirs(aHarPath);
+    final Path indexPath = new Path(aHarPath, "_index");
+    final Path masterIndexPath = new Path(aHarPath, "_masterindex");
+    localFileSystem.createNewFile(indexPath);
+    assertTrue(localFileSystem.exists(indexPath));
+    localFileSystem.createNewFile(masterIndexPath);
+    assertTrue(localFileSystem.exists(masterIndexPath));
+
+    writeVersionToMasterIndexImpl(HarFileSystem.VERSION, masterIndexPath);
+
+    final HarFileSystem harFileSystem = new HarFileSystem(localFileSystem);
+    final URI uri = new URI("har://" + aHarPath.toString());
+    harFileSystem.initialize(uri, conf);
+    return harFileSystem;
+  }
+
+  private void writeVersionToMasterIndexImpl(int version, Path masterIndexPath) throws IOException {
     // write Har version into the master index:
     final FSDataOutputStream fsdos = localFileSystem.create(masterIndexPath);
     try {
@@ -173,6 +190,29 @@ public class TestHarFileSystemBasics {
   }
 
   @Test
+  public void testPositiveLruMetadataCacheFs() throws Exception {
+    // Init 2nd har file system on the same underlying FS, so the
+    // metadata gets reused:
+    HarFileSystem hfs = new HarFileSystem(localFileSystem);
+    URI uri = new URI("har://" + harPath.toString());
+    hfs.initialize(uri, new Configuration());
+    // the metadata should be reused from cache:
+    assertTrue(hfs.getMetadata() == harFileSystem.getMetadata());
+
+    // Create more hars, until the cache is full + 1; the last creation should evict the first entry from the cache
+    for (int i = 0; i <= hfs.METADATA_CACHE_ENTRIES_DEFAULT; i++) {
+      Path p = new Path(rootPath, "path1/path2/my" + i +".har");
+      createHarFileSystem(conf, p);
+    }
+
+    // The first entry should not be in the cache anymore:
+    hfs = new HarFileSystem(localFileSystem);
+    uri = new URI("har://" + harPath.toString());
+    hfs.initialize(uri, new Configuration());
+    assertTrue(hfs.getMetadata() != harFileSystem.getMetadata());
+  }
+
+  @Test
   public void testPositiveInitWithoutUnderlyingFS() throws Exception {
     // Init HarFS with no constructor arg, so that the underlying FS object
     // is created on demand or got from cache in #initialize() method.
@@ -218,7 +258,7 @@ public class TestHarFileSystemBasics {
     // time with 1 second accuracy:
     Thread.sleep(1000);
     // write an unsupported version:
-    writeVersionToMasterIndexImpl(7777);
+    writeVersionToMasterIndexImpl(7777, new Path(harPath, "_masterindex"));
     // init the Har:
     final HarFileSystem hfs = new HarFileSystem(localFileSystem);