You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by to...@apache.org on 2012/09/20 01:41:00 UTC

svn commit: r1387817 - in /hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs: ./ src/main/java/org/apache/hadoop/hdfs/qjournal/server/ src/test/java/org/apache/hadoop/hdfs/qjournal/client/

Author: todd
Date: Wed Sep 19 23:40:59 2012
New Revision: 1387817

URL: http://svn.apache.org/viewvc?rev=1387817&view=rev
Log:
HDFS-3956. QJM: purge temporary files when no longer within retention period. Contributed by Todd Lipcon.

Modified:
    hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3077.txt
    hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java
    hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
    hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java
    hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java

Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3077.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3077.txt?rev=1387817&r1=1387816&r2=1387817&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3077.txt (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3077.txt Wed Sep 19 23:40:59 2012
@@ -80,3 +80,5 @@ HDFS-3943. QJM: remove currently-unused 
 HDFS-3950. QJM: misc TODO cleanup, improved log messages, etc. (todd)
 
 HDFS-3955. QJM: Make acceptRecovery() atomic. (todd)
+
+HDFS-3956. QJM: purge temporary files when no longer within retention period (todd)

Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java?rev=1387817&r1=1387816&r2=1387817&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java Wed Sep 19 23:40:59 2012
@@ -19,7 +19,11 @@ package org.apache.hadoop.hdfs.qjournal.
 
 import java.io.File;
 import java.io.IOException;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
+import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.common.Storage;
@@ -28,6 +32,8 @@ import org.apache.hadoop.hdfs.server.nam
 import org.apache.hadoop.hdfs.server.namenode.NNStorage;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 
+import com.google.common.collect.ImmutableList;
+
 /**
  * A {@link Storage} implementation for the {@link JournalNode}.
  * 
@@ -39,6 +45,15 @@ class JNStorage extends Storage {
   private final FileJournalManager fjm;
   private final StorageDirectory sd;
   private StorageState state;
+  
+
+  private static final List<Pattern> CURRENT_DIR_PURGE_REGEXES =
+      ImmutableList.of(
+        Pattern.compile("edits_\\d+-(\\d+)"),
+        Pattern.compile("edits_inprogress_(\\d+)(?:\\..*)?"));
+  
+  private static final List<Pattern> PAXOS_DIR_PURGE_REGEXES = 
+      ImmutableList.of(Pattern.compile("(\\d+)"));
 
   /**
    * @param logDir the path to the directory in which data will be stored
@@ -111,6 +126,48 @@ class JNStorage extends Storage {
   File getPaxosDir() {
     return new File(sd.getCurrentDir(), "paxos");
   }
+  
+  /**
+   * Remove any log files and associated paxos files which are older than
+   * the given txid.
+   */
+  void purgeDataOlderThan(long minTxIdToKeep) throws IOException {
+    purgeMatching(sd.getCurrentDir(),
+        CURRENT_DIR_PURGE_REGEXES, minTxIdToKeep);
+    purgeMatching(getPaxosDir(), PAXOS_DIR_PURGE_REGEXES, minTxIdToKeep);
+  }
+  
+  /**
+   * Purge files in the given directory which match any of the set of patterns.
+   * The patterns must have a single numeric capture group which determines
+   * the associated transaction ID of the file. Only those files for which
+   * the transaction ID is less than the <code>minTxIdToKeep</code> parameter
+   * are removed.
+   */
+  private static void purgeMatching(File dir, List<Pattern> patterns,
+      long minTxIdToKeep) throws IOException {
+
+    for (File f : FileUtil.listFiles(dir)) {
+      if (!f.isFile()) continue;
+      
+      for (Pattern p : patterns) {
+        Matcher matcher = p.matcher(f.getName());
+        if (matcher.matches()) {
+          // This parsing will always succeed since the group(1) is
+          // /\d+/ in the regex itself.
+          long txid = Long.valueOf(matcher.group(1));
+          if (txid < minTxIdToKeep) {
+            LOG.info("Purging no-longer needed file " + txid);
+            if (!f.delete()) {
+              LOG.warn("Unable to delete no-longer-needed data " +
+                  f);
+            }
+            break;
+          }
+        }
+      }
+    }
+  }
 
   void format(NamespaceInfo nsInfo) throws IOException {
     setStorageInfo(nsInfo);

Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java?rev=1387817&r1=1387816&r2=1387817&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java Wed Sep 19 23:40:59 2012
@@ -27,6 +27,8 @@ import java.net.URL;
 import java.security.PrivilegedExceptionAction;
 import java.util.List;
 import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -595,8 +597,7 @@ class Journal implements Closeable {
     checkFormatted();
     checkRequest(reqInfo);
     
-    fjm.purgeLogsOlderThan(minTxIdToKeep);
-    purgePaxosDecisionsOlderThan(minTxIdToKeep);
+    storage.purgeDataOlderThan(minTxIdToKeep);
   }
   
   /**
@@ -614,30 +615,6 @@ class Journal implements Closeable {
     }
   }
 
-  private void purgePaxosDecisionsOlderThan(long minTxIdToKeep)
-      throws IOException {
-    File dir = storage.getPaxosDir();
-    for (File f : FileUtil.listFiles(dir)) {
-      if (!f.isFile()) continue;
-      
-      long txid;
-      try {
-        txid = Long.valueOf(f.getName());
-      } catch (NumberFormatException nfe) {
-        LOG.warn("Unexpected non-numeric file name for " + f.getAbsolutePath());
-        continue;
-      }
-      
-      if (txid < minTxIdToKeep) {
-        if (!f.delete()) {
-          LOG.warn("Unable to delete no-longer-needed paxos decision record " +
-              f);
-        }
-      }
-    }
-  }
-
-
   /**
    * @see QJournalProtocol#getEditLogManifest(String, long)
    */

Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java?rev=1387817&r1=1387816&r2=1387817&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java Wed Sep 19 23:40:59 2012
@@ -234,10 +234,6 @@ public class TestQJMWithFaults {
         
         QuorumJournalManager qjm = createRandomFaultyQJM(cluster, r);
         try {
-          if (txid > 100) {
-            qjm.purgeLogsOlderThan(txid - 100);
-          }
-  
           long recovered;
           try {
             recovered = QJMTestUtil.recoverAndReturnLastTxn(qjm);
@@ -252,6 +248,12 @@ public class TestQJMWithFaults {
           
           txid = recovered + 1;
           
+          // Periodically purge old data on disk so it's easier to look
+          // at failure cases.
+          if (txid > 100 && i % 10 == 1) {
+            qjm.purgeLogsOlderThan(txid - 100);
+          }
+
           Holder<Throwable> thrown = new Holder<Throwable>(null);
           for (int j = 0; j < SEGMENTS_PER_WRITER; j++) {
             lastAcked = writeSegmentUntilCrash(cluster, qjm, txid, 4, thrown);

Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java?rev=1387817&r1=1387816&r2=1387817&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java Wed Sep 19 23:40:59 2012
@@ -854,6 +854,12 @@ public class TestQuorumJournalManager {
     GenericTestUtils.assertGlobEquals(paxosDir, "\\d+",
         "1", "3");
     
+    // Create some temporary files of the sort that are used during recovery.
+    assertTrue(new File(curDir,
+        "edits_inprogress_0000000000000000001.epoch=140").createNewFile());
+    assertTrue(new File(curDir,
+        "edits_inprogress_0000000000000000002.empty").createNewFile());
+    
     qjm.purgeLogsOlderThan(3);
     
     // Log purging is asynchronous, so we have to wait for the calls