You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by to...@apache.org on 2012/09/10 20:53:42 UTC

svn commit: r1383041 - in /hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs: CHANGES.HDFS-3077.txt src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java

Author: todd
Date: Mon Sep 10 18:53:41 2012
New Revision: 1383041

URL: http://svn.apache.org/viewvc?rev=1383041&view=rev
Log:
HDFS-3900. QJM: avoid validating log segments on log rolls. Contributed by Todd Lipcon.

Modified:
    hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3077.txt
    hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
    hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java

Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3077.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3077.txt?rev=1383041&r1=1383040&r2=1383041&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3077.txt (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3077.txt Mon Sep 10 18:53:41 2012
@@ -56,3 +56,5 @@ HDFS-3897. QJM: TestBlockToken fails aft
 HDFS-3898. QJM: enable TCP_NODELAY for IPC (todd)
 
 HDFS-3885. QJM: optimize log sync when JN is lagging behind (todd)
+
+HDFS-3900. QJM: avoid validating log segments on log rolls (todd)

Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java?rev=1383041&r1=1383040&r2=1383041&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java Mon Sep 10 18:53:41 2012
@@ -144,17 +144,18 @@ class Journal implements Closeable {
   }
   
   /**
-   * Iterate over the edit logs stored locally, and set
-   * {@link #curSegmentTxId} to refer to the most recently written
-   * one.
+   * Scan the local storage directory, and return the segment containing
+   * the highest transaction.
+   * @return the EditLogFile with the highest transactions, or null
+   * if no files exist.
    */
-  private synchronized void scanStorage() throws IOException {
+  private synchronized EditLogFile scanStorageForLatestEdits() throws IOException {
     if (!fjm.getStorageDirectory().getCurrentDir().exists()) {
-      return;
+      return null;
     }
+    
     LOG.info("Scanning storage " + fjm);
     List<EditLogFile> files = fjm.getLogFiles(0);
-    curSegmentTxId = HdfsConstants.INVALID_TXID;
     
     while (!files.isEmpty()) {
       EditLogFile latestLog = files.remove(files.size() - 1);
@@ -166,10 +167,12 @@ class Journal implements Closeable {
             "moving it aside and looking for previous log");
         latestLog.moveAsideEmptyFile();
       } else {
-        curSegmentTxId = latestLog.getFirstTxId();
-        break;
+        return latestLog;
       }
     }
+    
+    LOG.info("No files in " + fjm);
+    return null;
   }
 
   /**
@@ -248,25 +251,30 @@ class Journal implements Closeable {
     }
     
     lastPromisedEpoch.set(epoch);
-    if (curSegment != null) {
-      curSegment.close();
-      curSegment = null;
-      curSegmentTxId = HdfsConstants.INVALID_TXID;
-    }
+    abortCurSegment();
     
     NewEpochResponseProto.Builder builder =
         NewEpochResponseProto.newBuilder();
 
-    // TODO: we only need to do this once, not on writer switchover.
-    scanStorage();
+    EditLogFile latestFile = scanStorageForLatestEdits();
 
-    if (curSegmentTxId != HdfsConstants.INVALID_TXID) {
-      builder.setLastSegmentTxId(curSegmentTxId);
+    if (latestFile != null) {
+      builder.setLastSegmentTxId(latestFile.getFirstTxId());
     }
     
     return builder.build();
   }
 
+  private void abortCurSegment() throws IOException {
+    if (curSegment == null) {
+      return;
+    }
+    
+    curSegment.abort();
+    curSegment = null;
+    curSegmentTxId = HdfsConstants.INVALID_TXID;
+  }
+
   /**
    * Write a batch of edits to the journal.
    * {@see QJournalProtocol#journal(RequestInfo, long, long, int, byte[])}
@@ -287,11 +295,11 @@ class Journal implements Closeable {
       // instead of rolling to a new one, which breaks one of the
       // invariants in the design. If it happens, abort the segment
       // and throw an exception.
-      curSegment.abort();
-      curSegment = null;
-      throw new IllegalStateException(
+      JournalOutOfSyncException e = new JournalOutOfSyncException(
           "Writer out of sync: it thinks it is writing segment " + segmentTxId
           + " but current segment is " + curSegmentTxId);
+      abortCurSegment();
+      throw e;
     }
       
     checkSync(nextTxId == firstTxnId,
@@ -410,8 +418,7 @@ class Journal implements Closeable {
       // The writer may have lost a connection to us and is now
       // re-connecting after the connection came back.
       // We should abort our own old segment.
-      curSegment.abort();
-      curSegment = null;
+      abortCurSegment();
     }
 
     // Paranoid sanity check: we should never overwrite a finalized log file.
@@ -459,11 +466,23 @@ class Journal implements Closeable {
     checkFormatted();
     checkRequest(reqInfo);
 
+    boolean needsValidation = true;
+
+    // Finalizing the log that the writer was just writing.
     if (startTxId == curSegmentTxId) {
       if (curSegment != null) {
         curSegment.close();
         curSegment = null;
+        curSegmentTxId = HdfsConstants.INVALID_TXID;
       }
+      
+      checkSync(nextTxId == endTxId + 1,
+          "Trying to finalize in-progress log segment %s to end at " +
+          "txid %s but only written up to txid %s",
+          startTxId, endTxId, nextTxId - 1);
+      // No need to validate the edit log if the client is finalizing
+      // the log segment that it was just writing to.
+      needsValidation = false;
     }
     
     FileJournalManager.EditLogFile elf = fjm.getLogFile(startTxId);
@@ -473,15 +492,16 @@ class Journal implements Closeable {
     }
 
     if (elf.isInProgress()) {
-      // TODO: this is slow to validate when in non-recovery cases
-      // we already know the length here!
-
-      LOG.info("Validating log about to be finalized: " + elf);
-      elf.validateLog();
-
-      checkSync(elf.getLastTxId() == endTxId,
-          "Trying to finalize log %s-%s, but current state of log " +
-          "is %s", startTxId, endTxId, elf);
+      if (needsValidation) {
+        LOG.info("Validating log segment " + elf.getFile() + " about to be " +
+            "finalized");
+        elf.validateLog();
+  
+        checkSync(elf.getLastTxId() == endTxId,
+            "Trying to finalize in-progress log segment %s to end at " +
+            "txid %s but log %s on disk only contains up to txid %s",
+            startTxId, endTxId, elf.getFile(), elf.getLastTxId());
+      }
       fjm.finalizeLogSegment(startTxId, endTxId);
     } else {
       Preconditions.checkArgument(endTxId == elf.getLastTxId(),
@@ -599,6 +619,8 @@ class Journal implements Closeable {
     checkFormatted();
     checkRequest(reqInfo);
     
+    abortCurSegment();
+    
     PrepareRecoveryResponseProto.Builder builder =
         PrepareRecoveryResponseProto.newBuilder();
 

Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java?rev=1383041&r1=1383040&r2=1383041&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java Mon Sep 10 18:53:41 2012
@@ -222,7 +222,7 @@ public class TestJournal {
       fail("did not fail to finalize");
     } catch (JournalOutOfSyncException e) {
       GenericTestUtils.assertExceptionContains(
-          "but current state of log is", e);
+          "but only written up to txid 3", e);
     }
     
     // Check that, even if we re-construct the journal by scanning the
@@ -235,7 +235,7 @@ public class TestJournal {
       fail("did not fail to finalize");
     } catch (JournalOutOfSyncException e) {
       GenericTestUtils.assertExceptionContains(
-          "but current state of log is", e);
+          "disk only contains up to txid 3", e);
     }
   }