You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by to...@apache.org on 2012/09/10 20:53:42 UTC
svn commit: r1383041 - in
/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs:
CHANGES.HDFS-3077.txt
src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java
Author: todd
Date: Mon Sep 10 18:53:41 2012
New Revision: 1383041
URL: http://svn.apache.org/viewvc?rev=1383041&view=rev
Log:
HDFS-3900. QJM: avoid validating log segments on log rolls. Contributed by Todd Lipcon.
Modified:
hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3077.txt
hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java
Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3077.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3077.txt?rev=1383041&r1=1383040&r2=1383041&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3077.txt (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3077.txt Mon Sep 10 18:53:41 2012
@@ -56,3 +56,5 @@ HDFS-3897. QJM: TestBlockToken fails aft
HDFS-3898. QJM: enable TCP_NODELAY for IPC (todd)
HDFS-3885. QJM: optimize log sync when JN is lagging behind (todd)
+
+HDFS-3900. QJM: avoid validating log segments on log rolls (todd)
Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java?rev=1383041&r1=1383040&r2=1383041&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java Mon Sep 10 18:53:41 2012
@@ -144,17 +144,18 @@ class Journal implements Closeable {
}
/**
- * Iterate over the edit logs stored locally, and set
- * {@link #curSegmentTxId} to refer to the most recently written
- * one.
+ * Scan the local storage directory, and return the segment containing
+ * the highest transaction.
+ * @return the EditLogFile with the highest transactions, or null
+ * if no files exist.
*/
- private synchronized void scanStorage() throws IOException {
+ private synchronized EditLogFile scanStorageForLatestEdits() throws IOException {
if (!fjm.getStorageDirectory().getCurrentDir().exists()) {
- return;
+ return null;
}
+
LOG.info("Scanning storage " + fjm);
List<EditLogFile> files = fjm.getLogFiles(0);
- curSegmentTxId = HdfsConstants.INVALID_TXID;
while (!files.isEmpty()) {
EditLogFile latestLog = files.remove(files.size() - 1);
@@ -166,10 +167,12 @@ class Journal implements Closeable {
"moving it aside and looking for previous log");
latestLog.moveAsideEmptyFile();
} else {
- curSegmentTxId = latestLog.getFirstTxId();
- break;
+ return latestLog;
}
}
+
+ LOG.info("No files in " + fjm);
+ return null;
}
/**
@@ -248,25 +251,30 @@ class Journal implements Closeable {
}
lastPromisedEpoch.set(epoch);
- if (curSegment != null) {
- curSegment.close();
- curSegment = null;
- curSegmentTxId = HdfsConstants.INVALID_TXID;
- }
+ abortCurSegment();
NewEpochResponseProto.Builder builder =
NewEpochResponseProto.newBuilder();
- // TODO: we only need to do this once, not on writer switchover.
- scanStorage();
+ EditLogFile latestFile = scanStorageForLatestEdits();
- if (curSegmentTxId != HdfsConstants.INVALID_TXID) {
- builder.setLastSegmentTxId(curSegmentTxId);
+ if (latestFile != null) {
+ builder.setLastSegmentTxId(latestFile.getFirstTxId());
}
return builder.build();
}
+ private void abortCurSegment() throws IOException {
+ if (curSegment == null) {
+ return;
+ }
+
+ curSegment.abort();
+ curSegment = null;
+ curSegmentTxId = HdfsConstants.INVALID_TXID;
+ }
+
/**
* Write a batch of edits to the journal.
* {@see QJournalProtocol#journal(RequestInfo, long, long, int, byte[])}
@@ -287,11 +295,11 @@ class Journal implements Closeable {
// instead of rolling to a new one, which breaks one of the
// invariants in the design. If it happens, abort the segment
// and throw an exception.
- curSegment.abort();
- curSegment = null;
- throw new IllegalStateException(
+ JournalOutOfSyncException e = new JournalOutOfSyncException(
"Writer out of sync: it thinks it is writing segment " + segmentTxId
+ " but current segment is " + curSegmentTxId);
+ abortCurSegment();
+ throw e;
}
checkSync(nextTxId == firstTxnId,
@@ -410,8 +418,7 @@ class Journal implements Closeable {
// The writer may have lost a connection to us and is now
// re-connecting after the connection came back.
// We should abort our own old segment.
- curSegment.abort();
- curSegment = null;
+ abortCurSegment();
}
// Paranoid sanity check: we should never overwrite a finalized log file.
@@ -459,11 +466,23 @@ class Journal implements Closeable {
checkFormatted();
checkRequest(reqInfo);
+ boolean needsValidation = true;
+
+ // Finalizing the log that the writer was just writing.
if (startTxId == curSegmentTxId) {
if (curSegment != null) {
curSegment.close();
curSegment = null;
+ curSegmentTxId = HdfsConstants.INVALID_TXID;
}
+
+ checkSync(nextTxId == endTxId + 1,
+ "Trying to finalize in-progress log segment %s to end at " +
+ "txid %s but only written up to txid %s",
+ startTxId, endTxId, nextTxId - 1);
+ // No need to validate the edit log if the client is finalizing
+ // the log segment that it was just writing to.
+ needsValidation = false;
}
FileJournalManager.EditLogFile elf = fjm.getLogFile(startTxId);
@@ -473,15 +492,16 @@ class Journal implements Closeable {
}
if (elf.isInProgress()) {
- // TODO: this is slow to validate when in non-recovery cases
- // we already know the length here!
-
- LOG.info("Validating log about to be finalized: " + elf);
- elf.validateLog();
-
- checkSync(elf.getLastTxId() == endTxId,
- "Trying to finalize log %s-%s, but current state of log " +
- "is %s", startTxId, endTxId, elf);
+ if (needsValidation) {
+ LOG.info("Validating log segment " + elf.getFile() + " about to be " +
+ "finalized");
+ elf.validateLog();
+
+ checkSync(elf.getLastTxId() == endTxId,
+ "Trying to finalize in-progress log segment %s to end at " +
+ "txid %s but log %s on disk only contains up to txid %s",
+ startTxId, endTxId, elf.getFile(), elf.getLastTxId());
+ }
fjm.finalizeLogSegment(startTxId, endTxId);
} else {
Preconditions.checkArgument(endTxId == elf.getLastTxId(),
@@ -599,6 +619,8 @@ class Journal implements Closeable {
checkFormatted();
checkRequest(reqInfo);
+ abortCurSegment();
+
PrepareRecoveryResponseProto.Builder builder =
PrepareRecoveryResponseProto.newBuilder();
Modified: hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java?rev=1383041&r1=1383040&r2=1383041&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java (original)
+++ hadoop/common/branches/HDFS-3077/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java Mon Sep 10 18:53:41 2012
@@ -222,7 +222,7 @@ public class TestJournal {
fail("did not fail to finalize");
} catch (JournalOutOfSyncException e) {
GenericTestUtils.assertExceptionContains(
- "but current state of log is", e);
+ "but only written up to txid 3", e);
}
// Check that, even if we re-construct the journal by scanning the
@@ -235,7 +235,7 @@ public class TestJournal {
fail("did not fail to finalize");
} catch (JournalOutOfSyncException e) {
GenericTestUtils.assertExceptionContains(
- "but current state of log is", e);
+ "disk only contains up to txid 3", e);
}
}